rexml 3.2.5 → 3.2.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: false
2
+
2
3
  require_relative '../namespace'
3
4
  require_relative '../xmltokens'
4
5
 
@@ -38,108 +39,143 @@ module REXML
38
39
  parsed
39
40
  end
40
41
 
41
- def abbreviate( path )
42
- path = path.kind_of?(String) ? parse( path ) : path
43
- string = ""
44
- document = false
45
- while path.size > 0
46
- op = path.shift
42
+ def abbreviate(path_or_parsed)
43
+ if path_or_parsed.kind_of?(String)
44
+ parsed = parse(path_or_parsed)
45
+ else
46
+ parsed = path_or_parsed
47
+ end
48
+ components = []
49
+ component = nil
50
+ while parsed.size > 0
51
+ op = parsed.shift
47
52
  case op
48
53
  when :node
54
+ component << "node()"
49
55
  when :attribute
50
- string << "/" if string.size > 0
51
- string << "@"
56
+ component = "@"
57
+ components << component
52
58
  when :child
53
- string << "/" if string.size > 0
59
+ component = ""
60
+ components << component
54
61
  when :descendant_or_self
55
- string << "/"
62
+ next_op = parsed[0]
63
+ if next_op == :node
64
+ parsed.shift
65
+ component = ""
66
+ components << component
67
+ else
68
+ component = "descendant-or-self::"
69
+ components << component
70
+ end
56
71
  when :self
57
- string << "."
72
+ next_op = parsed[0]
73
+ if next_op == :node
74
+ parsed.shift
75
+ components << "."
76
+ else
77
+ component = "self::"
78
+ components << component
79
+ end
58
80
  when :parent
59
- string << ".."
81
+ next_op = parsed[0]
82
+ if next_op == :node
83
+ parsed.shift
84
+ components << ".."
85
+ else
86
+ component = "parent::"
87
+ components << component
88
+ end
60
89
  when :any
61
- string << "*"
90
+ component << "*"
62
91
  when :text
63
- string << "text()"
92
+ component << "text()"
64
93
  when :following, :following_sibling,
65
94
  :ancestor, :ancestor_or_self, :descendant,
66
95
  :namespace, :preceding, :preceding_sibling
67
- string << "/" unless string.size == 0
68
- string << op.to_s.tr("_", "-")
69
- string << "::"
96
+ component = op.to_s.tr("_", "-") << "::"
97
+ components << component
70
98
  when :qname
71
- prefix = path.shift
72
- name = path.shift
73
- string << prefix+":" if prefix.size > 0
74
- string << name
99
+ prefix = parsed.shift
100
+ name = parsed.shift
101
+ component << prefix+":" if prefix.size > 0
102
+ component << name
75
103
  when :predicate
76
- string << '['
77
- string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
78
- string << ']'
104
+ component << '['
105
+ component << predicate_to_path(parsed.shift) {|x| abbreviate(x)}
106
+ component << ']'
79
107
  when :document
80
- document = true
108
+ components << ""
81
109
  when :function
82
- string << path.shift
83
- string << "( "
84
- string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
85
- string << " )"
110
+ component << parsed.shift
111
+ component << "( "
112
+ component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)}
113
+ component << " )"
86
114
  when :literal
87
- string << %Q{ "#{path.shift}" }
115
+ component << quote_literal(parsed.shift)
88
116
  else
89
- string << "/" unless string.size == 0
90
- string << "UNKNOWN("
91
- string << op.inspect
92
- string << ")"
117
+ component << "UNKNOWN("
118
+ component << op.inspect
119
+ component << ")"
93
120
  end
94
121
  end
95
- string = "/"+string if document
96
- return string
122
+ case components
123
+ when [""]
124
+ "/"
125
+ when ["", ""]
126
+ "//"
127
+ else
128
+ components.join("/")
129
+ end
97
130
  end
98
131
 
99
- def expand( path )
100
- path = path.kind_of?(String) ? parse( path ) : path
101
- string = ""
132
+ def expand(path_or_parsed)
133
+ if path_or_parsed.kind_of?(String)
134
+ parsed = parse(path_or_parsed)
135
+ else
136
+ parsed = path_or_parsed
137
+ end
138
+ path = ""
102
139
  document = false
103
- while path.size > 0
104
- op = path.shift
140
+ while parsed.size > 0
141
+ op = parsed.shift
105
142
  case op
106
143
  when :node
107
- string << "node()"
144
+ path << "node()"
108
145
  when :attribute, :child, :following, :following_sibling,
109
146
  :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
110
147
  :namespace, :preceding, :preceding_sibling, :self, :parent
111
- string << "/" unless string.size == 0
112
- string << op.to_s.tr("_", "-")
113
- string << "::"
148
+ path << "/" unless path.size == 0
149
+ path << op.to_s.tr("_", "-")
150
+ path << "::"
114
151
  when :any
115
- string << "*"
152
+ path << "*"
116
153
  when :qname
117
- prefix = path.shift
118
- name = path.shift
119
- string << prefix+":" if prefix.size > 0
120
- string << name
154
+ prefix = parsed.shift
155
+ name = parsed.shift
156
+ path << prefix+":" if prefix.size > 0
157
+ path << name
121
158
  when :predicate
122
- string << '['
123
- string << predicate_to_string( path.shift ) { |x| expand(x) }
124
- string << ']'
159
+ path << '['
160
+ path << predicate_to_path( parsed.shift ) { |x| expand(x) }
161
+ path << ']'
125
162
  when :document
126
163
  document = true
127
164
  else
128
- string << "/" unless string.size == 0
129
- string << "UNKNOWN("
130
- string << op.inspect
131
- string << ")"
165
+ path << "UNKNOWN("
166
+ path << op.inspect
167
+ path << ")"
132
168
  end
133
169
  end
134
- string = "/"+string if document
135
- return string
170
+ path = "/"+path if document
171
+ path
136
172
  end
137
173
 
138
- def predicate_to_string( path, &block )
139
- string = ""
140
- case path[0]
174
+ def predicate_to_path(parsed, &block)
175
+ path = ""
176
+ case parsed[0]
141
177
  when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
142
- op = path.shift
178
+ op = parsed.shift
143
179
  case op
144
180
  when :eq
145
181
  op = "="
@@ -156,36 +192,50 @@ module REXML
156
192
  when :union
157
193
  op = "|"
158
194
  end
159
- left = predicate_to_string( path.shift, &block )
160
- right = predicate_to_string( path.shift, &block )
161
- string << " "
162
- string << left
163
- string << " "
164
- string << op.to_s
165
- string << " "
166
- string << right
167
- string << " "
195
+ left = predicate_to_path( parsed.shift, &block )
196
+ right = predicate_to_path( parsed.shift, &block )
197
+ path << left
198
+ path << " "
199
+ path << op.to_s
200
+ path << " "
201
+ path << right
168
202
  when :function
169
- path.shift
170
- name = path.shift
171
- string << name
172
- string << "( "
173
- string << predicate_to_string( path.shift, &block )
174
- string << " )"
203
+ parsed.shift
204
+ name = parsed.shift
205
+ path << name
206
+ path << "("
207
+ parsed.shift.each_with_index do |argument, i|
208
+ path << ", " if i > 0
209
+ path << predicate_to_path(argument, &block)
210
+ end
211
+ path << ")"
175
212
  when :literal
176
- path.shift
177
- string << " "
178
- string << path.shift.inspect
179
- string << " "
213
+ parsed.shift
214
+ path << quote_literal(parsed.shift)
180
215
  else
181
- string << " "
182
- string << yield( path )
183
- string << " "
216
+ path << yield( parsed )
184
217
  end
185
- return string.squeeze(" ")
218
+ return path.squeeze(" ")
186
219
  end
220
+ # For backward compatibility
221
+ alias_method :preciate_to_string, :predicate_to_path
187
222
 
188
223
  private
224
+ def quote_literal( literal )
225
+ case literal
226
+ when String
227
+ # XPath 1.0 does not support escape characters.
228
+ # Assumes literal does not contain both single and double quotes.
229
+ if literal.include?("'")
230
+ "\"#{literal}\""
231
+ else
232
+ "'#{literal}'"
233
+ end
234
+ else
235
+ literal.inspect
236
+ end
237
+ end
238
+
189
239
  #LocationPath
190
240
  # | RelativeLocationPath
191
241
  # | '/' RelativeLocationPath?
data/lib/rexml/rexml.rb CHANGED
@@ -26,10 +26,12 @@
26
26
  # - REXML::Document.
27
27
  # - REXML::Element.
28
28
  #
29
+ # There's also an {REXML tutorial}[doc/rexml/tutorial_rdoc.html].
30
+ #
29
31
  module REXML
30
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
31
33
  DATE = "2008/019"
32
- VERSION = "3.2.5"
34
+ VERSION = "3.2.8"
33
35
  REVISION = ""
34
36
 
35
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -30,8 +30,6 @@ module REXML
30
30
  # objects and provides consumption of text
31
31
  class Source
32
32
  include Encoding
33
- # The current buffer (what we're going to read next)
34
- attr_reader :buffer
35
33
  # The line number of the last consumed text
36
34
  attr_reader :line
37
35
  attr_reader :encoding
@@ -41,7 +39,8 @@ module REXML
41
39
  # @param encoding if non-null, sets the encoding of the source to this
42
40
  # value, overriding all encoding detection
43
41
  def initialize(arg, encoding=nil)
44
- @orig = @buffer = arg
42
+ @orig = arg
43
+ @scanner = StringScanner.new(@orig)
45
44
  if encoding
46
45
  self.encoding = encoding
47
46
  else
@@ -50,6 +49,14 @@ module REXML
50
49
  @line = 0
51
50
  end
52
51
 
52
+ # The current buffer (what we're going to read next)
53
+ def buffer
54
+ @scanner.rest
55
+ end
56
+
57
+ def buffer_encoding=(encoding)
58
+ @scanner.string.force_encoding(encoding)
59
+ end
53
60
 
54
61
  # Inherited from Encoding
55
62
  # Overridden to support optimized en/decoding
@@ -58,98 +65,72 @@ module REXML
58
65
  encoding_updated
59
66
  end
60
67
 
61
- # Scans the source for a given pattern. Note, that this is not your
62
- # usual scan() method. For one thing, the pattern argument has some
63
- # requirements; for another, the source can be consumed. You can easily
64
- # confuse this method. Originally, the patterns were easier
65
- # to construct and this method more robust, because this method
66
- # generated search regexps on the fly; however, this was
67
- # computationally expensive and slowed down the entire REXML package
68
- # considerably, since this is by far the most commonly called method.
69
- # @param pattern must be a Regexp, and must be in the form of
70
- # /^\s*(#{your pattern, with no groups})(.*)/. The first group
71
- # will be returned; the second group is used if the consume flag is
72
- # set.
73
- # @param consume if true, the pattern returned will be consumed, leaving
74
- # everything after it in the Source.
75
- # @return the pattern, if found, or nil if the Source is empty or the
76
- # pattern is not found.
77
- def scan(pattern, cons=false)
78
- return nil if @buffer.nil?
79
- rv = @buffer.scan(pattern)
80
- @buffer = $' if cons and rv.size>0
81
- rv
68
+ def read(term = nil)
82
69
  end
83
70
 
84
- def read
71
+ def read_until(term)
72
+ @scanner.scan_until(Regexp.union(term)) or @scanner.rest
85
73
  end
86
74
 
87
- def consume( pattern )
88
- @buffer = $' if pattern.match( @buffer )
75
+ def ensure_buffer
89
76
  end
90
77
 
91
- def match_to( char, pattern )
92
- return pattern.match(@buffer)
78
+ def match(pattern, cons=false)
79
+ if cons
80
+ @scanner.scan(pattern).nil? ? nil : @scanner
81
+ else
82
+ @scanner.check(pattern).nil? ? nil : @scanner
83
+ end
93
84
  end
94
85
 
95
- def match_to_consume( char, pattern )
96
- md = pattern.match(@buffer)
97
- @buffer = $'
98
- return md
86
+ def position
87
+ @scanner.pos
99
88
  end
100
89
 
101
- def match(pattern, cons=false)
102
- md = pattern.match(@buffer)
103
- @buffer = $' if cons and md
104
- return md
90
+ def position=(pos)
91
+ @scanner.pos = pos
105
92
  end
106
93
 
107
94
  # @return true if the Source is exhausted
108
95
  def empty?
109
- @buffer == ""
110
- end
111
-
112
- def position
113
- @orig.index( @buffer )
96
+ @scanner.eos?
114
97
  end
115
98
 
116
99
  # @return the current line in the source
117
100
  def current_line
118
101
  lines = @orig.split
119
- res = lines.grep @buffer[0..30]
102
+ res = lines.grep @scanner.rest[0..30]
120
103
  res = res[-1] if res.kind_of? Array
121
104
  lines.index( res ) if res
122
105
  end
123
106
 
124
107
  private
108
+
125
109
  def detect_encoding
126
- buffer_encoding = @buffer.encoding
110
+ scanner_encoding = @scanner.rest.encoding
127
111
  detected_encoding = "UTF-8"
128
112
  begin
129
- @buffer.force_encoding("ASCII-8BIT")
130
- if @buffer[0, 2] == "\xfe\xff"
131
- @buffer[0, 2] = ""
113
+ @scanner.string.force_encoding("ASCII-8BIT")
114
+ if @scanner.scan(/\xfe\xff/n)
132
115
  detected_encoding = "UTF-16BE"
133
- elsif @buffer[0, 2] == "\xff\xfe"
134
- @buffer[0, 2] = ""
116
+ elsif @scanner.scan(/\xff\xfe/n)
135
117
  detected_encoding = "UTF-16LE"
136
- elsif @buffer[0, 3] == "\xef\xbb\xbf"
137
- @buffer[0, 3] = ""
118
+ elsif @scanner.scan(/\xef\xbb\xbf/n)
138
119
  detected_encoding = "UTF-8"
139
120
  end
140
121
  ensure
141
- @buffer.force_encoding(buffer_encoding)
122
+ @scanner.string.force_encoding(scanner_encoding)
142
123
  end
143
124
  self.encoding = detected_encoding
144
125
  end
145
126
 
146
127
  def encoding_updated
147
128
  if @encoding != 'UTF-8'
148
- @buffer = decode(@buffer)
129
+ @scanner.string = decode(@scanner.rest)
149
130
  @to_utf = true
150
131
  else
151
132
  @to_utf = false
152
- @buffer.force_encoding ::Encoding::UTF_8
133
+ @scanner.string.force_encoding(::Encoding::UTF_8)
153
134
  end
154
135
  end
155
136
  end
@@ -172,7 +153,7 @@ module REXML
172
153
  end
173
154
 
174
155
  if !@to_utf and
175
- @buffer.respond_to?(:force_encoding) and
156
+ @orig.respond_to?(:force_encoding) and
176
157
  @source.respond_to?(:external_encoding) and
177
158
  @source.external_encoding != ::Encoding::UTF_8
178
159
  @force_utf8 = true
@@ -181,65 +162,57 @@ module REXML
181
162
  end
182
163
  end
183
164
 
184
- def scan(pattern, cons=false)
185
- rv = super
186
- # You'll notice that this next section is very similar to the same
187
- # section in match(), but just a liiittle different. This is
188
- # because it is a touch faster to do it this way with scan()
189
- # than the way match() does it; enough faster to warrant duplicating
190
- # some code
191
- if rv.size == 0
192
- until @buffer =~ pattern or @source.nil?
193
- begin
194
- @buffer << readline
195
- rescue Iconv::IllegalSequence
196
- raise
197
- rescue
198
- @source = nil
199
- end
200
- end
201
- rv = super
165
+ def read(term = nil)
166
+ begin
167
+ @scanner << readline(term)
168
+ true
169
+ rescue Exception, NameError
170
+ @source = nil
171
+ false
202
172
  end
203
- rv.taint if RUBY_VERSION < '2.7'
204
- rv
205
173
  end
206
174
 
207
- def read
175
+ def read_until(term)
176
+ pattern = Regexp.union(term)
208
177
  begin
209
- @buffer << readline
210
- rescue Exception, NameError
211
- @source = nil
178
+ until str = @scanner.scan_until(pattern)
179
+ @scanner << readline(term)
180
+ end
181
+ rescue EOFError
182
+ @scanner.rest
183
+ else
184
+ read if @scanner.eos? and !@source.eof?
185
+ str
212
186
  end
213
187
  end
214
188
 
215
- def consume( pattern )
216
- match( pattern, true )
189
+ def ensure_buffer
190
+ read if @scanner.eos? && @source
217
191
  end
218
192
 
193
+ # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
194
+ # - ">"
195
+ # - "XXX>" (X is any string excluding '>')
219
196
  def match( pattern, cons=false )
220
- rv = pattern.match(@buffer)
221
- @buffer = $' if cons and rv
222
- while !rv and @source
223
- begin
224
- @buffer << readline
225
- rv = pattern.match(@buffer)
226
- @buffer = $' if cons and rv
227
- rescue
228
- @source = nil
197
+ while true
198
+ if cons
199
+ md = @scanner.scan(pattern)
200
+ else
201
+ md = @scanner.check(pattern)
229
202
  end
203
+ break if md
204
+ return nil if pattern.is_a?(String)
205
+ return nil if @source.nil?
206
+ return nil unless read
230
207
  end
231
- rv.taint if RUBY_VERSION < '2.7'
232
- rv
208
+
209
+ md.nil? ? nil : @scanner
233
210
  end
234
211
 
235
212
  def empty?
236
213
  super and ( @source.nil? || @source.eof? )
237
214
  end
238
215
 
239
- def position
240
- @er_source.pos rescue 0
241
- end
242
-
243
216
  # @return the current line in the source
244
217
  def current_line
245
218
  begin
@@ -263,8 +236,8 @@ module REXML
263
236
  end
264
237
 
265
238
  private
266
- def readline
267
- str = @source.readline(@line_break)
239
+ def readline(term = nil)
240
+ str = @source.readline(term || @line_break)
268
241
  if @pending_buffer
269
242
  if str.nil?
270
243
  str = @pending_buffer
@@ -290,7 +263,7 @@ module REXML
290
263
  @source.set_encoding(@encoding, @encoding)
291
264
  end
292
265
  @line_break = encode(">")
293
- @pending_buffer, @buffer = @buffer, ""
266
+ @pending_buffer, @scanner.string = @scanner.rest, ""
294
267
  @pending_buffer.force_encoding(@encoding)
295
268
  super
296
269
  end
data/lib/rexml/text.rb CHANGED
@@ -1,4 +1,4 @@
1
- # frozen_string_literal: false
1
+ # frozen_string_literal: true
2
2
  require_relative 'security'
3
3
  require_relative 'entity'
4
4
  require_relative 'doctype'
@@ -131,7 +131,7 @@ module REXML
131
131
  def Text.check string, pattern, doctype
132
132
 
133
133
  # illegal anywhere
134
- if string !~ VALID_XML_CHARS
134
+ if !string.match?(VALID_XML_CHARS)
135
135
  if String.method_defined? :encode
136
136
  string.chars.each do |c|
137
137
  case c.ord
@@ -371,7 +371,7 @@ module REXML
371
371
  copy = input.to_s
372
372
  # Doing it like this rather than in a loop improves the speed
373
373
  #copy = copy.gsub( EREFERENCE, '&amp;' )
374
- copy = copy.gsub( "&", "&amp;" )
374
+ copy = copy.gsub( "&", "&amp;" ) if copy.include?("&")
375
375
  if doctype
376
376
  # Replace all ampersands that aren't part of an entity
377
377
  doctype.entities.each_value do |entity|
@@ -382,7 +382,9 @@ module REXML
382
382
  else
383
383
  # Replace all ampersands that aren't part of an entity
384
384
  DocType::DEFAULT_ENTITIES.each_value do |entity|
385
- copy = copy.gsub(entity.value, "&#{entity.name};" )
385
+ if copy.include?(entity.value)
386
+ copy = copy.gsub(entity.value, "&#{entity.name};" )
387
+ end
386
388
  end
387
389
  end
388
390
  copy
@@ -590,6 +590,7 @@ module REXML
590
590
 
591
591
  def evaluate_predicate(expression, nodesets)
592
592
  enter(:predicate, expression, nodesets) if @debug
593
+ new_nodeset_count = 0
593
594
  new_nodesets = nodesets.collect do |nodeset|
594
595
  new_nodeset = []
595
596
  subcontext = { :size => nodeset.size }
@@ -606,17 +607,20 @@ module REXML
606
607
  result = result[0] if result.kind_of? Array and result.length == 1
607
608
  if result.kind_of? Numeric
608
609
  if result == node.position
609
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
610
+ new_nodeset_count += 1
611
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
610
612
  end
611
613
  elsif result.instance_of? Array
612
614
  if result.size > 0 and result.inject(false) {|k,s| s or k}
613
615
  if result.size > 0
614
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
616
+ new_nodeset_count += 1
617
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
615
618
  end
616
619
  end
617
620
  else
618
621
  if result
619
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
622
+ new_nodeset_count += 1
623
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
620
624
  end
621
625
  end
622
626
  end