rexml 3.2.5 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: false
2
+
2
3
  require_relative '../namespace'
3
4
  require_relative '../xmltokens'
4
5
 
@@ -38,108 +39,143 @@ module REXML
38
39
  parsed
39
40
  end
40
41
 
41
- def abbreviate( path )
42
- path = path.kind_of?(String) ? parse( path ) : path
43
- string = ""
44
- document = false
45
- while path.size > 0
46
- op = path.shift
42
+ def abbreviate(path_or_parsed)
43
+ if path_or_parsed.kind_of?(String)
44
+ parsed = parse(path_or_parsed)
45
+ else
46
+ parsed = path_or_parsed
47
+ end
48
+ components = []
49
+ component = nil
50
+ while parsed.size > 0
51
+ op = parsed.shift
47
52
  case op
48
53
  when :node
54
+ component << "node()"
49
55
  when :attribute
50
- string << "/" if string.size > 0
51
- string << "@"
56
+ component = "@"
57
+ components << component
52
58
  when :child
53
- string << "/" if string.size > 0
59
+ component = ""
60
+ components << component
54
61
  when :descendant_or_self
55
- string << "/"
62
+ next_op = parsed[0]
63
+ if next_op == :node
64
+ parsed.shift
65
+ component = ""
66
+ components << component
67
+ else
68
+ component = "descendant-or-self::"
69
+ components << component
70
+ end
56
71
  when :self
57
- string << "."
72
+ next_op = parsed[0]
73
+ if next_op == :node
74
+ parsed.shift
75
+ components << "."
76
+ else
77
+ component = "self::"
78
+ components << component
79
+ end
58
80
  when :parent
59
- string << ".."
81
+ next_op = parsed[0]
82
+ if next_op == :node
83
+ parsed.shift
84
+ components << ".."
85
+ else
86
+ component = "parent::"
87
+ components << component
88
+ end
60
89
  when :any
61
- string << "*"
90
+ component << "*"
62
91
  when :text
63
- string << "text()"
92
+ component << "text()"
64
93
  when :following, :following_sibling,
65
94
  :ancestor, :ancestor_or_self, :descendant,
66
95
  :namespace, :preceding, :preceding_sibling
67
- string << "/" unless string.size == 0
68
- string << op.to_s.tr("_", "-")
69
- string << "::"
96
+ component = op.to_s.tr("_", "-") << "::"
97
+ components << component
70
98
  when :qname
71
- prefix = path.shift
72
- name = path.shift
73
- string << prefix+":" if prefix.size > 0
74
- string << name
99
+ prefix = parsed.shift
100
+ name = parsed.shift
101
+ component << prefix+":" if prefix.size > 0
102
+ component << name
75
103
  when :predicate
76
- string << '['
77
- string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
78
- string << ']'
104
+ component << '['
105
+ component << predicate_to_path(parsed.shift) {|x| abbreviate(x)}
106
+ component << ']'
79
107
  when :document
80
- document = true
108
+ components << ""
81
109
  when :function
82
- string << path.shift
83
- string << "( "
84
- string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
85
- string << " )"
110
+ component << parsed.shift
111
+ component << "( "
112
+ component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)}
113
+ component << " )"
86
114
  when :literal
87
- string << %Q{ "#{path.shift}" }
115
+ component << quote_literal(parsed.shift)
88
116
  else
89
- string << "/" unless string.size == 0
90
- string << "UNKNOWN("
91
- string << op.inspect
92
- string << ")"
117
+ component << "UNKNOWN("
118
+ component << op.inspect
119
+ component << ")"
93
120
  end
94
121
  end
95
- string = "/"+string if document
96
- return string
122
+ case components
123
+ when [""]
124
+ "/"
125
+ when ["", ""]
126
+ "//"
127
+ else
128
+ components.join("/")
129
+ end
97
130
  end
98
131
 
99
- def expand( path )
100
- path = path.kind_of?(String) ? parse( path ) : path
101
- string = ""
132
+ def expand(path_or_parsed)
133
+ if path_or_parsed.kind_of?(String)
134
+ parsed = parse(path_or_parsed)
135
+ else
136
+ parsed = path_or_parsed
137
+ end
138
+ path = ""
102
139
  document = false
103
- while path.size > 0
104
- op = path.shift
140
+ while parsed.size > 0
141
+ op = parsed.shift
105
142
  case op
106
143
  when :node
107
- string << "node()"
144
+ path << "node()"
108
145
  when :attribute, :child, :following, :following_sibling,
109
146
  :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
110
147
  :namespace, :preceding, :preceding_sibling, :self, :parent
111
- string << "/" unless string.size == 0
112
- string << op.to_s.tr("_", "-")
113
- string << "::"
148
+ path << "/" unless path.size == 0
149
+ path << op.to_s.tr("_", "-")
150
+ path << "::"
114
151
  when :any
115
- string << "*"
152
+ path << "*"
116
153
  when :qname
117
- prefix = path.shift
118
- name = path.shift
119
- string << prefix+":" if prefix.size > 0
120
- string << name
154
+ prefix = parsed.shift
155
+ name = parsed.shift
156
+ path << prefix+":" if prefix.size > 0
157
+ path << name
121
158
  when :predicate
122
- string << '['
123
- string << predicate_to_string( path.shift ) { |x| expand(x) }
124
- string << ']'
159
+ path << '['
160
+ path << predicate_to_path( parsed.shift ) { |x| expand(x) }
161
+ path << ']'
125
162
  when :document
126
163
  document = true
127
164
  else
128
- string << "/" unless string.size == 0
129
- string << "UNKNOWN("
130
- string << op.inspect
131
- string << ")"
165
+ path << "UNKNOWN("
166
+ path << op.inspect
167
+ path << ")"
132
168
  end
133
169
  end
134
- string = "/"+string if document
135
- return string
170
+ path = "/"+path if document
171
+ path
136
172
  end
137
173
 
138
- def predicate_to_string( path, &block )
139
- string = ""
140
- case path[0]
174
+ def predicate_to_path(parsed, &block)
175
+ path = ""
176
+ case parsed[0]
141
177
  when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
142
- op = path.shift
178
+ op = parsed.shift
143
179
  case op
144
180
  when :eq
145
181
  op = "="
@@ -156,36 +192,50 @@ module REXML
156
192
  when :union
157
193
  op = "|"
158
194
  end
159
- left = predicate_to_string( path.shift, &block )
160
- right = predicate_to_string( path.shift, &block )
161
- string << " "
162
- string << left
163
- string << " "
164
- string << op.to_s
165
- string << " "
166
- string << right
167
- string << " "
195
+ left = predicate_to_path( parsed.shift, &block )
196
+ right = predicate_to_path( parsed.shift, &block )
197
+ path << left
198
+ path << " "
199
+ path << op.to_s
200
+ path << " "
201
+ path << right
168
202
  when :function
169
- path.shift
170
- name = path.shift
171
- string << name
172
- string << "( "
173
- string << predicate_to_string( path.shift, &block )
174
- string << " )"
203
+ parsed.shift
204
+ name = parsed.shift
205
+ path << name
206
+ path << "("
207
+ parsed.shift.each_with_index do |argument, i|
208
+ path << ", " if i > 0
209
+ path << predicate_to_path(argument, &block)
210
+ end
211
+ path << ")"
175
212
  when :literal
176
- path.shift
177
- string << " "
178
- string << path.shift.inspect
179
- string << " "
213
+ parsed.shift
214
+ path << quote_literal(parsed.shift)
180
215
  else
181
- string << " "
182
- string << yield( path )
183
- string << " "
216
+ path << yield( parsed )
184
217
  end
185
- return string.squeeze(" ")
218
+ return path.squeeze(" ")
186
219
  end
220
+ # For backward compatibility
221
+ alias_method :preciate_to_string, :predicate_to_path
187
222
 
188
223
  private
224
+ def quote_literal( literal )
225
+ case literal
226
+ when String
227
+ # XPath 1.0 does not support escape characters.
228
+ # Assumes literal does not contain both single and double quotes.
229
+ if literal.include?("'")
230
+ "\"#{literal}\""
231
+ else
232
+ "'#{literal}'"
233
+ end
234
+ else
235
+ literal.inspect
236
+ end
237
+ end
238
+
189
239
  #LocationPath
190
240
  # | RelativeLocationPath
191
241
  # | '/' RelativeLocationPath?
data/lib/rexml/rexml.rb CHANGED
@@ -26,10 +26,12 @@
26
26
  # - REXML::Document.
27
27
  # - REXML::Element.
28
28
  #
29
+ # There's also an {REXML tutorial}[doc/rexml/tutorial_rdoc.html].
30
+ #
29
31
  module REXML
30
32
  COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
31
33
  DATE = "2008/019"
32
- VERSION = "3.2.5"
34
+ VERSION = "3.3.0"
33
35
  REVISION = ""
34
36
 
35
37
  Copyright = COPYRIGHT
data/lib/rexml/source.rb CHANGED
@@ -1,8 +1,28 @@
1
1
  # coding: US-ASCII
2
2
  # frozen_string_literal: false
3
+
4
+ require "strscan"
5
+
3
6
  require_relative 'encoding'
4
7
 
5
8
  module REXML
9
+ if StringScanner::Version < "1.0.0"
10
+ module StringScannerCheckScanString
11
+ refine StringScanner do
12
+ def check(pattern)
13
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
14
+ super(pattern)
15
+ end
16
+
17
+ def scan(pattern)
18
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
19
+ super(pattern)
20
+ end
21
+ end
22
+ end
23
+ using StringScannerCheckScanString
24
+ end
25
+
6
26
  # Generates Source-s. USE THIS CLASS.
7
27
  class SourceFactory
8
28
  # Generates a Source object
@@ -30,18 +50,27 @@ module REXML
30
50
  # objects and provides consumption of text
31
51
  class Source
32
52
  include Encoding
33
- # The current buffer (what we're going to read next)
34
- attr_reader :buffer
35
53
  # The line number of the last consumed text
36
54
  attr_reader :line
37
55
  attr_reader :encoding
38
56
 
57
+ module Private
58
+ PRE_DEFINED_TERM_PATTERNS = {}
59
+ pre_defined_terms = ["'", '"', "<"]
60
+ pre_defined_terms.each do |term|
61
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
62
+ end
63
+ end
64
+ private_constant :Private
65
+ include Private
66
+
39
67
  # Constructor
40
68
  # @param arg must be a String, and should be a valid XML document
41
69
  # @param encoding if non-null, sets the encoding of the source to this
42
70
  # value, overriding all encoding detection
43
71
  def initialize(arg, encoding=nil)
44
- @orig = @buffer = arg
72
+ @orig = arg
73
+ @scanner = StringScanner.new(@orig)
45
74
  if encoding
46
75
  self.encoding = encoding
47
76
  else
@@ -50,6 +79,14 @@ module REXML
50
79
  @line = 0
51
80
  end
52
81
 
82
+ # The current buffer (what we're going to read next)
83
+ def buffer
84
+ @scanner.rest
85
+ end
86
+
87
+ def buffer_encoding=(encoding)
88
+ @scanner.string.force_encoding(encoding)
89
+ end
53
90
 
54
91
  # Inherited from Encoding
55
92
  # Overridden to support optimized en/decoding
@@ -58,98 +95,78 @@ module REXML
58
95
  encoding_updated
59
96
  end
60
97
 
61
- # Scans the source for a given pattern. Note, that this is not your
62
- # usual scan() method. For one thing, the pattern argument has some
63
- # requirements; for another, the source can be consumed. You can easily
64
- # confuse this method. Originally, the patterns were easier
65
- # to construct and this method more robust, because this method
66
- # generated search regexps on the fly; however, this was
67
- # computationally expensive and slowed down the entire REXML package
68
- # considerably, since this is by far the most commonly called method.
69
- # @param pattern must be a Regexp, and must be in the form of
70
- # /^\s*(#{your pattern, with no groups})(.*)/. The first group
71
- # will be returned; the second group is used if the consume flag is
72
- # set.
73
- # @param consume if true, the pattern returned will be consumed, leaving
74
- # everything after it in the Source.
75
- # @return the pattern, if found, or nil if the Source is empty or the
76
- # pattern is not found.
77
- def scan(pattern, cons=false)
78
- return nil if @buffer.nil?
79
- rv = @buffer.scan(pattern)
80
- @buffer = $' if cons and rv.size>0
81
- rv
98
+ def read(term = nil)
82
99
  end
83
100
 
84
- def read
101
+ def read_until(term)
102
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
103
+ data = @scanner.scan_until(pattern)
104
+ unless data
105
+ data = @scanner.rest
106
+ @scanner.pos = @scanner.string.bytesize
107
+ end
108
+ data
85
109
  end
86
110
 
87
- def consume( pattern )
88
- @buffer = $' if pattern.match( @buffer )
111
+ def ensure_buffer
89
112
  end
90
113
 
91
- def match_to( char, pattern )
92
- return pattern.match(@buffer)
114
+ def match(pattern, cons=false)
115
+ if cons
116
+ @scanner.scan(pattern).nil? ? nil : @scanner
117
+ else
118
+ @scanner.check(pattern).nil? ? nil : @scanner
119
+ end
93
120
  end
94
121
 
95
- def match_to_consume( char, pattern )
96
- md = pattern.match(@buffer)
97
- @buffer = $'
98
- return md
122
+ def position
123
+ @scanner.pos
99
124
  end
100
125
 
101
- def match(pattern, cons=false)
102
- md = pattern.match(@buffer)
103
- @buffer = $' if cons and md
104
- return md
126
+ def position=(pos)
127
+ @scanner.pos = pos
105
128
  end
106
129
 
107
130
  # @return true if the Source is exhausted
108
131
  def empty?
109
- @buffer == ""
110
- end
111
-
112
- def position
113
- @orig.index( @buffer )
132
+ @scanner.eos?
114
133
  end
115
134
 
116
135
  # @return the current line in the source
117
136
  def current_line
118
137
  lines = @orig.split
119
- res = lines.grep @buffer[0..30]
138
+ res = lines.grep @scanner.rest[0..30]
120
139
  res = res[-1] if res.kind_of? Array
121
140
  lines.index( res ) if res
122
141
  end
123
142
 
124
143
  private
144
+
125
145
  def detect_encoding
126
- buffer_encoding = @buffer.encoding
146
+ scanner_encoding = @scanner.rest.encoding
127
147
  detected_encoding = "UTF-8"
128
148
  begin
129
- @buffer.force_encoding("ASCII-8BIT")
130
- if @buffer[0, 2] == "\xfe\xff"
131
- @buffer[0, 2] = ""
149
+ @scanner.string.force_encoding("ASCII-8BIT")
150
+ if @scanner.scan(/\xfe\xff/n)
132
151
  detected_encoding = "UTF-16BE"
133
- elsif @buffer[0, 2] == "\xff\xfe"
134
- @buffer[0, 2] = ""
152
+ elsif @scanner.scan(/\xff\xfe/n)
135
153
  detected_encoding = "UTF-16LE"
136
- elsif @buffer[0, 3] == "\xef\xbb\xbf"
137
- @buffer[0, 3] = ""
154
+ elsif @scanner.scan(/\xef\xbb\xbf/n)
138
155
  detected_encoding = "UTF-8"
139
156
  end
140
157
  ensure
141
- @buffer.force_encoding(buffer_encoding)
158
+ @scanner.string.force_encoding(scanner_encoding)
142
159
  end
143
160
  self.encoding = detected_encoding
144
161
  end
145
162
 
146
163
  def encoding_updated
147
164
  if @encoding != 'UTF-8'
148
- @buffer = decode(@buffer)
165
+ @scanner.string = decode(@scanner.rest)
149
166
  @to_utf = true
150
167
  else
151
168
  @to_utf = false
152
- @buffer.force_encoding ::Encoding::UTF_8
169
+ @scanner.string.force_encoding(::Encoding::UTF_8)
153
170
  end
154
171
  end
155
172
  end
@@ -172,7 +189,7 @@ module REXML
172
189
  end
173
190
 
174
191
  if !@to_utf and
175
- @buffer.respond_to?(:force_encoding) and
192
+ @orig.respond_to?(:force_encoding) and
176
193
  @source.respond_to?(:external_encoding) and
177
194
  @source.external_encoding != ::Encoding::UTF_8
178
195
  @force_utf8 = true
@@ -181,65 +198,62 @@ module REXML
181
198
  end
182
199
  end
183
200
 
184
- def scan(pattern, cons=false)
185
- rv = super
186
- # You'll notice that this next section is very similar to the same
187
- # section in match(), but just a liiittle different. This is
188
- # because it is a touch faster to do it this way with scan()
189
- # than the way match() does it; enough faster to warrant duplicating
190
- # some code
191
- if rv.size == 0
192
- until @buffer =~ pattern or @source.nil?
193
- begin
194
- @buffer << readline
195
- rescue Iconv::IllegalSequence
196
- raise
197
- rescue
198
- @source = nil
199
- end
200
- end
201
- rv = super
202
- end
203
- rv.taint if RUBY_VERSION < '2.7'
204
- rv
205
- end
206
-
207
- def read
201
+ def read(term = nil)
202
+ term = encode(term) if term
208
203
  begin
209
- @buffer << readline
204
+ @scanner << readline(term)
205
+ true
210
206
  rescue Exception, NameError
211
207
  @source = nil
208
+ false
209
+ end
210
+ end
211
+
212
+ def read_until(term)
213
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
214
+ term = encode(term)
215
+ until str = @scanner.scan_until(pattern)
216
+ break if @source.nil?
217
+ break if @source.eof?
218
+ @scanner << readline(term)
219
+ end
220
+ if str
221
+ read if @scanner.eos? and !@source.eof?
222
+ str
223
+ else
224
+ rest = @scanner.rest
225
+ @scanner.pos = @scanner.string.bytesize
226
+ rest
212
227
  end
213
228
  end
214
229
 
215
- def consume( pattern )
216
- match( pattern, true )
230
+ def ensure_buffer
231
+ read if @scanner.eos? && @source
217
232
  end
218
233
 
234
+ # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
235
+ # - ">"
236
+ # - "XXX>" (X is any string excluding '>')
219
237
  def match( pattern, cons=false )
220
- rv = pattern.match(@buffer)
221
- @buffer = $' if cons and rv
222
- while !rv and @source
223
- begin
224
- @buffer << readline
225
- rv = pattern.match(@buffer)
226
- @buffer = $' if cons and rv
227
- rescue
228
- @source = nil
238
+ while true
239
+ if cons
240
+ md = @scanner.scan(pattern)
241
+ else
242
+ md = @scanner.check(pattern)
229
243
  end
244
+ break if md
245
+ return nil if pattern.is_a?(String)
246
+ return nil if @source.nil?
247
+ return nil unless read
230
248
  end
231
- rv.taint if RUBY_VERSION < '2.7'
232
- rv
249
+
250
+ md.nil? ? nil : @scanner
233
251
  end
234
252
 
235
253
  def empty?
236
254
  super and ( @source.nil? || @source.eof? )
237
255
  end
238
256
 
239
- def position
240
- @er_source.pos rescue 0
241
- end
242
-
243
257
  # @return the current line in the source
244
258
  def current_line
245
259
  begin
@@ -263,8 +277,8 @@ module REXML
263
277
  end
264
278
 
265
279
  private
266
- def readline
267
- str = @source.readline(@line_break)
280
+ def readline(term = nil)
281
+ str = @source.readline(term || @line_break)
268
282
  if @pending_buffer
269
283
  if str.nil?
270
284
  str = @pending_buffer
@@ -290,7 +304,7 @@ module REXML
290
304
  @source.set_encoding(@encoding, @encoding)
291
305
  end
292
306
  @line_break = encode(">")
293
- @pending_buffer, @buffer = @buffer, ""
307
+ @pending_buffer, @scanner.string = @scanner.rest, ""
294
308
  @pending_buffer.force_encoding(@encoding)
295
309
  super
296
310
  end
data/lib/rexml/text.rb CHANGED
@@ -1,4 +1,4 @@
1
- # frozen_string_literal: false
1
+ # frozen_string_literal: true
2
2
  require_relative 'security'
3
3
  require_relative 'entity'
4
4
  require_relative 'doctype'
@@ -131,7 +131,7 @@ module REXML
131
131
  def Text.check string, pattern, doctype
132
132
 
133
133
  # illegal anywhere
134
- if string !~ VALID_XML_CHARS
134
+ if !string.match?(VALID_XML_CHARS)
135
135
  if String.method_defined? :encode
136
136
  string.chars.each do |c|
137
137
  case c.ord
@@ -371,7 +371,7 @@ module REXML
371
371
  copy = input.to_s
372
372
  # Doing it like this rather than in a loop improves the speed
373
373
  #copy = copy.gsub( EREFERENCE, '&amp;' )
374
- copy = copy.gsub( "&", "&amp;" )
374
+ copy = copy.gsub( "&", "&amp;" ) if copy.include?("&")
375
375
  if doctype
376
376
  # Replace all ampersands that aren't part of an entity
377
377
  doctype.entities.each_value do |entity|
@@ -382,7 +382,9 @@ module REXML
382
382
  else
383
383
  # Replace all ampersands that aren't part of an entity
384
384
  DocType::DEFAULT_ENTITIES.each_value do |entity|
385
- copy = copy.gsub(entity.value, "&#{entity.name};" )
385
+ if copy.include?(entity.value)
386
+ copy = copy.gsub(entity.value, "&#{entity.name};" )
387
+ end
386
388
  end
387
389
  end
388
390
  copy