rexml 3.2.5 → 3.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +204 -2
- data/README.md +10 -1
- data/doc/rexml/tasks/rdoc/element.rdoc +2 -2
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +14 -9
- data/lib/rexml/document.rb +1 -1
- data/lib/rexml/element.rb +3 -3
- data/lib/rexml/entity.rb +25 -15
- data/lib/rexml/formatters/pretty.rb +2 -2
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/namespace.rb +8 -4
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +247 -229
- data/lib/rexml/parsers/xpathparser.rb +136 -86
- data/lib/rexml/rexml.rb +3 -1
- data/lib/rexml/source.rb +114 -100
- data/lib/rexml/text.rb +6 -4
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +12 -38
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: false
|
2
|
+
|
2
3
|
require_relative '../namespace'
|
3
4
|
require_relative '../xmltokens'
|
4
5
|
|
@@ -38,108 +39,143 @@ module REXML
|
|
38
39
|
parsed
|
39
40
|
end
|
40
41
|
|
41
|
-
def abbreviate(
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
def abbreviate(path_or_parsed)
|
43
|
+
if path_or_parsed.kind_of?(String)
|
44
|
+
parsed = parse(path_or_parsed)
|
45
|
+
else
|
46
|
+
parsed = path_or_parsed
|
47
|
+
end
|
48
|
+
components = []
|
49
|
+
component = nil
|
50
|
+
while parsed.size > 0
|
51
|
+
op = parsed.shift
|
47
52
|
case op
|
48
53
|
when :node
|
54
|
+
component << "node()"
|
49
55
|
when :attribute
|
50
|
-
|
51
|
-
|
56
|
+
component = "@"
|
57
|
+
components << component
|
52
58
|
when :child
|
53
|
-
|
59
|
+
component = ""
|
60
|
+
components << component
|
54
61
|
when :descendant_or_self
|
55
|
-
|
62
|
+
next_op = parsed[0]
|
63
|
+
if next_op == :node
|
64
|
+
parsed.shift
|
65
|
+
component = ""
|
66
|
+
components << component
|
67
|
+
else
|
68
|
+
component = "descendant-or-self::"
|
69
|
+
components << component
|
70
|
+
end
|
56
71
|
when :self
|
57
|
-
|
72
|
+
next_op = parsed[0]
|
73
|
+
if next_op == :node
|
74
|
+
parsed.shift
|
75
|
+
components << "."
|
76
|
+
else
|
77
|
+
component = "self::"
|
78
|
+
components << component
|
79
|
+
end
|
58
80
|
when :parent
|
59
|
-
|
81
|
+
next_op = parsed[0]
|
82
|
+
if next_op == :node
|
83
|
+
parsed.shift
|
84
|
+
components << ".."
|
85
|
+
else
|
86
|
+
component = "parent::"
|
87
|
+
components << component
|
88
|
+
end
|
60
89
|
when :any
|
61
|
-
|
90
|
+
component << "*"
|
62
91
|
when :text
|
63
|
-
|
92
|
+
component << "text()"
|
64
93
|
when :following, :following_sibling,
|
65
94
|
:ancestor, :ancestor_or_self, :descendant,
|
66
95
|
:namespace, :preceding, :preceding_sibling
|
67
|
-
|
68
|
-
|
69
|
-
string << "::"
|
96
|
+
component = op.to_s.tr("_", "-") << "::"
|
97
|
+
components << component
|
70
98
|
when :qname
|
71
|
-
prefix =
|
72
|
-
name =
|
73
|
-
|
74
|
-
|
99
|
+
prefix = parsed.shift
|
100
|
+
name = parsed.shift
|
101
|
+
component << prefix+":" if prefix.size > 0
|
102
|
+
component << name
|
75
103
|
when :predicate
|
76
|
-
|
77
|
-
|
78
|
-
|
104
|
+
component << '['
|
105
|
+
component << predicate_to_path(parsed.shift) {|x| abbreviate(x)}
|
106
|
+
component << ']'
|
79
107
|
when :document
|
80
|
-
|
108
|
+
components << ""
|
81
109
|
when :function
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
110
|
+
component << parsed.shift
|
111
|
+
component << "( "
|
112
|
+
component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)}
|
113
|
+
component << " )"
|
86
114
|
when :literal
|
87
|
-
|
115
|
+
component << quote_literal(parsed.shift)
|
88
116
|
else
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
string << ")"
|
117
|
+
component << "UNKNOWN("
|
118
|
+
component << op.inspect
|
119
|
+
component << ")"
|
93
120
|
end
|
94
121
|
end
|
95
|
-
|
96
|
-
|
122
|
+
case components
|
123
|
+
when [""]
|
124
|
+
"/"
|
125
|
+
when ["", ""]
|
126
|
+
"//"
|
127
|
+
else
|
128
|
+
components.join("/")
|
129
|
+
end
|
97
130
|
end
|
98
131
|
|
99
|
-
def expand(
|
100
|
-
|
101
|
-
|
132
|
+
def expand(path_or_parsed)
|
133
|
+
if path_or_parsed.kind_of?(String)
|
134
|
+
parsed = parse(path_or_parsed)
|
135
|
+
else
|
136
|
+
parsed = path_or_parsed
|
137
|
+
end
|
138
|
+
path = ""
|
102
139
|
document = false
|
103
|
-
while
|
104
|
-
op =
|
140
|
+
while parsed.size > 0
|
141
|
+
op = parsed.shift
|
105
142
|
case op
|
106
143
|
when :node
|
107
|
-
|
144
|
+
path << "node()"
|
108
145
|
when :attribute, :child, :following, :following_sibling,
|
109
146
|
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
|
110
147
|
:namespace, :preceding, :preceding_sibling, :self, :parent
|
111
|
-
|
112
|
-
|
113
|
-
|
148
|
+
path << "/" unless path.size == 0
|
149
|
+
path << op.to_s.tr("_", "-")
|
150
|
+
path << "::"
|
114
151
|
when :any
|
115
|
-
|
152
|
+
path << "*"
|
116
153
|
when :qname
|
117
|
-
prefix =
|
118
|
-
name =
|
119
|
-
|
120
|
-
|
154
|
+
prefix = parsed.shift
|
155
|
+
name = parsed.shift
|
156
|
+
path << prefix+":" if prefix.size > 0
|
157
|
+
path << name
|
121
158
|
when :predicate
|
122
|
-
|
123
|
-
|
124
|
-
|
159
|
+
path << '['
|
160
|
+
path << predicate_to_path( parsed.shift ) { |x| expand(x) }
|
161
|
+
path << ']'
|
125
162
|
when :document
|
126
163
|
document = true
|
127
164
|
else
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
string << ")"
|
165
|
+
path << "UNKNOWN("
|
166
|
+
path << op.inspect
|
167
|
+
path << ")"
|
132
168
|
end
|
133
169
|
end
|
134
|
-
|
135
|
-
|
170
|
+
path = "/"+path if document
|
171
|
+
path
|
136
172
|
end
|
137
173
|
|
138
|
-
def
|
139
|
-
|
140
|
-
case
|
174
|
+
def predicate_to_path(parsed, &block)
|
175
|
+
path = ""
|
176
|
+
case parsed[0]
|
141
177
|
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
|
142
|
-
op =
|
178
|
+
op = parsed.shift
|
143
179
|
case op
|
144
180
|
when :eq
|
145
181
|
op = "="
|
@@ -156,36 +192,50 @@ module REXML
|
|
156
192
|
when :union
|
157
193
|
op = "|"
|
158
194
|
end
|
159
|
-
left =
|
160
|
-
right =
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
string << right
|
167
|
-
string << " "
|
195
|
+
left = predicate_to_path( parsed.shift, &block )
|
196
|
+
right = predicate_to_path( parsed.shift, &block )
|
197
|
+
path << left
|
198
|
+
path << " "
|
199
|
+
path << op.to_s
|
200
|
+
path << " "
|
201
|
+
path << right
|
168
202
|
when :function
|
169
|
-
|
170
|
-
name =
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
203
|
+
parsed.shift
|
204
|
+
name = parsed.shift
|
205
|
+
path << name
|
206
|
+
path << "("
|
207
|
+
parsed.shift.each_with_index do |argument, i|
|
208
|
+
path << ", " if i > 0
|
209
|
+
path << predicate_to_path(argument, &block)
|
210
|
+
end
|
211
|
+
path << ")"
|
175
212
|
when :literal
|
176
|
-
|
177
|
-
|
178
|
-
string << path.shift.inspect
|
179
|
-
string << " "
|
213
|
+
parsed.shift
|
214
|
+
path << quote_literal(parsed.shift)
|
180
215
|
else
|
181
|
-
|
182
|
-
string << yield( path )
|
183
|
-
string << " "
|
216
|
+
path << yield( parsed )
|
184
217
|
end
|
185
|
-
return
|
218
|
+
return path.squeeze(" ")
|
186
219
|
end
|
220
|
+
# For backward compatibility
|
221
|
+
alias_method :preciate_to_string, :predicate_to_path
|
187
222
|
|
188
223
|
private
|
224
|
+
def quote_literal( literal )
|
225
|
+
case literal
|
226
|
+
when String
|
227
|
+
# XPath 1.0 does not support escape characters.
|
228
|
+
# Assumes literal does not contain both single and double quotes.
|
229
|
+
if literal.include?("'")
|
230
|
+
"\"#{literal}\""
|
231
|
+
else
|
232
|
+
"'#{literal}'"
|
233
|
+
end
|
234
|
+
else
|
235
|
+
literal.inspect
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
189
239
|
#LocationPath
|
190
240
|
# | RelativeLocationPath
|
191
241
|
# | '/' RelativeLocationPath?
|
data/lib/rexml/rexml.rb
CHANGED
@@ -26,10 +26,12 @@
|
|
26
26
|
# - REXML::Document.
|
27
27
|
# - REXML::Element.
|
28
28
|
#
|
29
|
+
# There's also an {REXML tutorial}[doc/rexml/tutorial_rdoc.html].
|
30
|
+
#
|
29
31
|
module REXML
|
30
32
|
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
|
31
33
|
DATE = "2008/019"
|
32
|
-
VERSION = "3.
|
34
|
+
VERSION = "3.3.0"
|
33
35
|
REVISION = ""
|
34
36
|
|
35
37
|
Copyright = COPYRIGHT
|
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -30,18 +50,27 @@ module REXML
|
|
30
50
|
# objects and provides consumption of text
|
31
51
|
class Source
|
32
52
|
include Encoding
|
33
|
-
# The current buffer (what we're going to read next)
|
34
|
-
attr_reader :buffer
|
35
53
|
# The line number of the last consumed text
|
36
54
|
attr_reader :line
|
37
55
|
attr_reader :encoding
|
38
56
|
|
57
|
+
module Private
|
58
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
59
|
+
pre_defined_terms = ["'", '"', "<"]
|
60
|
+
pre_defined_terms.each do |term|
|
61
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
62
|
+
end
|
63
|
+
end
|
64
|
+
private_constant :Private
|
65
|
+
include Private
|
66
|
+
|
39
67
|
# Constructor
|
40
68
|
# @param arg must be a String, and should be a valid XML document
|
41
69
|
# @param encoding if non-null, sets the encoding of the source to this
|
42
70
|
# value, overriding all encoding detection
|
43
71
|
def initialize(arg, encoding=nil)
|
44
|
-
@orig =
|
72
|
+
@orig = arg
|
73
|
+
@scanner = StringScanner.new(@orig)
|
45
74
|
if encoding
|
46
75
|
self.encoding = encoding
|
47
76
|
else
|
@@ -50,6 +79,14 @@ module REXML
|
|
50
79
|
@line = 0
|
51
80
|
end
|
52
81
|
|
82
|
+
# The current buffer (what we're going to read next)
|
83
|
+
def buffer
|
84
|
+
@scanner.rest
|
85
|
+
end
|
86
|
+
|
87
|
+
def buffer_encoding=(encoding)
|
88
|
+
@scanner.string.force_encoding(encoding)
|
89
|
+
end
|
53
90
|
|
54
91
|
# Inherited from Encoding
|
55
92
|
# Overridden to support optimized en/decoding
|
@@ -58,98 +95,78 @@ module REXML
|
|
58
95
|
encoding_updated
|
59
96
|
end
|
60
97
|
|
61
|
-
|
62
|
-
# usual scan() method. For one thing, the pattern argument has some
|
63
|
-
# requirements; for another, the source can be consumed. You can easily
|
64
|
-
# confuse this method. Originally, the patterns were easier
|
65
|
-
# to construct and this method more robust, because this method
|
66
|
-
# generated search regexps on the fly; however, this was
|
67
|
-
# computationally expensive and slowed down the entire REXML package
|
68
|
-
# considerably, since this is by far the most commonly called method.
|
69
|
-
# @param pattern must be a Regexp, and must be in the form of
|
70
|
-
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
71
|
-
# will be returned; the second group is used if the consume flag is
|
72
|
-
# set.
|
73
|
-
# @param consume if true, the pattern returned will be consumed, leaving
|
74
|
-
# everything after it in the Source.
|
75
|
-
# @return the pattern, if found, or nil if the Source is empty or the
|
76
|
-
# pattern is not found.
|
77
|
-
def scan(pattern, cons=false)
|
78
|
-
return nil if @buffer.nil?
|
79
|
-
rv = @buffer.scan(pattern)
|
80
|
-
@buffer = $' if cons and rv.size>0
|
81
|
-
rv
|
98
|
+
def read(term = nil)
|
82
99
|
end
|
83
100
|
|
84
|
-
def
|
101
|
+
def read_until(term)
|
102
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
103
|
+
data = @scanner.scan_until(pattern)
|
104
|
+
unless data
|
105
|
+
data = @scanner.rest
|
106
|
+
@scanner.pos = @scanner.string.bytesize
|
107
|
+
end
|
108
|
+
data
|
85
109
|
end
|
86
110
|
|
87
|
-
def
|
88
|
-
@buffer = $' if pattern.match( @buffer )
|
111
|
+
def ensure_buffer
|
89
112
|
end
|
90
113
|
|
91
|
-
def
|
92
|
-
|
114
|
+
def match(pattern, cons=false)
|
115
|
+
if cons
|
116
|
+
@scanner.scan(pattern).nil? ? nil : @scanner
|
117
|
+
else
|
118
|
+
@scanner.check(pattern).nil? ? nil : @scanner
|
119
|
+
end
|
93
120
|
end
|
94
121
|
|
95
|
-
def
|
96
|
-
|
97
|
-
@buffer = $'
|
98
|
-
return md
|
122
|
+
def position
|
123
|
+
@scanner.pos
|
99
124
|
end
|
100
125
|
|
101
|
-
def
|
102
|
-
|
103
|
-
@buffer = $' if cons and md
|
104
|
-
return md
|
126
|
+
def position=(pos)
|
127
|
+
@scanner.pos = pos
|
105
128
|
end
|
106
129
|
|
107
130
|
# @return true if the Source is exhausted
|
108
131
|
def empty?
|
109
|
-
@
|
110
|
-
end
|
111
|
-
|
112
|
-
def position
|
113
|
-
@orig.index( @buffer )
|
132
|
+
@scanner.eos?
|
114
133
|
end
|
115
134
|
|
116
135
|
# @return the current line in the source
|
117
136
|
def current_line
|
118
137
|
lines = @orig.split
|
119
|
-
res = lines.grep @
|
138
|
+
res = lines.grep @scanner.rest[0..30]
|
120
139
|
res = res[-1] if res.kind_of? Array
|
121
140
|
lines.index( res ) if res
|
122
141
|
end
|
123
142
|
|
124
143
|
private
|
144
|
+
|
125
145
|
def detect_encoding
|
126
|
-
|
146
|
+
scanner_encoding = @scanner.rest.encoding
|
127
147
|
detected_encoding = "UTF-8"
|
128
148
|
begin
|
129
|
-
@
|
130
|
-
if @
|
131
|
-
@buffer[0, 2] = ""
|
149
|
+
@scanner.string.force_encoding("ASCII-8BIT")
|
150
|
+
if @scanner.scan(/\xfe\xff/n)
|
132
151
|
detected_encoding = "UTF-16BE"
|
133
|
-
elsif @
|
134
|
-
@buffer[0, 2] = ""
|
152
|
+
elsif @scanner.scan(/\xff\xfe/n)
|
135
153
|
detected_encoding = "UTF-16LE"
|
136
|
-
elsif @
|
137
|
-
@buffer[0, 3] = ""
|
154
|
+
elsif @scanner.scan(/\xef\xbb\xbf/n)
|
138
155
|
detected_encoding = "UTF-8"
|
139
156
|
end
|
140
157
|
ensure
|
141
|
-
@
|
158
|
+
@scanner.string.force_encoding(scanner_encoding)
|
142
159
|
end
|
143
160
|
self.encoding = detected_encoding
|
144
161
|
end
|
145
162
|
|
146
163
|
def encoding_updated
|
147
164
|
if @encoding != 'UTF-8'
|
148
|
-
@
|
165
|
+
@scanner.string = decode(@scanner.rest)
|
149
166
|
@to_utf = true
|
150
167
|
else
|
151
168
|
@to_utf = false
|
152
|
-
@
|
169
|
+
@scanner.string.force_encoding(::Encoding::UTF_8)
|
153
170
|
end
|
154
171
|
end
|
155
172
|
end
|
@@ -172,7 +189,7 @@ module REXML
|
|
172
189
|
end
|
173
190
|
|
174
191
|
if !@to_utf and
|
175
|
-
@
|
192
|
+
@orig.respond_to?(:force_encoding) and
|
176
193
|
@source.respond_to?(:external_encoding) and
|
177
194
|
@source.external_encoding != ::Encoding::UTF_8
|
178
195
|
@force_utf8 = true
|
@@ -181,65 +198,62 @@ module REXML
|
|
181
198
|
end
|
182
199
|
end
|
183
200
|
|
184
|
-
def
|
185
|
-
|
186
|
-
# You'll notice that this next section is very similar to the same
|
187
|
-
# section in match(), but just a liiittle different. This is
|
188
|
-
# because it is a touch faster to do it this way with scan()
|
189
|
-
# than the way match() does it; enough faster to warrant duplicating
|
190
|
-
# some code
|
191
|
-
if rv.size == 0
|
192
|
-
until @buffer =~ pattern or @source.nil?
|
193
|
-
begin
|
194
|
-
@buffer << readline
|
195
|
-
rescue Iconv::IllegalSequence
|
196
|
-
raise
|
197
|
-
rescue
|
198
|
-
@source = nil
|
199
|
-
end
|
200
|
-
end
|
201
|
-
rv = super
|
202
|
-
end
|
203
|
-
rv.taint if RUBY_VERSION < '2.7'
|
204
|
-
rv
|
205
|
-
end
|
206
|
-
|
207
|
-
def read
|
201
|
+
def read(term = nil)
|
202
|
+
term = encode(term) if term
|
208
203
|
begin
|
209
|
-
@
|
204
|
+
@scanner << readline(term)
|
205
|
+
true
|
210
206
|
rescue Exception, NameError
|
211
207
|
@source = nil
|
208
|
+
false
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def read_until(term)
|
213
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
214
|
+
term = encode(term)
|
215
|
+
until str = @scanner.scan_until(pattern)
|
216
|
+
break if @source.nil?
|
217
|
+
break if @source.eof?
|
218
|
+
@scanner << readline(term)
|
219
|
+
end
|
220
|
+
if str
|
221
|
+
read if @scanner.eos? and !@source.eof?
|
222
|
+
str
|
223
|
+
else
|
224
|
+
rest = @scanner.rest
|
225
|
+
@scanner.pos = @scanner.string.bytesize
|
226
|
+
rest
|
212
227
|
end
|
213
228
|
end
|
214
229
|
|
215
|
-
def
|
216
|
-
|
230
|
+
def ensure_buffer
|
231
|
+
read if @scanner.eos? && @source
|
217
232
|
end
|
218
233
|
|
234
|
+
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
235
|
+
# - ">"
|
236
|
+
# - "XXX>" (X is any string excluding '>')
|
219
237
|
def match( pattern, cons=false )
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
rv = pattern.match(@buffer)
|
226
|
-
@buffer = $' if cons and rv
|
227
|
-
rescue
|
228
|
-
@source = nil
|
238
|
+
while true
|
239
|
+
if cons
|
240
|
+
md = @scanner.scan(pattern)
|
241
|
+
else
|
242
|
+
md = @scanner.check(pattern)
|
229
243
|
end
|
244
|
+
break if md
|
245
|
+
return nil if pattern.is_a?(String)
|
246
|
+
return nil if @source.nil?
|
247
|
+
return nil unless read
|
230
248
|
end
|
231
|
-
|
232
|
-
|
249
|
+
|
250
|
+
md.nil? ? nil : @scanner
|
233
251
|
end
|
234
252
|
|
235
253
|
def empty?
|
236
254
|
super and ( @source.nil? || @source.eof? )
|
237
255
|
end
|
238
256
|
|
239
|
-
def position
|
240
|
-
@er_source.pos rescue 0
|
241
|
-
end
|
242
|
-
|
243
257
|
# @return the current line in the source
|
244
258
|
def current_line
|
245
259
|
begin
|
@@ -263,8 +277,8 @@ module REXML
|
|
263
277
|
end
|
264
278
|
|
265
279
|
private
|
266
|
-
def readline
|
267
|
-
str = @source.readline(@line_break)
|
280
|
+
def readline(term = nil)
|
281
|
+
str = @source.readline(term || @line_break)
|
268
282
|
if @pending_buffer
|
269
283
|
if str.nil?
|
270
284
|
str = @pending_buffer
|
@@ -290,7 +304,7 @@ module REXML
|
|
290
304
|
@source.set_encoding(@encoding, @encoding)
|
291
305
|
end
|
292
306
|
@line_break = encode(">")
|
293
|
-
@pending_buffer, @
|
307
|
+
@pending_buffer, @scanner.string = @scanner.rest, ""
|
294
308
|
@pending_buffer.force_encoding(@encoding)
|
295
309
|
super
|
296
310
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative 'security'
|
3
3
|
require_relative 'entity'
|
4
4
|
require_relative 'doctype'
|
@@ -131,7 +131,7 @@ module REXML
|
|
131
131
|
def Text.check string, pattern, doctype
|
132
132
|
|
133
133
|
# illegal anywhere
|
134
|
-
if string
|
134
|
+
if !string.match?(VALID_XML_CHARS)
|
135
135
|
if String.method_defined? :encode
|
136
136
|
string.chars.each do |c|
|
137
137
|
case c.ord
|
@@ -371,7 +371,7 @@ module REXML
|
|
371
371
|
copy = input.to_s
|
372
372
|
# Doing it like this rather than in a loop improves the speed
|
373
373
|
#copy = copy.gsub( EREFERENCE, '&' )
|
374
|
-
copy = copy.gsub( "&", "&" )
|
374
|
+
copy = copy.gsub( "&", "&" ) if copy.include?("&")
|
375
375
|
if doctype
|
376
376
|
# Replace all ampersands that aren't part of an entity
|
377
377
|
doctype.entities.each_value do |entity|
|
@@ -382,7 +382,9 @@ module REXML
|
|
382
382
|
else
|
383
383
|
# Replace all ampersands that aren't part of an entity
|
384
384
|
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
385
|
-
|
385
|
+
if copy.include?(entity.value)
|
386
|
+
copy = copy.gsub(entity.value, "&#{entity.name};" )
|
387
|
+
end
|
386
388
|
end
|
387
389
|
end
|
388
390
|
copy
|