rexml 3.2.6 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +423 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +466 -273
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +185 -100
- data/lib/rexml/text.rb +54 -57
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +6 -50
@@ -47,6 +47,18 @@ module REXML
|
|
47
47
|
@listeners << listener
|
48
48
|
end
|
49
49
|
|
50
|
+
def entity_expansion_count
|
51
|
+
@parser.entity_expansion_count
|
52
|
+
end
|
53
|
+
|
54
|
+
def entity_expansion_limit=( limit )
|
55
|
+
@parser.entity_expansion_limit = limit
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_expansion_text_limit=( limit )
|
59
|
+
@parser.entity_expansion_text_limit = limit
|
60
|
+
end
|
61
|
+
|
50
62
|
def each
|
51
63
|
while has_next?
|
52
64
|
yield self.pull
|
@@ -81,6 +93,10 @@ module REXML
|
|
81
93
|
def unshift token
|
82
94
|
@my_stack.unshift token
|
83
95
|
end
|
96
|
+
|
97
|
+
def reset
|
98
|
+
@parser.reset
|
99
|
+
end
|
84
100
|
end
|
85
101
|
|
86
102
|
# A parsing event. The contents of the event are accessed as an +Array?,
|
@@ -22,6 +22,18 @@ module REXML
|
|
22
22
|
@parser.source
|
23
23
|
end
|
24
24
|
|
25
|
+
def entity_expansion_count
|
26
|
+
@parser.entity_expansion_count
|
27
|
+
end
|
28
|
+
|
29
|
+
def entity_expansion_limit=( limit )
|
30
|
+
@parser.entity_expansion_limit = limit
|
31
|
+
end
|
32
|
+
|
33
|
+
def entity_expansion_text_limit=( limit )
|
34
|
+
@parser.entity_expansion_text_limit = limit
|
35
|
+
end
|
36
|
+
|
25
37
|
def add_listener( listener )
|
26
38
|
@parser.add_listener( listener )
|
27
39
|
end
|
@@ -157,25 +169,8 @@ module REXML
|
|
157
169
|
end
|
158
170
|
end
|
159
171
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
172
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
173
|
+
handle( :characters, unnormalized )
|
179
174
|
when :entitydecl
|
180
175
|
handle_entitydecl( event )
|
181
176
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -264,6 +259,8 @@ module REXML
|
|
264
259
|
end
|
265
260
|
|
266
261
|
def get_namespace( prefix )
|
262
|
+
return nil if @namespace_stack.empty?
|
263
|
+
|
267
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
268
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
269
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
@@ -7,37 +7,42 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
21
|
+
def entity_expansion_limit=( limit )
|
22
|
+
@parser.entity_expansion_limit = limit
|
23
|
+
end
|
24
|
+
|
25
|
+
def entity_expansion_text_limit=( limit )
|
26
|
+
@parser.entity_expansion_text_limit = limit
|
27
|
+
end
|
28
|
+
|
17
29
|
def parse
|
18
30
|
# entity string
|
19
31
|
while true
|
20
32
|
event = @parser.pull
|
21
33
|
case event[0]
|
22
34
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
35
|
return
|
29
36
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
37
|
attrs = event[2].each do |n, v|
|
32
38
|
event[2][n] = @parser.unnormalize( v )
|
33
39
|
end
|
34
40
|
@listener.tag_start( event[1], attrs )
|
35
41
|
when :end_element
|
36
42
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
43
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
44
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
45
|
+
@listener.text( unnormalized )
|
41
46
|
when :processing_instruction
|
42
47
|
@listener.instruction( *event[1,2] )
|
43
48
|
when :start_doctype
|
@@ -48,6 +53,7 @@ module REXML
|
|
48
53
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
54
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
55
|
when :entitydecl, :notationdecl
|
56
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
57
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
58
|
when :externalentity
|
53
59
|
entity_reference = event[1]
|
@@ -15,8 +15,6 @@ module REXML
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def parse
|
18
|
-
tag_stack = []
|
19
|
-
in_doctype = false
|
20
18
|
entities = nil
|
21
19
|
begin
|
22
20
|
while true
|
@@ -24,32 +22,24 @@ module REXML
|
|
24
22
|
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
|
25
23
|
case event[0]
|
26
24
|
when :end_document
|
27
|
-
unless tag_stack.empty?
|
28
|
-
raise ParseException.new("No close tag for #{@build_context.xpath}",
|
29
|
-
@parser.source, @parser)
|
30
|
-
end
|
31
25
|
return
|
32
26
|
when :start_element
|
33
|
-
tag_stack.push(event[1])
|
34
27
|
el = @build_context = @build_context.add_element( event[1] )
|
35
28
|
event[2].each do |key, value|
|
36
29
|
el.attributes[key]=Attribute.new(key,value,self)
|
37
30
|
end
|
38
31
|
when :end_element
|
39
|
-
tag_stack.pop
|
40
32
|
@build_context = @build_context.parent
|
41
33
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
34
|
+
if @build_context[-1].instance_of? Text
|
35
|
+
@build_context[-1] << event[1]
|
36
|
+
else
|
37
|
+
@build_context.add(
|
38
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
39
|
+
) unless (
|
40
|
+
@build_context.ignore_whitespace_nodes and
|
41
|
+
event[1].strip.size==0
|
42
|
+
)
|
53
43
|
end
|
54
44
|
when :comment
|
55
45
|
c = Comment.new( event[1] )
|
@@ -60,14 +50,12 @@ module REXML
|
|
60
50
|
when :processing_instruction
|
61
51
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
52
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
53
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
54
|
@build_context = @build_context.parent
|
66
55
|
when :start_doctype
|
67
56
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
57
|
@build_context = doctype
|
69
58
|
entities = {}
|
70
|
-
in_doctype = true
|
71
59
|
when :attlistdecl
|
72
60
|
n = AttlistDecl.new( event[1..-1] )
|
73
61
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,39 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "stringio"
|
5
|
+
require "strscan"
|
6
|
+
|
3
7
|
require_relative 'encoding'
|
4
8
|
|
5
9
|
module REXML
|
10
|
+
if StringScanner::Version < "1.0.0"
|
11
|
+
module StringScannerCheckScanString
|
12
|
+
refine StringScanner do
|
13
|
+
def check(pattern)
|
14
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
15
|
+
super(pattern)
|
16
|
+
end
|
17
|
+
|
18
|
+
def scan(pattern)
|
19
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
20
|
+
super(pattern)
|
21
|
+
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
using StringScannerCheckScanString
|
35
|
+
end
|
36
|
+
|
6
37
|
# Generates Source-s. USE THIS CLASS.
|
7
38
|
class SourceFactory
|
8
39
|
# Generates a Source object
|
@@ -15,7 +46,6 @@ module REXML
|
|
15
46
|
arg.respond_to? :eof?
|
16
47
|
IOSource.new(arg)
|
17
48
|
elsif arg.respond_to? :to_str
|
18
|
-
require 'stringio'
|
19
49
|
IOSource.new(StringIO.new(arg))
|
20
50
|
elsif arg.kind_of? Source
|
21
51
|
arg
|
@@ -30,26 +60,56 @@ module REXML
|
|
30
60
|
# objects and provides consumption of text
|
31
61
|
class Source
|
32
62
|
include Encoding
|
33
|
-
# The current buffer (what we're going to read next)
|
34
|
-
attr_reader :buffer
|
35
63
|
# The line number of the last consumed text
|
36
64
|
attr_reader :line
|
37
65
|
attr_reader :encoding
|
38
66
|
|
67
|
+
module Private
|
68
|
+
SCANNER_RESET_SIZE = 100000
|
69
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
70
|
+
pre_defined_terms = ["'", '"', "<"]
|
71
|
+
if StringScanner::Version < "3.1.1"
|
72
|
+
pre_defined_terms.each do |term|
|
73
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
74
|
+
end
|
75
|
+
else
|
76
|
+
pre_defined_terms.each do |term|
|
77
|
+
PRE_DEFINED_TERM_PATTERNS[term] = term
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
private_constant :Private
|
82
|
+
|
39
83
|
# Constructor
|
40
84
|
# @param arg must be a String, and should be a valid XML document
|
41
85
|
# @param encoding if non-null, sets the encoding of the source to this
|
42
86
|
# value, overriding all encoding detection
|
43
87
|
def initialize(arg, encoding=nil)
|
44
|
-
@orig =
|
88
|
+
@orig = arg
|
89
|
+
@scanner = StringScanner.new(@orig)
|
45
90
|
if encoding
|
46
91
|
self.encoding = encoding
|
47
92
|
else
|
48
93
|
detect_encoding
|
49
94
|
end
|
50
95
|
@line = 0
|
96
|
+
@encoded_terms = {}
|
97
|
+
end
|
98
|
+
|
99
|
+
# The current buffer (what we're going to read next)
|
100
|
+
def buffer
|
101
|
+
@scanner.rest
|
51
102
|
end
|
52
103
|
|
104
|
+
def drop_parsed_content
|
105
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
106
|
+
@scanner.string = @scanner.rest
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def buffer_encoding=(encoding)
|
111
|
+
@scanner.string.force_encoding(encoding)
|
112
|
+
end
|
53
113
|
|
54
114
|
# Inherited from Encoding
|
55
115
|
# Overridden to support optimized en/decoding
|
@@ -58,98 +118,94 @@ module REXML
|
|
58
118
|
encoding_updated
|
59
119
|
end
|
60
120
|
|
61
|
-
|
62
|
-
# usual scan() method. For one thing, the pattern argument has some
|
63
|
-
# requirements; for another, the source can be consumed. You can easily
|
64
|
-
# confuse this method. Originally, the patterns were easier
|
65
|
-
# to construct and this method more robust, because this method
|
66
|
-
# generated search regexps on the fly; however, this was
|
67
|
-
# computationally expensive and slowed down the entire REXML package
|
68
|
-
# considerably, since this is by far the most commonly called method.
|
69
|
-
# @param pattern must be a Regexp, and must be in the form of
|
70
|
-
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
71
|
-
# will be returned; the second group is used if the consume flag is
|
72
|
-
# set.
|
73
|
-
# @param consume if true, the pattern returned will be consumed, leaving
|
74
|
-
# everything after it in the Source.
|
75
|
-
# @return the pattern, if found, or nil if the Source is empty or the
|
76
|
-
# pattern is not found.
|
77
|
-
def scan(pattern, cons=false)
|
78
|
-
return nil if @buffer.nil?
|
79
|
-
rv = @buffer.scan(pattern)
|
80
|
-
@buffer = $' if cons and rv.size>0
|
81
|
-
rv
|
121
|
+
def read(term = nil)
|
82
122
|
end
|
83
123
|
|
84
|
-
def
|
124
|
+
def read_until(term)
|
125
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
126
|
+
data = @scanner.scan_until(pattern)
|
127
|
+
unless data
|
128
|
+
data = @scanner.rest
|
129
|
+
@scanner.pos = @scanner.string.bytesize
|
130
|
+
end
|
131
|
+
data
|
85
132
|
end
|
86
133
|
|
87
|
-
def
|
88
|
-
@buffer = $' if pattern.match( @buffer )
|
134
|
+
def ensure_buffer
|
89
135
|
end
|
90
136
|
|
91
|
-
def
|
92
|
-
|
137
|
+
def match(pattern, cons=false)
|
138
|
+
if cons
|
139
|
+
@scanner.scan(pattern).nil? ? nil : @scanner
|
140
|
+
else
|
141
|
+
@scanner.check(pattern).nil? ? nil : @scanner
|
142
|
+
end
|
93
143
|
end
|
94
144
|
|
95
|
-
def
|
96
|
-
|
97
|
-
|
98
|
-
|
145
|
+
def match?(pattern, cons=false)
|
146
|
+
if cons
|
147
|
+
!@scanner.skip(pattern).nil?
|
148
|
+
else
|
149
|
+
!@scanner.match?(pattern).nil?
|
150
|
+
end
|
99
151
|
end
|
100
152
|
|
101
|
-
def
|
102
|
-
|
103
|
-
@buffer = $' if cons and md
|
104
|
-
return md
|
153
|
+
def position
|
154
|
+
@scanner.pos
|
105
155
|
end
|
106
156
|
|
107
|
-
|
108
|
-
|
109
|
-
@buffer == ""
|
157
|
+
def position=(pos)
|
158
|
+
@scanner.pos = pos
|
110
159
|
end
|
111
160
|
|
112
|
-
def
|
113
|
-
@
|
161
|
+
def peek_byte
|
162
|
+
@scanner.peek_byte
|
163
|
+
end
|
164
|
+
|
165
|
+
def scan_byte
|
166
|
+
@scanner.scan_byte
|
167
|
+
end
|
168
|
+
|
169
|
+
# @return true if the Source is exhausted
|
170
|
+
def empty?
|
171
|
+
@scanner.eos?
|
114
172
|
end
|
115
173
|
|
116
174
|
# @return the current line in the source
|
117
175
|
def current_line
|
118
176
|
lines = @orig.split
|
119
|
-
res = lines.grep @
|
177
|
+
res = lines.grep @scanner.rest[0..30]
|
120
178
|
res = res[-1] if res.kind_of? Array
|
121
179
|
lines.index( res ) if res
|
122
180
|
end
|
123
181
|
|
124
182
|
private
|
183
|
+
|
125
184
|
def detect_encoding
|
126
|
-
|
185
|
+
scanner_encoding = @scanner.rest.encoding
|
127
186
|
detected_encoding = "UTF-8"
|
128
187
|
begin
|
129
|
-
@
|
130
|
-
if @
|
131
|
-
@buffer[0, 2] = ""
|
188
|
+
@scanner.string.force_encoding("ASCII-8BIT")
|
189
|
+
if @scanner.scan(/\xfe\xff/n)
|
132
190
|
detected_encoding = "UTF-16BE"
|
133
|
-
elsif @
|
134
|
-
@buffer[0, 2] = ""
|
191
|
+
elsif @scanner.scan(/\xff\xfe/n)
|
135
192
|
detected_encoding = "UTF-16LE"
|
136
|
-
elsif @
|
137
|
-
@buffer[0, 3] = ""
|
193
|
+
elsif @scanner.scan(/\xef\xbb\xbf/n)
|
138
194
|
detected_encoding = "UTF-8"
|
139
195
|
end
|
140
196
|
ensure
|
141
|
-
@
|
197
|
+
@scanner.string.force_encoding(scanner_encoding)
|
142
198
|
end
|
143
199
|
self.encoding = detected_encoding
|
144
200
|
end
|
145
201
|
|
146
202
|
def encoding_updated
|
147
203
|
if @encoding != 'UTF-8'
|
148
|
-
@
|
204
|
+
@scanner.string = decode(@scanner.rest)
|
149
205
|
@to_utf = true
|
150
206
|
else
|
151
207
|
@to_utf = false
|
152
|
-
@
|
208
|
+
@scanner.string.force_encoding(::Encoding::UTF_8)
|
153
209
|
end
|
154
210
|
end
|
155
211
|
end
|
@@ -172,7 +228,7 @@ module REXML
|
|
172
228
|
end
|
173
229
|
|
174
230
|
if !@to_utf and
|
175
|
-
@
|
231
|
+
@orig.respond_to?(:force_encoding) and
|
176
232
|
@source.respond_to?(:external_encoding) and
|
177
233
|
@source.external_encoding != ::Encoding::UTF_8
|
178
234
|
@force_utf8 = true
|
@@ -181,63 +237,87 @@ module REXML
|
|
181
237
|
end
|
182
238
|
end
|
183
239
|
|
184
|
-
def
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
rescue Iconv::IllegalSequence
|
196
|
-
raise
|
197
|
-
rescue
|
198
|
-
@source = nil
|
240
|
+
def read(term = nil, min_bytes = 1)
|
241
|
+
term = encode(term) if term
|
242
|
+
begin
|
243
|
+
str = readline(term)
|
244
|
+
@scanner << str
|
245
|
+
read_bytes = str.bytesize
|
246
|
+
begin
|
247
|
+
while read_bytes < min_bytes
|
248
|
+
str = readline(term)
|
249
|
+
@scanner << str
|
250
|
+
read_bytes += str.bytesize
|
199
251
|
end
|
252
|
+
rescue IOError
|
200
253
|
end
|
201
|
-
|
254
|
+
true
|
255
|
+
rescue Exception, NameError
|
256
|
+
@source = nil
|
257
|
+
false
|
202
258
|
end
|
203
|
-
rv.taint if RUBY_VERSION < '2.7'
|
204
|
-
rv
|
205
259
|
end
|
206
260
|
|
207
|
-
def
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
@source
|
261
|
+
def read_until(term)
|
262
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
263
|
+
term = @encoded_terms[term] ||= encode(term)
|
264
|
+
until str = @scanner.scan_until(pattern)
|
265
|
+
break if @source.nil?
|
266
|
+
break if @source.eof?
|
267
|
+
@scanner << readline(term)
|
268
|
+
end
|
269
|
+
if str
|
270
|
+
read if @scanner.eos? and !@source.eof?
|
271
|
+
str
|
272
|
+
else
|
273
|
+
rest = @scanner.rest
|
274
|
+
@scanner.pos = @scanner.string.bytesize
|
275
|
+
rest
|
212
276
|
end
|
213
277
|
end
|
214
278
|
|
215
|
-
def
|
216
|
-
|
279
|
+
def ensure_buffer
|
280
|
+
read if @scanner.eos? && @source
|
217
281
|
end
|
218
282
|
|
219
283
|
def match( pattern, cons=false )
|
220
|
-
|
221
|
-
|
222
|
-
while
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
rescue
|
228
|
-
@source = nil
|
284
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
285
|
+
min_bytes = 1
|
286
|
+
while true
|
287
|
+
if cons
|
288
|
+
md = @scanner.scan(pattern)
|
289
|
+
else
|
290
|
+
md = @scanner.check(pattern)
|
229
291
|
end
|
292
|
+
break if md
|
293
|
+
return nil if pattern.is_a?(String)
|
294
|
+
return nil if @source.nil?
|
295
|
+
return nil unless read(nil, min_bytes)
|
296
|
+
min_bytes *= 2
|
230
297
|
end
|
231
|
-
|
232
|
-
|
298
|
+
|
299
|
+
md.nil? ? nil : @scanner
|
233
300
|
end
|
234
301
|
|
235
|
-
def
|
236
|
-
|
302
|
+
def match?( pattern, cons=false )
|
303
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
304
|
+
min_bytes = 1
|
305
|
+
while true
|
306
|
+
if cons
|
307
|
+
n_matched_bytes = @scanner.skip(pattern)
|
308
|
+
else
|
309
|
+
n_matched_bytes = @scanner.match?(pattern)
|
310
|
+
end
|
311
|
+
return true if n_matched_bytes
|
312
|
+
return false if pattern.is_a?(String)
|
313
|
+
return false if @source.nil?
|
314
|
+
return false unless read(nil, min_bytes)
|
315
|
+
min_bytes *= 2
|
316
|
+
end
|
237
317
|
end
|
238
318
|
|
239
|
-
def
|
240
|
-
@
|
319
|
+
def empty?
|
320
|
+
super and ( @source.nil? || @source.eof? )
|
241
321
|
end
|
242
322
|
|
243
323
|
# @return the current line in the source
|
@@ -255,7 +335,7 @@ module REXML
|
|
255
335
|
rescue
|
256
336
|
end
|
257
337
|
@er_source.seek(pos)
|
258
|
-
rescue IOError
|
338
|
+
rescue IOError, SystemCallError
|
259
339
|
pos = -1
|
260
340
|
line = -1
|
261
341
|
end
|
@@ -263,15 +343,20 @@ module REXML
|
|
263
343
|
end
|
264
344
|
|
265
345
|
private
|
266
|
-
def readline
|
267
|
-
str = @source.readline(@line_break)
|
346
|
+
def readline(term = nil)
|
268
347
|
if @pending_buffer
|
348
|
+
begin
|
349
|
+
str = @source.readline(term || @line_break)
|
350
|
+
rescue IOError
|
351
|
+
end
|
269
352
|
if str.nil?
|
270
353
|
str = @pending_buffer
|
271
354
|
else
|
272
355
|
str = @pending_buffer + str
|
273
356
|
end
|
274
357
|
@pending_buffer = nil
|
358
|
+
else
|
359
|
+
str = @source.readline(term || @line_break)
|
275
360
|
end
|
276
361
|
return nil if str.nil?
|
277
362
|
|
@@ -290,7 +375,7 @@ module REXML
|
|
290
375
|
@source.set_encoding(@encoding, @encoding)
|
291
376
|
end
|
292
377
|
@line_break = encode(">")
|
293
|
-
@pending_buffer, @
|
378
|
+
@pending_buffer, @scanner.string = @scanner.rest, ""
|
294
379
|
@pending_buffer.force_encoding(@encoding)
|
295
380
|
super
|
296
381
|
end
|