rexml 3.2.6 → 3.3.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +151 -0
- data/lib/rexml/element.rb +2 -15
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +281 -240
- data/lib/rexml/parsers/treeparser.rb +9 -14
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +120 -100
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +7 -37
@@ -16,7 +16,6 @@ module REXML
|
|
16
16
|
|
17
17
|
def parse
|
18
18
|
tag_stack = []
|
19
|
-
in_doctype = false
|
20
19
|
entities = nil
|
21
20
|
begin
|
22
21
|
while true
|
@@ -39,17 +38,15 @@ module REXML
|
|
39
38
|
tag_stack.pop
|
40
39
|
@build_context = @build_context.parent
|
41
40
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
41
|
+
if @build_context[-1].instance_of? Text
|
42
|
+
@build_context[-1] << event[1]
|
43
|
+
else
|
44
|
+
@build_context.add(
|
45
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
46
|
+
) unless (
|
47
|
+
@build_context.ignore_whitespace_nodes and
|
48
|
+
event[1].strip.size==0
|
49
|
+
)
|
53
50
|
end
|
54
51
|
when :comment
|
55
52
|
c = Comment.new( event[1] )
|
@@ -60,14 +57,12 @@ module REXML
|
|
60
57
|
when :processing_instruction
|
61
58
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
59
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
60
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
61
|
@build_context = @build_context.parent
|
66
62
|
when :start_doctype
|
67
63
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
64
|
@build_context = doctype
|
69
65
|
entities = {}
|
70
|
-
in_doctype = true
|
71
66
|
when :attlistdecl
|
72
67
|
n = AttlistDecl.new( event[1..-1] )
|
73
68
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -30,18 +50,27 @@ module REXML
|
|
30
50
|
# objects and provides consumption of text
|
31
51
|
class Source
|
32
52
|
include Encoding
|
33
|
-
# The current buffer (what we're going to read next)
|
34
|
-
attr_reader :buffer
|
35
53
|
# The line number of the last consumed text
|
36
54
|
attr_reader :line
|
37
55
|
attr_reader :encoding
|
38
56
|
|
57
|
+
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
59
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
60
|
+
pre_defined_terms = ["'", '"', "<"]
|
61
|
+
pre_defined_terms.each do |term|
|
62
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
63
|
+
end
|
64
|
+
end
|
65
|
+
private_constant :Private
|
66
|
+
|
39
67
|
# Constructor
|
40
68
|
# @param arg must be a String, and should be a valid XML document
|
41
69
|
# @param encoding if non-null, sets the encoding of the source to this
|
42
70
|
# value, overriding all encoding detection
|
43
71
|
def initialize(arg, encoding=nil)
|
44
|
-
@orig =
|
72
|
+
@orig = arg
|
73
|
+
@scanner = StringScanner.new(@orig)
|
45
74
|
if encoding
|
46
75
|
self.encoding = encoding
|
47
76
|
else
|
@@ -50,6 +79,20 @@ module REXML
|
|
50
79
|
@line = 0
|
51
80
|
end
|
52
81
|
|
82
|
+
# The current buffer (what we're going to read next)
|
83
|
+
def buffer
|
84
|
+
@scanner.rest
|
85
|
+
end
|
86
|
+
|
87
|
+
def drop_parsed_content
|
88
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
89
|
+
@scanner.string = @scanner.rest
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def buffer_encoding=(encoding)
|
94
|
+
@scanner.string.force_encoding(encoding)
|
95
|
+
end
|
53
96
|
|
54
97
|
# Inherited from Encoding
|
55
98
|
# Overridden to support optimized en/decoding
|
@@ -58,98 +101,78 @@ module REXML
|
|
58
101
|
encoding_updated
|
59
102
|
end
|
60
103
|
|
61
|
-
|
62
|
-
# usual scan() method. For one thing, the pattern argument has some
|
63
|
-
# requirements; for another, the source can be consumed. You can easily
|
64
|
-
# confuse this method. Originally, the patterns were easier
|
65
|
-
# to construct and this method more robust, because this method
|
66
|
-
# generated search regexps on the fly; however, this was
|
67
|
-
# computationally expensive and slowed down the entire REXML package
|
68
|
-
# considerably, since this is by far the most commonly called method.
|
69
|
-
# @param pattern must be a Regexp, and must be in the form of
|
70
|
-
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
71
|
-
# will be returned; the second group is used if the consume flag is
|
72
|
-
# set.
|
73
|
-
# @param consume if true, the pattern returned will be consumed, leaving
|
74
|
-
# everything after it in the Source.
|
75
|
-
# @return the pattern, if found, or nil if the Source is empty or the
|
76
|
-
# pattern is not found.
|
77
|
-
def scan(pattern, cons=false)
|
78
|
-
return nil if @buffer.nil?
|
79
|
-
rv = @buffer.scan(pattern)
|
80
|
-
@buffer = $' if cons and rv.size>0
|
81
|
-
rv
|
104
|
+
def read(term = nil)
|
82
105
|
end
|
83
106
|
|
84
|
-
def
|
107
|
+
def read_until(term)
|
108
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
109
|
+
data = @scanner.scan_until(pattern)
|
110
|
+
unless data
|
111
|
+
data = @scanner.rest
|
112
|
+
@scanner.pos = @scanner.string.bytesize
|
113
|
+
end
|
114
|
+
data
|
85
115
|
end
|
86
116
|
|
87
|
-
def
|
88
|
-
@buffer = $' if pattern.match( @buffer )
|
117
|
+
def ensure_buffer
|
89
118
|
end
|
90
119
|
|
91
|
-
def
|
92
|
-
|
120
|
+
def match(pattern, cons=false)
|
121
|
+
if cons
|
122
|
+
@scanner.scan(pattern).nil? ? nil : @scanner
|
123
|
+
else
|
124
|
+
@scanner.check(pattern).nil? ? nil : @scanner
|
125
|
+
end
|
93
126
|
end
|
94
127
|
|
95
|
-
def
|
96
|
-
|
97
|
-
@buffer = $'
|
98
|
-
return md
|
128
|
+
def position
|
129
|
+
@scanner.pos
|
99
130
|
end
|
100
131
|
|
101
|
-
def
|
102
|
-
|
103
|
-
@buffer = $' if cons and md
|
104
|
-
return md
|
132
|
+
def position=(pos)
|
133
|
+
@scanner.pos = pos
|
105
134
|
end
|
106
135
|
|
107
136
|
# @return true if the Source is exhausted
|
108
137
|
def empty?
|
109
|
-
@
|
110
|
-
end
|
111
|
-
|
112
|
-
def position
|
113
|
-
@orig.index( @buffer )
|
138
|
+
@scanner.eos?
|
114
139
|
end
|
115
140
|
|
116
141
|
# @return the current line in the source
|
117
142
|
def current_line
|
118
143
|
lines = @orig.split
|
119
|
-
res = lines.grep @
|
144
|
+
res = lines.grep @scanner.rest[0..30]
|
120
145
|
res = res[-1] if res.kind_of? Array
|
121
146
|
lines.index( res ) if res
|
122
147
|
end
|
123
148
|
|
124
149
|
private
|
150
|
+
|
125
151
|
def detect_encoding
|
126
|
-
|
152
|
+
scanner_encoding = @scanner.rest.encoding
|
127
153
|
detected_encoding = "UTF-8"
|
128
154
|
begin
|
129
|
-
@
|
130
|
-
if @
|
131
|
-
@buffer[0, 2] = ""
|
155
|
+
@scanner.string.force_encoding("ASCII-8BIT")
|
156
|
+
if @scanner.scan(/\xfe\xff/n)
|
132
157
|
detected_encoding = "UTF-16BE"
|
133
|
-
elsif @
|
134
|
-
@buffer[0, 2] = ""
|
158
|
+
elsif @scanner.scan(/\xff\xfe/n)
|
135
159
|
detected_encoding = "UTF-16LE"
|
136
|
-
elsif @
|
137
|
-
@buffer[0, 3] = ""
|
160
|
+
elsif @scanner.scan(/\xef\xbb\xbf/n)
|
138
161
|
detected_encoding = "UTF-8"
|
139
162
|
end
|
140
163
|
ensure
|
141
|
-
@
|
164
|
+
@scanner.string.force_encoding(scanner_encoding)
|
142
165
|
end
|
143
166
|
self.encoding = detected_encoding
|
144
167
|
end
|
145
168
|
|
146
169
|
def encoding_updated
|
147
170
|
if @encoding != 'UTF-8'
|
148
|
-
@
|
171
|
+
@scanner.string = decode(@scanner.rest)
|
149
172
|
@to_utf = true
|
150
173
|
else
|
151
174
|
@to_utf = false
|
152
|
-
@
|
175
|
+
@scanner.string.force_encoding(::Encoding::UTF_8)
|
153
176
|
end
|
154
177
|
end
|
155
178
|
end
|
@@ -172,7 +195,7 @@ module REXML
|
|
172
195
|
end
|
173
196
|
|
174
197
|
if !@to_utf and
|
175
|
-
@
|
198
|
+
@orig.respond_to?(:force_encoding) and
|
176
199
|
@source.respond_to?(:external_encoding) and
|
177
200
|
@source.external_encoding != ::Encoding::UTF_8
|
178
201
|
@force_utf8 = true
|
@@ -181,65 +204,62 @@ module REXML
|
|
181
204
|
end
|
182
205
|
end
|
183
206
|
|
184
|
-
def
|
185
|
-
|
186
|
-
# You'll notice that this next section is very similar to the same
|
187
|
-
# section in match(), but just a liiittle different. This is
|
188
|
-
# because it is a touch faster to do it this way with scan()
|
189
|
-
# than the way match() does it; enough faster to warrant duplicating
|
190
|
-
# some code
|
191
|
-
if rv.size == 0
|
192
|
-
until @buffer =~ pattern or @source.nil?
|
193
|
-
begin
|
194
|
-
@buffer << readline
|
195
|
-
rescue Iconv::IllegalSequence
|
196
|
-
raise
|
197
|
-
rescue
|
198
|
-
@source = nil
|
199
|
-
end
|
200
|
-
end
|
201
|
-
rv = super
|
202
|
-
end
|
203
|
-
rv.taint if RUBY_VERSION < '2.7'
|
204
|
-
rv
|
205
|
-
end
|
206
|
-
|
207
|
-
def read
|
207
|
+
def read(term = nil)
|
208
|
+
term = encode(term) if term
|
208
209
|
begin
|
209
|
-
@
|
210
|
+
@scanner << readline(term)
|
211
|
+
true
|
210
212
|
rescue Exception, NameError
|
211
213
|
@source = nil
|
214
|
+
false
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def read_until(term)
|
219
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
220
|
+
term = encode(term)
|
221
|
+
until str = @scanner.scan_until(pattern)
|
222
|
+
break if @source.nil?
|
223
|
+
break if @source.eof?
|
224
|
+
@scanner << readline(term)
|
225
|
+
end
|
226
|
+
if str
|
227
|
+
read if @scanner.eos? and !@source.eof?
|
228
|
+
str
|
229
|
+
else
|
230
|
+
rest = @scanner.rest
|
231
|
+
@scanner.pos = @scanner.string.bytesize
|
232
|
+
rest
|
212
233
|
end
|
213
234
|
end
|
214
235
|
|
215
|
-
def
|
216
|
-
|
236
|
+
def ensure_buffer
|
237
|
+
read if @scanner.eos? && @source
|
217
238
|
end
|
218
239
|
|
240
|
+
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
241
|
+
# - ">"
|
242
|
+
# - "XXX>" (X is any string excluding '>')
|
219
243
|
def match( pattern, cons=false )
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
rv = pattern.match(@buffer)
|
226
|
-
@buffer = $' if cons and rv
|
227
|
-
rescue
|
228
|
-
@source = nil
|
244
|
+
while true
|
245
|
+
if cons
|
246
|
+
md = @scanner.scan(pattern)
|
247
|
+
else
|
248
|
+
md = @scanner.check(pattern)
|
229
249
|
end
|
250
|
+
break if md
|
251
|
+
return nil if pattern.is_a?(String)
|
252
|
+
return nil if @source.nil?
|
253
|
+
return nil unless read
|
230
254
|
end
|
231
|
-
|
232
|
-
|
255
|
+
|
256
|
+
md.nil? ? nil : @scanner
|
233
257
|
end
|
234
258
|
|
235
259
|
def empty?
|
236
260
|
super and ( @source.nil? || @source.eof? )
|
237
261
|
end
|
238
262
|
|
239
|
-
def position
|
240
|
-
@er_source.pos rescue 0
|
241
|
-
end
|
242
|
-
|
243
263
|
# @return the current line in the source
|
244
264
|
def current_line
|
245
265
|
begin
|
@@ -263,8 +283,8 @@ module REXML
|
|
263
283
|
end
|
264
284
|
|
265
285
|
private
|
266
|
-
def readline
|
267
|
-
str = @source.readline(@line_break)
|
286
|
+
def readline(term = nil)
|
287
|
+
str = @source.readline(term || @line_break)
|
268
288
|
if @pending_buffer
|
269
289
|
if str.nil?
|
270
290
|
str = @pending_buffer
|
@@ -290,7 +310,7 @@ module REXML
|
|
290
310
|
@source.set_encoding(@encoding, @encoding)
|
291
311
|
end
|
292
312
|
@line_break = encode(">")
|
293
|
-
@pending_buffer, @
|
313
|
+
@pending_buffer, @scanner.string = @scanner.rest, ""
|
294
314
|
@pending_buffer.force_encoding(@encoding)
|
295
315
|
super
|
296
316
|
end
|
data/lib/rexml/xpath_parser.rb
CHANGED
@@ -590,6 +590,7 @@ module REXML
|
|
590
590
|
|
591
591
|
def evaluate_predicate(expression, nodesets)
|
592
592
|
enter(:predicate, expression, nodesets) if @debug
|
593
|
+
new_nodeset_count = 0
|
593
594
|
new_nodesets = nodesets.collect do |nodeset|
|
594
595
|
new_nodeset = []
|
595
596
|
subcontext = { :size => nodeset.size }
|
@@ -606,17 +607,20 @@ module REXML
|
|
606
607
|
result = result[0] if result.kind_of? Array and result.length == 1
|
607
608
|
if result.kind_of? Numeric
|
608
609
|
if result == node.position
|
609
|
-
|
610
|
+
new_nodeset_count += 1
|
611
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
610
612
|
end
|
611
613
|
elsif result.instance_of? Array
|
612
614
|
if result.size > 0 and result.inject(false) {|k,s| s or k}
|
613
615
|
if result.size > 0
|
614
|
-
|
616
|
+
new_nodeset_count += 1
|
617
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
615
618
|
end
|
616
619
|
end
|
617
620
|
else
|
618
621
|
if result
|
619
|
-
|
622
|
+
new_nodeset_count += 1
|
623
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
620
624
|
end
|
621
625
|
end
|
622
626
|
end
|
metadata
CHANGED
@@ -1,51 +1,22 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2024-06-25 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
13
|
+
name: strscan
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
16
|
- - ">="
|
18
17
|
- !ruby/object:Gem::Version
|
19
18
|
version: '0'
|
20
|
-
type: :
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: test-unit
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
19
|
+
type: :runtime
|
49
20
|
prerelease: false
|
50
21
|
version_requirements: !ruby/object:Gem::Requirement
|
51
22
|
requirements:
|
@@ -144,8 +115,8 @@ files:
|
|
144
115
|
homepage: https://github.com/ruby/rexml
|
145
116
|
licenses:
|
146
117
|
- BSD-2-Clause
|
147
|
-
metadata:
|
148
|
-
|
118
|
+
metadata:
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
|
149
120
|
rdoc_options:
|
150
121
|
- "--main"
|
151
122
|
- README.md
|
@@ -162,8 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
162
133
|
- !ruby/object:Gem::Version
|
163
134
|
version: '0'
|
164
135
|
requirements: []
|
165
|
-
rubygems_version: 3.
|
166
|
-
signing_key:
|
136
|
+
rubygems_version: 3.6.0.dev
|
167
137
|
specification_version: 4
|
168
138
|
summary: An XML toolkit for Ruby
|
169
139
|
test_files: []
|