rexml 3.2.5 → 3.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +251 -2
- data/README.md +10 -1
- data/doc/rexml/tasks/rdoc/element.rdoc +2 -2
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +14 -9
- data/lib/rexml/document.rb +1 -1
- data/lib/rexml/element.rb +5 -18
- data/lib/rexml/entity.rb +25 -15
- data/lib/rexml/formatters/pretty.rb +2 -2
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/namespace.rb +8 -4
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +281 -240
- data/lib/rexml/parsers/treeparser.rb +9 -14
- data/lib/rexml/parsers/xpathparser.rb +136 -86
- data/lib/rexml/rexml.rb +3 -1
- data/lib/rexml/source.rb +120 -100
- data/lib/rexml/text.rb +6 -4
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +11 -39
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -30,18 +50,27 @@ module REXML
|
|
30
50
|
# objects and provides consumption of text
|
31
51
|
class Source
|
32
52
|
include Encoding
|
33
|
-
# The current buffer (what we're going to read next)
|
34
|
-
attr_reader :buffer
|
35
53
|
# The line number of the last consumed text
|
36
54
|
attr_reader :line
|
37
55
|
attr_reader :encoding
|
38
56
|
|
57
|
+
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
59
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
60
|
+
pre_defined_terms = ["'", '"', "<"]
|
61
|
+
pre_defined_terms.each do |term|
|
62
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
63
|
+
end
|
64
|
+
end
|
65
|
+
private_constant :Private
|
66
|
+
|
39
67
|
# Constructor
|
40
68
|
# @param arg must be a String, and should be a valid XML document
|
41
69
|
# @param encoding if non-null, sets the encoding of the source to this
|
42
70
|
# value, overriding all encoding detection
|
43
71
|
def initialize(arg, encoding=nil)
|
44
|
-
@orig =
|
72
|
+
@orig = arg
|
73
|
+
@scanner = StringScanner.new(@orig)
|
45
74
|
if encoding
|
46
75
|
self.encoding = encoding
|
47
76
|
else
|
@@ -50,6 +79,20 @@ module REXML
|
|
50
79
|
@line = 0
|
51
80
|
end
|
52
81
|
|
82
|
+
# The current buffer (what we're going to read next)
|
83
|
+
def buffer
|
84
|
+
@scanner.rest
|
85
|
+
end
|
86
|
+
|
87
|
+
def drop_parsed_content
|
88
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
89
|
+
@scanner.string = @scanner.rest
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def buffer_encoding=(encoding)
|
94
|
+
@scanner.string.force_encoding(encoding)
|
95
|
+
end
|
53
96
|
|
54
97
|
# Inherited from Encoding
|
55
98
|
# Overridden to support optimized en/decoding
|
@@ -58,98 +101,78 @@ module REXML
|
|
58
101
|
encoding_updated
|
59
102
|
end
|
60
103
|
|
61
|
-
|
62
|
-
# usual scan() method. For one thing, the pattern argument has some
|
63
|
-
# requirements; for another, the source can be consumed. You can easily
|
64
|
-
# confuse this method. Originally, the patterns were easier
|
65
|
-
# to construct and this method more robust, because this method
|
66
|
-
# generated search regexps on the fly; however, this was
|
67
|
-
# computationally expensive and slowed down the entire REXML package
|
68
|
-
# considerably, since this is by far the most commonly called method.
|
69
|
-
# @param pattern must be a Regexp, and must be in the form of
|
70
|
-
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
71
|
-
# will be returned; the second group is used if the consume flag is
|
72
|
-
# set.
|
73
|
-
# @param consume if true, the pattern returned will be consumed, leaving
|
74
|
-
# everything after it in the Source.
|
75
|
-
# @return the pattern, if found, or nil if the Source is empty or the
|
76
|
-
# pattern is not found.
|
77
|
-
def scan(pattern, cons=false)
|
78
|
-
return nil if @buffer.nil?
|
79
|
-
rv = @buffer.scan(pattern)
|
80
|
-
@buffer = $' if cons and rv.size>0
|
81
|
-
rv
|
104
|
+
def read(term = nil)
|
82
105
|
end
|
83
106
|
|
84
|
-
def
|
107
|
+
def read_until(term)
|
108
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
109
|
+
data = @scanner.scan_until(pattern)
|
110
|
+
unless data
|
111
|
+
data = @scanner.rest
|
112
|
+
@scanner.pos = @scanner.string.bytesize
|
113
|
+
end
|
114
|
+
data
|
85
115
|
end
|
86
116
|
|
87
|
-
def
|
88
|
-
@buffer = $' if pattern.match( @buffer )
|
117
|
+
def ensure_buffer
|
89
118
|
end
|
90
119
|
|
91
|
-
def
|
92
|
-
|
120
|
+
def match(pattern, cons=false)
|
121
|
+
if cons
|
122
|
+
@scanner.scan(pattern).nil? ? nil : @scanner
|
123
|
+
else
|
124
|
+
@scanner.check(pattern).nil? ? nil : @scanner
|
125
|
+
end
|
93
126
|
end
|
94
127
|
|
95
|
-
def
|
96
|
-
|
97
|
-
@buffer = $'
|
98
|
-
return md
|
128
|
+
def position
|
129
|
+
@scanner.pos
|
99
130
|
end
|
100
131
|
|
101
|
-
def
|
102
|
-
|
103
|
-
@buffer = $' if cons and md
|
104
|
-
return md
|
132
|
+
def position=(pos)
|
133
|
+
@scanner.pos = pos
|
105
134
|
end
|
106
135
|
|
107
136
|
# @return true if the Source is exhausted
|
108
137
|
def empty?
|
109
|
-
@
|
110
|
-
end
|
111
|
-
|
112
|
-
def position
|
113
|
-
@orig.index( @buffer )
|
138
|
+
@scanner.eos?
|
114
139
|
end
|
115
140
|
|
116
141
|
# @return the current line in the source
|
117
142
|
def current_line
|
118
143
|
lines = @orig.split
|
119
|
-
res = lines.grep @
|
144
|
+
res = lines.grep @scanner.rest[0..30]
|
120
145
|
res = res[-1] if res.kind_of? Array
|
121
146
|
lines.index( res ) if res
|
122
147
|
end
|
123
148
|
|
124
149
|
private
|
150
|
+
|
125
151
|
def detect_encoding
|
126
|
-
|
152
|
+
scanner_encoding = @scanner.rest.encoding
|
127
153
|
detected_encoding = "UTF-8"
|
128
154
|
begin
|
129
|
-
@
|
130
|
-
if @
|
131
|
-
@buffer[0, 2] = ""
|
155
|
+
@scanner.string.force_encoding("ASCII-8BIT")
|
156
|
+
if @scanner.scan(/\xfe\xff/n)
|
132
157
|
detected_encoding = "UTF-16BE"
|
133
|
-
elsif @
|
134
|
-
@buffer[0, 2] = ""
|
158
|
+
elsif @scanner.scan(/\xff\xfe/n)
|
135
159
|
detected_encoding = "UTF-16LE"
|
136
|
-
elsif @
|
137
|
-
@buffer[0, 3] = ""
|
160
|
+
elsif @scanner.scan(/\xef\xbb\xbf/n)
|
138
161
|
detected_encoding = "UTF-8"
|
139
162
|
end
|
140
163
|
ensure
|
141
|
-
@
|
164
|
+
@scanner.string.force_encoding(scanner_encoding)
|
142
165
|
end
|
143
166
|
self.encoding = detected_encoding
|
144
167
|
end
|
145
168
|
|
146
169
|
def encoding_updated
|
147
170
|
if @encoding != 'UTF-8'
|
148
|
-
@
|
171
|
+
@scanner.string = decode(@scanner.rest)
|
149
172
|
@to_utf = true
|
150
173
|
else
|
151
174
|
@to_utf = false
|
152
|
-
@
|
175
|
+
@scanner.string.force_encoding(::Encoding::UTF_8)
|
153
176
|
end
|
154
177
|
end
|
155
178
|
end
|
@@ -172,7 +195,7 @@ module REXML
|
|
172
195
|
end
|
173
196
|
|
174
197
|
if !@to_utf and
|
175
|
-
@
|
198
|
+
@orig.respond_to?(:force_encoding) and
|
176
199
|
@source.respond_to?(:external_encoding) and
|
177
200
|
@source.external_encoding != ::Encoding::UTF_8
|
178
201
|
@force_utf8 = true
|
@@ -181,65 +204,62 @@ module REXML
|
|
181
204
|
end
|
182
205
|
end
|
183
206
|
|
184
|
-
def
|
185
|
-
|
186
|
-
# You'll notice that this next section is very similar to the same
|
187
|
-
# section in match(), but just a liiittle different. This is
|
188
|
-
# because it is a touch faster to do it this way with scan()
|
189
|
-
# than the way match() does it; enough faster to warrant duplicating
|
190
|
-
# some code
|
191
|
-
if rv.size == 0
|
192
|
-
until @buffer =~ pattern or @source.nil?
|
193
|
-
begin
|
194
|
-
@buffer << readline
|
195
|
-
rescue Iconv::IllegalSequence
|
196
|
-
raise
|
197
|
-
rescue
|
198
|
-
@source = nil
|
199
|
-
end
|
200
|
-
end
|
201
|
-
rv = super
|
202
|
-
end
|
203
|
-
rv.taint if RUBY_VERSION < '2.7'
|
204
|
-
rv
|
205
|
-
end
|
206
|
-
|
207
|
-
def read
|
207
|
+
def read(term = nil)
|
208
|
+
term = encode(term) if term
|
208
209
|
begin
|
209
|
-
@
|
210
|
+
@scanner << readline(term)
|
211
|
+
true
|
210
212
|
rescue Exception, NameError
|
211
213
|
@source = nil
|
214
|
+
false
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def read_until(term)
|
219
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
220
|
+
term = encode(term)
|
221
|
+
until str = @scanner.scan_until(pattern)
|
222
|
+
break if @source.nil?
|
223
|
+
break if @source.eof?
|
224
|
+
@scanner << readline(term)
|
225
|
+
end
|
226
|
+
if str
|
227
|
+
read if @scanner.eos? and !@source.eof?
|
228
|
+
str
|
229
|
+
else
|
230
|
+
rest = @scanner.rest
|
231
|
+
@scanner.pos = @scanner.string.bytesize
|
232
|
+
rest
|
212
233
|
end
|
213
234
|
end
|
214
235
|
|
215
|
-
def
|
216
|
-
|
236
|
+
def ensure_buffer
|
237
|
+
read if @scanner.eos? && @source
|
217
238
|
end
|
218
239
|
|
240
|
+
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
241
|
+
# - ">"
|
242
|
+
# - "XXX>" (X is any string excluding '>')
|
219
243
|
def match( pattern, cons=false )
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
rv = pattern.match(@buffer)
|
226
|
-
@buffer = $' if cons and rv
|
227
|
-
rescue
|
228
|
-
@source = nil
|
244
|
+
while true
|
245
|
+
if cons
|
246
|
+
md = @scanner.scan(pattern)
|
247
|
+
else
|
248
|
+
md = @scanner.check(pattern)
|
229
249
|
end
|
250
|
+
break if md
|
251
|
+
return nil if pattern.is_a?(String)
|
252
|
+
return nil if @source.nil?
|
253
|
+
return nil unless read
|
230
254
|
end
|
231
|
-
|
232
|
-
|
255
|
+
|
256
|
+
md.nil? ? nil : @scanner
|
233
257
|
end
|
234
258
|
|
235
259
|
def empty?
|
236
260
|
super and ( @source.nil? || @source.eof? )
|
237
261
|
end
|
238
262
|
|
239
|
-
def position
|
240
|
-
@er_source.pos rescue 0
|
241
|
-
end
|
242
|
-
|
243
263
|
# @return the current line in the source
|
244
264
|
def current_line
|
245
265
|
begin
|
@@ -263,8 +283,8 @@ module REXML
|
|
263
283
|
end
|
264
284
|
|
265
285
|
private
|
266
|
-
def readline
|
267
|
-
str = @source.readline(@line_break)
|
286
|
+
def readline(term = nil)
|
287
|
+
str = @source.readline(term || @line_break)
|
268
288
|
if @pending_buffer
|
269
289
|
if str.nil?
|
270
290
|
str = @pending_buffer
|
@@ -290,7 +310,7 @@ module REXML
|
|
290
310
|
@source.set_encoding(@encoding, @encoding)
|
291
311
|
end
|
292
312
|
@line_break = encode(">")
|
293
|
-
@pending_buffer, @
|
313
|
+
@pending_buffer, @scanner.string = @scanner.rest, ""
|
294
314
|
@pending_buffer.force_encoding(@encoding)
|
295
315
|
super
|
296
316
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative 'security'
|
3
3
|
require_relative 'entity'
|
4
4
|
require_relative 'doctype'
|
@@ -131,7 +131,7 @@ module REXML
|
|
131
131
|
def Text.check string, pattern, doctype
|
132
132
|
|
133
133
|
# illegal anywhere
|
134
|
-
if string
|
134
|
+
if !string.match?(VALID_XML_CHARS)
|
135
135
|
if String.method_defined? :encode
|
136
136
|
string.chars.each do |c|
|
137
137
|
case c.ord
|
@@ -371,7 +371,7 @@ module REXML
|
|
371
371
|
copy = input.to_s
|
372
372
|
# Doing it like this rather than in a loop improves the speed
|
373
373
|
#copy = copy.gsub( EREFERENCE, '&' )
|
374
|
-
copy = copy.gsub( "&", "&" )
|
374
|
+
copy = copy.gsub( "&", "&" ) if copy.include?("&")
|
375
375
|
if doctype
|
376
376
|
# Replace all ampersands that aren't part of an entity
|
377
377
|
doctype.entities.each_value do |entity|
|
@@ -382,7 +382,9 @@ module REXML
|
|
382
382
|
else
|
383
383
|
# Replace all ampersands that aren't part of an entity
|
384
384
|
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
385
|
-
|
385
|
+
if copy.include?(entity.value)
|
386
|
+
copy = copy.gsub(entity.value, "&#{entity.name};" )
|
387
|
+
end
|
386
388
|
end
|
387
389
|
end
|
388
390
|
copy
|
data/lib/rexml/xpath_parser.rb
CHANGED
@@ -590,6 +590,7 @@ module REXML
|
|
590
590
|
|
591
591
|
def evaluate_predicate(expression, nodesets)
|
592
592
|
enter(:predicate, expression, nodesets) if @debug
|
593
|
+
new_nodeset_count = 0
|
593
594
|
new_nodesets = nodesets.collect do |nodeset|
|
594
595
|
new_nodeset = []
|
595
596
|
subcontext = { :size => nodeset.size }
|
@@ -606,17 +607,20 @@ module REXML
|
|
606
607
|
result = result[0] if result.kind_of? Array and result.length == 1
|
607
608
|
if result.kind_of? Numeric
|
608
609
|
if result == node.position
|
609
|
-
|
610
|
+
new_nodeset_count += 1
|
611
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
610
612
|
end
|
611
613
|
elsif result.instance_of? Array
|
612
614
|
if result.size > 0 and result.inject(false) {|k,s| s or k}
|
613
615
|
if result.size > 0
|
614
|
-
|
616
|
+
new_nodeset_count += 1
|
617
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
615
618
|
end
|
616
619
|
end
|
617
620
|
else
|
618
621
|
if result
|
619
|
-
|
622
|
+
new_nodeset_count += 1
|
623
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
620
624
|
end
|
621
625
|
end
|
622
626
|
end
|
metadata
CHANGED
@@ -1,51 +1,22 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
|
-
|
9
|
-
bindir: exe
|
8
|
+
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2024-06-25 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
13
|
+
name: strscan
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
16
|
- - ">="
|
18
17
|
- !ruby/object:Gem::Version
|
19
18
|
version: '0'
|
20
|
-
type: :
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: test-unit
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
19
|
+
type: :runtime
|
49
20
|
prerelease: false
|
50
21
|
version_requirements: !ruby/object:Gem::Requirement
|
51
22
|
requirements:
|
@@ -73,6 +44,7 @@ extra_rdoc_files:
|
|
73
44
|
- doc/rexml/tasks/tocs/master_toc.rdoc
|
74
45
|
- doc/rexml/tasks/tocs/node_toc.rdoc
|
75
46
|
- doc/rexml/tasks/tocs/parent_toc.rdoc
|
47
|
+
- doc/rexml/tutorial.rdoc
|
76
48
|
files:
|
77
49
|
- LICENSE.txt
|
78
50
|
- NEWS.md
|
@@ -89,6 +61,7 @@ files:
|
|
89
61
|
- doc/rexml/tasks/tocs/master_toc.rdoc
|
90
62
|
- doc/rexml/tasks/tocs/node_toc.rdoc
|
91
63
|
- doc/rexml/tasks/tocs/parent_toc.rdoc
|
64
|
+
- doc/rexml/tutorial.rdoc
|
92
65
|
- lib/rexml.rb
|
93
66
|
- lib/rexml/attlistdecl.rb
|
94
67
|
- lib/rexml/attribute.rb
|
@@ -142,8 +115,8 @@ files:
|
|
142
115
|
homepage: https://github.com/ruby/rexml
|
143
116
|
licenses:
|
144
117
|
- BSD-2-Clause
|
145
|
-
metadata:
|
146
|
-
|
118
|
+
metadata:
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
|
147
120
|
rdoc_options:
|
148
121
|
- "--main"
|
149
122
|
- README.md
|
@@ -153,15 +126,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
153
126
|
requirements:
|
154
127
|
- - ">="
|
155
128
|
- !ruby/object:Gem::Version
|
156
|
-
version:
|
129
|
+
version: 2.5.0
|
157
130
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
158
131
|
requirements:
|
159
132
|
- - ">="
|
160
133
|
- !ruby/object:Gem::Version
|
161
134
|
version: '0'
|
162
135
|
requirements: []
|
163
|
-
rubygems_version: 3.
|
164
|
-
signing_key:
|
136
|
+
rubygems_version: 3.6.0.dev
|
165
137
|
specification_version: 4
|
166
138
|
summary: An XML toolkit for Ruby
|
167
139
|
test_files: []
|