rexml 3.2.7 → 3.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +278 -2
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +224 -61
- data/lib/rexml/parsers/pullparser.rb +12 -0
- data/lib/rexml/parsers/sax2parser.rb +14 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +72 -16
- data/lib/rexml/text.rb +39 -17
- metadata +5 -18
@@ -15,8 +15,6 @@ module REXML
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def parse
|
18
|
-
tag_stack = []
|
19
|
-
in_doctype = false
|
20
18
|
entities = nil
|
21
19
|
begin
|
22
20
|
while true
|
@@ -24,32 +22,24 @@ module REXML
|
|
24
22
|
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
|
25
23
|
case event[0]
|
26
24
|
when :end_document
|
27
|
-
unless tag_stack.empty?
|
28
|
-
raise ParseException.new("No close tag for #{@build_context.xpath}",
|
29
|
-
@parser.source, @parser)
|
30
|
-
end
|
31
25
|
return
|
32
26
|
when :start_element
|
33
|
-
tag_stack.push(event[1])
|
34
27
|
el = @build_context = @build_context.add_element( event[1] )
|
35
28
|
event[2].each do |key, value|
|
36
29
|
el.attributes[key]=Attribute.new(key,value,self)
|
37
30
|
end
|
38
31
|
when :end_element
|
39
|
-
tag_stack.pop
|
40
32
|
@build_context = @build_context.parent
|
41
33
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
34
|
+
if @build_context[-1].instance_of? Text
|
35
|
+
@build_context[-1] << event[1]
|
36
|
+
else
|
37
|
+
@build_context.add(
|
38
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
39
|
+
) unless (
|
40
|
+
@build_context.ignore_whitespace_nodes and
|
41
|
+
event[1].strip.size==0
|
42
|
+
)
|
53
43
|
end
|
54
44
|
when :comment
|
55
45
|
c = Comment.new( event[1] )
|
@@ -60,14 +50,12 @@ module REXML
|
|
60
50
|
when :processing_instruction
|
61
51
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
52
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
53
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
54
|
@build_context = @build_context.parent
|
66
55
|
when :start_doctype
|
67
56
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
57
|
@build_context = doctype
|
69
58
|
entities = {}
|
70
|
-
in_doctype = true
|
71
59
|
when :attlistdecl
|
72
60
|
n = AttlistDecl.new( event[1..-1] )
|
73
61
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -34,6 +54,16 @@ module REXML
|
|
34
54
|
attr_reader :line
|
35
55
|
attr_reader :encoding
|
36
56
|
|
57
|
+
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
59
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
60
|
+
pre_defined_terms = ["'", '"', "<"]
|
61
|
+
pre_defined_terms.each do |term|
|
62
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
63
|
+
end
|
64
|
+
end
|
65
|
+
private_constant :Private
|
66
|
+
|
37
67
|
# Constructor
|
38
68
|
# @param arg must be a String, and should be a valid XML document
|
39
69
|
# @param encoding if non-null, sets the encoding of the source to this
|
@@ -54,6 +84,12 @@ module REXML
|
|
54
84
|
@scanner.rest
|
55
85
|
end
|
56
86
|
|
87
|
+
def drop_parsed_content
|
88
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
89
|
+
@scanner.string = @scanner.rest
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
57
93
|
def buffer_encoding=(encoding)
|
58
94
|
@scanner.string.force_encoding(encoding)
|
59
95
|
end
|
@@ -69,7 +105,13 @@ module REXML
|
|
69
105
|
end
|
70
106
|
|
71
107
|
def read_until(term)
|
72
|
-
|
108
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
109
|
+
data = @scanner.scan_until(pattern)
|
110
|
+
unless data
|
111
|
+
data = @scanner.rest
|
112
|
+
@scanner.pos = @scanner.string.bytesize
|
113
|
+
end
|
114
|
+
data
|
73
115
|
end
|
74
116
|
|
75
117
|
def ensure_buffer
|
@@ -162,9 +204,20 @@ module REXML
|
|
162
204
|
end
|
163
205
|
end
|
164
206
|
|
165
|
-
def read(term = nil)
|
207
|
+
def read(term = nil, min_bytes = 1)
|
208
|
+
term = encode(term) if term
|
166
209
|
begin
|
167
|
-
|
210
|
+
str = readline(term)
|
211
|
+
@scanner << str
|
212
|
+
read_bytes = str.bytesize
|
213
|
+
begin
|
214
|
+
while read_bytes < min_bytes
|
215
|
+
str = readline(term)
|
216
|
+
@scanner << str
|
217
|
+
read_bytes += str.bytesize
|
218
|
+
end
|
219
|
+
rescue IOError
|
220
|
+
end
|
168
221
|
true
|
169
222
|
rescue Exception, NameError
|
170
223
|
@source = nil
|
@@ -173,17 +226,20 @@ module REXML
|
|
173
226
|
end
|
174
227
|
|
175
228
|
def read_until(term)
|
176
|
-
pattern = Regexp.
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
else
|
229
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
230
|
+
term = encode(term)
|
231
|
+
until str = @scanner.scan_until(pattern)
|
232
|
+
break if @source.nil?
|
233
|
+
break if @source.eof?
|
234
|
+
@scanner << readline(term)
|
235
|
+
end
|
236
|
+
if str
|
185
237
|
read if @scanner.eos? and !@source.eof?
|
186
238
|
str
|
239
|
+
else
|
240
|
+
rest = @scanner.rest
|
241
|
+
@scanner.pos = @scanner.string.bytesize
|
242
|
+
rest
|
187
243
|
end
|
188
244
|
end
|
189
245
|
|
@@ -191,10 +247,9 @@ module REXML
|
|
191
247
|
read if @scanner.eos? && @source
|
192
248
|
end
|
193
249
|
|
194
|
-
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
195
|
-
# - ">"
|
196
|
-
# - "XXX>" (X is any string excluding '>')
|
197
250
|
def match( pattern, cons=false )
|
251
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
252
|
+
min_bytes = 1
|
198
253
|
while true
|
199
254
|
if cons
|
200
255
|
md = @scanner.scan(pattern)
|
@@ -204,7 +259,8 @@ module REXML
|
|
204
259
|
break if md
|
205
260
|
return nil if pattern.is_a?(String)
|
206
261
|
return nil if @source.nil?
|
207
|
-
return nil unless read
|
262
|
+
return nil unless read(nil, min_bytes)
|
263
|
+
min_bytes *= 2
|
208
264
|
end
|
209
265
|
|
210
266
|
md.nil? ? nil : @scanner
|
data/lib/rexml/text.rb
CHANGED
@@ -151,25 +151,45 @@ module REXML
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
154
|
+
pos = 0
|
155
|
+
while (index = string.index(/<|&/, pos))
|
156
|
+
if string[index] == "<"
|
157
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
158
|
+
end
|
159
|
+
|
160
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
|
+
end
|
163
|
+
|
164
|
+
value = string[(index + 1)..end_index]
|
165
|
+
if /\s/.match?(value)
|
166
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
167
|
+
end
|
168
|
+
|
169
|
+
if value[0] == "#"
|
170
|
+
character_reference = value[1..-1]
|
171
|
+
|
172
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
173
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
174
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
175
|
else
|
163
|
-
raise "Illegal character #{
|
176
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
177
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
178
|
end
|
179
|
+
|
180
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
181
|
+
when *VALID_CHAR
|
182
|
+
else
|
183
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
184
|
+
end
|
185
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
186
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
187
|
end
|
188
|
+
|
189
|
+
pos = end_index + 1
|
172
190
|
end
|
191
|
+
|
192
|
+
string
|
173
193
|
end
|
174
194
|
|
175
195
|
def node_type
|
@@ -248,7 +268,8 @@ module REXML
|
|
248
268
|
# u = Text.new( "sean russell", false, nil, true )
|
249
269
|
# u.value #-> "sean russell"
|
250
270
|
def value
|
251
|
-
@unnormalized ||= Text::unnormalize(
|
271
|
+
@unnormalized ||= Text::unnormalize(@string, doctype,
|
272
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
252
273
|
end
|
253
274
|
|
254
275
|
# Sets the contents of this text node. This expects the text to be
|
@@ -391,11 +412,12 @@ module REXML
|
|
391
412
|
end
|
392
413
|
|
393
414
|
# Unescapes all possible entities
|
394
|
-
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
415
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
|
416
|
+
entity_expansion_text_limit ||= Security.entity_expansion_text_limit
|
395
417
|
sum = 0
|
396
418
|
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
397
419
|
s = Text.expand($&, doctype, filter)
|
398
|
-
if sum + s.bytesize >
|
420
|
+
if sum + s.bytesize > entity_expansion_text_limit
|
399
421
|
raise "entity expansion has grown too large"
|
400
422
|
else
|
401
423
|
sum += s.bytesize
|
metadata
CHANGED
@@ -1,28 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
11
|
-
dependencies:
|
12
|
-
- !ruby/object:Gem::Dependency
|
13
|
-
name: strscan
|
14
|
-
requirement: !ruby/object:Gem::Requirement
|
15
|
-
requirements:
|
16
|
-
- - ">="
|
17
|
-
- !ruby/object:Gem::Version
|
18
|
-
version: 3.0.9
|
19
|
-
type: :runtime
|
20
|
-
prerelease: false
|
21
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
-
requirements:
|
23
|
-
- - ">="
|
24
|
-
- !ruby/object:Gem::Version
|
25
|
-
version: 3.0.9
|
10
|
+
date: 2024-09-04 00:00:00.000000000 Z
|
11
|
+
dependencies: []
|
26
12
|
description: An XML toolkit for Ruby
|
27
13
|
email:
|
28
14
|
- kou@cozmixng.org
|
@@ -115,7 +101,8 @@ files:
|
|
115
101
|
homepage: https://github.com/ruby/rexml
|
116
102
|
licenses:
|
117
103
|
- BSD-2-Clause
|
118
|
-
metadata:
|
104
|
+
metadata:
|
105
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.7
|
119
106
|
rdoc_options:
|
120
107
|
- "--main"
|
121
108
|
- README.md
|