rexml 3.2.8 → 3.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +306 -2
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +233 -64
- data/lib/rexml/parsers/pullparser.rb +12 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +79 -16
- data/lib/rexml/text.rb +39 -17
- metadata +5 -18
@@ -15,8 +15,6 @@ module REXML
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def parse
|
18
|
-
tag_stack = []
|
19
|
-
in_doctype = false
|
20
18
|
entities = nil
|
21
19
|
begin
|
22
20
|
while true
|
@@ -24,32 +22,24 @@ module REXML
|
|
24
22
|
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
|
25
23
|
case event[0]
|
26
24
|
when :end_document
|
27
|
-
unless tag_stack.empty?
|
28
|
-
raise ParseException.new("No close tag for #{@build_context.xpath}",
|
29
|
-
@parser.source, @parser)
|
30
|
-
end
|
31
25
|
return
|
32
26
|
when :start_element
|
33
|
-
tag_stack.push(event[1])
|
34
27
|
el = @build_context = @build_context.add_element( event[1] )
|
35
28
|
event[2].each do |key, value|
|
36
29
|
el.attributes[key]=Attribute.new(key,value,self)
|
37
30
|
end
|
38
31
|
when :end_element
|
39
|
-
tag_stack.pop
|
40
32
|
@build_context = @build_context.parent
|
41
33
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
34
|
+
if @build_context[-1].instance_of? Text
|
35
|
+
@build_context[-1] << event[1]
|
36
|
+
else
|
37
|
+
@build_context.add(
|
38
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
39
|
+
) unless (
|
40
|
+
@build_context.ignore_whitespace_nodes and
|
41
|
+
event[1].strip.size==0
|
42
|
+
)
|
53
43
|
end
|
54
44
|
when :comment
|
55
45
|
c = Comment.new( event[1] )
|
@@ -60,14 +50,12 @@ module REXML
|
|
60
50
|
when :processing_instruction
|
61
51
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
52
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
53
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
54
|
@build_context = @build_context.parent
|
66
55
|
when :start_doctype
|
67
56
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
57
|
@build_context = doctype
|
69
58
|
entities = {}
|
70
|
-
in_doctype = true
|
71
59
|
when :attlistdecl
|
72
60
|
n = AttlistDecl.new( event[1..-1] )
|
73
61
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -34,6 +54,16 @@ module REXML
|
|
34
54
|
attr_reader :line
|
35
55
|
attr_reader :encoding
|
36
56
|
|
57
|
+
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
59
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
60
|
+
pre_defined_terms = ["'", '"', "<"]
|
61
|
+
pre_defined_terms.each do |term|
|
62
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
63
|
+
end
|
64
|
+
end
|
65
|
+
private_constant :Private
|
66
|
+
|
37
67
|
# Constructor
|
38
68
|
# @param arg must be a String, and should be a valid XML document
|
39
69
|
# @param encoding if non-null, sets the encoding of the source to this
|
@@ -47,6 +77,7 @@ module REXML
|
|
47
77
|
detect_encoding
|
48
78
|
end
|
49
79
|
@line = 0
|
80
|
+
@term_encord = {}
|
50
81
|
end
|
51
82
|
|
52
83
|
# The current buffer (what we're going to read next)
|
@@ -54,6 +85,12 @@ module REXML
|
|
54
85
|
@scanner.rest
|
55
86
|
end
|
56
87
|
|
88
|
+
def drop_parsed_content
|
89
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
90
|
+
@scanner.string = @scanner.rest
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
57
94
|
def buffer_encoding=(encoding)
|
58
95
|
@scanner.string.force_encoding(encoding)
|
59
96
|
end
|
@@ -69,7 +106,13 @@ module REXML
|
|
69
106
|
end
|
70
107
|
|
71
108
|
def read_until(term)
|
72
|
-
|
109
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
110
|
+
data = @scanner.scan_until(pattern)
|
111
|
+
unless data
|
112
|
+
data = @scanner.rest
|
113
|
+
@scanner.pos = @scanner.string.bytesize
|
114
|
+
end
|
115
|
+
data
|
73
116
|
end
|
74
117
|
|
75
118
|
def ensure_buffer
|
@@ -162,9 +205,20 @@ module REXML
|
|
162
205
|
end
|
163
206
|
end
|
164
207
|
|
165
|
-
def read(term = nil)
|
208
|
+
def read(term = nil, min_bytes = 1)
|
209
|
+
term = encode(term) if term
|
166
210
|
begin
|
167
|
-
|
211
|
+
str = readline(term)
|
212
|
+
@scanner << str
|
213
|
+
read_bytes = str.bytesize
|
214
|
+
begin
|
215
|
+
while read_bytes < min_bytes
|
216
|
+
str = readline(term)
|
217
|
+
@scanner << str
|
218
|
+
read_bytes += str.bytesize
|
219
|
+
end
|
220
|
+
rescue IOError
|
221
|
+
end
|
168
222
|
true
|
169
223
|
rescue Exception, NameError
|
170
224
|
@source = nil
|
@@ -173,16 +227,20 @@ module REXML
|
|
173
227
|
end
|
174
228
|
|
175
229
|
def read_until(term)
|
176
|
-
pattern = Regexp.
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
230
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
231
|
+
term = @term_encord[term] ||= encode(term)
|
232
|
+
until str = @scanner.scan_until(pattern)
|
233
|
+
break if @source.nil?
|
234
|
+
break if @source.eof?
|
235
|
+
@scanner << readline(term)
|
236
|
+
end
|
237
|
+
if str
|
184
238
|
read if @scanner.eos? and !@source.eof?
|
185
239
|
str
|
240
|
+
else
|
241
|
+
rest = @scanner.rest
|
242
|
+
@scanner.pos = @scanner.string.bytesize
|
243
|
+
rest
|
186
244
|
end
|
187
245
|
end
|
188
246
|
|
@@ -190,10 +248,9 @@ module REXML
|
|
190
248
|
read if @scanner.eos? && @source
|
191
249
|
end
|
192
250
|
|
193
|
-
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
194
|
-
# - ">"
|
195
|
-
# - "XXX>" (X is any string excluding '>')
|
196
251
|
def match( pattern, cons=false )
|
252
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
253
|
+
min_bytes = 1
|
197
254
|
while true
|
198
255
|
if cons
|
199
256
|
md = @scanner.scan(pattern)
|
@@ -203,7 +260,8 @@ module REXML
|
|
203
260
|
break if md
|
204
261
|
return nil if pattern.is_a?(String)
|
205
262
|
return nil if @source.nil?
|
206
|
-
return nil unless read
|
263
|
+
return nil unless read(nil, min_bytes)
|
264
|
+
min_bytes *= 2
|
207
265
|
end
|
208
266
|
|
209
267
|
md.nil? ? nil : @scanner
|
@@ -237,14 +295,19 @@ module REXML
|
|
237
295
|
|
238
296
|
private
|
239
297
|
def readline(term = nil)
|
240
|
-
str = @source.readline(term || @line_break)
|
241
298
|
if @pending_buffer
|
299
|
+
begin
|
300
|
+
str = @source.readline(term || @line_break)
|
301
|
+
rescue IOError
|
302
|
+
end
|
242
303
|
if str.nil?
|
243
304
|
str = @pending_buffer
|
244
305
|
else
|
245
306
|
str = @pending_buffer + str
|
246
307
|
end
|
247
308
|
@pending_buffer = nil
|
309
|
+
else
|
310
|
+
str = @source.readline(term || @line_break)
|
248
311
|
end
|
249
312
|
return nil if str.nil?
|
250
313
|
|
data/lib/rexml/text.rb
CHANGED
@@ -151,25 +151,45 @@ module REXML
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
154
|
+
pos = 0
|
155
|
+
while (index = string.index(/<|&/, pos))
|
156
|
+
if string[index] == "<"
|
157
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
158
|
+
end
|
159
|
+
|
160
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
|
+
end
|
163
|
+
|
164
|
+
value = string[(index + 1)..end_index]
|
165
|
+
if /\s/.match?(value)
|
166
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
167
|
+
end
|
168
|
+
|
169
|
+
if value[0] == "#"
|
170
|
+
character_reference = value[1..-1]
|
171
|
+
|
172
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
173
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
174
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
175
|
else
|
163
|
-
raise "Illegal character #{
|
176
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
177
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
178
|
end
|
179
|
+
|
180
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
181
|
+
when *VALID_CHAR
|
182
|
+
else
|
183
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
184
|
+
end
|
185
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
186
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
187
|
end
|
188
|
+
|
189
|
+
pos = end_index + 1
|
172
190
|
end
|
191
|
+
|
192
|
+
string
|
173
193
|
end
|
174
194
|
|
175
195
|
def node_type
|
@@ -248,7 +268,8 @@ module REXML
|
|
248
268
|
# u = Text.new( "sean russell", false, nil, true )
|
249
269
|
# u.value #-> "sean russell"
|
250
270
|
def value
|
251
|
-
@unnormalized ||= Text::unnormalize(
|
271
|
+
@unnormalized ||= Text::unnormalize(@string, doctype,
|
272
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
252
273
|
end
|
253
274
|
|
254
275
|
# Sets the contents of this text node. This expects the text to be
|
@@ -391,11 +412,12 @@ module REXML
|
|
391
412
|
end
|
392
413
|
|
393
414
|
# Unescapes all possible entities
|
394
|
-
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
415
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
|
416
|
+
entity_expansion_text_limit ||= Security.entity_expansion_text_limit
|
395
417
|
sum = 0
|
396
418
|
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
397
419
|
s = Text.expand($&, doctype, filter)
|
398
|
-
if sum + s.bytesize >
|
420
|
+
if sum + s.bytesize > entity_expansion_text_limit
|
399
421
|
raise "entity expansion has grown too large"
|
400
422
|
else
|
401
423
|
sum += s.bytesize
|
metadata
CHANGED
@@ -1,28 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
11
|
-
dependencies:
|
12
|
-
- !ruby/object:Gem::Dependency
|
13
|
-
name: strscan
|
14
|
-
requirement: !ruby/object:Gem::Requirement
|
15
|
-
requirements:
|
16
|
-
- - ">="
|
17
|
-
- !ruby/object:Gem::Version
|
18
|
-
version: 3.0.9
|
19
|
-
type: :runtime
|
20
|
-
prerelease: false
|
21
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
-
requirements:
|
23
|
-
- - ">="
|
24
|
-
- !ruby/object:Gem::Version
|
25
|
-
version: 3.0.9
|
10
|
+
date: 2024-10-24 00:00:00.000000000 Z
|
11
|
+
dependencies: []
|
26
12
|
description: An XML toolkit for Ruby
|
27
13
|
email:
|
28
14
|
- kou@cozmixng.org
|
@@ -115,7 +101,8 @@ files:
|
|
115
101
|
homepage: https://github.com/ruby/rexml
|
116
102
|
licenses:
|
117
103
|
- BSD-2-Clause
|
118
|
-
metadata:
|
104
|
+
metadata:
|
105
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.9
|
119
106
|
rdoc_options:
|
120
107
|
- "--main"
|
121
108
|
- README.md
|