rexml 3.2.7 → 3.3.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +149 -2
- data/lib/rexml/element.rb +2 -15
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +97 -29
- data/lib/rexml/parsers/sax2parser.rb +2 -19
- data/lib/rexml/parsers/streamparser.rb +2 -2
- data/lib/rexml/parsers/treeparser.rb +9 -14
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +59 -13
- data/lib/rexml/text.rb +34 -14
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70ccd1465a05dba3d53dcfc4a98e76dec865a4f6ac833b954aff4234bce6c255
|
4
|
+
data.tar.gz: 53f43fab8f531e0ba7461ce091e5eae6bec27b12e9139450c7b3e748b4eeacdc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b46818d79ae57075c4e0bd620802e82c6958dddc7da1b182504c3fdc16685c887ac0ddd6a4838a080483abba330839e9ef4b2db22cc81b9eae3eac71ac14c965
|
7
|
+
data.tar.gz: 1e5205905eb435c02038dd0539de22472f5364ffc47635f13a1752cb79a423dcca558fb47394ac5d624b358e779b07cbcafedfd06b99742026856f9988109976
|
data/NEWS.md
CHANGED
@@ -1,5 +1,145 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Improved parse performance.
|
8
|
+
* GH-160
|
9
|
+
* Patch by NAITOH Jun.
|
10
|
+
|
11
|
+
* Improved parse performance.
|
12
|
+
* GH-169
|
13
|
+
* GH-170
|
14
|
+
* GH-171
|
15
|
+
* GH-172
|
16
|
+
* GH-173
|
17
|
+
* GH-174
|
18
|
+
* Patch by Watson.
|
19
|
+
|
20
|
+
* Added support for raising a parse exception when an XML has extra
|
21
|
+
content after the root element.
|
22
|
+
* GH-161
|
23
|
+
* Patch by NAITOH Jun.
|
24
|
+
|
25
|
+
* Added support for raising a parse exception when an XML
|
26
|
+
declaration exists in wrong position.
|
27
|
+
* GH-162
|
28
|
+
* Patch by NAITOH Jun.
|
29
|
+
|
30
|
+
* Removed needless a space after XML declaration in pretty print mode.
|
31
|
+
* GH-164
|
32
|
+
* Patch by NAITOH Jun.
|
33
|
+
|
34
|
+
* Stopped to emit `:text` event after the root element.
|
35
|
+
* GH-167
|
36
|
+
* Patch by NAITOH Jun.
|
37
|
+
|
38
|
+
### Fixes
|
39
|
+
|
40
|
+
* Fixed a bug that SAX2 parser doesn't expand predefined entities for
|
41
|
+
`characters` callback.
|
42
|
+
* GH-168
|
43
|
+
* Patch by NAITOH Jun.
|
44
|
+
|
45
|
+
### Thanks
|
46
|
+
|
47
|
+
* NAITOH Jun
|
48
|
+
|
49
|
+
* Watson
|
50
|
+
|
51
|
+
## 3.3.1 - 2024-06-25 {#version-3-3-1}
|
52
|
+
|
53
|
+
### Improvements
|
54
|
+
|
55
|
+
* Added support for detecting malformed top-level comments.
|
56
|
+
* GH-145
|
57
|
+
* Patch by Hiroya Fujinami.
|
58
|
+
|
59
|
+
* Improved `REXML::Element#attribute` performance.
|
60
|
+
* GH-146
|
61
|
+
* Patch by Hiroya Fujinami.
|
62
|
+
|
63
|
+
* Added support for detecting malformed `<!-->` comments.
|
64
|
+
* GH-147
|
65
|
+
* Patch by Hiroya Fujinami.
|
66
|
+
|
67
|
+
* Added support for detecting unclosed `DOCTYPE`.
|
68
|
+
* GH-152
|
69
|
+
* Patch by Hiroya Fujinami.
|
70
|
+
|
71
|
+
* Added `changlog_uri` metadata to gemspec.
|
72
|
+
* GH-156
|
73
|
+
* Patch by fynsta.
|
74
|
+
|
75
|
+
* Improved parse performance.
|
76
|
+
* GH-157
|
77
|
+
* GH-158
|
78
|
+
* Patch by NAITOH Jun.
|
79
|
+
|
80
|
+
### Fixes
|
81
|
+
|
82
|
+
* Fixed a bug that large XML can't be parsed.
|
83
|
+
* GH-154
|
84
|
+
* Patch by NAITOH Jun.
|
85
|
+
|
86
|
+
* Fixed a bug that private constants are visible.
|
87
|
+
* GH-155
|
88
|
+
* Patch by NAITOH Jun.
|
89
|
+
|
90
|
+
### Thanks
|
91
|
+
|
92
|
+
* Hiroya Fujinami
|
93
|
+
|
94
|
+
* NAITOH Jun
|
95
|
+
|
96
|
+
* fynsta
|
97
|
+
|
98
|
+
## 3.3.0 - 2024-06-11 {#version-3-3-0}
|
99
|
+
|
100
|
+
### Improvements
|
101
|
+
|
102
|
+
* Added support for strscan 0.7.0 installed with Ruby 2.6.
|
103
|
+
* GH-142
|
104
|
+
* Reported by Fernando Trigoso.
|
105
|
+
|
106
|
+
### Thanks
|
107
|
+
|
108
|
+
* Fernando Trigoso
|
109
|
+
|
110
|
+
## 3.2.9 - 2024-06-09 {#version-3-2-9}
|
111
|
+
|
112
|
+
### Improvements
|
113
|
+
|
114
|
+
* Added support for old strscan.
|
115
|
+
* GH-132
|
116
|
+
* Reported by Adam.
|
117
|
+
|
118
|
+
* Improved attribute value parse performance.
|
119
|
+
* GH-135
|
120
|
+
* Patch by NAITOH Jun.
|
121
|
+
|
122
|
+
* Improved `REXML::Node#each_recursive` performance.
|
123
|
+
* GH-134
|
124
|
+
* GH-139
|
125
|
+
* Patch by Hiroya Fujinami.
|
126
|
+
|
127
|
+
* Improved text parse performance.
|
128
|
+
* Reported by mprogrammer.
|
129
|
+
|
130
|
+
### Thanks
|
131
|
+
|
132
|
+
* Adam
|
133
|
+
* NAITOH Jun
|
134
|
+
* Hiroya Fujinami
|
135
|
+
* mprogrammer
|
136
|
+
|
137
|
+
## 3.2.8 - 2024-05-16 {#version-3-2-8}
|
138
|
+
|
139
|
+
### Fixes
|
140
|
+
|
141
|
+
* Suppressed a warning
|
142
|
+
|
3
143
|
## 3.2.7 - 2024-05-16 {#version-3-2-7}
|
4
144
|
|
5
145
|
### Improvements
|
@@ -24,7 +164,7 @@
|
|
24
164
|
|
25
165
|
* Improved parse performance when an attribute has many `<`s.
|
26
166
|
|
27
|
-
* GH-
|
167
|
+
* GH-126
|
28
168
|
|
29
169
|
### Fixes
|
30
170
|
|
@@ -52,7 +192,14 @@
|
|
52
192
|
* Reported by DuKewu.
|
53
193
|
* Patch by NAITOH Jun.
|
54
194
|
|
55
|
-
|
195
|
+
### Thanks
|
196
|
+
|
197
|
+
* NAITOH Jun
|
198
|
+
* flatisland
|
199
|
+
* jcavalieri
|
200
|
+
* DuKewu
|
201
|
+
|
202
|
+
## 3.2.6 - 2023-07-27 {#version-3-2-6}
|
56
203
|
|
57
204
|
### Improvements
|
58
205
|
|
data/lib/rexml/element.rb
CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
|
|
7
7
|
require_relative "parseexception"
|
8
8
|
|
9
9
|
module REXML
|
10
|
-
# An implementation note about namespaces:
|
11
|
-
# As we parse, when we find namespaces we put them in a hash and assign
|
12
|
-
# them a unique ID. We then convert the namespace prefix for the node
|
13
|
-
# to the unique ID. This makes namespace lookup much faster for the
|
14
|
-
# cost of extra memory use. We save the namespace prefix for the
|
15
|
-
# context node and convert it back when we write it.
|
16
|
-
@@namespaces = {}
|
17
|
-
|
18
10
|
# An \REXML::Element object represents an XML element.
|
19
11
|
#
|
20
12
|
# An element:
|
@@ -1284,16 +1276,11 @@ module REXML
|
|
1284
1276
|
# document.root.attribute("x", "a") # => a:x='a:x'
|
1285
1277
|
#
|
1286
1278
|
def attribute( name, namespace=nil )
|
1287
|
-
prefix =
|
1288
|
-
if namespaces.respond_to? :key
|
1289
|
-
prefix = namespaces.key(namespace) if namespace
|
1290
|
-
else
|
1291
|
-
prefix = namespaces.index(namespace) if namespace
|
1292
|
-
end
|
1279
|
+
prefix = namespaces.key(namespace) if namespace
|
1293
1280
|
prefix = nil if prefix == 'xmlns'
|
1294
1281
|
|
1295
1282
|
ret_val =
|
1296
|
-
attributes.get_attribute(
|
1283
|
+
attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
1297
1284
|
|
1298
1285
|
return ret_val unless ret_val.nil?
|
1299
1286
|
return nil if prefix.nil?
|
@@ -111,7 +111,7 @@ module REXML
|
|
111
111
|
# itself, then we don't need a carriage return... which makes this
|
112
112
|
# logic more complex.
|
113
113
|
node.children.each { |child|
|
114
|
-
next if child
|
114
|
+
next if child.instance_of?(Text)
|
115
115
|
unless child == node.children[0] or child.instance_of?(Text) or
|
116
116
|
(child == node.children[1] and !node.children[0].writethis)
|
117
117
|
output << "\n"
|
data/lib/rexml/node.rb
CHANGED
@@ -52,10 +52,14 @@ module REXML
|
|
52
52
|
|
53
53
|
# Visit all subnodes of +self+ recursively
|
54
54
|
def each_recursive(&block) # :yields: node
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
stack = []
|
56
|
+
each { |child| stack.unshift child if child.node_type == :element }
|
57
|
+
until stack.empty?
|
58
|
+
child = stack.pop
|
59
|
+
yield child
|
60
|
+
n = stack.size
|
61
|
+
child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
|
62
|
+
end
|
59
63
|
end
|
60
64
|
|
61
65
|
# Find (and return) first subnode (recursively) for which the block
|
@@ -7,6 +7,17 @@ require "strscan"
|
|
7
7
|
|
8
8
|
module REXML
|
9
9
|
module Parsers
|
10
|
+
if StringScanner::Version < "3.0.8"
|
11
|
+
module StringScannerCaptures
|
12
|
+
refine StringScanner do
|
13
|
+
def captures
|
14
|
+
values_at(*(1...size))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
using StringScannerCaptures
|
19
|
+
end
|
20
|
+
|
10
21
|
# = Using the Pull Parser
|
11
22
|
# <em>This API is experimental, and subject to change.</em>
|
12
23
|
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
@@ -113,6 +124,14 @@ module REXML
|
|
113
124
|
}
|
114
125
|
|
115
126
|
module Private
|
127
|
+
# Terminal requires two or more letters.
|
128
|
+
INSTRUCTION_TERM = "?>"
|
129
|
+
COMMENT_TERM = "-->"
|
130
|
+
CDATA_TERM = "]]>"
|
131
|
+
DOCTYPE_TERM = "]>"
|
132
|
+
# Read to the end of DOCTYPE because there is no proper ENTITY termination
|
133
|
+
ENTITY_TERM = DOCTYPE_TERM
|
134
|
+
|
116
135
|
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
117
136
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
118
137
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
@@ -121,13 +140,20 @@ module REXML
|
|
121
140
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
122
141
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
123
142
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
143
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
144
|
+
CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
145
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
146
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
147
|
+
default_entities.each do |term|
|
148
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
149
|
+
end
|
124
150
|
end
|
125
151
|
private_constant :Private
|
126
|
-
include Private
|
127
152
|
|
128
153
|
def initialize( source )
|
129
154
|
self.stream = source
|
130
155
|
@listeners = []
|
156
|
+
@prefixes = Set.new
|
131
157
|
end
|
132
158
|
|
133
159
|
def add_listener( listener )
|
@@ -139,6 +165,7 @@ module REXML
|
|
139
165
|
def stream=( source )
|
140
166
|
@source = SourceFactory.create_from( source )
|
141
167
|
@closed = nil
|
168
|
+
@have_root = false
|
142
169
|
@document_status = nil
|
143
170
|
@tags = []
|
144
171
|
@stack = []
|
@@ -193,6 +220,8 @@ module REXML
|
|
193
220
|
|
194
221
|
# Returns the next event. This is a +PullEvent+ object.
|
195
222
|
def pull
|
223
|
+
@source.drop_parsed_content
|
224
|
+
|
196
225
|
pull_event.tap do |event|
|
197
226
|
@listeners.each do |listener|
|
198
227
|
listener.receive event
|
@@ -205,7 +234,12 @@ module REXML
|
|
205
234
|
x, @closed = @closed, nil
|
206
235
|
return [ :end_element, x ]
|
207
236
|
end
|
208
|
-
|
237
|
+
if empty?
|
238
|
+
if @document_status == :in_doctype
|
239
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
240
|
+
end
|
241
|
+
return [ :end_document ]
|
242
|
+
end
|
209
243
|
return @stack.shift if @stack.size > 0
|
210
244
|
#STDERR.puts @source.encoding
|
211
245
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -217,7 +251,14 @@ module REXML
|
|
217
251
|
return process_instruction(start_position)
|
218
252
|
elsif @source.match("<!", true)
|
219
253
|
if @source.match("--", true)
|
220
|
-
|
254
|
+
md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
|
255
|
+
if md.nil?
|
256
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
257
|
+
end
|
258
|
+
if /--|-\z/.match?(md[1])
|
259
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
260
|
+
end
|
261
|
+
return [ :comment, md[1] ]
|
221
262
|
elsif @source.match("DOCTYPE", true)
|
222
263
|
base_error_message = "Malformed DOCTYPE"
|
223
264
|
unless @source.match(/\s+/um, true)
|
@@ -229,7 +270,7 @@ module REXML
|
|
229
270
|
@source.position = start_position
|
230
271
|
raise REXML::ParseException.new(message, @source)
|
231
272
|
end
|
232
|
-
@nsstack.unshift(
|
273
|
+
@nsstack.unshift(Set.new)
|
233
274
|
name = parse_name(base_error_message)
|
234
275
|
if @source.match(/\s*\[/um, true)
|
235
276
|
id = [nil, nil, nil]
|
@@ -277,7 +318,7 @@ module REXML
|
|
277
318
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
278
319
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
279
320
|
elsif @source.match("ENTITY", true)
|
280
|
-
match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
|
321
|
+
match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
|
281
322
|
ref = false
|
282
323
|
if match[1] == '%'
|
283
324
|
ref = true
|
@@ -303,13 +344,13 @@ module REXML
|
|
303
344
|
match << '%' if ref
|
304
345
|
return match
|
305
346
|
elsif @source.match("ATTLIST", true)
|
306
|
-
md = @source.match(ATTLISTDECL_END, true)
|
347
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
307
348
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
308
349
|
element = md[1]
|
309
350
|
contents = md[0]
|
310
351
|
|
311
352
|
pairs = {}
|
312
|
-
values = md[0].scan( ATTDEF_RE )
|
353
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
313
354
|
values.each do |attdef|
|
314
355
|
unless attdef[3] == "#IMPLIED"
|
315
356
|
attdef.compact!
|
@@ -342,19 +383,22 @@ module REXML
|
|
342
383
|
raise REXML::ParseException.new(message, @source)
|
343
384
|
end
|
344
385
|
return [:notationdecl, name, *id]
|
345
|
-
elsif md = @source.match(/--(.*?)-->/um, true)
|
386
|
+
elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
|
346
387
|
case md[1]
|
347
388
|
when /--/, /-\z/
|
348
389
|
raise REXML::ParseException.new("Malformed comment", @source)
|
349
390
|
end
|
350
391
|
return [ :comment, md[1] ] if md
|
351
392
|
end
|
352
|
-
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
393
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
|
353
394
|
return [ :externalentity, match[1] ]
|
354
395
|
elsif @source.match(/\]\s*>/um, true)
|
355
396
|
@document_status = :after_doctype
|
356
397
|
return [ :end_doctype ]
|
357
398
|
end
|
399
|
+
if @document_status == :in_doctype
|
400
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
401
|
+
end
|
358
402
|
end
|
359
403
|
if @document_status == :after_doctype
|
360
404
|
@source.match(/\s*/um, true)
|
@@ -362,10 +406,14 @@ module REXML
|
|
362
406
|
begin
|
363
407
|
start_position = @source.position
|
364
408
|
if @source.match("<", true)
|
409
|
+
# :text's read_until may remain only "<" in buffer. In the
|
410
|
+
# case, buffer is empty here. So we need to fill buffer
|
411
|
+
# here explicitly.
|
412
|
+
@source.ensure_buffer
|
365
413
|
if @source.match("/", true)
|
366
414
|
@nsstack.shift
|
367
415
|
last_tag = @tags.pop
|
368
|
-
md = @source.match(CLOSE_PATTERN, true)
|
416
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
369
417
|
if md and !last_tag
|
370
418
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
371
419
|
raise REXML::ParseException.new(message, @source)
|
@@ -382,16 +430,15 @@ module REXML
|
|
382
430
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
383
431
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
384
432
|
if md[0][0] == ?-
|
385
|
-
md = @source.match(/--(.*?)-->/um, true)
|
433
|
+
md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
|
386
434
|
|
387
|
-
|
388
|
-
when /--/, /-\z/
|
435
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
389
436
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
437
|
end
|
391
438
|
|
392
|
-
return [ :comment, md[1] ]
|
439
|
+
return [ :comment, md[1] ]
|
393
440
|
else
|
394
|
-
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
441
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
|
395
442
|
return [ :cdata, md[1] ] if md
|
396
443
|
end
|
397
444
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
@@ -400,19 +447,19 @@ module REXML
|
|
400
447
|
return process_instruction(start_position)
|
401
448
|
else
|
402
449
|
# Get the next tag
|
403
|
-
md = @source.match(TAG_PATTERN, true)
|
450
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
404
451
|
unless md
|
405
452
|
@source.position = start_position
|
406
453
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
407
454
|
end
|
408
455
|
tag = md[1]
|
409
456
|
@document_status = :in_element
|
410
|
-
prefixes
|
411
|
-
prefixes << md[2] if md[2]
|
457
|
+
@prefixes.clear
|
458
|
+
@prefixes << md[2] if md[2]
|
412
459
|
@nsstack.unshift(curr_ns=Set.new)
|
413
|
-
attributes, closed = parse_attributes(prefixes, curr_ns)
|
460
|
+
attributes, closed = parse_attributes(@prefixes, curr_ns)
|
414
461
|
# Verify that all of the prefixes have been defined
|
415
|
-
for prefix in prefixes
|
462
|
+
for prefix in @prefixes
|
416
463
|
unless @nsstack.find{|k| k.member?(prefix)}
|
417
464
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
418
465
|
end
|
@@ -422,13 +469,25 @@ module REXML
|
|
422
469
|
@closed = tag
|
423
470
|
@nsstack.shift
|
424
471
|
else
|
472
|
+
if @tags.empty? and @have_root
|
473
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
474
|
+
end
|
425
475
|
@tags.push( tag )
|
426
476
|
end
|
477
|
+
@have_root = true
|
427
478
|
return [ :start_element, tag, attributes ]
|
428
479
|
end
|
429
480
|
else
|
430
|
-
|
431
|
-
text
|
481
|
+
text = @source.read_until("<")
|
482
|
+
if text.chomp!("<")
|
483
|
+
@source.position -= "<".bytesize
|
484
|
+
end
|
485
|
+
if @tags.empty? and @have_root
|
486
|
+
unless /\A\s*\z/.match?(text)
|
487
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
488
|
+
end
|
489
|
+
return pull_event
|
490
|
+
end
|
432
491
|
return [ :text, text ]
|
433
492
|
end
|
434
493
|
rescue REXML::UndefinedNamespaceException
|
@@ -471,10 +530,14 @@ module REXML
|
|
471
530
|
|
472
531
|
# Unescapes all possible entities
|
473
532
|
def unnormalize( string, entities=nil, filter=nil )
|
474
|
-
|
533
|
+
if string.include?("\r")
|
534
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
535
|
+
else
|
536
|
+
rv = string.dup
|
537
|
+
end
|
475
538
|
matches = rv.scan( REFERENCE_RE )
|
476
539
|
return rv if matches.size == 0
|
477
|
-
rv.gsub!(
|
540
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
478
541
|
m=$1
|
479
542
|
m = "0#{m}" if m[0] == ?x
|
480
543
|
[Integer(m)].pack('U*')
|
@@ -485,7 +548,7 @@ module REXML
|
|
485
548
|
unless filter and filter.include?(entity_reference)
|
486
549
|
entity_value = entity( entity_reference, entities )
|
487
550
|
if entity_value
|
488
|
-
re = /&#{entity_reference};/
|
551
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
489
552
|
rv.gsub!( re, entity_value )
|
490
553
|
else
|
491
554
|
er = DEFAULT_ENTITIES[entity_reference]
|
@@ -493,7 +556,7 @@ module REXML
|
|
493
556
|
end
|
494
557
|
end
|
495
558
|
end
|
496
|
-
rv.gsub!(
|
559
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
497
560
|
end
|
498
561
|
rv
|
499
562
|
end
|
@@ -506,7 +569,7 @@ module REXML
|
|
506
569
|
end
|
507
570
|
|
508
571
|
def parse_name(base_error_message)
|
509
|
-
md = @source.match(NAME_PATTERN, true)
|
572
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
510
573
|
unless md
|
511
574
|
if @source.match(/\s*\S/um)
|
512
575
|
message = "#{base_error_message}: invalid name"
|
@@ -585,13 +648,16 @@ module REXML
|
|
585
648
|
end
|
586
649
|
|
587
650
|
def process_instruction(start_position)
|
588
|
-
match_data = @source.match(INSTRUCTION_END, true)
|
651
|
+
match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
|
589
652
|
unless match_data
|
590
653
|
message = "Invalid processing instruction node"
|
591
654
|
@source.position = start_position
|
592
655
|
raise REXML::ParseException.new(message, @source)
|
593
656
|
end
|
594
|
-
if
|
657
|
+
if match_data[1] == "xml"
|
658
|
+
if @document_status
|
659
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
660
|
+
end
|
595
661
|
content = match_data[2]
|
596
662
|
version = VERSION.match(content)
|
597
663
|
version = version[1] unless version.nil?
|
@@ -633,8 +699,10 @@ module REXML
|
|
633
699
|
raise REXML::ParseException.new(message, @source)
|
634
700
|
end
|
635
701
|
quote = match[1]
|
702
|
+
start_position = @source.position
|
636
703
|
value = @source.read_until(quote)
|
637
704
|
unless value.chomp!(quote)
|
705
|
+
@source.position = start_position
|
638
706
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
639
707
|
raise REXML::ParseException.new(message, @source)
|
640
708
|
end
|
@@ -157,25 +157,8 @@ module REXML
|
|
157
157
|
end
|
158
158
|
end
|
159
159
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
160
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
161
|
+
handle( :characters, unnormalized )
|
179
162
|
when :entitydecl
|
180
163
|
handle_entitydecl( event )
|
181
164
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -36,8 +36,8 @@ module REXML
|
|
36
36
|
@listener.tag_end( event[1] )
|
37
37
|
@tag_stack.pop
|
38
38
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
39
|
+
unnormalized = @parser.unnormalize( event[1] )
|
40
|
+
@listener.text( unnormalized )
|
41
41
|
when :processing_instruction
|
42
42
|
@listener.instruction( *event[1,2] )
|
43
43
|
when :start_doctype
|
@@ -16,7 +16,6 @@ module REXML
|
|
16
16
|
|
17
17
|
def parse
|
18
18
|
tag_stack = []
|
19
|
-
in_doctype = false
|
20
19
|
entities = nil
|
21
20
|
begin
|
22
21
|
while true
|
@@ -39,17 +38,15 @@ module REXML
|
|
39
38
|
tag_stack.pop
|
40
39
|
@build_context = @build_context.parent
|
41
40
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
41
|
+
if @build_context[-1].instance_of? Text
|
42
|
+
@build_context[-1] << event[1]
|
43
|
+
else
|
44
|
+
@build_context.add(
|
45
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
46
|
+
) unless (
|
47
|
+
@build_context.ignore_whitespace_nodes and
|
48
|
+
event[1].strip.size==0
|
49
|
+
)
|
53
50
|
end
|
54
51
|
when :comment
|
55
52
|
c = Comment.new( event[1] )
|
@@ -60,14 +57,12 @@ module REXML
|
|
60
57
|
when :processing_instruction
|
61
58
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
59
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
60
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
61
|
@build_context = @build_context.parent
|
66
62
|
when :start_doctype
|
67
63
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
64
|
@build_context = doctype
|
69
65
|
entities = {}
|
70
|
-
in_doctype = true
|
71
66
|
when :attlistdecl
|
72
67
|
n = AttlistDecl.new( event[1..-1] )
|
73
68
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -34,6 +54,16 @@ module REXML
|
|
34
54
|
attr_reader :line
|
35
55
|
attr_reader :encoding
|
36
56
|
|
57
|
+
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
59
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
60
|
+
pre_defined_terms = ["'", '"', "<"]
|
61
|
+
pre_defined_terms.each do |term|
|
62
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
63
|
+
end
|
64
|
+
end
|
65
|
+
private_constant :Private
|
66
|
+
|
37
67
|
# Constructor
|
38
68
|
# @param arg must be a String, and should be a valid XML document
|
39
69
|
# @param encoding if non-null, sets the encoding of the source to this
|
@@ -54,6 +84,12 @@ module REXML
|
|
54
84
|
@scanner.rest
|
55
85
|
end
|
56
86
|
|
87
|
+
def drop_parsed_content
|
88
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
89
|
+
@scanner.string = @scanner.rest
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
57
93
|
def buffer_encoding=(encoding)
|
58
94
|
@scanner.string.force_encoding(encoding)
|
59
95
|
end
|
@@ -69,13 +105,19 @@ module REXML
|
|
69
105
|
end
|
70
106
|
|
71
107
|
def read_until(term)
|
72
|
-
|
108
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
109
|
+
data = @scanner.scan_until(pattern)
|
110
|
+
unless data
|
111
|
+
data = @scanner.rest
|
112
|
+
@scanner.pos = @scanner.string.bytesize
|
113
|
+
end
|
114
|
+
data
|
73
115
|
end
|
74
116
|
|
75
117
|
def ensure_buffer
|
76
118
|
end
|
77
119
|
|
78
|
-
def match(pattern, cons=false)
|
120
|
+
def match(pattern, cons=false, term: nil)
|
79
121
|
if cons
|
80
122
|
@scanner.scan(pattern).nil? ? nil : @scanner
|
81
123
|
else
|
@@ -163,6 +205,7 @@ module REXML
|
|
163
205
|
end
|
164
206
|
|
165
207
|
def read(term = nil)
|
208
|
+
term = encode(term) if term
|
166
209
|
begin
|
167
210
|
@scanner << readline(term)
|
168
211
|
true
|
@@ -173,17 +216,20 @@ module REXML
|
|
173
216
|
end
|
174
217
|
|
175
218
|
def read_until(term)
|
176
|
-
pattern = Regexp.
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
else
|
219
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
220
|
+
term = encode(term)
|
221
|
+
until str = @scanner.scan_until(pattern)
|
222
|
+
break if @source.nil?
|
223
|
+
break if @source.eof?
|
224
|
+
@scanner << readline(term)
|
225
|
+
end
|
226
|
+
if str
|
185
227
|
read if @scanner.eos? and !@source.eof?
|
186
228
|
str
|
229
|
+
else
|
230
|
+
rest = @scanner.rest
|
231
|
+
@scanner.pos = @scanner.string.bytesize
|
232
|
+
rest
|
187
233
|
end
|
188
234
|
end
|
189
235
|
|
@@ -194,7 +240,7 @@ module REXML
|
|
194
240
|
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
195
241
|
# - ">"
|
196
242
|
# - "XXX>" (X is any string excluding '>')
|
197
|
-
def match( pattern, cons=false )
|
243
|
+
def match( pattern, cons=false, term: nil )
|
198
244
|
while true
|
199
245
|
if cons
|
200
246
|
md = @scanner.scan(pattern)
|
@@ -204,7 +250,7 @@ module REXML
|
|
204
250
|
break if md
|
205
251
|
return nil if pattern.is_a?(String)
|
206
252
|
return nil if @source.nil?
|
207
|
-
return nil unless read
|
253
|
+
return nil unless read(term)
|
208
254
|
end
|
209
255
|
|
210
256
|
md.nil? ? nil : @scanner
|
data/lib/rexml/text.rb
CHANGED
@@ -151,25 +151,45 @@ module REXML
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
154
|
+
pos = 0
|
155
|
+
while (index = string.index(/<|&/, pos))
|
156
|
+
if string[index] == "<"
|
157
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
158
|
+
end
|
159
|
+
|
160
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
|
+
end
|
163
|
+
|
164
|
+
value = string[(index + 1)..end_index]
|
165
|
+
if /\s/.match?(value)
|
166
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
167
|
+
end
|
168
|
+
|
169
|
+
if value[0] == "#"
|
170
|
+
character_reference = value[1..-1]
|
171
|
+
|
172
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
173
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
174
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
175
|
else
|
163
|
-
raise "Illegal character #{
|
176
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
177
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
178
|
end
|
179
|
+
|
180
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
181
|
+
when *VALID_CHAR
|
182
|
+
else
|
183
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
184
|
+
end
|
185
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
186
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
187
|
end
|
188
|
+
|
189
|
+
pos = end_index + 1
|
172
190
|
end
|
191
|
+
|
192
|
+
string
|
173
193
|
end
|
174
194
|
|
175
195
|
def node_type
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2
|
4
|
+
version: 3.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
10
|
+
date: 2024-07-16 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
requirements:
|
16
16
|
- - ">="
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version:
|
18
|
+
version: '0'
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - ">="
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version:
|
25
|
+
version: '0'
|
26
26
|
description: An XML toolkit for Ruby
|
27
27
|
email:
|
28
28
|
- kou@cozmixng.org
|
@@ -115,7 +115,8 @@ files:
|
|
115
115
|
homepage: https://github.com/ruby/rexml
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
|
-
metadata:
|
118
|
+
metadata:
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.2
|
119
120
|
rdoc_options:
|
120
121
|
- "--main"
|
121
122
|
- README.md
|