rexml 3.2.6 → 3.2.7
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +53 -1
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +229 -230
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +73 -99
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +7 -38
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c54a60c677a865a023fc0bf1fc403419b530cbc7b306bc7da18f1489e02cd79
|
4
|
+
data.tar.gz: 5dbbae05d90151d6d4ea9d8b5a4a3097e144ab79bb346c30e75c3d62cbc05dd7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5579b5fe5f6a5488d78d0ed19cdad1498aeb44bbe0b72dca9895391d1a3d1aaaed353fa14e7366d3c08ab6f723e4bb11d6cdb7a667fd310d5cdcec954bb0e77e
|
7
|
+
data.tar.gz: 2db805399a3cf3c6cf5bced1157e3c84539c5f3d12d806db951c5c3fd6aaadb86b3a4feaa0ea60a2771432009f873df3be3a688947156be9a63039a5f9bf449c
|
data/NEWS.md
CHANGED
@@ -1,6 +1,58 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
-
## 3.2.
|
3
|
+
## 3.2.7 - 2024-05-16 {#version-3-2-7}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Improve parse performance by using `StringScanner`.
|
8
|
+
|
9
|
+
* GH-106
|
10
|
+
* GH-107
|
11
|
+
* GH-108
|
12
|
+
* GH-109
|
13
|
+
* GH-112
|
14
|
+
* GH-113
|
15
|
+
* GH-114
|
16
|
+
* GH-115
|
17
|
+
* GH-116
|
18
|
+
* GH-117
|
19
|
+
* GH-118
|
20
|
+
* GH-119
|
21
|
+
* GH-121
|
22
|
+
|
23
|
+
* Patch by NAITOH Jun.
|
24
|
+
|
25
|
+
* Improved parse performance when an attribute has many `<`s.
|
26
|
+
|
27
|
+
* GH-124
|
28
|
+
|
29
|
+
### Fixes
|
30
|
+
|
31
|
+
* XPath: Fixed a bug of `normalize_space(array)`.
|
32
|
+
|
33
|
+
* GH-110
|
34
|
+
* GH-111
|
35
|
+
|
36
|
+
* Patch by flatisland.
|
37
|
+
|
38
|
+
* XPath: Fixed a bug that wrong position is used with nested path.
|
39
|
+
|
40
|
+
* GH-110
|
41
|
+
* GH-122
|
42
|
+
|
43
|
+
* Reported by jcavalieri.
|
44
|
+
* Patch by NAITOH Jun.
|
45
|
+
|
46
|
+
* Fixed a bug that an exception message can't be generated for
|
47
|
+
invalid encoding XML.
|
48
|
+
|
49
|
+
* GH-29
|
50
|
+
* GH-123
|
51
|
+
|
52
|
+
* Reported by DuKewu.
|
53
|
+
* Patch by NAITOH Jun.
|
54
|
+
|
55
|
+
w## 3.2.6 - 2023-07-27 {#version-3-2-6}
|
4
56
|
|
5
57
|
### Improvements
|
6
58
|
|
data/lib/rexml/functions.rb
CHANGED
@@ -262,11 +262,10 @@ module REXML
|
|
262
262
|
string(string).length
|
263
263
|
end
|
264
264
|
|
265
|
-
# UNTESTED
|
266
265
|
def Functions::normalize_space( string=nil )
|
267
266
|
string = string(@@context[:node]) if string.nil?
|
268
267
|
if string.kind_of? Array
|
269
|
-
string.collect{|x|
|
268
|
+
string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
|
270
269
|
else
|
271
270
|
string.to_s.strip.gsub(/\s+/um, ' ')
|
272
271
|
end
|
data/lib/rexml/parseexception.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
4
|
require_relative '../source'
|
@@ -96,7 +96,7 @@ module REXML
|
|
96
96
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
97
97
|
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
98
98
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
99
|
-
ENTITYDECL = /\s*(?:#{GEDECL})
|
99
|
+
ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
|
100
100
|
|
101
101
|
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
102
102
|
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
|
@@ -112,6 +112,19 @@ module REXML
|
|
112
112
|
"apos" => [/'/, "'", "'", /'/]
|
113
113
|
}
|
114
114
|
|
115
|
+
module Private
|
116
|
+
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
117
|
+
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
118
|
+
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
119
|
+
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
120
|
+
NAME_PATTERN = /\s*#{NAME}/um
|
121
|
+
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
122
|
+
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
123
|
+
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
124
|
+
end
|
125
|
+
private_constant :Private
|
126
|
+
include Private
|
127
|
+
|
115
128
|
def initialize( source )
|
116
129
|
self.stream = source
|
117
130
|
@listeners = []
|
@@ -196,181 +209,180 @@ module REXML
|
|
196
209
|
return @stack.shift if @stack.size > 0
|
197
210
|
#STDERR.puts @source.encoding
|
198
211
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
212
|
+
|
213
|
+
@source.ensure_buffer
|
199
214
|
if @document_status == nil
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
217
|
-
encoding = "UTF-16"
|
218
|
-
end
|
219
|
-
standalone = STANDALONE.match(results)
|
220
|
-
standalone = standalone[1] unless standalone.nil?
|
221
|
-
return [ :xmldecl, version, encoding, standalone ]
|
222
|
-
when INSTRUCTION_START
|
223
|
-
return process_instruction
|
224
|
-
when DOCTYPE_START
|
225
|
-
base_error_message = "Malformed DOCTYPE"
|
226
|
-
@source.match(DOCTYPE_START, true)
|
227
|
-
@nsstack.unshift(curr_ns=Set.new)
|
228
|
-
name = parse_name(base_error_message)
|
229
|
-
if @source.match(/\A\s*\[/um, true)
|
230
|
-
id = [nil, nil, nil]
|
231
|
-
@document_status = :in_doctype
|
232
|
-
elsif @source.match(/\A\s*>/um, true)
|
233
|
-
id = [nil, nil, nil]
|
234
|
-
@document_status = :after_doctype
|
235
|
-
else
|
236
|
-
id = parse_id(base_error_message,
|
237
|
-
accept_external_id: true,
|
238
|
-
accept_public_id: false)
|
239
|
-
if id[0] == "SYSTEM"
|
240
|
-
# For backward compatibility
|
241
|
-
id[1], id[2] = id[2], nil
|
215
|
+
start_position = @source.position
|
216
|
+
if @source.match("<?", true)
|
217
|
+
return process_instruction(start_position)
|
218
|
+
elsif @source.match("<!", true)
|
219
|
+
if @source.match("--", true)
|
220
|
+
return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
|
221
|
+
elsif @source.match("DOCTYPE", true)
|
222
|
+
base_error_message = "Malformed DOCTYPE"
|
223
|
+
unless @source.match(/\s+/um, true)
|
224
|
+
if @source.match(">")
|
225
|
+
message = "#{base_error_message}: name is missing"
|
226
|
+
else
|
227
|
+
message = "#{base_error_message}: invalid name"
|
228
|
+
end
|
229
|
+
@source.position = start_position
|
230
|
+
raise REXML::ParseException.new(message, @source)
|
242
231
|
end
|
243
|
-
|
232
|
+
@nsstack.unshift(curr_ns=Set.new)
|
233
|
+
name = parse_name(base_error_message)
|
234
|
+
if @source.match(/\s*\[/um, true)
|
235
|
+
id = [nil, nil, nil]
|
244
236
|
@document_status = :in_doctype
|
245
|
-
elsif @source.match(/\
|
237
|
+
elsif @source.match(/\s*>/um, true)
|
238
|
+
id = [nil, nil, nil]
|
246
239
|
@document_status = :after_doctype
|
240
|
+
@source.ensure_buffer
|
247
241
|
else
|
248
|
-
|
249
|
-
|
242
|
+
id = parse_id(base_error_message,
|
243
|
+
accept_external_id: true,
|
244
|
+
accept_public_id: false)
|
245
|
+
if id[0] == "SYSTEM"
|
246
|
+
# For backward compatibility
|
247
|
+
id[1], id[2] = id[2], nil
|
248
|
+
end
|
249
|
+
if @source.match(/\s*\[/um, true)
|
250
|
+
@document_status = :in_doctype
|
251
|
+
elsif @source.match(/\s*>/um, true)
|
252
|
+
@document_status = :after_doctype
|
253
|
+
@source.ensure_buffer
|
254
|
+
else
|
255
|
+
message = "#{base_error_message}: garbage after external ID"
|
256
|
+
raise REXML::ParseException.new(message, @source)
|
257
|
+
end
|
250
258
|
end
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
@document_status = :after_doctype
|
261
|
-
if @source.encoding == "UTF-8"
|
262
|
-
@source.buffer.force_encoding(::Encoding::UTF_8)
|
259
|
+
args = [:start_doctype, name, *id]
|
260
|
+
if @document_status == :after_doctype
|
261
|
+
@source.match(/\s*/um, true)
|
262
|
+
@stack << [ :end_doctype ]
|
263
|
+
end
|
264
|
+
return args
|
265
|
+
else
|
266
|
+
message = "Invalid XML"
|
267
|
+
raise REXML::ParseException.new(message, @source)
|
263
268
|
end
|
264
269
|
end
|
265
270
|
end
|
266
271
|
if @document_status == :in_doctype
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
if match[1] == '%'
|
281
|
-
ref = true
|
282
|
-
match.delete_at 1
|
283
|
-
end
|
284
|
-
# Now we have to sort out what kind of entity reference this is
|
285
|
-
if match[2] == 'SYSTEM'
|
286
|
-
# External reference
|
287
|
-
match[3] = match[3][1..-2] # PUBID
|
288
|
-
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
289
|
-
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
290
|
-
elsif match[2] == 'PUBLIC'
|
291
|
-
# External reference
|
292
|
-
match[3] = match[3][1..-2] # PUBID
|
293
|
-
match[4] = match[4][1..-2] # HREF
|
294
|
-
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
295
|
-
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
296
|
-
else
|
297
|
-
match[2] = match[2][1..-2]
|
298
|
-
match.pop if match.size == 4
|
299
|
-
# match is [ :entity, name, value ]
|
300
|
-
end
|
301
|
-
match << '%' if ref
|
302
|
-
return match
|
303
|
-
when ATTLISTDECL_START
|
304
|
-
md = @source.match( ATTLISTDECL_PATTERN, true )
|
305
|
-
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
306
|
-
element = md[1]
|
307
|
-
contents = md[0]
|
308
|
-
|
309
|
-
pairs = {}
|
310
|
-
values = md[0].scan( ATTDEF_RE )
|
311
|
-
values.each do |attdef|
|
312
|
-
unless attdef[3] == "#IMPLIED"
|
313
|
-
attdef.compact!
|
314
|
-
val = attdef[3]
|
315
|
-
val = attdef[4] if val == "#FIXED "
|
316
|
-
pairs[attdef[0]] = val
|
317
|
-
if attdef[0] =~ /^xmlns:(.*)/
|
318
|
-
@nsstack[0] << $1
|
319
|
-
end
|
272
|
+
@source.match(/\s*/um, true) # skip spaces
|
273
|
+
start_position = @source.position
|
274
|
+
if @source.match("<!", true)
|
275
|
+
if @source.match("ELEMENT", true)
|
276
|
+
md = @source.match(/(.*?)>/um, true)
|
277
|
+
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
278
|
+
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
279
|
+
elsif @source.match("ENTITY", true)
|
280
|
+
match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
|
281
|
+
ref = false
|
282
|
+
if match[1] == '%'
|
283
|
+
ref = true
|
284
|
+
match.delete_at 1
|
320
285
|
end
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
286
|
+
# Now we have to sort out what kind of entity reference this is
|
287
|
+
if match[2] == 'SYSTEM'
|
288
|
+
# External reference
|
289
|
+
match[3] = match[3][1..-2] # PUBID
|
290
|
+
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
291
|
+
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
292
|
+
elsif match[2] == 'PUBLIC'
|
293
|
+
# External reference
|
294
|
+
match[3] = match[3][1..-2] # PUBID
|
295
|
+
match[4] = match[4][1..-2] # HREF
|
296
|
+
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
297
|
+
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
328
298
|
else
|
329
|
-
|
299
|
+
match[2] = match[2][1..-2]
|
300
|
+
match.pop if match.size == 4
|
301
|
+
# match is [ :entity, name, value ]
|
330
302
|
end
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
303
|
+
match << '%' if ref
|
304
|
+
return match
|
305
|
+
elsif @source.match("ATTLIST", true)
|
306
|
+
md = @source.match(ATTLISTDECL_END, true)
|
307
|
+
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
308
|
+
element = md[1]
|
309
|
+
contents = md[0]
|
310
|
+
|
311
|
+
pairs = {}
|
312
|
+
values = md[0].scan( ATTDEF_RE )
|
313
|
+
values.each do |attdef|
|
314
|
+
unless attdef[3] == "#IMPLIED"
|
315
|
+
attdef.compact!
|
316
|
+
val = attdef[3]
|
317
|
+
val = attdef[4] if val == "#FIXED "
|
318
|
+
pairs[attdef[0]] = val
|
319
|
+
if attdef[0] =~ /^xmlns:(.*)/
|
320
|
+
@nsstack[0] << $1
|
321
|
+
end
|
322
|
+
end
|
323
|
+
end
|
324
|
+
return [ :attlistdecl, element, pairs, contents ]
|
325
|
+
elsif @source.match("NOTATION", true)
|
326
|
+
base_error_message = "Malformed notation declaration"
|
327
|
+
unless @source.match(/\s+/um, true)
|
328
|
+
if @source.match(">")
|
329
|
+
message = "#{base_error_message}: name is missing"
|
330
|
+
else
|
331
|
+
message = "#{base_error_message}: invalid name"
|
332
|
+
end
|
333
|
+
@source.position = start_position
|
334
|
+
raise REXML::ParseException.new(message, @source)
|
335
|
+
end
|
336
|
+
name = parse_name(base_error_message)
|
337
|
+
id = parse_id(base_error_message,
|
338
|
+
accept_external_id: true,
|
339
|
+
accept_public_id: true)
|
340
|
+
unless @source.match(/\s*>/um, true)
|
341
|
+
message = "#{base_error_message}: garbage before end >"
|
342
|
+
raise REXML::ParseException.new(message, @source)
|
343
|
+
end
|
344
|
+
return [:notationdecl, name, *id]
|
345
|
+
elsif md = @source.match(/--(.*?)-->/um, true)
|
346
|
+
case md[1]
|
347
|
+
when /--/, /-\z/
|
348
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
349
|
+
end
|
350
|
+
return [ :comment, md[1] ] if md
|
340
351
|
end
|
341
|
-
|
342
|
-
|
352
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
353
|
+
return [ :externalentity, match[1] ]
|
354
|
+
elsif @source.match(/\]\s*>/um, true)
|
343
355
|
@document_status = :after_doctype
|
344
|
-
@source.match( DOCTYPE_END, true )
|
345
356
|
return [ :end_doctype ]
|
346
357
|
end
|
347
358
|
end
|
348
359
|
if @document_status == :after_doctype
|
349
|
-
@source.match(/\
|
360
|
+
@source.match(/\s*/um, true)
|
350
361
|
end
|
351
362
|
begin
|
352
|
-
|
353
|
-
if @source.
|
354
|
-
if @source.
|
363
|
+
start_position = @source.position
|
364
|
+
if @source.match("<", true)
|
365
|
+
if @source.match("/", true)
|
355
366
|
@nsstack.shift
|
356
367
|
last_tag = @tags.pop
|
357
|
-
md = @source.match(
|
368
|
+
md = @source.match(CLOSE_PATTERN, true)
|
358
369
|
if md and !last_tag
|
359
370
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
360
371
|
raise REXML::ParseException.new(message, @source)
|
361
372
|
end
|
362
373
|
if md.nil? or last_tag != md[1]
|
363
374
|
message = "Missing end tag for '#{last_tag}'"
|
364
|
-
message
|
375
|
+
message += " (got '#{md[1]}')" if md
|
376
|
+
@source.position = start_position if md.nil?
|
365
377
|
raise REXML::ParseException.new(message, @source)
|
366
378
|
end
|
367
379
|
return [ :end_element, last_tag ]
|
368
|
-
elsif @source.
|
369
|
-
md = @source.match(
|
380
|
+
elsif @source.match("!", true)
|
381
|
+
md = @source.match(/([^>]*>)/um)
|
370
382
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
371
383
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
372
|
-
if md[0][
|
373
|
-
md = @source.match(
|
384
|
+
if md[0][0] == ?-
|
385
|
+
md = @source.match(/--(.*?)-->/um, true)
|
374
386
|
|
375
387
|
case md[1]
|
376
388
|
when /--/, /-\z/
|
@@ -379,19 +391,21 @@ module REXML
|
|
379
391
|
|
380
392
|
return [ :comment, md[1] ] if md
|
381
393
|
else
|
382
|
-
md = @source.match(
|
394
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
383
395
|
return [ :cdata, md[1] ] if md
|
384
396
|
end
|
385
397
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
386
398
|
"in the doctype declaration.", @source)
|
387
|
-
elsif @source.
|
388
|
-
return process_instruction
|
399
|
+
elsif @source.match("?", true)
|
400
|
+
return process_instruction(start_position)
|
389
401
|
else
|
390
402
|
# Get the next tag
|
391
|
-
md = @source.match(
|
403
|
+
md = @source.match(TAG_PATTERN, true)
|
392
404
|
unless md
|
405
|
+
@source.position = start_position
|
393
406
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
394
407
|
end
|
408
|
+
tag = md[1]
|
395
409
|
@document_status = :in_element
|
396
410
|
prefixes = Set.new
|
397
411
|
prefixes << md[2] if md[2]
|
@@ -405,23 +419,17 @@ module REXML
|
|
405
419
|
end
|
406
420
|
|
407
421
|
if closed
|
408
|
-
@closed =
|
422
|
+
@closed = tag
|
409
423
|
@nsstack.shift
|
410
424
|
else
|
411
|
-
@tags.push(
|
425
|
+
@tags.push( tag )
|
412
426
|
end
|
413
|
-
return [ :start_element,
|
427
|
+
return [ :start_element, tag, attributes ]
|
414
428
|
end
|
415
429
|
else
|
416
|
-
md = @source.match(
|
417
|
-
|
418
|
-
|
419
|
-
end
|
420
|
-
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
|
421
|
-
#return [ :text, "" ] if md[0].length == 0
|
422
|
-
# unnormalized = Text::unnormalize( md[1], self )
|
423
|
-
# return PullEvent.new( :text, md[1], unnormalized )
|
424
|
-
return [ :text, md[1] ]
|
430
|
+
md = @source.match(/([^<]*)/um, true)
|
431
|
+
text = md[1]
|
432
|
+
return [ :text, text ]
|
425
433
|
end
|
426
434
|
rescue REXML::UndefinedNamespaceException
|
427
435
|
raise
|
@@ -463,8 +471,7 @@ module REXML
|
|
463
471
|
|
464
472
|
# Unescapes all possible entities
|
465
473
|
def unnormalize( string, entities=nil, filter=nil )
|
466
|
-
rv = string.
|
467
|
-
rv.gsub!( /\r\n?/, "\n" )
|
474
|
+
rv = string.gsub( /\r\n?/, "\n" )
|
468
475
|
matches = rv.scan( REFERENCE_RE )
|
469
476
|
return rv if matches.size == 0
|
470
477
|
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
@@ -499,9 +506,9 @@ module REXML
|
|
499
506
|
end
|
500
507
|
|
501
508
|
def parse_name(base_error_message)
|
502
|
-
md = @source.match(
|
509
|
+
md = @source.match(NAME_PATTERN, true)
|
503
510
|
unless md
|
504
|
-
if @source.match(/\
|
511
|
+
if @source.match(/\s*\S/um)
|
505
512
|
message = "#{base_error_message}: invalid name"
|
506
513
|
else
|
507
514
|
message = "#{base_error_message}: name is missing"
|
@@ -577,97 +584,89 @@ module REXML
|
|
577
584
|
end
|
578
585
|
end
|
579
586
|
|
580
|
-
def process_instruction
|
581
|
-
match_data = @source.match(
|
587
|
+
def process_instruction(start_position)
|
588
|
+
match_data = @source.match(INSTRUCTION_END, true)
|
582
589
|
unless match_data
|
583
590
|
message = "Invalid processing instruction node"
|
591
|
+
@source.position = start_position
|
584
592
|
raise REXML::ParseException.new(message, @source)
|
585
593
|
end
|
594
|
+
if @document_status.nil? and match_data[1] == "xml"
|
595
|
+
content = match_data[2]
|
596
|
+
version = VERSION.match(content)
|
597
|
+
version = version[1] unless version.nil?
|
598
|
+
encoding = ENCODING.match(content)
|
599
|
+
encoding = encoding[1] unless encoding.nil?
|
600
|
+
if need_source_encoding_update?(encoding)
|
601
|
+
@source.encoding = encoding
|
602
|
+
end
|
603
|
+
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
604
|
+
encoding = "UTF-16"
|
605
|
+
end
|
606
|
+
standalone = STANDALONE.match(content)
|
607
|
+
standalone = standalone[1] unless standalone.nil?
|
608
|
+
return [ :xmldecl, version, encoding, standalone ]
|
609
|
+
end
|
586
610
|
[:processing_instruction, match_data[1], match_data[2]]
|
587
611
|
end
|
588
612
|
|
589
613
|
def parse_attributes(prefixes, curr_ns)
|
590
614
|
attributes = {}
|
591
615
|
closed = false
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
until scanner.eos?
|
605
|
-
if scanner.scan(/\s+/)
|
606
|
-
break if scanner.eos?
|
607
|
-
end
|
608
|
-
|
609
|
-
pos = scanner.pos
|
610
|
-
loop do
|
611
|
-
break if scanner.scan(ATTRIBUTE_PATTERN)
|
612
|
-
unless scanner.scan(QNAME)
|
613
|
-
message = "Invalid attribute name: <#{scanner.rest}>"
|
614
|
-
raise REXML::ParseException.new(message, @source)
|
615
|
-
end
|
616
|
-
name = scanner[0]
|
617
|
-
unless scanner.scan(/\s*=\s*/um)
|
616
|
+
while true
|
617
|
+
if @source.match(">", true)
|
618
|
+
return attributes, closed
|
619
|
+
elsif @source.match("/>", true)
|
620
|
+
closed = true
|
621
|
+
return attributes, closed
|
622
|
+
elsif match = @source.match(QNAME, true)
|
623
|
+
name = match[1]
|
624
|
+
prefix = match[2]
|
625
|
+
local_part = match[3]
|
626
|
+
|
627
|
+
unless @source.match(/\s*=\s*/um, true)
|
618
628
|
message = "Missing attribute equal: <#{name}>"
|
619
629
|
raise REXML::ParseException.new(message, @source)
|
620
630
|
end
|
621
|
-
|
622
|
-
unless quote
|
631
|
+
unless match = @source.match(/(['"])/, true)
|
623
632
|
message = "Missing attribute value start quote: <#{name}>"
|
624
633
|
raise REXML::ParseException.new(message, @source)
|
625
634
|
end
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
scanner << ">"
|
631
|
-
scanner << match_data[1]
|
632
|
-
scanner.pos = pos
|
633
|
-
closed = !match_data[2].nil?
|
634
|
-
next
|
635
|
-
end
|
636
|
-
message =
|
637
|
-
"Missing attribute value end quote: <#{name}>: <#{quote}>"
|
635
|
+
quote = match[1]
|
636
|
+
value = @source.read_until(quote)
|
637
|
+
unless value.chomp!(quote)
|
638
|
+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
638
639
|
raise REXML::ParseException.new(message, @source)
|
639
640
|
end
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
msg = "The '
|
641
|
+
@source.match(/\s*/um, true)
|
642
|
+
if prefix == "xmlns"
|
643
|
+
if local_part == "xml"
|
644
|
+
if value != "http://www.w3.org/XML/1998/namespace"
|
645
|
+
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
646
|
+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
647
|
+
raise REXML::ParseException.new( msg, @source, self )
|
648
|
+
end
|
649
|
+
elsif local_part == "xmlns"
|
650
|
+
msg = "The 'xmlns' prefix must not be declared "+
|
650
651
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
651
|
-
raise REXML::ParseException.new( msg, @source, self
|
652
|
+
raise REXML::ParseException.new( msg, @source, self)
|
652
653
|
end
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
raise REXML::ParseException.new( msg, @source, self)
|
654
|
+
curr_ns << local_part
|
655
|
+
elsif prefix
|
656
|
+
prefixes << prefix unless prefix == "xml"
|
657
657
|
end
|
658
|
-
curr_ns << local_part
|
659
|
-
elsif prefix
|
660
|
-
prefixes << prefix unless prefix == "xml"
|
661
|
-
end
|
662
658
|
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
659
|
+
if attributes[name]
|
660
|
+
msg = "Duplicate attribute #{name.inspect}"
|
661
|
+
raise REXML::ParseException.new(msg, @source, self)
|
662
|
+
end
|
667
663
|
|
668
|
-
|
664
|
+
attributes[name] = value
|
665
|
+
else
|
666
|
+
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
667
|
+
raise REXML::ParseException.new(message, @source)
|
668
|
+
end
|
669
669
|
end
|
670
|
-
return attributes, closed
|
671
670
|
end
|
672
671
|
end
|
673
672
|
end
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -30,8 +30,6 @@ module REXML
|
|
30
30
|
# objects and provides consumption of text
|
31
31
|
class Source
|
32
32
|
include Encoding
|
33
|
-
# The current buffer (what we're going to read next)
|
34
|
-
attr_reader :buffer
|
35
33
|
# The line number of the last consumed text
|
36
34
|
attr_reader :line
|
37
35
|
attr_reader :encoding
|
@@ -41,7 +39,8 @@ module REXML
|
|
41
39
|
# @param encoding if non-null, sets the encoding of the source to this
|
42
40
|
# value, overriding all encoding detection
|
43
41
|
def initialize(arg, encoding=nil)
|
44
|
-
@orig =
|
42
|
+
@orig = arg
|
43
|
+
@scanner = StringScanner.new(@orig)
|
45
44
|
if encoding
|
46
45
|
self.encoding = encoding
|
47
46
|
else
|
@@ -50,6 +49,14 @@ module REXML
|
|
50
49
|
@line = 0
|
51
50
|
end
|
52
51
|
|
52
|
+
# The current buffer (what we're going to read next)
|
53
|
+
def buffer
|
54
|
+
@scanner.rest
|
55
|
+
end
|
56
|
+
|
57
|
+
def buffer_encoding=(encoding)
|
58
|
+
@scanner.string.force_encoding(encoding)
|
59
|
+
end
|
53
60
|
|
54
61
|
# Inherited from Encoding
|
55
62
|
# Overridden to support optimized en/decoding
|
@@ -58,98 +65,72 @@ module REXML
|
|
58
65
|
encoding_updated
|
59
66
|
end
|
60
67
|
|
61
|
-
|
62
|
-
# usual scan() method. For one thing, the pattern argument has some
|
63
|
-
# requirements; for another, the source can be consumed. You can easily
|
64
|
-
# confuse this method. Originally, the patterns were easier
|
65
|
-
# to construct and this method more robust, because this method
|
66
|
-
# generated search regexps on the fly; however, this was
|
67
|
-
# computationally expensive and slowed down the entire REXML package
|
68
|
-
# considerably, since this is by far the most commonly called method.
|
69
|
-
# @param pattern must be a Regexp, and must be in the form of
|
70
|
-
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
71
|
-
# will be returned; the second group is used if the consume flag is
|
72
|
-
# set.
|
73
|
-
# @param consume if true, the pattern returned will be consumed, leaving
|
74
|
-
# everything after it in the Source.
|
75
|
-
# @return the pattern, if found, or nil if the Source is empty or the
|
76
|
-
# pattern is not found.
|
77
|
-
def scan(pattern, cons=false)
|
78
|
-
return nil if @buffer.nil?
|
79
|
-
rv = @buffer.scan(pattern)
|
80
|
-
@buffer = $' if cons and rv.size>0
|
81
|
-
rv
|
68
|
+
def read(term = nil)
|
82
69
|
end
|
83
70
|
|
84
|
-
def
|
71
|
+
def read_until(term)
|
72
|
+
@scanner.scan_until(Regexp.union(term)) or @scanner.rest
|
85
73
|
end
|
86
74
|
|
87
|
-
def
|
88
|
-
@buffer = $' if pattern.match( @buffer )
|
75
|
+
def ensure_buffer
|
89
76
|
end
|
90
77
|
|
91
|
-
def
|
92
|
-
|
78
|
+
def match(pattern, cons=false)
|
79
|
+
if cons
|
80
|
+
@scanner.scan(pattern).nil? ? nil : @scanner
|
81
|
+
else
|
82
|
+
@scanner.check(pattern).nil? ? nil : @scanner
|
83
|
+
end
|
93
84
|
end
|
94
85
|
|
95
|
-
def
|
96
|
-
|
97
|
-
@buffer = $'
|
98
|
-
return md
|
86
|
+
def position
|
87
|
+
@scanner.pos
|
99
88
|
end
|
100
89
|
|
101
|
-
def
|
102
|
-
|
103
|
-
@buffer = $' if cons and md
|
104
|
-
return md
|
90
|
+
def position=(pos)
|
91
|
+
@scanner.pos = pos
|
105
92
|
end
|
106
93
|
|
107
94
|
# @return true if the Source is exhausted
|
108
95
|
def empty?
|
109
|
-
@
|
110
|
-
end
|
111
|
-
|
112
|
-
def position
|
113
|
-
@orig.index( @buffer )
|
96
|
+
@scanner.eos?
|
114
97
|
end
|
115
98
|
|
116
99
|
# @return the current line in the source
|
117
100
|
def current_line
|
118
101
|
lines = @orig.split
|
119
|
-
res = lines.grep @
|
102
|
+
res = lines.grep @scanner.rest[0..30]
|
120
103
|
res = res[-1] if res.kind_of? Array
|
121
104
|
lines.index( res ) if res
|
122
105
|
end
|
123
106
|
|
124
107
|
private
|
108
|
+
|
125
109
|
def detect_encoding
|
126
|
-
|
110
|
+
scanner_encoding = @scanner.rest.encoding
|
127
111
|
detected_encoding = "UTF-8"
|
128
112
|
begin
|
129
|
-
@
|
130
|
-
if @
|
131
|
-
@buffer[0, 2] = ""
|
113
|
+
@scanner.string.force_encoding("ASCII-8BIT")
|
114
|
+
if @scanner.scan(/\xfe\xff/n)
|
132
115
|
detected_encoding = "UTF-16BE"
|
133
|
-
elsif @
|
134
|
-
@buffer[0, 2] = ""
|
116
|
+
elsif @scanner.scan(/\xff\xfe/n)
|
135
117
|
detected_encoding = "UTF-16LE"
|
136
|
-
elsif @
|
137
|
-
@buffer[0, 3] = ""
|
118
|
+
elsif @scanner.scan(/\xef\xbb\xbf/n)
|
138
119
|
detected_encoding = "UTF-8"
|
139
120
|
end
|
140
121
|
ensure
|
141
|
-
@
|
122
|
+
@scanner.string.force_encoding(scanner_encoding)
|
142
123
|
end
|
143
124
|
self.encoding = detected_encoding
|
144
125
|
end
|
145
126
|
|
146
127
|
def encoding_updated
|
147
128
|
if @encoding != 'UTF-8'
|
148
|
-
@
|
129
|
+
@scanner.string = decode(@scanner.rest)
|
149
130
|
@to_utf = true
|
150
131
|
else
|
151
132
|
@to_utf = false
|
152
|
-
@
|
133
|
+
@scanner.string.force_encoding(::Encoding::UTF_8)
|
153
134
|
end
|
154
135
|
end
|
155
136
|
end
|
@@ -172,7 +153,7 @@ module REXML
|
|
172
153
|
end
|
173
154
|
|
174
155
|
if !@to_utf and
|
175
|
-
@
|
156
|
+
@orig.respond_to?(:force_encoding) and
|
176
157
|
@source.respond_to?(:external_encoding) and
|
177
158
|
@source.external_encoding != ::Encoding::UTF_8
|
178
159
|
@force_utf8 = true
|
@@ -181,65 +162,58 @@ module REXML
|
|
181
162
|
end
|
182
163
|
end
|
183
164
|
|
184
|
-
def
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
if rv.size == 0
|
192
|
-
until @buffer =~ pattern or @source.nil?
|
193
|
-
begin
|
194
|
-
@buffer << readline
|
195
|
-
rescue Iconv::IllegalSequence
|
196
|
-
raise
|
197
|
-
rescue
|
198
|
-
@source = nil
|
199
|
-
end
|
200
|
-
end
|
201
|
-
rv = super
|
165
|
+
def read(term = nil)
|
166
|
+
begin
|
167
|
+
@scanner << readline(term)
|
168
|
+
true
|
169
|
+
rescue Exception, NameError
|
170
|
+
@source = nil
|
171
|
+
false
|
202
172
|
end
|
203
|
-
rv.taint if RUBY_VERSION < '2.7'
|
204
|
-
rv
|
205
173
|
end
|
206
174
|
|
207
|
-
def
|
175
|
+
def read_until(term)
|
176
|
+
pattern = Regexp.union(term)
|
177
|
+
data = []
|
208
178
|
begin
|
209
|
-
|
210
|
-
|
211
|
-
|
179
|
+
until str = @scanner.scan_until(pattern)
|
180
|
+
@scanner << readline(term)
|
181
|
+
end
|
182
|
+
rescue EOFError
|
183
|
+
@scanner.rest
|
184
|
+
else
|
185
|
+
read if @scanner.eos? and !@source.eof?
|
186
|
+
str
|
212
187
|
end
|
213
188
|
end
|
214
189
|
|
215
|
-
def
|
216
|
-
|
190
|
+
def ensure_buffer
|
191
|
+
read if @scanner.eos? && @source
|
217
192
|
end
|
218
193
|
|
194
|
+
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
195
|
+
# - ">"
|
196
|
+
# - "XXX>" (X is any string excluding '>')
|
219
197
|
def match( pattern, cons=false )
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
rv = pattern.match(@buffer)
|
226
|
-
@buffer = $' if cons and rv
|
227
|
-
rescue
|
228
|
-
@source = nil
|
198
|
+
while true
|
199
|
+
if cons
|
200
|
+
md = @scanner.scan(pattern)
|
201
|
+
else
|
202
|
+
md = @scanner.check(pattern)
|
229
203
|
end
|
204
|
+
break if md
|
205
|
+
return nil if pattern.is_a?(String)
|
206
|
+
return nil if @source.nil?
|
207
|
+
return nil unless read
|
230
208
|
end
|
231
|
-
|
232
|
-
|
209
|
+
|
210
|
+
md.nil? ? nil : @scanner
|
233
211
|
end
|
234
212
|
|
235
213
|
def empty?
|
236
214
|
super and ( @source.nil? || @source.eof? )
|
237
215
|
end
|
238
216
|
|
239
|
-
def position
|
240
|
-
@er_source.pos rescue 0
|
241
|
-
end
|
242
|
-
|
243
217
|
# @return the current line in the source
|
244
218
|
def current_line
|
245
219
|
begin
|
@@ -263,8 +237,8 @@ module REXML
|
|
263
237
|
end
|
264
238
|
|
265
239
|
private
|
266
|
-
def readline
|
267
|
-
str = @source.readline(@line_break)
|
240
|
+
def readline(term = nil)
|
241
|
+
str = @source.readline(term || @line_break)
|
268
242
|
if @pending_buffer
|
269
243
|
if str.nil?
|
270
244
|
str = @pending_buffer
|
@@ -290,7 +264,7 @@ module REXML
|
|
290
264
|
@source.set_encoding(@encoding, @encoding)
|
291
265
|
end
|
292
266
|
@line_break = encode(">")
|
293
|
-
@pending_buffer, @
|
267
|
+
@pending_buffer, @scanner.string = @scanner.rest, ""
|
294
268
|
@pending_buffer.force_encoding(@encoding)
|
295
269
|
super
|
296
270
|
end
|
data/lib/rexml/xpath_parser.rb
CHANGED
@@ -590,6 +590,7 @@ module REXML
|
|
590
590
|
|
591
591
|
def evaluate_predicate(expression, nodesets)
|
592
592
|
enter(:predicate, expression, nodesets) if @debug
|
593
|
+
new_nodeset_count = 0
|
593
594
|
new_nodesets = nodesets.collect do |nodeset|
|
594
595
|
new_nodeset = []
|
595
596
|
subcontext = { :size => nodeset.size }
|
@@ -606,17 +607,20 @@ module REXML
|
|
606
607
|
result = result[0] if result.kind_of? Array and result.length == 1
|
607
608
|
if result.kind_of? Numeric
|
608
609
|
if result == node.position
|
609
|
-
|
610
|
+
new_nodeset_count += 1
|
611
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
610
612
|
end
|
611
613
|
elsif result.instance_of? Array
|
612
614
|
if result.size > 0 and result.inject(false) {|k,s| s or k}
|
613
615
|
if result.size > 0
|
614
|
-
|
616
|
+
new_nodeset_count += 1
|
617
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
615
618
|
end
|
616
619
|
end
|
617
620
|
else
|
618
621
|
if result
|
619
|
-
|
622
|
+
new_nodeset_count += 1
|
623
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
620
624
|
end
|
621
625
|
end
|
622
626
|
end
|
metadata
CHANGED
@@ -1,57 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2.
|
4
|
+
version: 3.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2024-05-16 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
13
|
+
name: strscan
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
16
|
- - ">="
|
18
17
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
20
|
-
type: :
|
18
|
+
version: 3.0.9
|
19
|
+
type: :runtime
|
21
20
|
prerelease: false
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
23
22
|
requirements:
|
24
23
|
- - ">="
|
25
24
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: test-unit
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
25
|
+
version: 3.0.9
|
55
26
|
description: An XML toolkit for Ruby
|
56
27
|
email:
|
57
28
|
- kou@cozmixng.org
|
@@ -145,7 +116,6 @@ homepage: https://github.com/ruby/rexml
|
|
145
116
|
licenses:
|
146
117
|
- BSD-2-Clause
|
147
118
|
metadata: {}
|
148
|
-
post_install_message:
|
149
119
|
rdoc_options:
|
150
120
|
- "--main"
|
151
121
|
- README.md
|
@@ -162,8 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
162
132
|
- !ruby/object:Gem::Version
|
163
133
|
version: '0'
|
164
134
|
requirements: []
|
165
|
-
rubygems_version: 3.
|
166
|
-
signing_key:
|
135
|
+
rubygems_version: 3.6.0.dev
|
167
136
|
specification_version: 4
|
168
137
|
summary: An XML toolkit for Ruby
|
169
138
|
test_files: []
|