rexml 3.1.9 → 3.1.9.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +6 -0
- data/lib/rexml/doctype.rb +52 -19
- data/lib/rexml/parsers/baseparser.rb +139 -39
- data/lib/rexml/rexml.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be344b60e7ffcb223b739ee96a7a98008be5177ea49c4bd2de82109438525a65
|
4
|
+
data.tar.gz: 57a4ed3ee747d87bdd010ce03e57a24101826b4103d3faf1e828719600d3060a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 874ac6300ee76cdd4d713497fa63e39aac5969b2a1f271d6cdda988db22ebffa291044eac0a2f6debae0202406fae6e6ba0fef4e5cae33159b13a478951158e0
|
7
|
+
data.tar.gz: 92d623b9da84327b1bde85d2564b4c1bb5c01aa431f6ec225d8fd9455a9398976b63ecfbb991ab6742e203b5f8db340393ea5cdd1c1c83aaab6d265645fe4bfe
|
data/NEWS.md
CHANGED
data/lib/rexml/doctype.rb
CHANGED
@@ -7,6 +7,44 @@ require_relative 'attlistdecl'
|
|
7
7
|
require_relative 'xmltokens'
|
8
8
|
|
9
9
|
module REXML
|
10
|
+
class ReferenceWriter
|
11
|
+
def initialize(id_type,
|
12
|
+
public_id_literal,
|
13
|
+
system_literal,
|
14
|
+
context=nil)
|
15
|
+
@id_type = id_type
|
16
|
+
@public_id_literal = public_id_literal
|
17
|
+
@system_literal = system_literal
|
18
|
+
if context and context[:prologue_quote] == :apostrophe
|
19
|
+
@default_quote = "'"
|
20
|
+
else
|
21
|
+
@default_quote = "\""
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def write(output)
|
26
|
+
output << " #{@id_type}"
|
27
|
+
if @public_id_literal
|
28
|
+
if @public_id_literal.include?("'")
|
29
|
+
quote = "\""
|
30
|
+
else
|
31
|
+
quote = @default_quote
|
32
|
+
end
|
33
|
+
output << " #{quote}#{@public_id_literal}#{quote}"
|
34
|
+
end
|
35
|
+
if @system_literal
|
36
|
+
if @system_literal.include?("'")
|
37
|
+
quote = "\""
|
38
|
+
elsif @system_literal.include?("\"")
|
39
|
+
quote = "'"
|
40
|
+
else
|
41
|
+
quote = @default_quote
|
42
|
+
end
|
43
|
+
output << " #{quote}#{@system_literal}#{quote}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
10
48
|
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
|
11
49
|
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as
|
12
50
|
# being used to declare entities used in the document.
|
@@ -50,6 +88,8 @@ module REXML
|
|
50
88
|
super( parent )
|
51
89
|
@name = first.name
|
52
90
|
@external_id = first.external_id
|
91
|
+
@long_name = first.instance_variable_get(:@long_name)
|
92
|
+
@uri = first.instance_variable_get(:@uri)
|
53
93
|
elsif first.kind_of? Array
|
54
94
|
super( parent )
|
55
95
|
@name = first[0]
|
@@ -108,19 +148,17 @@ module REXML
|
|
108
148
|
# Ignored
|
109
149
|
def write( output, indent=0, transitive=false, ie_hack=false )
|
110
150
|
f = REXML::Formatters::Default.new
|
111
|
-
c = context
|
112
|
-
if c and c[:prologue_quote] == :apostrophe
|
113
|
-
quote = "'"
|
114
|
-
else
|
115
|
-
quote = "\""
|
116
|
-
end
|
117
151
|
indent( output, indent )
|
118
152
|
output << START
|
119
153
|
output << ' '
|
120
154
|
output << @name
|
121
|
-
|
122
|
-
|
123
|
-
|
155
|
+
if @external_id
|
156
|
+
reference_writer = ReferenceWriter.new(@external_id,
|
157
|
+
@long_name,
|
158
|
+
@uri,
|
159
|
+
context)
|
160
|
+
reference_writer.write(output)
|
161
|
+
end
|
124
162
|
unless @children.empty?
|
125
163
|
output << ' ['
|
126
164
|
@children.each { |child|
|
@@ -259,16 +297,11 @@ module REXML
|
|
259
297
|
end
|
260
298
|
|
261
299
|
def to_s
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
quote = "\""
|
268
|
-
end
|
269
|
-
notation = "<!NOTATION #{@name} #{@middle}"
|
270
|
-
notation << " #{quote}#{@public}#{quote}" if @public
|
271
|
-
notation << " #{quote}#{@system}#{quote}" if @system
|
300
|
+
context = nil
|
301
|
+
context = parent.context if parent
|
302
|
+
notation = "<!NOTATION #{@name}"
|
303
|
+
reference_writer = ReferenceWriter.new(@middle, @public, @system, context)
|
304
|
+
reference_writer.write(notation)
|
272
305
|
notation << ">"
|
273
306
|
notation
|
274
307
|
end
|
@@ -50,7 +50,6 @@ module REXML
|
|
50
50
|
|
51
51
|
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
52
52
|
DOCTYPE_END = /\A\s*\]\s*>/um
|
53
|
-
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
54
53
|
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
|
55
54
|
COMMENT_START = /\A<!--/u
|
56
55
|
COMMENT_PATTERN = /<!--(.*?)-->/um
|
@@ -61,15 +60,14 @@ module REXML
|
|
61
60
|
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
62
61
|
INSTRUCTION_START = /\A<\?/u
|
63
62
|
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
|
64
|
-
TAG_MATCH =
|
65
|
-
CLOSE_MATCH =
|
63
|
+
TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
|
64
|
+
CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
|
66
65
|
|
67
66
|
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
68
67
|
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
69
68
|
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
|
70
69
|
|
71
70
|
ENTITY_START = /\A\s*<!ENTITY/
|
72
|
-
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
73
71
|
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
|
74
72
|
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
|
75
73
|
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
|
@@ -83,9 +81,6 @@ module REXML
|
|
83
81
|
ATTDEF_RE = /#{ATTDEF}/
|
84
82
|
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
|
85
83
|
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
86
|
-
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
87
|
-
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
88
|
-
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
89
84
|
|
90
85
|
TEXT_PATTERN = /\A([^<]*)/um
|
91
86
|
|
@@ -103,6 +98,11 @@ module REXML
|
|
103
98
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
104
99
|
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
105
100
|
|
101
|
+
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
102
|
+
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
|
103
|
+
EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
|
104
|
+
PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
|
105
|
+
|
106
106
|
EREFERENCE = /&(?!#{NAME};)/
|
107
107
|
|
108
108
|
DEFAULT_ENTITIES = {
|
@@ -195,11 +195,9 @@ module REXML
|
|
195
195
|
return [ :end_document ] if empty?
|
196
196
|
return @stack.shift if @stack.size > 0
|
197
197
|
#STDERR.puts @source.encoding
|
198
|
-
@source.read if @source.buffer.size<2
|
199
198
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
200
199
|
if @document_status == nil
|
201
|
-
|
202
|
-
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
|
200
|
+
word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
|
203
201
|
word = word[1] unless word.nil?
|
204
202
|
#STDERR.puts "WORD = #{word.inspect}"
|
205
203
|
case word
|
@@ -224,38 +222,49 @@ module REXML
|
|
224
222
|
when INSTRUCTION_START
|
225
223
|
return process_instruction
|
226
224
|
when DOCTYPE_START
|
227
|
-
|
225
|
+
base_error_message = "Malformed DOCTYPE"
|
226
|
+
@source.match(DOCTYPE_START, true)
|
228
227
|
@nsstack.unshift(curr_ns=Set.new)
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
long_name = $4.nil? ? nil : $4.strip
|
236
|
-
uri = $6.nil? ? nil : $6.strip
|
237
|
-
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
238
|
-
if close == ">"
|
228
|
+
name = parse_name(base_error_message)
|
229
|
+
if @source.match(/\A\s*\[/um, true)
|
230
|
+
id = [nil, nil, nil]
|
231
|
+
@document_status = :in_doctype
|
232
|
+
elsif @source.match(/\A\s*>/um, true)
|
233
|
+
id = [nil, nil, nil]
|
239
234
|
@document_status = :after_doctype
|
240
|
-
@source.read if @source.buffer.size<2
|
241
|
-
md = @source.match(/^\s*/um, true)
|
242
|
-
@stack << [ :end_doctype ]
|
243
235
|
else
|
244
|
-
|
236
|
+
id = parse_id(base_error_message,
|
237
|
+
accept_external_id: true,
|
238
|
+
accept_public_id: false)
|
239
|
+
if id[0] == "SYSTEM"
|
240
|
+
# For backward compatibility
|
241
|
+
id[1], id[2] = id[2], nil
|
242
|
+
end
|
243
|
+
if @source.match(/\A\s*\[/um, true)
|
244
|
+
@document_status = :in_doctype
|
245
|
+
elsif @source.match(/\A\s*>/um, true)
|
246
|
+
@document_status = :after_doctype
|
247
|
+
else
|
248
|
+
message = "#{base_error_message}: garbage after external ID"
|
249
|
+
raise REXML::ParseException.new(message, @source)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
args = [:start_doctype, name, *id]
|
253
|
+
if @document_status == :after_doctype
|
254
|
+
@source.match(/\A\s*/um, true)
|
255
|
+
@stack << [ :end_doctype ]
|
245
256
|
end
|
246
257
|
return args
|
247
|
-
when
|
258
|
+
when /\A\s+/
|
248
259
|
else
|
249
260
|
@document_status = :after_doctype
|
250
|
-
@source.read if @source.buffer.size<2
|
251
|
-
md = @source.match(/\s*/um, true)
|
252
261
|
if @source.encoding == "UTF-8"
|
253
262
|
@source.buffer.force_encoding(::Encoding::UTF_8)
|
254
263
|
end
|
255
264
|
end
|
256
265
|
end
|
257
266
|
if @document_status == :in_doctype
|
258
|
-
md = @source.match(/\s*(.*?>)/um)
|
267
|
+
md = @source.match(/\A\s*(.*?>)/um)
|
259
268
|
case md[1]
|
260
269
|
when SYSTEMENTITY
|
261
270
|
match = @source.match( SYSTEMENTITY, true )[1]
|
@@ -312,24 +321,35 @@ module REXML
|
|
312
321
|
end
|
313
322
|
return [ :attlistdecl, element, pairs, contents ]
|
314
323
|
when NOTATIONDECL_START
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
324
|
+
base_error_message = "Malformed notation declaration"
|
325
|
+
unless @source.match(/\A\s*<!NOTATION\s+/um, true)
|
326
|
+
if @source.match(/\A\s*<!NOTATION\s*>/um)
|
327
|
+
message = "#{base_error_message}: name is missing"
|
328
|
+
else
|
329
|
+
message = "#{base_error_message}: invalid declaration name"
|
330
|
+
end
|
331
|
+
raise REXML::ParseException.new(message, @source)
|
324
332
|
end
|
325
|
-
|
333
|
+
name = parse_name(base_error_message)
|
334
|
+
id = parse_id(base_error_message,
|
335
|
+
accept_external_id: true,
|
336
|
+
accept_public_id: true)
|
337
|
+
unless @source.match(/\A\s*>/um, true)
|
338
|
+
message = "#{base_error_message}: garbage before end >"
|
339
|
+
raise REXML::ParseException.new(message, @source)
|
340
|
+
end
|
341
|
+
return [:notationdecl, name, *id]
|
326
342
|
when DOCTYPE_END
|
327
343
|
@document_status = :after_doctype
|
328
344
|
@source.match( DOCTYPE_END, true )
|
329
345
|
return [ :end_doctype ]
|
330
346
|
end
|
331
347
|
end
|
348
|
+
if @document_status == :after_doctype
|
349
|
+
@source.match(/\A\s*/um, true)
|
350
|
+
end
|
332
351
|
begin
|
352
|
+
@source.read if @source.buffer.size<2
|
333
353
|
if @source.buffer[0] == ?<
|
334
354
|
if @source.buffer[1] == ?/
|
335
355
|
@nsstack.shift
|
@@ -368,6 +388,7 @@ module REXML
|
|
368
388
|
unless md
|
369
389
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
370
390
|
end
|
391
|
+
@document_status = :in_element
|
371
392
|
prefixes = Set.new
|
372
393
|
prefixes << md[2] if md[2]
|
373
394
|
@nsstack.unshift(curr_ns=Set.new)
|
@@ -473,6 +494,85 @@ module REXML
|
|
473
494
|
true
|
474
495
|
end
|
475
496
|
|
497
|
+
def parse_name(base_error_message)
|
498
|
+
md = @source.match(/\A\s*#{NAME}/um, true)
|
499
|
+
unless md
|
500
|
+
if @source.match(/\A\s*\S/um)
|
501
|
+
message = "#{base_error_message}: invalid name"
|
502
|
+
else
|
503
|
+
message = "#{base_error_message}: name is missing"
|
504
|
+
end
|
505
|
+
raise REXML::ParseException.new(message, @source)
|
506
|
+
end
|
507
|
+
md[1]
|
508
|
+
end
|
509
|
+
|
510
|
+
def parse_id(base_error_message,
|
511
|
+
accept_external_id:,
|
512
|
+
accept_public_id:)
|
513
|
+
if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
|
514
|
+
pubid = system = nil
|
515
|
+
pubid_literal = md[1]
|
516
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
517
|
+
system_literal = md[2]
|
518
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
519
|
+
["PUBLIC", pubid, system]
|
520
|
+
elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
|
521
|
+
pubid = system = nil
|
522
|
+
pubid_literal = md[1]
|
523
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
524
|
+
["PUBLIC", pubid, nil]
|
525
|
+
elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
|
526
|
+
system = nil
|
527
|
+
system_literal = md[1]
|
528
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
529
|
+
["SYSTEM", nil, system]
|
530
|
+
else
|
531
|
+
details = parse_id_invalid_details(accept_external_id: accept_external_id,
|
532
|
+
accept_public_id: accept_public_id)
|
533
|
+
message = "#{base_error_message}: #{details}"
|
534
|
+
raise REXML::ParseException.new(message, @source)
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
def parse_id_invalid_details(accept_external_id:,
|
539
|
+
accept_public_id:)
|
540
|
+
public = /\A\s*PUBLIC/um
|
541
|
+
system = /\A\s*SYSTEM/um
|
542
|
+
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
543
|
+
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
544
|
+
return "public ID literal is missing"
|
545
|
+
end
|
546
|
+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
547
|
+
return "invalid public ID literal"
|
548
|
+
end
|
549
|
+
if accept_public_id
|
550
|
+
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
551
|
+
return "system ID literal is missing"
|
552
|
+
end
|
553
|
+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
554
|
+
return "invalid system literal"
|
555
|
+
end
|
556
|
+
"garbage after system literal"
|
557
|
+
else
|
558
|
+
"garbage after public ID literal"
|
559
|
+
end
|
560
|
+
elsif accept_external_id and @source.match(/#{system}/um)
|
561
|
+
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
562
|
+
return "system literal is missing"
|
563
|
+
end
|
564
|
+
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
565
|
+
return "invalid system literal"
|
566
|
+
end
|
567
|
+
"garbage after system literal"
|
568
|
+
else
|
569
|
+
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
570
|
+
return "invalid ID type"
|
571
|
+
end
|
572
|
+
"ID type is missing"
|
573
|
+
end
|
574
|
+
end
|
575
|
+
|
476
576
|
def process_instruction
|
477
577
|
match_data = @source.match(INSTRUCTION_PATTERN, true)
|
478
578
|
unless match_data
|
data/lib/rexml/rexml.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.9
|
4
|
+
version: 3.1.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
- !ruby/object:Gem::Version
|
123
123
|
version: '0'
|
124
124
|
requirements: []
|
125
|
-
rubygems_version: 3.0.
|
125
|
+
rubygems_version: 3.3.0.dev
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
128
|
summary: An XML toolkit for Ruby
|