rexml 3.2.4 → 3.2.6
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +135 -0
- data/README.md +11 -14
- data/doc/rexml/context.rdoc +143 -0
- data/doc/rexml/tasks/rdoc/child.rdoc +87 -0
- data/doc/rexml/tasks/rdoc/document.rdoc +276 -0
- data/doc/rexml/tasks/rdoc/element.rdoc +602 -0
- data/doc/rexml/tasks/rdoc/node.rdoc +97 -0
- data/doc/rexml/tasks/rdoc/parent.rdoc +267 -0
- data/doc/rexml/tasks/tocs/child_toc.rdoc +12 -0
- data/doc/rexml/tasks/tocs/document_toc.rdoc +30 -0
- data/doc/rexml/tasks/tocs/element_toc.rdoc +55 -0
- data/doc/rexml/tasks/tocs/master_toc.rdoc +135 -0
- data/doc/rexml/tasks/tocs/node_toc.rdoc +16 -0
- data/doc/rexml/tasks/tocs/parent_toc.rdoc +25 -0
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +14 -9
- data/lib/rexml/doctype.rb +55 -31
- data/lib/rexml/document.rb +194 -34
- data/lib/rexml/element.rb +1786 -456
- data/lib/rexml/entity.rb +25 -15
- data/lib/rexml/formatters/pretty.rb +2 -2
- data/lib/rexml/light/node.rb +0 -8
- data/lib/rexml/namespace.rb +8 -4
- data/lib/rexml/parsers/baseparser.rb +139 -39
- data/lib/rexml/parsers/xpathparser.rb +161 -97
- data/lib/rexml/rexml.rb +29 -22
- data/lib/rexml/text.rb +6 -4
- data/lib/rexml/xpath_parser.rb +36 -30
- data/lib/rexml.rb +3 -0
- metadata +56 -16
- data/.gitignore +0 -9
- data/.travis.yml +0 -24
- data/Gemfile +0 -6
- data/Rakefile +0 -8
- data/rexml.gemspec +0 -84
data/lib/rexml/entity.rb
CHANGED
@@ -132,24 +132,34 @@ module REXML
|
|
132
132
|
# then:
|
133
133
|
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
134
134
|
def value
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
135
|
+
@resolved_value ||= resolve_value
|
136
|
+
end
|
137
|
+
|
138
|
+
def parent=(other)
|
139
|
+
@resolved_value = nil
|
140
|
+
super
|
141
|
+
end
|
142
|
+
|
143
|
+
private
|
144
|
+
def resolve_value
|
145
|
+
return nil if @value.nil?
|
146
|
+
return @value unless @value.match?(PEREFERENCE_RE)
|
147
|
+
|
148
|
+
matches = @value.scan(PEREFERENCE_RE)
|
149
|
+
rv = @value.clone
|
150
|
+
if @parent
|
151
|
+
sum = 0
|
152
|
+
matches.each do |entity_reference|
|
153
|
+
entity_value = @parent.entity( entity_reference[0] )
|
154
|
+
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
155
|
+
raise "entity expansion has grown too large"
|
156
|
+
else
|
157
|
+
sum += entity_value.bytesize
|
148
158
|
end
|
159
|
+
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
149
160
|
end
|
150
|
-
return rv
|
151
161
|
end
|
152
|
-
|
162
|
+
rv
|
153
163
|
end
|
154
164
|
end
|
155
165
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative 'default'
|
3
3
|
|
4
4
|
module REXML
|
@@ -58,7 +58,7 @@ module REXML
|
|
58
58
|
skip = false
|
59
59
|
if compact
|
60
60
|
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
|
61
|
-
string = ""
|
61
|
+
string = +""
|
62
62
|
old_level = @level
|
63
63
|
@level = 0
|
64
64
|
node.children.each { |child| write( child, string ) }
|
data/lib/rexml/light/node.rb
CHANGED
@@ -1,14 +1,6 @@
|
|
1
1
|
# frozen_string_literal: false
|
2
2
|
require_relative '../xmltokens'
|
3
3
|
|
4
|
-
# [ :element, parent, name, attributes, children* ]
|
5
|
-
# a = Node.new
|
6
|
-
# a << "B" # => <a>B</a>
|
7
|
-
# a.b # => <a>B<b/></a>
|
8
|
-
# a.b[1] # => <a>B<b/><b/><a>
|
9
|
-
# a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
|
10
|
-
# a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
|
11
|
-
# a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
|
12
4
|
module REXML
|
13
5
|
module Light
|
14
6
|
# Represents a tagged XML element. Elements are characterized by
|
data/lib/rexml/namespace.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'xmltokens'
|
4
4
|
|
@@ -10,13 +10,17 @@ module REXML
|
|
10
10
|
# The expanded name of the object, valid if name is set
|
11
11
|
attr_accessor :prefix
|
12
12
|
include XMLTokens
|
13
|
+
NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/
|
13
14
|
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
|
14
15
|
|
15
16
|
# Sets the name and the expanded name
|
16
17
|
def name=( name )
|
17
18
|
@expanded_name = name
|
18
|
-
|
19
|
-
|
19
|
+
if name.match?(NAME_WITHOUT_NAMESPACE)
|
20
|
+
@prefix = ""
|
21
|
+
@namespace = ""
|
22
|
+
@name = name
|
23
|
+
elsif name =~ NAMESPLIT
|
20
24
|
if $1
|
21
25
|
@prefix = $1
|
22
26
|
else
|
@@ -24,7 +28,7 @@ module REXML
|
|
24
28
|
@namespace = ""
|
25
29
|
end
|
26
30
|
@name = $2
|
27
|
-
|
31
|
+
elsif name == ""
|
28
32
|
@prefix = nil
|
29
33
|
@namespace = nil
|
30
34
|
@name = nil
|
@@ -50,7 +50,6 @@ module REXML
|
|
50
50
|
|
51
51
|
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
52
52
|
DOCTYPE_END = /\A\s*\]\s*>/um
|
53
|
-
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
54
53
|
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
|
55
54
|
COMMENT_START = /\A<!--/u
|
56
55
|
COMMENT_PATTERN = /<!--(.*?)-->/um
|
@@ -61,15 +60,14 @@ module REXML
|
|
61
60
|
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
62
61
|
INSTRUCTION_START = /\A<\?/u
|
63
62
|
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
|
64
|
-
TAG_MATCH =
|
65
|
-
CLOSE_MATCH =
|
63
|
+
TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
|
64
|
+
CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
|
66
65
|
|
67
66
|
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
68
67
|
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
69
68
|
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
|
70
69
|
|
71
70
|
ENTITY_START = /\A\s*<!ENTITY/
|
72
|
-
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
73
71
|
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
|
74
72
|
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
|
75
73
|
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
|
@@ -83,9 +81,6 @@ module REXML
|
|
83
81
|
ATTDEF_RE = /#{ATTDEF}/
|
84
82
|
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
|
85
83
|
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
86
|
-
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
87
|
-
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
88
|
-
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
89
84
|
|
90
85
|
TEXT_PATTERN = /\A([^<]*)/um
|
91
86
|
|
@@ -103,6 +98,11 @@ module REXML
|
|
103
98
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
104
99
|
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
105
100
|
|
101
|
+
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
102
|
+
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
|
103
|
+
EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
|
104
|
+
PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
|
105
|
+
|
106
106
|
EREFERENCE = /&(?!#{NAME};)/
|
107
107
|
|
108
108
|
DEFAULT_ENTITIES = {
|
@@ -195,11 +195,9 @@ module REXML
|
|
195
195
|
return [ :end_document ] if empty?
|
196
196
|
return @stack.shift if @stack.size > 0
|
197
197
|
#STDERR.puts @source.encoding
|
198
|
-
@source.read if @source.buffer.size<2
|
199
198
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
200
199
|
if @document_status == nil
|
201
|
-
|
202
|
-
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
|
200
|
+
word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
|
203
201
|
word = word[1] unless word.nil?
|
204
202
|
#STDERR.puts "WORD = #{word.inspect}"
|
205
203
|
case word
|
@@ -224,38 +222,49 @@ module REXML
|
|
224
222
|
when INSTRUCTION_START
|
225
223
|
return process_instruction
|
226
224
|
when DOCTYPE_START
|
227
|
-
|
225
|
+
base_error_message = "Malformed DOCTYPE"
|
226
|
+
@source.match(DOCTYPE_START, true)
|
228
227
|
@nsstack.unshift(curr_ns=Set.new)
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
long_name = $4.nil? ? nil : $4.strip
|
236
|
-
uri = $6.nil? ? nil : $6.strip
|
237
|
-
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
238
|
-
if close == ">"
|
228
|
+
name = parse_name(base_error_message)
|
229
|
+
if @source.match(/\A\s*\[/um, true)
|
230
|
+
id = [nil, nil, nil]
|
231
|
+
@document_status = :in_doctype
|
232
|
+
elsif @source.match(/\A\s*>/um, true)
|
233
|
+
id = [nil, nil, nil]
|
239
234
|
@document_status = :after_doctype
|
240
|
-
@source.read if @source.buffer.size<2
|
241
|
-
md = @source.match(/^\s*/um, true)
|
242
|
-
@stack << [ :end_doctype ]
|
243
235
|
else
|
244
|
-
|
236
|
+
id = parse_id(base_error_message,
|
237
|
+
accept_external_id: true,
|
238
|
+
accept_public_id: false)
|
239
|
+
if id[0] == "SYSTEM"
|
240
|
+
# For backward compatibility
|
241
|
+
id[1], id[2] = id[2], nil
|
242
|
+
end
|
243
|
+
if @source.match(/\A\s*\[/um, true)
|
244
|
+
@document_status = :in_doctype
|
245
|
+
elsif @source.match(/\A\s*>/um, true)
|
246
|
+
@document_status = :after_doctype
|
247
|
+
else
|
248
|
+
message = "#{base_error_message}: garbage after external ID"
|
249
|
+
raise REXML::ParseException.new(message, @source)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
args = [:start_doctype, name, *id]
|
253
|
+
if @document_status == :after_doctype
|
254
|
+
@source.match(/\A\s*/um, true)
|
255
|
+
@stack << [ :end_doctype ]
|
245
256
|
end
|
246
257
|
return args
|
247
|
-
when
|
258
|
+
when /\A\s+/
|
248
259
|
else
|
249
260
|
@document_status = :after_doctype
|
250
|
-
@source.read if @source.buffer.size<2
|
251
|
-
md = @source.match(/\s*/um, true)
|
252
261
|
if @source.encoding == "UTF-8"
|
253
262
|
@source.buffer.force_encoding(::Encoding::UTF_8)
|
254
263
|
end
|
255
264
|
end
|
256
265
|
end
|
257
266
|
if @document_status == :in_doctype
|
258
|
-
md = @source.match(/\s*(.*?>)/um)
|
267
|
+
md = @source.match(/\A\s*(.*?>)/um)
|
259
268
|
case md[1]
|
260
269
|
when SYSTEMENTITY
|
261
270
|
match = @source.match( SYSTEMENTITY, true )[1]
|
@@ -312,24 +321,35 @@ module REXML
|
|
312
321
|
end
|
313
322
|
return [ :attlistdecl, element, pairs, contents ]
|
314
323
|
when NOTATIONDECL_START
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
324
|
+
base_error_message = "Malformed notation declaration"
|
325
|
+
unless @source.match(/\A\s*<!NOTATION\s+/um, true)
|
326
|
+
if @source.match(/\A\s*<!NOTATION\s*>/um)
|
327
|
+
message = "#{base_error_message}: name is missing"
|
328
|
+
else
|
329
|
+
message = "#{base_error_message}: invalid declaration name"
|
330
|
+
end
|
331
|
+
raise REXML::ParseException.new(message, @source)
|
324
332
|
end
|
325
|
-
|
333
|
+
name = parse_name(base_error_message)
|
334
|
+
id = parse_id(base_error_message,
|
335
|
+
accept_external_id: true,
|
336
|
+
accept_public_id: true)
|
337
|
+
unless @source.match(/\A\s*>/um, true)
|
338
|
+
message = "#{base_error_message}: garbage before end >"
|
339
|
+
raise REXML::ParseException.new(message, @source)
|
340
|
+
end
|
341
|
+
return [:notationdecl, name, *id]
|
326
342
|
when DOCTYPE_END
|
327
343
|
@document_status = :after_doctype
|
328
344
|
@source.match( DOCTYPE_END, true )
|
329
345
|
return [ :end_doctype ]
|
330
346
|
end
|
331
347
|
end
|
348
|
+
if @document_status == :after_doctype
|
349
|
+
@source.match(/\A\s*/um, true)
|
350
|
+
end
|
332
351
|
begin
|
352
|
+
@source.read if @source.buffer.size<2
|
333
353
|
if @source.buffer[0] == ?<
|
334
354
|
if @source.buffer[1] == ?/
|
335
355
|
@nsstack.shift
|
@@ -372,6 +392,7 @@ module REXML
|
|
372
392
|
unless md
|
373
393
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
374
394
|
end
|
395
|
+
@document_status = :in_element
|
375
396
|
prefixes = Set.new
|
376
397
|
prefixes << md[2] if md[2]
|
377
398
|
@nsstack.unshift(curr_ns=Set.new)
|
@@ -477,6 +498,85 @@ module REXML
|
|
477
498
|
true
|
478
499
|
end
|
479
500
|
|
501
|
+
def parse_name(base_error_message)
|
502
|
+
md = @source.match(/\A\s*#{NAME}/um, true)
|
503
|
+
unless md
|
504
|
+
if @source.match(/\A\s*\S/um)
|
505
|
+
message = "#{base_error_message}: invalid name"
|
506
|
+
else
|
507
|
+
message = "#{base_error_message}: name is missing"
|
508
|
+
end
|
509
|
+
raise REXML::ParseException.new(message, @source)
|
510
|
+
end
|
511
|
+
md[1]
|
512
|
+
end
|
513
|
+
|
514
|
+
def parse_id(base_error_message,
|
515
|
+
accept_external_id:,
|
516
|
+
accept_public_id:)
|
517
|
+
if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
|
518
|
+
pubid = system = nil
|
519
|
+
pubid_literal = md[1]
|
520
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
521
|
+
system_literal = md[2]
|
522
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
523
|
+
["PUBLIC", pubid, system]
|
524
|
+
elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
|
525
|
+
pubid = system = nil
|
526
|
+
pubid_literal = md[1]
|
527
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
528
|
+
["PUBLIC", pubid, nil]
|
529
|
+
elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
|
530
|
+
system = nil
|
531
|
+
system_literal = md[1]
|
532
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
533
|
+
["SYSTEM", nil, system]
|
534
|
+
else
|
535
|
+
details = parse_id_invalid_details(accept_external_id: accept_external_id,
|
536
|
+
accept_public_id: accept_public_id)
|
537
|
+
message = "#{base_error_message}: #{details}"
|
538
|
+
raise REXML::ParseException.new(message, @source)
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
def parse_id_invalid_details(accept_external_id:,
|
543
|
+
accept_public_id:)
|
544
|
+
public = /\A\s*PUBLIC/um
|
545
|
+
system = /\A\s*SYSTEM/um
|
546
|
+
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
547
|
+
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
548
|
+
return "public ID literal is missing"
|
549
|
+
end
|
550
|
+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
551
|
+
return "invalid public ID literal"
|
552
|
+
end
|
553
|
+
if accept_public_id
|
554
|
+
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
555
|
+
return "system ID literal is missing"
|
556
|
+
end
|
557
|
+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
558
|
+
return "invalid system literal"
|
559
|
+
end
|
560
|
+
"garbage after system literal"
|
561
|
+
else
|
562
|
+
"garbage after public ID literal"
|
563
|
+
end
|
564
|
+
elsif accept_external_id and @source.match(/#{system}/um)
|
565
|
+
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
566
|
+
return "system literal is missing"
|
567
|
+
end
|
568
|
+
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
569
|
+
return "invalid system literal"
|
570
|
+
end
|
571
|
+
"garbage after system literal"
|
572
|
+
else
|
573
|
+
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
574
|
+
return "invalid ID type"
|
575
|
+
end
|
576
|
+
"ID type is missing"
|
577
|
+
end
|
578
|
+
end
|
579
|
+
|
480
580
|
def process_instruction
|
481
581
|
match_data = @source.match(INSTRUCTION_PATTERN, true)
|
482
582
|
unless match_data
|