rexml 3.2.3 → 3.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +219 -0
- data/README.md +11 -14
- data/doc/rexml/context.rdoc +143 -0
- data/doc/rexml/tasks/rdoc/child.rdoc +87 -0
- data/doc/rexml/tasks/rdoc/document.rdoc +276 -0
- data/doc/rexml/tasks/rdoc/element.rdoc +602 -0
- data/doc/rexml/tasks/rdoc/node.rdoc +97 -0
- data/doc/rexml/tasks/rdoc/parent.rdoc +267 -0
- data/doc/rexml/tasks/tocs/child_toc.rdoc +12 -0
- data/doc/rexml/tasks/tocs/document_toc.rdoc +30 -0
- data/doc/rexml/tasks/tocs/element_toc.rdoc +55 -0
- data/doc/rexml/tasks/tocs/master_toc.rdoc +135 -0
- data/doc/rexml/tasks/tocs/node_toc.rdoc +16 -0
- data/doc/rexml/tasks/tocs/parent_toc.rdoc +25 -0
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +14 -9
- data/lib/rexml/doctype.rb +55 -31
- data/lib/rexml/document.rb +194 -34
- data/lib/rexml/element.rb +1786 -456
- data/lib/rexml/entity.rb +26 -16
- data/lib/rexml/formatters/pretty.rb +2 -2
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/light/node.rb +0 -8
- data/lib/rexml/namespace.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +321 -222
- data/lib/rexml/parsers/xpathparser.rb +161 -97
- data/lib/rexml/rexml.rb +29 -22
- data/lib/rexml/source.rb +72 -99
- data/lib/rexml/text.rb +7 -5
- data/lib/rexml/xpath_parser.rb +43 -33
- data/lib/rexml.rb +3 -0
- metadata +43 -34
- data/.gitignore +0 -9
- data/.travis.yml +0 -24
- data/Gemfile +0 -6
- data/Rakefile +0 -8
- data/rexml.gemspec +0 -84
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
4
|
require_relative '../source'
|
@@ -50,7 +50,6 @@ module REXML
|
|
50
50
|
|
51
51
|
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
52
52
|
DOCTYPE_END = /\A\s*\]\s*>/um
|
53
|
-
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
54
53
|
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
|
55
54
|
COMMENT_START = /\A<!--/u
|
56
55
|
COMMENT_PATTERN = /<!--(.*?)-->/um
|
@@ -61,15 +60,14 @@ module REXML
|
|
61
60
|
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
62
61
|
INSTRUCTION_START = /\A<\?/u
|
63
62
|
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
|
64
|
-
TAG_MATCH =
|
65
|
-
CLOSE_MATCH =
|
63
|
+
TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
|
64
|
+
CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
|
66
65
|
|
67
66
|
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
68
67
|
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
69
68
|
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
|
70
69
|
|
71
70
|
ENTITY_START = /\A\s*<!ENTITY/
|
72
|
-
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
73
71
|
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
|
74
72
|
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
|
75
73
|
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
|
@@ -83,9 +81,6 @@ module REXML
|
|
83
81
|
ATTDEF_RE = /#{ATTDEF}/
|
84
82
|
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
|
85
83
|
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
86
|
-
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
87
|
-
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
88
|
-
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
89
84
|
|
90
85
|
TEXT_PATTERN = /\A([^<]*)/um
|
91
86
|
|
@@ -101,7 +96,12 @@ module REXML
|
|
101
96
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
102
97
|
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
103
98
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
104
|
-
ENTITYDECL = /\s*(?:#{GEDECL})
|
99
|
+
ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
|
100
|
+
|
101
|
+
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
102
|
+
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
|
103
|
+
EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
|
104
|
+
PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
|
105
105
|
|
106
106
|
EREFERENCE = /&(?!#{NAME};)/
|
107
107
|
|
@@ -112,6 +112,19 @@ module REXML
|
|
112
112
|
"apos" => [/'/, "'", "'", /'/]
|
113
113
|
}
|
114
114
|
|
115
|
+
module Private
|
116
|
+
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
117
|
+
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
118
|
+
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
119
|
+
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
120
|
+
NAME_PATTERN = /\s*#{NAME}/um
|
121
|
+
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
122
|
+
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
123
|
+
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
124
|
+
end
|
125
|
+
private_constant :Private
|
126
|
+
include Private
|
127
|
+
|
115
128
|
def initialize( source )
|
116
129
|
self.stream = source
|
117
130
|
@listeners = []
|
@@ -195,162 +208,181 @@ module REXML
|
|
195
208
|
return [ :end_document ] if empty?
|
196
209
|
return @stack.shift if @stack.size > 0
|
197
210
|
#STDERR.puts @source.encoding
|
198
|
-
@source.read if @source.buffer.size<2
|
199
211
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
212
|
+
|
213
|
+
@source.ensure_buffer
|
200
214
|
if @document_status == nil
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
215
|
+
start_position = @source.position
|
216
|
+
if @source.match("<?", true)
|
217
|
+
return process_instruction(start_position)
|
218
|
+
elsif @source.match("<!", true)
|
219
|
+
if @source.match("--", true)
|
220
|
+
return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
|
221
|
+
elsif @source.match("DOCTYPE", true)
|
222
|
+
base_error_message = "Malformed DOCTYPE"
|
223
|
+
unless @source.match(/\s+/um, true)
|
224
|
+
if @source.match(">")
|
225
|
+
message = "#{base_error_message}: name is missing"
|
226
|
+
else
|
227
|
+
message = "#{base_error_message}: invalid name"
|
228
|
+
end
|
229
|
+
@source.position = start_position
|
230
|
+
raise REXML::ParseException.new(message, @source)
|
231
|
+
end
|
232
|
+
@nsstack.unshift(curr_ns=Set.new)
|
233
|
+
name = parse_name(base_error_message)
|
234
|
+
if @source.match(/\s*\[/um, true)
|
235
|
+
id = [nil, nil, nil]
|
236
|
+
@document_status = :in_doctype
|
237
|
+
elsif @source.match(/\s*>/um, true)
|
238
|
+
id = [nil, nil, nil]
|
239
|
+
@document_status = :after_doctype
|
240
|
+
@source.ensure_buffer
|
241
|
+
else
|
242
|
+
id = parse_id(base_error_message,
|
243
|
+
accept_external_id: true,
|
244
|
+
accept_public_id: false)
|
245
|
+
if id[0] == "SYSTEM"
|
246
|
+
# For backward compatibility
|
247
|
+
id[1], id[2] = id[2], nil
|
248
|
+
end
|
249
|
+
if @source.match(/\s*\[/um, true)
|
250
|
+
@document_status = :in_doctype
|
251
|
+
elsif @source.match(/\s*>/um, true)
|
252
|
+
@document_status = :after_doctype
|
253
|
+
@source.ensure_buffer
|
254
|
+
else
|
255
|
+
message = "#{base_error_message}: garbage after external ID"
|
256
|
+
raise REXML::ParseException.new(message, @source)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
args = [:start_doctype, name, *id]
|
260
|
+
if @document_status == :after_doctype
|
261
|
+
@source.match(/\s*/um, true)
|
262
|
+
@stack << [ :end_doctype ]
|
263
|
+
end
|
264
|
+
return args
|
243
265
|
else
|
244
|
-
|
245
|
-
|
246
|
-
return args
|
247
|
-
when /^\s+/
|
248
|
-
else
|
249
|
-
@document_status = :after_doctype
|
250
|
-
@source.read if @source.buffer.size<2
|
251
|
-
md = @source.match(/\s*/um, true)
|
252
|
-
if @source.encoding == "UTF-8"
|
253
|
-
@source.buffer.force_encoding(::Encoding::UTF_8)
|
266
|
+
message = "Invalid XML"
|
267
|
+
raise REXML::ParseException.new(message, @source)
|
254
268
|
end
|
255
269
|
end
|
256
270
|
end
|
257
271
|
if @document_status == :in_doctype
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
match
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
match
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
if attdef[0] =~ /^xmlns:(.*)/
|
309
|
-
@nsstack[0] << $1
|
272
|
+
@source.match(/\s*/um, true) # skip spaces
|
273
|
+
start_position = @source.position
|
274
|
+
if @source.match("<!", true)
|
275
|
+
if @source.match("ELEMENT", true)
|
276
|
+
md = @source.match(/(.*?)>/um, true)
|
277
|
+
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
278
|
+
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
279
|
+
elsif @source.match("ENTITY", true)
|
280
|
+
match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
|
281
|
+
ref = false
|
282
|
+
if match[1] == '%'
|
283
|
+
ref = true
|
284
|
+
match.delete_at 1
|
285
|
+
end
|
286
|
+
# Now we have to sort out what kind of entity reference this is
|
287
|
+
if match[2] == 'SYSTEM'
|
288
|
+
# External reference
|
289
|
+
match[3] = match[3][1..-2] # PUBID
|
290
|
+
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
291
|
+
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
292
|
+
elsif match[2] == 'PUBLIC'
|
293
|
+
# External reference
|
294
|
+
match[3] = match[3][1..-2] # PUBID
|
295
|
+
match[4] = match[4][1..-2] # HREF
|
296
|
+
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
297
|
+
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
298
|
+
else
|
299
|
+
match[2] = match[2][1..-2]
|
300
|
+
match.pop if match.size == 4
|
301
|
+
# match is [ :entity, name, value ]
|
302
|
+
end
|
303
|
+
match << '%' if ref
|
304
|
+
return match
|
305
|
+
elsif @source.match("ATTLIST", true)
|
306
|
+
md = @source.match(ATTLISTDECL_END, true)
|
307
|
+
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
308
|
+
element = md[1]
|
309
|
+
contents = md[0]
|
310
|
+
|
311
|
+
pairs = {}
|
312
|
+
values = md[0].scan( ATTDEF_RE )
|
313
|
+
values.each do |attdef|
|
314
|
+
unless attdef[3] == "#IMPLIED"
|
315
|
+
attdef.compact!
|
316
|
+
val = attdef[3]
|
317
|
+
val = attdef[4] if val == "#FIXED "
|
318
|
+
pairs[attdef[0]] = val
|
319
|
+
if attdef[0] =~ /^xmlns:(.*)/
|
320
|
+
@nsstack[0] << $1
|
321
|
+
end
|
310
322
|
end
|
311
323
|
end
|
324
|
+
return [ :attlistdecl, element, pairs, contents ]
|
325
|
+
elsif @source.match("NOTATION", true)
|
326
|
+
base_error_message = "Malformed notation declaration"
|
327
|
+
unless @source.match(/\s+/um, true)
|
328
|
+
if @source.match(">")
|
329
|
+
message = "#{base_error_message}: name is missing"
|
330
|
+
else
|
331
|
+
message = "#{base_error_message}: invalid name"
|
332
|
+
end
|
333
|
+
@source.position = start_position
|
334
|
+
raise REXML::ParseException.new(message, @source)
|
335
|
+
end
|
336
|
+
name = parse_name(base_error_message)
|
337
|
+
id = parse_id(base_error_message,
|
338
|
+
accept_external_id: true,
|
339
|
+
accept_public_id: true)
|
340
|
+
unless @source.match(/\s*>/um, true)
|
341
|
+
message = "#{base_error_message}: garbage before end >"
|
342
|
+
raise REXML::ParseException.new(message, @source)
|
343
|
+
end
|
344
|
+
return [:notationdecl, name, *id]
|
345
|
+
elsif md = @source.match(/--(.*?)-->/um, true)
|
346
|
+
case md[1]
|
347
|
+
when /--/, /-\z/
|
348
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
349
|
+
end
|
350
|
+
return [ :comment, md[1] ] if md
|
312
351
|
end
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
if @source.match( PUBLIC )
|
317
|
-
md = @source.match( PUBLIC, true )
|
318
|
-
vals = [md[1],md[2],md[4],md[6]]
|
319
|
-
elsif @source.match( SYSTEM )
|
320
|
-
md = @source.match( SYSTEM, true )
|
321
|
-
vals = [md[1],md[2],nil,md[4]]
|
322
|
-
else
|
323
|
-
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
324
|
-
end
|
325
|
-
return [ :notationdecl, *vals ]
|
326
|
-
when DOCTYPE_END
|
352
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
353
|
+
return [ :externalentity, match[1] ]
|
354
|
+
elsif @source.match(/\]\s*>/um, true)
|
327
355
|
@document_status = :after_doctype
|
328
|
-
@source.match( DOCTYPE_END, true )
|
329
356
|
return [ :end_doctype ]
|
330
357
|
end
|
331
358
|
end
|
359
|
+
if @document_status == :after_doctype
|
360
|
+
@source.match(/\s*/um, true)
|
361
|
+
end
|
332
362
|
begin
|
333
|
-
|
334
|
-
|
363
|
+
start_position = @source.position
|
364
|
+
if @source.match("<", true)
|
365
|
+
if @source.match("/", true)
|
335
366
|
@nsstack.shift
|
336
367
|
last_tag = @tags.pop
|
337
|
-
md = @source.match(
|
368
|
+
md = @source.match(CLOSE_PATTERN, true)
|
338
369
|
if md and !last_tag
|
339
370
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
340
371
|
raise REXML::ParseException.new(message, @source)
|
341
372
|
end
|
342
373
|
if md.nil? or last_tag != md[1]
|
343
374
|
message = "Missing end tag for '#{last_tag}'"
|
344
|
-
message
|
375
|
+
message += " (got '#{md[1]}')" if md
|
376
|
+
@source.position = start_position if md.nil?
|
345
377
|
raise REXML::ParseException.new(message, @source)
|
346
378
|
end
|
347
379
|
return [ :end_element, last_tag ]
|
348
|
-
elsif @source.
|
349
|
-
md = @source.match(
|
380
|
+
elsif @source.match("!", true)
|
381
|
+
md = @source.match(/([^>]*>)/um)
|
350
382
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
351
383
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
352
|
-
if md[0][
|
353
|
-
md = @source.match(
|
384
|
+
if md[0][0] == ?-
|
385
|
+
md = @source.match(/--(.*?)-->/um, true)
|
354
386
|
|
355
387
|
case md[1]
|
356
388
|
when /--/, /-\z/
|
@@ -359,19 +391,22 @@ module REXML
|
|
359
391
|
|
360
392
|
return [ :comment, md[1] ] if md
|
361
393
|
else
|
362
|
-
md = @source.match(
|
394
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
363
395
|
return [ :cdata, md[1] ] if md
|
364
396
|
end
|
365
397
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
366
398
|
"in the doctype declaration.", @source)
|
367
|
-
elsif @source.
|
368
|
-
return process_instruction
|
399
|
+
elsif @source.match("?", true)
|
400
|
+
return process_instruction(start_position)
|
369
401
|
else
|
370
402
|
# Get the next tag
|
371
|
-
md = @source.match(
|
403
|
+
md = @source.match(TAG_PATTERN, true)
|
372
404
|
unless md
|
405
|
+
@source.position = start_position
|
373
406
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
374
407
|
end
|
408
|
+
tag = md[1]
|
409
|
+
@document_status = :in_element
|
375
410
|
prefixes = Set.new
|
376
411
|
prefixes << md[2] if md[2]
|
377
412
|
@nsstack.unshift(curr_ns=Set.new)
|
@@ -384,23 +419,17 @@ module REXML
|
|
384
419
|
end
|
385
420
|
|
386
421
|
if closed
|
387
|
-
@closed =
|
422
|
+
@closed = tag
|
388
423
|
@nsstack.shift
|
389
424
|
else
|
390
|
-
@tags.push(
|
425
|
+
@tags.push( tag )
|
391
426
|
end
|
392
|
-
return [ :start_element,
|
427
|
+
return [ :start_element, tag, attributes ]
|
393
428
|
end
|
394
429
|
else
|
395
|
-
md = @source.match(
|
396
|
-
|
397
|
-
|
398
|
-
end
|
399
|
-
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
|
400
|
-
#return [ :text, "" ] if md[0].length == 0
|
401
|
-
# unnormalized = Text::unnormalize( md[1], self )
|
402
|
-
# return PullEvent.new( :text, md[1], unnormalized )
|
403
|
-
return [ :text, md[1] ]
|
430
|
+
md = @source.match(/([^<]*)/um, true)
|
431
|
+
text = md[1]
|
432
|
+
return [ :text, text ]
|
404
433
|
end
|
405
434
|
rescue REXML::UndefinedNamespaceException
|
406
435
|
raise
|
@@ -442,8 +471,7 @@ module REXML
|
|
442
471
|
|
443
472
|
# Unescapes all possible entities
|
444
473
|
def unnormalize( string, entities=nil, filter=nil )
|
445
|
-
rv = string.
|
446
|
-
rv.gsub!( /\r\n?/, "\n" )
|
474
|
+
rv = string.gsub( /\r\n?/, "\n" )
|
447
475
|
matches = rv.scan( REFERENCE_RE )
|
448
476
|
return rv if matches.size == 0
|
449
477
|
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
@@ -477,97 +505,168 @@ module REXML
|
|
477
505
|
true
|
478
506
|
end
|
479
507
|
|
480
|
-
def
|
481
|
-
|
482
|
-
unless
|
483
|
-
|
508
|
+
def parse_name(base_error_message)
|
509
|
+
md = @source.match(NAME_PATTERN, true)
|
510
|
+
unless md
|
511
|
+
if @source.match(/\s*\S/um)
|
512
|
+
message = "#{base_error_message}: invalid name"
|
513
|
+
else
|
514
|
+
message = "#{base_error_message}: name is missing"
|
515
|
+
end
|
484
516
|
raise REXML::ParseException.new(message, @source)
|
485
517
|
end
|
486
|
-
[
|
518
|
+
md[1]
|
487
519
|
end
|
488
520
|
|
489
|
-
def
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
521
|
+
def parse_id(base_error_message,
|
522
|
+
accept_external_id:,
|
523
|
+
accept_public_id:)
|
524
|
+
if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
|
525
|
+
pubid = system = nil
|
526
|
+
pubid_literal = md[1]
|
527
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
528
|
+
system_literal = md[2]
|
529
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
530
|
+
["PUBLIC", pubid, system]
|
531
|
+
elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
|
532
|
+
pubid = system = nil
|
533
|
+
pubid_literal = md[1]
|
534
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
535
|
+
["PUBLIC", pubid, nil]
|
536
|
+
elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
|
537
|
+
system = nil
|
538
|
+
system_literal = md[1]
|
539
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
540
|
+
["SYSTEM", nil, system]
|
541
|
+
else
|
542
|
+
details = parse_id_invalid_details(accept_external_id: accept_external_id,
|
543
|
+
accept_public_id: accept_public_id)
|
544
|
+
message = "#{base_error_message}: #{details}"
|
495
545
|
raise REXML::ParseException.new(message, @source)
|
496
546
|
end
|
547
|
+
end
|
497
548
|
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
549
|
+
def parse_id_invalid_details(accept_external_id:,
|
550
|
+
accept_public_id:)
|
551
|
+
public = /\A\s*PUBLIC/um
|
552
|
+
system = /\A\s*SYSTEM/um
|
553
|
+
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
554
|
+
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
555
|
+
return "public ID literal is missing"
|
556
|
+
end
|
557
|
+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
558
|
+
return "invalid public ID literal"
|
559
|
+
end
|
560
|
+
if accept_public_id
|
561
|
+
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
562
|
+
return "system ID literal is missing"
|
563
|
+
end
|
564
|
+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
565
|
+
return "invalid system literal"
|
566
|
+
end
|
567
|
+
"garbage after system literal"
|
568
|
+
else
|
569
|
+
"garbage after public ID literal"
|
570
|
+
end
|
571
|
+
elsif accept_external_id and @source.match(/#{system}/um)
|
572
|
+
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
573
|
+
return "system literal is missing"
|
574
|
+
end
|
575
|
+
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
576
|
+
return "invalid system literal"
|
577
|
+
end
|
578
|
+
"garbage after system literal"
|
579
|
+
else
|
580
|
+
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
581
|
+
return "invalid ID type"
|
582
|
+
end
|
583
|
+
"ID type is missing"
|
584
|
+
end
|
585
|
+
end
|
502
586
|
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
587
|
+
def process_instruction(start_position)
|
588
|
+
match_data = @source.match(INSTRUCTION_END, true)
|
589
|
+
unless match_data
|
590
|
+
message = "Invalid processing instruction node"
|
591
|
+
@source.position = start_position
|
592
|
+
raise REXML::ParseException.new(message, @source)
|
593
|
+
end
|
594
|
+
if @document_status.nil? and match_data[1] == "xml"
|
595
|
+
content = match_data[2]
|
596
|
+
version = VERSION.match(content)
|
597
|
+
version = version[1] unless version.nil?
|
598
|
+
encoding = ENCODING.match(content)
|
599
|
+
encoding = encoding[1] unless encoding.nil?
|
600
|
+
if need_source_encoding_update?(encoding)
|
601
|
+
@source.encoding = encoding
|
507
602
|
end
|
603
|
+
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
604
|
+
encoding = "UTF-16"
|
605
|
+
end
|
606
|
+
standalone = STANDALONE.match(content)
|
607
|
+
standalone = standalone[1] unless standalone.nil?
|
608
|
+
return [ :xmldecl, version, encoding, standalone ]
|
609
|
+
end
|
610
|
+
[:processing_instruction, match_data[1], match_data[2]]
|
611
|
+
end
|
508
612
|
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
613
|
+
def parse_attributes(prefixes, curr_ns)
|
614
|
+
attributes = {}
|
615
|
+
closed = false
|
616
|
+
while true
|
617
|
+
if @source.match(">", true)
|
618
|
+
return attributes, closed
|
619
|
+
elsif @source.match("/>", true)
|
620
|
+
closed = true
|
621
|
+
return attributes, closed
|
622
|
+
elsif match = @source.match(QNAME, true)
|
623
|
+
name = match[1]
|
624
|
+
prefix = match[2]
|
625
|
+
local_part = match[3]
|
626
|
+
|
627
|
+
unless @source.match(/\s*=\s*/um, true)
|
518
628
|
message = "Missing attribute equal: <#{name}>"
|
519
629
|
raise REXML::ParseException.new(message, @source)
|
520
630
|
end
|
521
|
-
|
522
|
-
unless quote
|
631
|
+
unless match = @source.match(/(['"])/, true)
|
523
632
|
message = "Missing attribute value start quote: <#{name}>"
|
524
633
|
raise REXML::ParseException.new(message, @source)
|
525
634
|
end
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
scanner << ">"
|
531
|
-
scanner << match_data[1]
|
532
|
-
scanner.pos = pos
|
533
|
-
closed = !match_data[2].nil?
|
534
|
-
next
|
535
|
-
end
|
536
|
-
message =
|
537
|
-
"Missing attribute value end quote: <#{name}>: <#{quote}>"
|
635
|
+
quote = match[1]
|
636
|
+
value = @source.read_until(quote)
|
637
|
+
unless value.chomp!(quote)
|
638
|
+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
538
639
|
raise REXML::ParseException.new(message, @source)
|
539
640
|
end
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
msg = "The '
|
641
|
+
@source.match(/\s*/um, true)
|
642
|
+
if prefix == "xmlns"
|
643
|
+
if local_part == "xml"
|
644
|
+
if value != "http://www.w3.org/XML/1998/namespace"
|
645
|
+
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
646
|
+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
647
|
+
raise REXML::ParseException.new( msg, @source, self )
|
648
|
+
end
|
649
|
+
elsif local_part == "xmlns"
|
650
|
+
msg = "The 'xmlns' prefix must not be declared "+
|
550
651
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
551
|
-
raise REXML::ParseException.new( msg, @source, self
|
652
|
+
raise REXML::ParseException.new( msg, @source, self)
|
552
653
|
end
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
raise REXML::ParseException.new( msg, @source, self)
|
654
|
+
curr_ns << local_part
|
655
|
+
elsif prefix
|
656
|
+
prefixes << prefix unless prefix == "xml"
|
557
657
|
end
|
558
|
-
curr_ns << local_part
|
559
|
-
elsif prefix
|
560
|
-
prefixes << prefix unless prefix == "xml"
|
561
|
-
end
|
562
658
|
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
659
|
+
if attributes[name]
|
660
|
+
msg = "Duplicate attribute #{name.inspect}"
|
661
|
+
raise REXML::ParseException.new(msg, @source, self)
|
662
|
+
end
|
567
663
|
|
568
|
-
|
664
|
+
attributes[name] = value
|
665
|
+
else
|
666
|
+
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
667
|
+
raise REXML::ParseException.new(message, @source)
|
668
|
+
end
|
569
669
|
end
|
570
|
-
return attributes, closed
|
571
670
|
end
|
572
671
|
end
|
573
672
|
end
|