moxml 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +680 -110
- data/Rakefile +12 -9
- data/lib/compat/opal/rexml/namespace.rb +8 -5
- data/lib/compat/opal/rexml/parsers/baseparser.rb +276 -212
- data/lib/compat/opal/rexml/source.rb +28 -27
- data/lib/compat/opal/rexml/text.rb +112 -104
- data/lib/compat/opal/rexml/xmltokens.rb +8 -8
- data/lib/compat/opal/rexml_compat.rb +12 -11
- data/lib/moxml/adapter/customized_oga/xml_declaration.rb +8 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +4 -4
- data/lib/moxml/adapter/libxml/entity_ref_registry.rb +4 -2
- data/lib/moxml/adapter/libxml/entity_restorer.rb +3 -1
- data/lib/moxml/adapter/libxml.rb +17 -4
- data/lib/moxml/adapter/nokogiri.rb +17 -15
- data/lib/moxml/adapter/oga.rb +43 -62
- data/lib/moxml/adapter/ox.rb +35 -18
- data/lib/moxml/adapter.rb +1 -1
- data/lib/moxml/config.rb +15 -2
- data/lib/moxml/document.rb +2 -8
- data/lib/moxml/entity_registry.rb +8 -4
- data/lib/moxml/entity_registry_opal_data.rb +3 -2
- data/lib/moxml/node.rb +8 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +1 -0
- data/lib/moxml.rb +7 -0
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/shared_examples/line_ending_behavior.rb +56 -0
- data/spec/moxml/adapter/libxml_internals_spec.rb +4 -2
- data/spec/moxml/adapter/platform_spec.rb +2 -1
- data/spec/moxml/config_spec.rb +33 -0
- metadata +3 -2
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require
|
|
4
|
-
require
|
|
5
|
-
require
|
|
6
|
-
require
|
|
2
|
+
|
|
3
|
+
require "rexml/parseexception"
|
|
4
|
+
require "rexml/undefinednamespaceexception"
|
|
5
|
+
require "rexml/security"
|
|
6
|
+
require "rexml/source"
|
|
7
|
+
require "set"
|
|
7
8
|
require "strscan"
|
|
8
9
|
|
|
9
10
|
module REXML
|
|
@@ -55,25 +56,25 @@ module REXML
|
|
|
55
56
|
#
|
|
56
57
|
# Nat Price gave me some good ideas for the API.
|
|
57
58
|
class BaseParser
|
|
58
|
-
LETTER =
|
|
59
|
-
DIGIT =
|
|
59
|
+
LETTER = "A-Za-z"
|
|
60
|
+
DIGIT = "0-9"
|
|
60
61
|
|
|
61
|
-
COMBININGCHAR =
|
|
62
|
-
EXTENDER =
|
|
62
|
+
COMBININGCHAR = "" # TODO
|
|
63
|
+
EXTENDER = "" # TODO
|
|
63
64
|
|
|
64
|
-
NCNAME_STR= "[#{LETTER}_][-A-Za-z0-9._#{COMBININGCHAR}#{EXTENDER}]*"
|
|
65
|
-
QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
|
|
65
|
+
NCNAME_STR = "[#{LETTER}_][-A-Za-z0-9._#{COMBININGCHAR}#{EXTENDER}]*".freeze
|
|
66
|
+
QNAME_STR = "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})".freeze
|
|
66
67
|
QNAME = /(#{QNAME_STR})/
|
|
67
68
|
|
|
68
69
|
# Just for backward compatibility. For example, kramdown uses this.
|
|
69
70
|
# It's not used in REXML.
|
|
70
|
-
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
|
71
|
+
UNAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}".freeze
|
|
71
72
|
|
|
72
73
|
NAMECHAR = '[\-\w\.:]'
|
|
73
|
-
NAME = "([\\w:]#{NAMECHAR}*)"
|
|
74
|
-
NMTOKEN = "(?:#{NAMECHAR})+"
|
|
75
|
-
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
|
76
|
-
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
|
|
74
|
+
NAME = "([\\w:]#{NAMECHAR}*)".freeze
|
|
75
|
+
NMTOKEN = "(?:#{NAMECHAR})+".freeze
|
|
76
|
+
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*".freeze
|
|
77
|
+
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)".freeze
|
|
77
78
|
REFERENCE_RE = /#{REFERENCE}/
|
|
78
79
|
|
|
79
80
|
DOCTYPE_START = /^\s*<!DOCTYPE\s/um
|
|
@@ -84,7 +85,7 @@ module REXML
|
|
|
84
85
|
CDATA_START = /^<!\[CDATA\[/u
|
|
85
86
|
CDATA_END = /^\s*\]\s*>/um
|
|
86
87
|
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
|
87
|
-
XMLDECL_START = /^<\?xml\s/u
|
|
88
|
+
XMLDECL_START = /^<\?xml\s/u
|
|
88
89
|
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
|
89
90
|
INSTRUCTION_START = /^<\?/u
|
|
90
91
|
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
|
|
@@ -99,13 +100,13 @@ module REXML
|
|
|
99
100
|
ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
|
100
101
|
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
|
101
102
|
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
|
|
102
|
-
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
|
103
|
-
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
|
104
|
-
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
|
105
|
-
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
|
106
|
-
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
|
107
|
-
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
|
108
|
-
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
|
103
|
+
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)".freeze
|
|
104
|
+
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)".freeze
|
|
105
|
+
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))".freeze
|
|
106
|
+
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})".freeze
|
|
107
|
+
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')".freeze
|
|
108
|
+
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))".freeze
|
|
109
|
+
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}".freeze
|
|
109
110
|
ATTDEF_RE = /#{ATTDEF}/
|
|
110
111
|
ATTLISTDECL_START = /^\s*<!ATTLIST/um
|
|
111
112
|
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
@@ -114,16 +115,16 @@ module REXML
|
|
|
114
115
|
|
|
115
116
|
# Entity constants
|
|
116
117
|
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
|
117
|
-
SYSTEMLITERAL = %
|
|
118
|
-
PUBIDLITERAL = %
|
|
119
|
-
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
|
120
|
-
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
|
121
|
-
PEREFERENCE = "%#{NAME};"
|
|
122
|
-
ENTITYVALUE = %
|
|
123
|
-
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
|
124
|
-
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
|
125
|
-
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
126
|
-
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
118
|
+
SYSTEMLITERAL = %{((?:"[^"]*")|(?:'[^']*'))}
|
|
119
|
+
PUBIDLITERAL = %{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}.freeze
|
|
120
|
+
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))".freeze
|
|
121
|
+
NDATADECL = "\\s+NDATA\\s+#{NAME}".freeze
|
|
122
|
+
PEREFERENCE = "%#{NAME};".freeze
|
|
123
|
+
ENTITYVALUE = %{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}.freeze
|
|
124
|
+
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})".freeze
|
|
125
|
+
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))".freeze
|
|
126
|
+
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>".freeze
|
|
127
|
+
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>".freeze
|
|
127
128
|
ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
|
|
128
129
|
|
|
129
130
|
NOTATIONDECL_START = /^\s*<!NOTATION/um
|
|
@@ -134,11 +135,11 @@ module REXML
|
|
|
134
135
|
EREFERENCE = /&(?!#{NAME};)/
|
|
135
136
|
|
|
136
137
|
DEFAULT_ENTITIES = {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
"apos" => [/'/, "'", "'", /'/]
|
|
141
|
-
}
|
|
138
|
+
"gt" => [/>/, ">", ">", />/],
|
|
139
|
+
"lt" => [/</, "<", "<", /</],
|
|
140
|
+
"quot" => [/"/, """, '"', /"/],
|
|
141
|
+
"apos" => [/'/, "'", "'", /'/],
|
|
142
|
+
}.freeze
|
|
142
143
|
|
|
143
144
|
module Private
|
|
144
145
|
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
|
@@ -147,21 +148,22 @@ module REXML
|
|
|
147
148
|
EQUAL_PATTERN = /\s*=\s*/um
|
|
148
149
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
149
150
|
NAME_PATTERN = /#{NAME}/um
|
|
150
|
-
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
151
|
-
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
151
|
+
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>".freeze
|
|
152
|
+
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>".freeze
|
|
152
153
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
|
153
154
|
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
|
154
155
|
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
|
155
156
|
DEFAULT_ENTITIES_PATTERNS = {}
|
|
156
|
-
default_entities = [
|
|
157
|
+
default_entities = ["gt", "lt", "quot", "apos", "amp"]
|
|
157
158
|
default_entities.each do |term|
|
|
158
159
|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
|
159
160
|
end
|
|
161
|
+
DEFAULT_ENTITIES_PATTERNS.freeze
|
|
160
162
|
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
|
161
163
|
end
|
|
162
164
|
private_constant :Private
|
|
163
165
|
|
|
164
|
-
def initialize(
|
|
166
|
+
def initialize(source)
|
|
165
167
|
self.stream = source
|
|
166
168
|
@listeners = []
|
|
167
169
|
@prefixes = Set.new
|
|
@@ -172,17 +174,15 @@ module REXML
|
|
|
172
174
|
@version = nil
|
|
173
175
|
end
|
|
174
176
|
|
|
175
|
-
def add_listener(
|
|
177
|
+
def add_listener(listener)
|
|
176
178
|
@listeners << listener
|
|
177
179
|
end
|
|
178
180
|
|
|
179
|
-
attr_reader :source
|
|
180
|
-
|
|
181
|
-
attr_writer :entity_expansion_limit
|
|
182
|
-
attr_writer :entity_expansion_text_limit
|
|
181
|
+
attr_reader :source, :entity_expansion_count
|
|
182
|
+
attr_writer :entity_expansion_limit, :entity_expansion_text_limit
|
|
183
183
|
|
|
184
|
-
def stream=(
|
|
185
|
-
@source = SourceFactory.create_from(
|
|
184
|
+
def stream=(source)
|
|
185
|
+
@source = SourceFactory.create_from(source)
|
|
186
186
|
reset
|
|
187
187
|
end
|
|
188
188
|
|
|
@@ -193,7 +193,7 @@ module REXML
|
|
|
193
193
|
@tags = []
|
|
194
194
|
@stack = []
|
|
195
195
|
@entities = []
|
|
196
|
-
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
|
196
|
+
@namespaces = { "xml" => Private::XML_PREFIXED_NAMESPACE }
|
|
197
197
|
@namespaces_restore_stack = []
|
|
198
198
|
end
|
|
199
199
|
|
|
@@ -218,7 +218,7 @@ module REXML
|
|
|
218
218
|
|
|
219
219
|
# Push an event back on the head of the stream. This method
|
|
220
220
|
# has (theoretically) infinite depth.
|
|
221
|
-
def unshift
|
|
221
|
+
def unshift(token)
|
|
222
222
|
@stack.unshift(token)
|
|
223
223
|
end
|
|
224
224
|
|
|
@@ -228,17 +228,18 @@ module REXML
|
|
|
228
228
|
# Be aware that this causes the stream to be parsed up to the +depth+
|
|
229
229
|
# event, so you can effectively pre-parse the entire document (pull the
|
|
230
230
|
# entire thing into memory) using this method.
|
|
231
|
-
def peek
|
|
232
|
-
raise %
|
|
231
|
+
def peek(depth = 0)
|
|
232
|
+
raise %[Illegal argument "#{depth}"] if depth < -1
|
|
233
|
+
|
|
233
234
|
temp = []
|
|
234
235
|
if depth == -1
|
|
235
|
-
temp.push(pull
|
|
236
|
+
temp.push(pull) until empty?
|
|
236
237
|
else
|
|
237
|
-
while @stack.size+temp.size < depth+1
|
|
238
|
-
temp.push(pull
|
|
238
|
+
while @stack.size + temp.size < depth + 1
|
|
239
|
+
temp.push(pull)
|
|
239
240
|
end
|
|
240
241
|
end
|
|
241
|
-
@stack += temp if temp.size
|
|
242
|
+
@stack += temp if temp.size.positive?
|
|
242
243
|
@stack[depth]
|
|
243
244
|
end
|
|
244
245
|
|
|
@@ -255,15 +256,17 @@ module REXML
|
|
|
255
256
|
|
|
256
257
|
def pull_event
|
|
257
258
|
if @closed
|
|
258
|
-
x
|
|
259
|
-
|
|
259
|
+
x = @closed
|
|
260
|
+
@closed = nil
|
|
261
|
+
return [:end_element, x]
|
|
260
262
|
end
|
|
261
263
|
if empty?
|
|
262
264
|
if @document_status == :in_doctype
|
|
263
265
|
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
|
264
266
|
end
|
|
267
|
+
|
|
265
268
|
unless @tags.empty?
|
|
266
|
-
path = "
|
|
269
|
+
path = "/#{@tags.join('/')}"
|
|
267
270
|
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
|
268
271
|
end
|
|
269
272
|
|
|
@@ -271,11 +274,12 @@ module REXML
|
|
|
271
274
|
raise ParseException.new("Malformed XML: No root element", @source)
|
|
272
275
|
end
|
|
273
276
|
|
|
274
|
-
return [
|
|
277
|
+
return [:end_document]
|
|
275
278
|
end
|
|
276
|
-
return @stack.shift if @stack.size
|
|
277
|
-
|
|
278
|
-
#STDERR.puts
|
|
279
|
+
return @stack.shift if @stack.size.positive?
|
|
280
|
+
|
|
281
|
+
# STDERR.puts @source.encoding
|
|
282
|
+
# STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
|
279
283
|
|
|
280
284
|
@source.ensure_buffer
|
|
281
285
|
if @document_status == nil
|
|
@@ -284,15 +288,15 @@ module REXML
|
|
|
284
288
|
return process_instruction
|
|
285
289
|
elsif @source.match?("<!", true)
|
|
286
290
|
if @source.match?("--", true)
|
|
287
|
-
return [
|
|
291
|
+
return [:comment, process_comment]
|
|
288
292
|
elsif @source.match?("DOCTYPE", true)
|
|
289
293
|
base_error_message = "Malformed DOCTYPE"
|
|
290
294
|
unless @source.skip_spaces
|
|
291
|
-
if @source.match?(">")
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
295
|
+
message = if @source.match?(">")
|
|
296
|
+
"#{base_error_message}: name is missing"
|
|
297
|
+
else
|
|
298
|
+
"#{base_error_message}: invalid name"
|
|
299
|
+
end
|
|
296
300
|
@source.position = start_position
|
|
297
301
|
raise REXML::ParseException.new(message, @source)
|
|
298
302
|
end
|
|
@@ -311,7 +315,8 @@ module REXML
|
|
|
311
315
|
accept_public_id: false)
|
|
312
316
|
if id[0] == "SYSTEM"
|
|
313
317
|
# For backward compatibility
|
|
314
|
-
id[1]
|
|
318
|
+
id[1] = id[2]
|
|
319
|
+
id[2] = nil
|
|
315
320
|
end
|
|
316
321
|
@source.skip_spaces
|
|
317
322
|
if @source.match?("[", true)
|
|
@@ -327,7 +332,7 @@ module REXML
|
|
|
327
332
|
args = [:start_doctype, name, *id]
|
|
328
333
|
if @document_status == :after_doctype
|
|
329
334
|
@source.skip_spaces
|
|
330
|
-
@stack << [
|
|
335
|
+
@stack << [:end_doctype]
|
|
331
336
|
end
|
|
332
337
|
return args
|
|
333
338
|
else
|
|
@@ -342,48 +347,61 @@ module REXML
|
|
|
342
347
|
if @source.match?("<!", true)
|
|
343
348
|
if @source.match?("ELEMENT", true)
|
|
344
349
|
md = @source.match(/(.*?)>/um, true)
|
|
345
|
-
|
|
346
|
-
|
|
350
|
+
if md.nil?
|
|
351
|
+
raise REXML::ParseException.new("Bad ELEMENT declaration!",
|
|
352
|
+
@source)
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
return [:elementdecl, "<!ELEMENT#{md[1]}"]
|
|
347
356
|
elsif @source.match?("ENTITY", true)
|
|
348
357
|
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
|
349
358
|
unless match_data
|
|
350
|
-
raise REXML::ParseException.new("Malformed entity declaration",
|
|
359
|
+
raise REXML::ParseException.new("Malformed entity declaration",
|
|
360
|
+
@source)
|
|
351
361
|
end
|
|
362
|
+
|
|
352
363
|
match = [:entitydecl, *match_data.captures.compact]
|
|
353
364
|
ref = false
|
|
354
|
-
if match[1] ==
|
|
365
|
+
if match[1] == "%"
|
|
355
366
|
ref = true
|
|
356
367
|
match.delete_at 1
|
|
357
368
|
end
|
|
358
369
|
# Now we have to sort out what kind of entity reference this is
|
|
359
|
-
|
|
370
|
+
case match[2]
|
|
371
|
+
when "SYSTEM"
|
|
360
372
|
# External reference
|
|
361
373
|
match[3] = match[3][1..-2] # PUBID
|
|
362
374
|
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
|
363
375
|
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
|
364
|
-
|
|
376
|
+
when "PUBLIC"
|
|
365
377
|
# External reference
|
|
366
378
|
match[3] = match[3][1..-2] # PUBID
|
|
367
379
|
match[4] = match[4][1..-2] # HREF
|
|
368
380
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
|
369
381
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
|
370
|
-
|
|
371
|
-
raise REXML::ParseException.new(
|
|
382
|
+
when Private::PEREFERENCE_PATTERN
|
|
383
|
+
raise REXML::ParseException.new(
|
|
384
|
+
"Parameter entity references forbidden in internal subset: #{match[2]}", @source
|
|
385
|
+
)
|
|
372
386
|
else
|
|
373
387
|
match[2] = match[2][1..-2]
|
|
374
388
|
match.pop if match.size == 4
|
|
375
389
|
# match is [ :entity, name, value ]
|
|
376
390
|
end
|
|
377
|
-
match <<
|
|
391
|
+
match << "%" if ref
|
|
378
392
|
return match
|
|
379
393
|
elsif @source.match?("ATTLIST", true)
|
|
380
394
|
md = @source.match(Private::ATTLISTDECL_END, true)
|
|
381
|
-
|
|
395
|
+
if md.nil?
|
|
396
|
+
raise REXML::ParseException.new("Bad ATTLIST declaration!",
|
|
397
|
+
@source)
|
|
398
|
+
end
|
|
399
|
+
|
|
382
400
|
element = md[1]
|
|
383
|
-
contents = "<!ATTLIST
|
|
401
|
+
contents = "<!ATTLIST#{md[0]}"
|
|
384
402
|
|
|
385
403
|
pairs = {}
|
|
386
|
-
values = md[0].strip.scan(
|
|
404
|
+
values = md[0].strip.scan(ATTDEF_RE)
|
|
387
405
|
values.each do |attdef|
|
|
388
406
|
unless attdef[3] == "#IMPLIED"
|
|
389
407
|
attdef.compact!
|
|
@@ -395,15 +413,15 @@ module REXML
|
|
|
395
413
|
end
|
|
396
414
|
end
|
|
397
415
|
end
|
|
398
|
-
return [
|
|
416
|
+
return [:attlistdecl, element, pairs, contents]
|
|
399
417
|
elsif @source.match?("NOTATION", true)
|
|
400
418
|
base_error_message = "Malformed notation declaration"
|
|
401
419
|
unless @source.skip_spaces
|
|
402
|
-
if @source.match?(">")
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
420
|
+
message = if @source.match?(">")
|
|
421
|
+
"#{base_error_message}: name is missing"
|
|
422
|
+
else
|
|
423
|
+
"#{base_error_message}: invalid name"
|
|
424
|
+
end
|
|
407
425
|
@source.position = start_position
|
|
408
426
|
raise REXML::ParseException.new(message, @source)
|
|
409
427
|
end
|
|
@@ -418,17 +436,20 @@ module REXML
|
|
|
418
436
|
end
|
|
419
437
|
return [:notationdecl, name, *id]
|
|
420
438
|
elsif @source.match?("--", true)
|
|
421
|
-
return [
|
|
439
|
+
return [:comment, process_comment]
|
|
422
440
|
else
|
|
423
|
-
raise REXML::ParseException.new(
|
|
441
|
+
raise REXML::ParseException.new(
|
|
442
|
+
"Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source
|
|
443
|
+
)
|
|
424
444
|
end
|
|
425
445
|
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
|
426
|
-
return [
|
|
446
|
+
return [:externalentity, match[1]]
|
|
427
447
|
elsif @source.match?(/\]\s*>/um, true)
|
|
428
448
|
@document_status = :after_doctype
|
|
429
|
-
return [
|
|
449
|
+
return [:end_doctype]
|
|
430
450
|
else
|
|
431
|
-
raise ParseException.new("Malformed DOCTYPE: invalid declaration",
|
|
451
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration",
|
|
452
|
+
@source)
|
|
432
453
|
end
|
|
433
454
|
end
|
|
434
455
|
if @document_status == :after_doctype
|
|
@@ -445,39 +466,46 @@ module REXML
|
|
|
445
466
|
@namespaces_restore_stack.pop
|
|
446
467
|
last_tag = @tags.pop
|
|
447
468
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
|
448
|
-
if md
|
|
469
|
+
if md && !last_tag
|
|
449
470
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
|
450
471
|
raise REXML::ParseException.new(message, @source)
|
|
451
472
|
end
|
|
452
|
-
if md.nil?
|
|
473
|
+
if md.nil? || (last_tag != md[1])
|
|
453
474
|
message = "Missing end tag for '#{last_tag}'"
|
|
454
475
|
message += " (got '#{md[1]}')" if md
|
|
455
476
|
@source.position = start_position if md.nil?
|
|
456
477
|
raise REXML::ParseException.new(message, @source)
|
|
457
478
|
end
|
|
458
|
-
|
|
479
|
+
[:end_element, last_tag]
|
|
459
480
|
elsif @source.match?("!", true)
|
|
460
|
-
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
481
|
+
# STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
461
482
|
if @source.match?("--", true)
|
|
462
|
-
|
|
483
|
+
[:comment, process_comment]
|
|
463
484
|
elsif @source.match?("[CDATA[", true)
|
|
464
485
|
text = @source.read_until("]]>")
|
|
465
486
|
unless text.end_with?("]]>")
|
|
466
|
-
raise REXML::ParseException.new(
|
|
487
|
+
raise REXML::ParseException.new(
|
|
488
|
+
"Malformed CDATA: Missing end ']]>'", @source
|
|
489
|
+
)
|
|
467
490
|
end
|
|
491
|
+
|
|
468
492
|
text = text[0...-3]
|
|
469
|
-
|
|
493
|
+
[:cdata, text]
|
|
470
494
|
else
|
|
471
|
-
raise REXML::ParseException.new(
|
|
495
|
+
raise REXML::ParseException.new(
|
|
496
|
+
"Malformed node: Started with '<!' but not a comment nor CDATA", @source
|
|
497
|
+
)
|
|
472
498
|
end
|
|
473
499
|
elsif @source.match?("?", true)
|
|
474
|
-
|
|
500
|
+
process_instruction
|
|
475
501
|
else
|
|
476
502
|
# Get the next tag
|
|
477
503
|
md = @source.match(Private::TAG_PATTERN, true)
|
|
478
504
|
unless md
|
|
479
505
|
@source.position = start_position
|
|
480
|
-
raise REXML::ParseException.new(
|
|
506
|
+
raise REXML::ParseException.new(
|
|
507
|
+
"malformed XML: missing tag start", @source
|
|
508
|
+
)
|
|
481
509
|
end
|
|
482
510
|
tag = md[1]
|
|
483
511
|
@document_status = :in_element
|
|
@@ -486,9 +514,9 @@ module REXML
|
|
|
486
514
|
push_namespaces_restore
|
|
487
515
|
attributes, closed = parse_attributes(@prefixes)
|
|
488
516
|
# Verify that all of the prefixes have been defined
|
|
489
|
-
|
|
517
|
+
@prefixes.each do |prefix|
|
|
490
518
|
unless @namespaces.key?(prefix)
|
|
491
|
-
raise UndefinedNamespaceException.new(prefix
|
|
519
|
+
raise UndefinedNamespaceException.new(prefix, @source, self)
|
|
492
520
|
end
|
|
493
521
|
end
|
|
494
522
|
|
|
@@ -496,13 +524,16 @@ module REXML
|
|
|
496
524
|
@closed = tag
|
|
497
525
|
pop_namespaces_restore
|
|
498
526
|
else
|
|
499
|
-
if @tags.empty?
|
|
500
|
-
raise ParseException.new(
|
|
527
|
+
if @tags.empty? && @have_root
|
|
528
|
+
raise ParseException.new(
|
|
529
|
+
"Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source
|
|
530
|
+
)
|
|
501
531
|
end
|
|
502
|
-
|
|
532
|
+
|
|
533
|
+
@tags.push(tag)
|
|
503
534
|
end
|
|
504
535
|
@have_root = true
|
|
505
|
-
|
|
536
|
+
[:start_element, tag, attributes]
|
|
506
537
|
end
|
|
507
538
|
else
|
|
508
539
|
text = @source.read_until("<")
|
|
@@ -513,82 +544,90 @@ module REXML
|
|
|
513
544
|
if @tags.empty?
|
|
514
545
|
unless /^\s*$/.match?(text)
|
|
515
546
|
if @have_root
|
|
516
|
-
raise ParseException.new(
|
|
547
|
+
raise ParseException.new(
|
|
548
|
+
"Malformed XML: Extra content at the end of the document (got '#{text}')", @source
|
|
549
|
+
)
|
|
517
550
|
else
|
|
518
|
-
raise ParseException.new(
|
|
551
|
+
raise ParseException.new(
|
|
552
|
+
"Malformed XML: Content at the start of the document (got '#{text}')", @source
|
|
553
|
+
)
|
|
519
554
|
end
|
|
520
555
|
end
|
|
521
556
|
return pull_event if @have_root
|
|
522
557
|
end
|
|
523
|
-
|
|
558
|
+
[:text, text]
|
|
524
559
|
end
|
|
525
560
|
rescue REXML::UndefinedNamespaceException
|
|
526
561
|
raise
|
|
527
562
|
rescue REXML::ParseException
|
|
528
563
|
raise
|
|
529
|
-
rescue =>
|
|
530
|
-
raise REXML::ParseException.new(
|
|
531
|
-
|
|
564
|
+
rescue StandardError => e
|
|
565
|
+
raise REXML::ParseException.new("Exception parsing",
|
|
566
|
+
@source, self, e || $!)
|
|
532
567
|
end
|
|
533
568
|
# NOTE: The end of the method never runs, because it is unreachable.
|
|
534
569
|
# All branches of code above have explicit unconditional return or raise statements.
|
|
535
570
|
end
|
|
536
571
|
private :pull_event
|
|
537
572
|
|
|
538
|
-
def entity(
|
|
573
|
+
def entity(reference, entities)
|
|
539
574
|
return unless entities
|
|
540
575
|
|
|
541
|
-
value = entities[
|
|
576
|
+
value = entities[reference]
|
|
542
577
|
return if value.nil?
|
|
543
578
|
|
|
544
579
|
record_entity_expansion
|
|
545
|
-
unnormalize(
|
|
580
|
+
unnormalize(value, entities)
|
|
546
581
|
end
|
|
547
582
|
|
|
548
583
|
# Escapes all possible entities
|
|
549
|
-
def normalize(
|
|
584
|
+
def normalize(input, entities = nil, entity_filter = nil)
|
|
550
585
|
copy = input.clone
|
|
551
586
|
# Doing it like this rather than in a loop improves the speed
|
|
552
|
-
copy.gsub!(
|
|
553
|
-
entities
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
587
|
+
copy.gsub!(EREFERENCE, "&")
|
|
588
|
+
if entities
|
|
589
|
+
entities.each do |key, value|
|
|
590
|
+
unless entity_filter && entity_filter.include?(entity)
|
|
591
|
+
copy.gsub!(value, "&#{key};")
|
|
592
|
+
end
|
|
593
|
+
end
|
|
594
|
+
end
|
|
595
|
+
copy.gsub!(EREFERENCE, "&")
|
|
596
|
+
DEFAULT_ENTITIES.each_value do |value|
|
|
597
|
+
copy.gsub!(value[3], value[1])
|
|
560
598
|
end
|
|
561
599
|
copy
|
|
562
600
|
end
|
|
563
601
|
|
|
564
602
|
# Unescapes all possible entities
|
|
565
|
-
def unnormalize(
|
|
566
|
-
if string.include?("\r")
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
matches = rv.scan(
|
|
572
|
-
return rv if matches.
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
603
|
+
def unnormalize(string, entities = nil, filter = nil)
|
|
604
|
+
rv = if string.include?("\r")
|
|
605
|
+
string.gsub(Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n")
|
|
606
|
+
else
|
|
607
|
+
string.dup
|
|
608
|
+
end
|
|
609
|
+
matches = rv.scan(REFERENCE_RE)
|
|
610
|
+
return rv if matches.empty?
|
|
611
|
+
|
|
612
|
+
rv.gsub!(Private::CHARACTER_REFERENCES) do
|
|
613
|
+
m = $1
|
|
614
|
+
code_point = if m.start_with?("x")
|
|
615
|
+
Integer(m[1..], 16)
|
|
616
|
+
else
|
|
617
|
+
Integer(m, 10)
|
|
618
|
+
end
|
|
619
|
+
[code_point].pack("U*")
|
|
620
|
+
end
|
|
621
|
+
matches.collect! { |x| x[0] }.compact!
|
|
583
622
|
if filter
|
|
584
623
|
matches.reject! do |entity_reference|
|
|
585
624
|
filter.include?(entity_reference)
|
|
586
625
|
end
|
|
587
626
|
end
|
|
588
|
-
if matches.size
|
|
627
|
+
if matches.size.positive?
|
|
589
628
|
matches.tally.each do |entity_reference, n|
|
|
590
629
|
entity_expansion_count_before = @entity_expansion_count
|
|
591
|
-
entity_value = entity(
|
|
630
|
+
entity_value = entity(entity_reference, entities)
|
|
592
631
|
if entity_value
|
|
593
632
|
if n > 1
|
|
594
633
|
entity_expansion_count_delta =
|
|
@@ -596,21 +635,22 @@ module REXML
|
|
|
596
635
|
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
|
597
636
|
end
|
|
598
637
|
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
|
599
|
-
rv.gsub!(
|
|
638
|
+
rv.gsub!(re, entity_value)
|
|
600
639
|
if rv.bytesize > @entity_expansion_text_limit
|
|
601
640
|
raise "entity expansion has grown too large"
|
|
602
641
|
end
|
|
603
642
|
else
|
|
604
643
|
er = DEFAULT_ENTITIES[entity_reference]
|
|
605
|
-
rv.gsub!(
|
|
644
|
+
rv.gsub!(er[0], er[2]) if er
|
|
606
645
|
end
|
|
607
646
|
end
|
|
608
|
-
rv.gsub!(
|
|
647
|
+
rv.gsub!(Private::DEFAULT_ENTITIES_PATTERNS["amp"], "&")
|
|
609
648
|
end
|
|
610
649
|
rv
|
|
611
650
|
end
|
|
612
651
|
|
|
613
652
|
private
|
|
653
|
+
|
|
614
654
|
def add_namespace(prefix, uri)
|
|
615
655
|
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
|
616
656
|
if uri.nil?
|
|
@@ -637,7 +677,7 @@ module REXML
|
|
|
637
677
|
end
|
|
638
678
|
end
|
|
639
679
|
|
|
640
|
-
def record_entity_expansion(delta=1)
|
|
680
|
+
def record_entity_expansion(delta = 1)
|
|
641
681
|
@entity_expansion_count += delta
|
|
642
682
|
if @entity_expansion_count > @entity_expansion_limit
|
|
643
683
|
raise "number of entity expansions exceeded, processing aborted."
|
|
@@ -646,7 +686,8 @@ module REXML
|
|
|
646
686
|
|
|
647
687
|
def need_source_encoding_update?(xml_declaration_encoding)
|
|
648
688
|
return false if xml_declaration_encoding.nil?
|
|
649
|
-
return false if /^UTF-16$/i
|
|
689
|
+
return false if /^UTF-16$/i.match?(xml_declaration_encoding)
|
|
690
|
+
|
|
650
691
|
true
|
|
651
692
|
end
|
|
652
693
|
|
|
@@ -657,11 +698,11 @@ module REXML
|
|
|
657
698
|
def parse_name(base_error_message)
|
|
658
699
|
md = @source.match(Private::NAME_PATTERN, true)
|
|
659
700
|
unless md
|
|
660
|
-
if @source.match?(/\S/um)
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
701
|
+
message = if @source.match?(/\S/um)
|
|
702
|
+
"#{base_error_message}: invalid name"
|
|
703
|
+
else
|
|
704
|
+
"#{base_error_message}: name is missing"
|
|
705
|
+
end
|
|
665
706
|
raise REXML::ParseException.new(message, @source)
|
|
666
707
|
end
|
|
667
708
|
md[0]
|
|
@@ -670,19 +711,20 @@ module REXML
|
|
|
670
711
|
def parse_id(base_error_message,
|
|
671
712
|
accept_external_id:,
|
|
672
713
|
accept_public_id:)
|
|
673
|
-
if accept_external_id
|
|
714
|
+
if accept_external_id && (md = @source.match(EXTERNAL_ID_PUBLIC, true))
|
|
674
715
|
pubid = system = nil
|
|
675
716
|
pubid_literal = md[1]
|
|
676
717
|
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
|
677
718
|
system_literal = md[2]
|
|
678
719
|
system = system_literal[1..-2] if system_literal # Remove quote
|
|
679
720
|
["PUBLIC", pubid, system]
|
|
680
|
-
elsif accept_public_id
|
|
681
|
-
pubid =
|
|
721
|
+
elsif accept_public_id && (md = @source.match(PUBLIC_ID, true))
|
|
722
|
+
pubid = nil
|
|
682
723
|
pubid_literal = md[1]
|
|
683
724
|
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
|
684
725
|
["PUBLIC", pubid, nil]
|
|
685
|
-
elsif accept_external_id
|
|
726
|
+
elsif accept_external_id && (md = @source.match(EXTERNAL_ID_SYSTEM,
|
|
727
|
+
true))
|
|
686
728
|
system = nil
|
|
687
729
|
system_literal = md[1]
|
|
688
730
|
system = system_literal[1..-2] if system_literal # Remove quote
|
|
@@ -699,13 +741,14 @@ module REXML
|
|
|
699
741
|
accept_public_id:)
|
|
700
742
|
public = /^\s*PUBLIC/um
|
|
701
743
|
system = /^\s*SYSTEM/um
|
|
702
|
-
if (accept_external_id
|
|
744
|
+
if (accept_external_id || accept_public_id) && @source.match?(/#{public}/um)
|
|
703
745
|
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
704
746
|
return "public ID literal is missing"
|
|
705
747
|
end
|
|
706
748
|
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
707
749
|
return "invalid public ID literal"
|
|
708
750
|
end
|
|
751
|
+
|
|
709
752
|
if accept_public_id
|
|
710
753
|
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
711
754
|
return "system ID literal is missing"
|
|
@@ -713,22 +756,25 @@ module REXML
|
|
|
713
756
|
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
714
757
|
return "invalid system literal"
|
|
715
758
|
end
|
|
759
|
+
|
|
716
760
|
"garbage after system literal"
|
|
717
761
|
else
|
|
718
762
|
"garbage after public ID literal"
|
|
719
763
|
end
|
|
720
|
-
elsif accept_external_id
|
|
764
|
+
elsif accept_external_id && @source.match?(/#{system}/um)
|
|
721
765
|
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
722
766
|
return "system literal is missing"
|
|
723
767
|
end
|
|
724
768
|
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
725
769
|
return "invalid system literal"
|
|
726
770
|
end
|
|
771
|
+
|
|
727
772
|
"garbage after system literal"
|
|
728
773
|
else
|
|
729
774
|
unless @source.match?(/^\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
730
775
|
return "invalid ID type"
|
|
731
776
|
end
|
|
777
|
+
|
|
732
778
|
"ID type is missing"
|
|
733
779
|
end
|
|
734
780
|
end
|
|
@@ -736,13 +782,17 @@ module REXML
|
|
|
736
782
|
def process_comment
|
|
737
783
|
text = @source.read_until("-->")
|
|
738
784
|
unless text.end_with?("-->")
|
|
739
|
-
raise REXML::ParseException.new(
|
|
785
|
+
raise REXML::ParseException.new(
|
|
786
|
+
"Unclosed comment: Missing end '-->'", @source
|
|
787
|
+
)
|
|
740
788
|
end
|
|
789
|
+
|
|
741
790
|
text = text[0...-3]
|
|
742
791
|
|
|
743
|
-
if text.include?
|
|
792
|
+
if text.include?("--") || text.end_with?("-")
|
|
744
793
|
raise REXML::ParseException.new("Malformed comment", @source)
|
|
745
794
|
end
|
|
795
|
+
|
|
746
796
|
text
|
|
747
797
|
end
|
|
748
798
|
|
|
@@ -756,13 +806,17 @@ module REXML
|
|
|
756
806
|
content = @source.read_until("?>")
|
|
757
807
|
unless content.end_with?("?>")
|
|
758
808
|
@source.position = start_position
|
|
759
|
-
raise ParseException.new(
|
|
809
|
+
raise ParseException.new(
|
|
810
|
+
"Malformed XML: Unclosed processing instruction: <#{name}>", @source
|
|
811
|
+
)
|
|
760
812
|
end
|
|
761
813
|
content = content[0...-2]
|
|
762
814
|
else # e.g. <?name?>
|
|
763
815
|
content = nil
|
|
764
816
|
unless @source.match?("?>", true)
|
|
765
|
-
raise ParseException.new(
|
|
817
|
+
raise ParseException.new(
|
|
818
|
+
"Malformed XML: Unclosed processing instruction: <#{name}>", @source
|
|
819
|
+
)
|
|
766
820
|
end
|
|
767
821
|
end
|
|
768
822
|
[:processing_instruction, name, content]
|
|
@@ -771,37 +825,51 @@ module REXML
|
|
|
771
825
|
|
|
772
826
|
def xml_declaration
|
|
773
827
|
unless @version.nil?
|
|
774
|
-
raise ParseException.new(
|
|
828
|
+
raise ParseException.new(
|
|
829
|
+
"Malformed XML: XML declaration is duplicated", @source
|
|
830
|
+
)
|
|
775
831
|
end
|
|
776
832
|
if @document_status
|
|
777
|
-
raise ParseException.new(
|
|
833
|
+
raise ParseException.new(
|
|
834
|
+
"Malformed XML: XML declaration is not at the start", @source
|
|
835
|
+
)
|
|
778
836
|
end
|
|
779
837
|
unless @source.skip_spaces
|
|
780
|
-
raise ParseException.new(
|
|
838
|
+
raise ParseException.new(
|
|
839
|
+
"Malformed XML: XML declaration misses spaces before version", @source
|
|
840
|
+
)
|
|
781
841
|
end
|
|
782
842
|
unless @source.match?("version", true)
|
|
783
|
-
raise ParseException.new(
|
|
843
|
+
raise ParseException.new(
|
|
844
|
+
"Malformed XML: XML declaration misses version", @source
|
|
845
|
+
)
|
|
784
846
|
end
|
|
847
|
+
|
|
785
848
|
@version = parse_attribute_value_with_equal("xml")
|
|
786
849
|
unless @source.skip_spaces
|
|
787
850
|
unless @source.match?("?>", true)
|
|
788
|
-
raise ParseException.new("Malformed XML: Unclosed XML declaration",
|
|
851
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration",
|
|
852
|
+
@source)
|
|
789
853
|
end
|
|
854
|
+
|
|
790
855
|
encoding = normalize_xml_declaration_encoding(@source.encoding)
|
|
791
|
-
return [
|
|
856
|
+
return [:xmldecl, @version, encoding, nil] # e.g. <?xml version="1.0"?>
|
|
792
857
|
end
|
|
793
858
|
|
|
794
859
|
if @source.match?("encoding", true)
|
|
795
860
|
encoding = parse_attribute_value_with_equal("xml")
|
|
796
861
|
unless @source.skip_spaces
|
|
797
862
|
unless @source.match?("?>", true)
|
|
798
|
-
raise ParseException.new(
|
|
863
|
+
raise ParseException.new(
|
|
864
|
+
"Malformed XML: Unclosed XML declaration", @source
|
|
865
|
+
)
|
|
799
866
|
end
|
|
867
|
+
|
|
800
868
|
if need_source_encoding_update?(encoding)
|
|
801
869
|
@source.encoding = encoding
|
|
802
870
|
end
|
|
803
871
|
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
804
|
-
return [
|
|
872
|
+
return [:xmldecl, @version, encoding, nil] # e.g. <?xml version="1.1" encoding="UTF-8"?>
|
|
805
873
|
end
|
|
806
874
|
end
|
|
807
875
|
|
|
@@ -810,12 +878,15 @@ module REXML
|
|
|
810
878
|
case standalone
|
|
811
879
|
when "yes", "no"
|
|
812
880
|
else
|
|
813
|
-
raise ParseException.new(
|
|
881
|
+
raise ParseException.new(
|
|
882
|
+
"Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source
|
|
883
|
+
)
|
|
814
884
|
end
|
|
815
885
|
end
|
|
816
886
|
@source.skip_spaces
|
|
817
887
|
unless @source.match?("?>", true)
|
|
818
|
-
raise ParseException.new("Malformed XML: Unclosed XML declaration",
|
|
888
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration",
|
|
889
|
+
@source)
|
|
819
890
|
end
|
|
820
891
|
|
|
821
892
|
if need_source_encoding_update?(encoding)
|
|
@@ -827,7 +898,7 @@ module REXML
|
|
|
827
898
|
# <?xml version="1.1" encoding="UTF-8" ?>
|
|
828
899
|
# <?xml version="1.1" standalone="yes"?>
|
|
829
900
|
# <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
|
|
830
|
-
[
|
|
901
|
+
[:xmldecl, @version, encoding, standalone]
|
|
831
902
|
end
|
|
832
903
|
|
|
833
904
|
if StringScanner::Version < "3.1.1"
|
|
@@ -843,8 +914,6 @@ module REXML
|
|
|
843
914
|
when 39 # "'".ord
|
|
844
915
|
@source.scan_byte
|
|
845
916
|
"'"
|
|
846
|
-
else
|
|
847
|
-
nil
|
|
848
917
|
end
|
|
849
918
|
end
|
|
850
919
|
end
|
|
@@ -865,14 +934,14 @@ module REXML
|
|
|
865
934
|
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
866
935
|
raise REXML::ParseException.new(message, @source)
|
|
867
936
|
end
|
|
868
|
-
value
|
|
937
|
+
value[0...-1]
|
|
869
938
|
end
|
|
870
939
|
|
|
871
940
|
def parse_attributes(prefixes)
|
|
872
941
|
attributes = {}
|
|
873
942
|
expanded_names = {}
|
|
874
943
|
closed = false
|
|
875
|
-
|
|
944
|
+
loop do
|
|
876
945
|
if @source.match?(">", true)
|
|
877
946
|
return attributes, closed
|
|
878
947
|
elsif @source.match?("/>", true)
|
|
@@ -887,14 +956,14 @@ module REXML
|
|
|
887
956
|
if prefix == "xmlns"
|
|
888
957
|
if local_part == "xml"
|
|
889
958
|
if value != Private::XML_PREFIXED_NAMESPACE
|
|
890
|
-
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
|
959
|
+
msg = "The 'xml' prefix must not be bound to any other namespace " +
|
|
891
960
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
892
|
-
raise REXML::ParseException.new(
|
|
961
|
+
raise REXML::ParseException.new(msg, @source, self)
|
|
893
962
|
end
|
|
894
963
|
elsif local_part == "xmlns"
|
|
895
|
-
msg = "The 'xmlns' prefix must not be declared "+
|
|
964
|
+
msg = "The 'xmlns' prefix must not be declared " +
|
|
896
965
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
897
|
-
raise REXML::ParseException.new(
|
|
966
|
+
raise REXML::ParseException.new(msg, @source, self)
|
|
898
967
|
end
|
|
899
968
|
add_namespace(local_part, value)
|
|
900
969
|
elsif prefix
|
|
@@ -911,10 +980,7 @@ module REXML
|
|
|
911
980
|
expanded_name = [uri, local_part]
|
|
912
981
|
existing_prefix = expanded_names[expanded_name]
|
|
913
982
|
if existing_prefix
|
|
914
|
-
message = "Namespace conflict in adding attribute "
|
|
915
|
-
"\"#{local_part}\": " +
|
|
916
|
-
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
|
917
|
-
"prefix \"#{prefix}\" = \"#{uri}\""
|
|
983
|
+
message = "Namespace conflict in adding attribute \"#{local_part}\": Prefix \"#{existing_prefix}\" = \"#{uri}\" and prefix \"#{prefix}\" = \"#{uri}\""
|
|
918
984
|
raise REXML::ParseException.new(message, @source, self)
|
|
919
985
|
end
|
|
920
986
|
expanded_names[expanded_name] = prefix
|
|
@@ -931,22 +997,20 @@ module REXML
|
|
|
931
997
|
end
|
|
932
998
|
end
|
|
933
999
|
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
end
|
|
952
|
-
=end
|
|
1000
|
+
# case event[0]
|
|
1001
|
+
# when :start_element
|
|
1002
|
+
# when :text
|
|
1003
|
+
# when :end_element
|
|
1004
|
+
# when :processing_instruction
|
|
1005
|
+
# when :cdata
|
|
1006
|
+
# when :comment
|
|
1007
|
+
# when :xmldecl
|
|
1008
|
+
# when :start_doctype
|
|
1009
|
+
# when :end_doctype
|
|
1010
|
+
# when :externalentity
|
|
1011
|
+
# when :elementdecl
|
|
1012
|
+
# when :entity
|
|
1013
|
+
# when :attlistdecl
|
|
1014
|
+
# when :notationdecl
|
|
1015
|
+
# when :end_doctype
|
|
1016
|
+
# end
|