moxml 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/opal.yml +37 -0
- data/.rspec-opal +5 -0
- data/Gemfile +6 -0
- data/Rakefile +67 -0
- data/lib/compat/opal/rexml/namespace.rb +56 -0
- data/lib/compat/opal/rexml/parsers/baseparser.rb +952 -0
- data/lib/compat/opal/rexml/source.rb +213 -0
- data/lib/compat/opal/rexml/text.rb +418 -0
- data/lib/compat/opal/rexml/xmltokens.rb +45 -0
- data/lib/compat/opal/rexml_compat.rb +76 -0
- data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -10
- data/lib/moxml/adapter/headed_ox.rb +2 -6
- data/lib/moxml/adapter/libxml.rb +5 -20
- data/lib/moxml/adapter/nokogiri.rb +7 -18
- data/lib/moxml/adapter/oga.rb +4 -22
- data/lib/moxml/adapter/ox.rb +8 -23
- data/lib/moxml/adapter/rexml.rb +29 -33
- data/lib/moxml/adapter.rb +38 -8
- data/lib/moxml/config.rb +1 -1
- data/lib/moxml/entity_registry.rb +36 -31
- data/lib/moxml/entity_registry_opal_data.rb +2137 -0
- data/lib/moxml/node.rb +19 -26
- data/lib/moxml/sax/namespace_splitter.rb +54 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +9 -1
- data/spec/consistency/adapter_parity_spec.rb +1 -1
- data/spec/integration/all_adapters_spec.rb +1 -1
- data/spec/integration/w3c_namespace_spec.rb +1 -1
- data/spec/moxml/adapter/ox_spec.rb +8 -0
- data/spec/moxml/adapter/platform_spec.rb +69 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +0 -6
- data/spec/moxml/entity_registry_spec.rb +10 -0
- data/spec/moxml/native_attachment/opal_spec.rb +39 -2
- data/spec/moxml/node_type_map_spec.rb +43 -0
- data/spec/moxml/opal_rexml_adapter_spec.rb +14 -0
- data/spec/moxml/opal_smoke_spec.rb +61 -0
- data/spec/moxml/sax/namespace_splitter_spec.rb +67 -0
- data/spec/moxml/text_spec.rb +1 -1
- data/spec/spec_helper.rb +32 -13
- data/spec/support/opal.rb +16 -0
- metadata +17 -1
|
@@ -0,0 +1,952 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require 'rexml/parseexception'
|
|
3
|
+
require 'rexml/undefinednamespaceexception'
|
|
4
|
+
require 'rexml/security'
|
|
5
|
+
require 'rexml/source'
|
|
6
|
+
require 'set'
|
|
7
|
+
require "strscan"
|
|
8
|
+
|
|
9
|
+
module REXML
|
|
10
|
+
module Parsers
|
|
11
|
+
unless [].respond_to?(:tally)
|
|
12
|
+
module EnumerableTally
|
|
13
|
+
refine Enumerable do
|
|
14
|
+
def tally
|
|
15
|
+
counts = {}
|
|
16
|
+
each do |item|
|
|
17
|
+
counts[item] ||= 0
|
|
18
|
+
counts[item] += 1
|
|
19
|
+
end
|
|
20
|
+
counts
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
using EnumerableTally
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if StringScanner::Version < "3.0.8"
|
|
28
|
+
module StringScannerCaptures
|
|
29
|
+
refine StringScanner do
|
|
30
|
+
def captures
|
|
31
|
+
values_at(*(1...size))
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
using StringScannerCaptures
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# = Using the Pull Parser
|
|
39
|
+
# <em>This API is experimental, and subject to change.</em>
|
|
40
|
+
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
|
41
|
+
# while parser.has_next?
|
|
42
|
+
# res = parser.next
|
|
43
|
+
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
|
44
|
+
# end
|
|
45
|
+
# See the PullEvent class for information on the content of the results.
|
|
46
|
+
# The data is identical to the arguments passed for the various events to
|
|
47
|
+
# the StreamListener API.
|
|
48
|
+
#
|
|
49
|
+
# Notice that:
|
|
50
|
+
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
|
51
|
+
# while parser.has_next?
|
|
52
|
+
# res = parser.next
|
|
53
|
+
# raise res[1] if res.error?
|
|
54
|
+
# end
|
|
55
|
+
#
|
|
56
|
+
# Nat Price gave me some good ideas for the API.
|
|
57
|
+
class BaseParser
|
|
58
|
+
LETTER = 'A-Za-z'
|
|
59
|
+
DIGIT = '0-9'
|
|
60
|
+
|
|
61
|
+
COMBININGCHAR = '' # TODO
|
|
62
|
+
EXTENDER = '' # TODO
|
|
63
|
+
|
|
64
|
+
NCNAME_STR= "[#{LETTER}_][-A-Za-z0-9._#{COMBININGCHAR}#{EXTENDER}]*"
|
|
65
|
+
QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
|
|
66
|
+
QNAME = /(#{QNAME_STR})/
|
|
67
|
+
|
|
68
|
+
# Just for backward compatibility. For example, kramdown uses this.
|
|
69
|
+
# It's not used in REXML.
|
|
70
|
+
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
|
71
|
+
|
|
72
|
+
NAMECHAR = '[\-\w\.:]'
|
|
73
|
+
NAME = "([\\w:]#{NAMECHAR}*)"
|
|
74
|
+
NMTOKEN = "(?:#{NAMECHAR})+"
|
|
75
|
+
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
|
76
|
+
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
|
|
77
|
+
REFERENCE_RE = /#{REFERENCE}/
|
|
78
|
+
|
|
79
|
+
DOCTYPE_START = /^\s*<!DOCTYPE\s/um
|
|
80
|
+
DOCTYPE_END = /^\s*\]\s*>/um
|
|
81
|
+
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
|
|
82
|
+
COMMENT_START = /^<!--/u
|
|
83
|
+
COMMENT_PATTERN = /<!--(.*?)-->/um
|
|
84
|
+
CDATA_START = /^<!\[CDATA\[/u
|
|
85
|
+
CDATA_END = /^\s*\]\s*>/um
|
|
86
|
+
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
|
87
|
+
XMLDECL_START = /^<\?xml\s/u;
|
|
88
|
+
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
|
89
|
+
INSTRUCTION_START = /^<\?/u
|
|
90
|
+
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
|
|
91
|
+
TAG_MATCH = /^<((?:#{QNAME_STR}))/um
|
|
92
|
+
CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
|
|
93
|
+
|
|
94
|
+
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
|
95
|
+
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
|
96
|
+
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
|
|
97
|
+
|
|
98
|
+
ENTITY_START = /^\s*<!ENTITY/
|
|
99
|
+
ELEMENTDECL_START = /^\s*<!ELEMENT/um
|
|
100
|
+
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
|
|
101
|
+
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
|
|
102
|
+
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
|
103
|
+
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
|
104
|
+
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
|
105
|
+
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
|
106
|
+
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
|
107
|
+
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
|
108
|
+
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
|
109
|
+
ATTDEF_RE = /#{ATTDEF}/
|
|
110
|
+
ATTLISTDECL_START = /^\s*<!ATTLIST/um
|
|
111
|
+
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
112
|
+
|
|
113
|
+
TEXT_PATTERN = /^([^<]*)/um
|
|
114
|
+
|
|
115
|
+
# Entity constants
|
|
116
|
+
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
|
117
|
+
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
|
118
|
+
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
|
119
|
+
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
|
120
|
+
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
|
121
|
+
PEREFERENCE = "%#{NAME};"
|
|
122
|
+
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
|
123
|
+
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
|
124
|
+
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
|
125
|
+
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
126
|
+
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
127
|
+
ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
|
|
128
|
+
|
|
129
|
+
NOTATIONDECL_START = /^\s*<!NOTATION/um
|
|
130
|
+
EXTERNAL_ID_PUBLIC = /^\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
|
|
131
|
+
EXTERNAL_ID_SYSTEM = /^\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
|
|
132
|
+
PUBLIC_ID = /^\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
|
|
133
|
+
|
|
134
|
+
EREFERENCE = /&(?!#{NAME};)/
|
|
135
|
+
|
|
136
|
+
DEFAULT_ENTITIES = {
|
|
137
|
+
'gt' => [/>/, '>', '>', />/],
|
|
138
|
+
'lt' => [/</, '<', '<', /</],
|
|
139
|
+
'quot' => [/"/, '"', '"', /"/],
|
|
140
|
+
"apos" => [/'/, "'", "'", /'/]
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
module Private
|
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
|
145
|
+
TAG_PATTERN = /((?:#{QNAME_STR}))\s*/um
|
|
146
|
+
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
|
147
|
+
EQUAL_PATTERN = /\s*=\s*/um
|
|
148
|
+
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
149
|
+
NAME_PATTERN = /#{NAME}/um
|
|
150
|
+
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
151
|
+
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
152
|
+
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
|
153
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
|
154
|
+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
|
|
155
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
|
156
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
|
157
|
+
default_entities.each do |term|
|
|
158
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
|
159
|
+
end
|
|
160
|
+
XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
|
161
|
+
end
|
|
162
|
+
private_constant :Private
|
|
163
|
+
|
|
164
|
+
def initialize( source )
|
|
165
|
+
self.stream = source
|
|
166
|
+
@listeners = []
|
|
167
|
+
@prefixes = Set.new
|
|
168
|
+
@entity_expansion_count = 0
|
|
169
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
|
170
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
|
171
|
+
@source.ensure_buffer
|
|
172
|
+
@version = nil
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def add_listener( listener )
|
|
176
|
+
@listeners << listener
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
attr_reader :source
|
|
180
|
+
attr_reader :entity_expansion_count
|
|
181
|
+
attr_writer :entity_expansion_limit
|
|
182
|
+
attr_writer :entity_expansion_text_limit
|
|
183
|
+
|
|
184
|
+
def stream=( source )
|
|
185
|
+
@source = SourceFactory.create_from( source )
|
|
186
|
+
reset
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def reset
|
|
190
|
+
@closed = nil
|
|
191
|
+
@have_root = false
|
|
192
|
+
@document_status = nil
|
|
193
|
+
@tags = []
|
|
194
|
+
@stack = []
|
|
195
|
+
@entities = []
|
|
196
|
+
@namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
|
|
197
|
+
@namespaces_restore_stack = []
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def position
|
|
201
|
+
if @source.respond_to? :position
|
|
202
|
+
@source.position
|
|
203
|
+
else
|
|
204
|
+
# FIXME
|
|
205
|
+
0
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Returns true if there are no more events
|
|
210
|
+
def empty?
|
|
211
|
+
(@source.empty? and @stack.empty?)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Returns true if there are more events. Synonymous with !empty?
|
|
215
|
+
def has_next?
|
|
216
|
+
!(@source.empty? and @stack.empty?)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Push an event back on the head of the stream. This method
|
|
220
|
+
# has (theoretically) infinite depth.
|
|
221
|
+
def unshift token
|
|
222
|
+
@stack.unshift(token)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Peek at the +depth+ event in the stack. The first element on the stack
|
|
226
|
+
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
|
227
|
+
# stream and return the last event, which is always :end_document.
|
|
228
|
+
# Be aware that this causes the stream to be parsed up to the +depth+
|
|
229
|
+
# event, so you can effectively pre-parse the entire document (pull the
|
|
230
|
+
# entire thing into memory) using this method.
|
|
231
|
+
def peek depth=0
|
|
232
|
+
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
|
233
|
+
temp = []
|
|
234
|
+
if depth == -1
|
|
235
|
+
temp.push(pull()) until empty?
|
|
236
|
+
else
|
|
237
|
+
while @stack.size+temp.size < depth+1
|
|
238
|
+
temp.push(pull())
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
@stack += temp if temp.size > 0
|
|
242
|
+
@stack[depth]
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Returns the next event. This is a +PullEvent+ object.
|
|
246
|
+
def pull
|
|
247
|
+
@source.drop_parsed_content
|
|
248
|
+
|
|
249
|
+
pull_event.tap do |event|
|
|
250
|
+
@listeners.each do |listener|
|
|
251
|
+
listener.receive event
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def pull_event
|
|
257
|
+
if @closed
|
|
258
|
+
x, @closed = @closed, nil
|
|
259
|
+
return [ :end_element, x ]
|
|
260
|
+
end
|
|
261
|
+
if empty?
|
|
262
|
+
if @document_status == :in_doctype
|
|
263
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
|
264
|
+
end
|
|
265
|
+
unless @tags.empty?
|
|
266
|
+
path = "/" + @tags.join("/")
|
|
267
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
unless @document_status == :in_element
|
|
271
|
+
raise ParseException.new("Malformed XML: No root element", @source)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
return [ :end_document ]
|
|
275
|
+
end
|
|
276
|
+
return @stack.shift if @stack.size > 0
|
|
277
|
+
#STDERR.puts @source.encoding
|
|
278
|
+
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
|
279
|
+
|
|
280
|
+
@source.ensure_buffer
|
|
281
|
+
if @document_status == nil
|
|
282
|
+
start_position = @source.position
|
|
283
|
+
if @source.match?("<?", true)
|
|
284
|
+
return process_instruction
|
|
285
|
+
elsif @source.match?("<!", true)
|
|
286
|
+
if @source.match?("--", true)
|
|
287
|
+
return [ :comment, process_comment ]
|
|
288
|
+
elsif @source.match?("DOCTYPE", true)
|
|
289
|
+
base_error_message = "Malformed DOCTYPE"
|
|
290
|
+
unless @source.skip_spaces
|
|
291
|
+
if @source.match?(">")
|
|
292
|
+
message = "#{base_error_message}: name is missing"
|
|
293
|
+
else
|
|
294
|
+
message = "#{base_error_message}: invalid name"
|
|
295
|
+
end
|
|
296
|
+
@source.position = start_position
|
|
297
|
+
raise REXML::ParseException.new(message, @source)
|
|
298
|
+
end
|
|
299
|
+
name = parse_name(base_error_message)
|
|
300
|
+
@source.skip_spaces
|
|
301
|
+
if @source.match?("[", true)
|
|
302
|
+
id = [nil, nil, nil]
|
|
303
|
+
@document_status = :in_doctype
|
|
304
|
+
elsif @source.match?(">", true)
|
|
305
|
+
id = [nil, nil, nil]
|
|
306
|
+
@document_status = :after_doctype
|
|
307
|
+
@source.ensure_buffer
|
|
308
|
+
else
|
|
309
|
+
id = parse_id(base_error_message,
|
|
310
|
+
accept_external_id: true,
|
|
311
|
+
accept_public_id: false)
|
|
312
|
+
if id[0] == "SYSTEM"
|
|
313
|
+
# For backward compatibility
|
|
314
|
+
id[1], id[2] = id[2], nil
|
|
315
|
+
end
|
|
316
|
+
@source.skip_spaces
|
|
317
|
+
if @source.match?("[", true)
|
|
318
|
+
@document_status = :in_doctype
|
|
319
|
+
elsif @source.match?(">", true)
|
|
320
|
+
@document_status = :after_doctype
|
|
321
|
+
@source.ensure_buffer
|
|
322
|
+
else
|
|
323
|
+
message = "#{base_error_message}: garbage after external ID"
|
|
324
|
+
raise REXML::ParseException.new(message, @source)
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
args = [:start_doctype, name, *id]
|
|
328
|
+
if @document_status == :after_doctype
|
|
329
|
+
@source.skip_spaces
|
|
330
|
+
@stack << [ :end_doctype ]
|
|
331
|
+
end
|
|
332
|
+
return args
|
|
333
|
+
else
|
|
334
|
+
message = "Invalid XML"
|
|
335
|
+
raise REXML::ParseException.new(message, @source)
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
if @document_status == :in_doctype
|
|
340
|
+
@source.skip_spaces
|
|
341
|
+
start_position = @source.position
|
|
342
|
+
if @source.match?("<!", true)
|
|
343
|
+
if @source.match?("ELEMENT", true)
|
|
344
|
+
md = @source.match(/(.*?)>/um, true)
|
|
345
|
+
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
|
346
|
+
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
|
347
|
+
elsif @source.match?("ENTITY", true)
|
|
348
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
|
349
|
+
unless match_data
|
|
350
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
|
351
|
+
end
|
|
352
|
+
match = [:entitydecl, *match_data.captures.compact]
|
|
353
|
+
ref = false
|
|
354
|
+
if match[1] == '%'
|
|
355
|
+
ref = true
|
|
356
|
+
match.delete_at 1
|
|
357
|
+
end
|
|
358
|
+
# Now we have to sort out what kind of entity reference this is
|
|
359
|
+
if match[2] == 'SYSTEM'
|
|
360
|
+
# External reference
|
|
361
|
+
match[3] = match[3][1..-2] # PUBID
|
|
362
|
+
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
|
363
|
+
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
|
364
|
+
elsif match[2] == 'PUBLIC'
|
|
365
|
+
# External reference
|
|
366
|
+
match[3] = match[3][1..-2] # PUBID
|
|
367
|
+
match[4] = match[4][1..-2] # HREF
|
|
368
|
+
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
|
369
|
+
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
|
370
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
|
371
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
|
372
|
+
else
|
|
373
|
+
match[2] = match[2][1..-2]
|
|
374
|
+
match.pop if match.size == 4
|
|
375
|
+
# match is [ :entity, name, value ]
|
|
376
|
+
end
|
|
377
|
+
match << '%' if ref
|
|
378
|
+
return match
|
|
379
|
+
elsif @source.match?("ATTLIST", true)
|
|
380
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
|
381
|
+
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
|
382
|
+
element = md[1]
|
|
383
|
+
contents = "<!ATTLIST" + md[0]
|
|
384
|
+
|
|
385
|
+
pairs = {}
|
|
386
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
|
387
|
+
values.each do |attdef|
|
|
388
|
+
unless attdef[3] == "#IMPLIED"
|
|
389
|
+
attdef.compact!
|
|
390
|
+
val = attdef[3]
|
|
391
|
+
val = attdef[4] if val == "#FIXED "
|
|
392
|
+
pairs[attdef[0]] = val
|
|
393
|
+
if attdef[0] =~ /^xmlns:(.*)/
|
|
394
|
+
@namespaces[$1] = val
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
return [ :attlistdecl, element, pairs, contents ]
|
|
399
|
+
elsif @source.match?("NOTATION", true)
|
|
400
|
+
base_error_message = "Malformed notation declaration"
|
|
401
|
+
unless @source.skip_spaces
|
|
402
|
+
if @source.match?(">")
|
|
403
|
+
message = "#{base_error_message}: name is missing"
|
|
404
|
+
else
|
|
405
|
+
message = "#{base_error_message}: invalid name"
|
|
406
|
+
end
|
|
407
|
+
@source.position = start_position
|
|
408
|
+
raise REXML::ParseException.new(message, @source)
|
|
409
|
+
end
|
|
410
|
+
name = parse_name(base_error_message)
|
|
411
|
+
id = parse_id(base_error_message,
|
|
412
|
+
accept_external_id: true,
|
|
413
|
+
accept_public_id: true)
|
|
414
|
+
@source.skip_spaces
|
|
415
|
+
unless @source.match?(">", true)
|
|
416
|
+
message = "#{base_error_message}: garbage before end >"
|
|
417
|
+
raise REXML::ParseException.new(message, @source)
|
|
418
|
+
end
|
|
419
|
+
return [:notationdecl, name, *id]
|
|
420
|
+
elsif @source.match?("--", true)
|
|
421
|
+
return [ :comment, process_comment ]
|
|
422
|
+
else
|
|
423
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
|
|
424
|
+
end
|
|
425
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
|
426
|
+
return [ :externalentity, match[1] ]
|
|
427
|
+
elsif @source.match?(/\]\s*>/um, true)
|
|
428
|
+
@document_status = :after_doctype
|
|
429
|
+
return [ :end_doctype ]
|
|
430
|
+
else
|
|
431
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
if @document_status == :after_doctype
|
|
435
|
+
@source.skip_spaces
|
|
436
|
+
end
|
|
437
|
+
begin
|
|
438
|
+
start_position = @source.position
|
|
439
|
+
if @source.match?("<", true)
|
|
440
|
+
# :text's read_until may remain only "<" in buffer. In the
|
|
441
|
+
# case, buffer is empty here. So we need to fill buffer
|
|
442
|
+
# here explicitly.
|
|
443
|
+
@source.ensure_buffer
|
|
444
|
+
if @source.match?("/", true)
|
|
445
|
+
@namespaces_restore_stack.pop
|
|
446
|
+
last_tag = @tags.pop
|
|
447
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
|
448
|
+
if md and !last_tag
|
|
449
|
+
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
|
450
|
+
raise REXML::ParseException.new(message, @source)
|
|
451
|
+
end
|
|
452
|
+
if md.nil? or last_tag != md[1]
|
|
453
|
+
message = "Missing end tag for '#{last_tag}'"
|
|
454
|
+
message += " (got '#{md[1]}')" if md
|
|
455
|
+
@source.position = start_position if md.nil?
|
|
456
|
+
raise REXML::ParseException.new(message, @source)
|
|
457
|
+
end
|
|
458
|
+
return [ :end_element, last_tag ]
|
|
459
|
+
elsif @source.match?("!", true)
|
|
460
|
+
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
|
461
|
+
if @source.match?("--", true)
|
|
462
|
+
return [ :comment, process_comment ]
|
|
463
|
+
elsif @source.match?("[CDATA[", true)
|
|
464
|
+
text = @source.read_until("]]>")
|
|
465
|
+
unless text.end_with?("]]>")
|
|
466
|
+
raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
|
|
467
|
+
end
|
|
468
|
+
text = text[0...-3]
|
|
469
|
+
return [ :cdata, text ]
|
|
470
|
+
else
|
|
471
|
+
raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
|
|
472
|
+
end
|
|
473
|
+
elsif @source.match?("?", true)
|
|
474
|
+
return process_instruction
|
|
475
|
+
else
|
|
476
|
+
# Get the next tag
|
|
477
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
|
478
|
+
unless md
|
|
479
|
+
@source.position = start_position
|
|
480
|
+
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
|
481
|
+
end
|
|
482
|
+
tag = md[1]
|
|
483
|
+
@document_status = :in_element
|
|
484
|
+
@prefixes.clear
|
|
485
|
+
@prefixes << md[2] if md[2]
|
|
486
|
+
push_namespaces_restore
|
|
487
|
+
attributes, closed = parse_attributes(@prefixes)
|
|
488
|
+
# Verify that all of the prefixes have been defined
|
|
489
|
+
for prefix in @prefixes
|
|
490
|
+
unless @namespaces.key?(prefix)
|
|
491
|
+
raise UndefinedNamespaceException.new(prefix,@source,self)
|
|
492
|
+
end
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
if closed
|
|
496
|
+
@closed = tag
|
|
497
|
+
pop_namespaces_restore
|
|
498
|
+
else
|
|
499
|
+
if @tags.empty? and @have_root
|
|
500
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
|
501
|
+
end
|
|
502
|
+
@tags.push( tag )
|
|
503
|
+
end
|
|
504
|
+
@have_root = true
|
|
505
|
+
return [ :start_element, tag, attributes ]
|
|
506
|
+
end
|
|
507
|
+
else
|
|
508
|
+
text = @source.read_until("<")
|
|
509
|
+
if text.end_with?("<")
|
|
510
|
+
text = text[0...-1]
|
|
511
|
+
@source.position -= "<".bytesize
|
|
512
|
+
end
|
|
513
|
+
if @tags.empty?
|
|
514
|
+
unless /^\s*$/.match?(text)
|
|
515
|
+
if @have_root
|
|
516
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
|
517
|
+
else
|
|
518
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
|
519
|
+
end
|
|
520
|
+
end
|
|
521
|
+
return pull_event if @have_root
|
|
522
|
+
end
|
|
523
|
+
return [ :text, text ]
|
|
524
|
+
end
|
|
525
|
+
rescue REXML::UndefinedNamespaceException
|
|
526
|
+
raise
|
|
527
|
+
rescue REXML::ParseException
|
|
528
|
+
raise
|
|
529
|
+
rescue => error
|
|
530
|
+
raise REXML::ParseException.new( "Exception parsing",
|
|
531
|
+
@source, self, (error ? error : $!) )
|
|
532
|
+
end
|
|
533
|
+
# NOTE: The end of the method never runs, because it is unreachable.
|
|
534
|
+
# All branches of code above have explicit unconditional return or raise statements.
|
|
535
|
+
end
|
|
536
|
+
private :pull_event
|
|
537
|
+
|
|
538
|
+
def entity( reference, entities )
|
|
539
|
+
return unless entities
|
|
540
|
+
|
|
541
|
+
value = entities[ reference ]
|
|
542
|
+
return if value.nil?
|
|
543
|
+
|
|
544
|
+
record_entity_expansion
|
|
545
|
+
unnormalize( value, entities )
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
# Escapes all possible entities
|
|
549
|
+
def normalize( input, entities=nil, entity_filter=nil )
|
|
550
|
+
copy = input.clone
|
|
551
|
+
# Doing it like this rather than in a loop improves the speed
|
|
552
|
+
copy.gsub!( EREFERENCE, '&' )
|
|
553
|
+
entities.each do |key, value|
|
|
554
|
+
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
|
555
|
+
entity_filter.include?(entity)
|
|
556
|
+
end if entities
|
|
557
|
+
copy.gsub!( EREFERENCE, '&' )
|
|
558
|
+
DEFAULT_ENTITIES.each do |key, value|
|
|
559
|
+
copy.gsub!( value[3], value[1] )
|
|
560
|
+
end
|
|
561
|
+
copy
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
# Unescapes all possible entities
|
|
565
|
+
def unnormalize( string, entities=nil, filter=nil )
|
|
566
|
+
if string.include?("\r")
|
|
567
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
|
568
|
+
else
|
|
569
|
+
rv = string.dup
|
|
570
|
+
end
|
|
571
|
+
matches = rv.scan( REFERENCE_RE )
|
|
572
|
+
return rv if matches.size == 0
|
|
573
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
|
574
|
+
m=$1
|
|
575
|
+
if m.start_with?("x")
|
|
576
|
+
code_point = Integer(m[1..-1], 16)
|
|
577
|
+
else
|
|
578
|
+
code_point = Integer(m, 10)
|
|
579
|
+
end
|
|
580
|
+
[code_point].pack('U*')
|
|
581
|
+
}
|
|
582
|
+
matches.collect!{|x|x[0]}.compact!
|
|
583
|
+
if filter
|
|
584
|
+
matches.reject! do |entity_reference|
|
|
585
|
+
filter.include?(entity_reference)
|
|
586
|
+
end
|
|
587
|
+
end
|
|
588
|
+
if matches.size > 0
|
|
589
|
+
matches.tally.each do |entity_reference, n|
|
|
590
|
+
entity_expansion_count_before = @entity_expansion_count
|
|
591
|
+
entity_value = entity( entity_reference, entities )
|
|
592
|
+
if entity_value
|
|
593
|
+
if n > 1
|
|
594
|
+
entity_expansion_count_delta =
|
|
595
|
+
@entity_expansion_count - entity_expansion_count_before
|
|
596
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
|
597
|
+
end
|
|
598
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
|
599
|
+
rv.gsub!( re, entity_value )
|
|
600
|
+
if rv.bytesize > @entity_expansion_text_limit
|
|
601
|
+
raise "entity expansion has grown too large"
|
|
602
|
+
end
|
|
603
|
+
else
|
|
604
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
|
605
|
+
rv.gsub!( er[0], er[2] ) if er
|
|
606
|
+
end
|
|
607
|
+
end
|
|
608
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
|
609
|
+
end
|
|
610
|
+
rv
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
private
|
|
614
|
+
def add_namespace(prefix, uri)
|
|
615
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
|
616
|
+
if uri.nil?
|
|
617
|
+
@namespaces.delete(prefix)
|
|
618
|
+
else
|
|
619
|
+
@namespaces[prefix] = uri
|
|
620
|
+
end
|
|
621
|
+
end
|
|
622
|
+
|
|
623
|
+
def push_namespaces_restore
|
|
624
|
+
namespaces_restore = {}
|
|
625
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
|
626
|
+
namespaces_restore
|
|
627
|
+
end
|
|
628
|
+
|
|
629
|
+
def pop_namespaces_restore
|
|
630
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
|
631
|
+
namespaces_restore.each do |prefix, uri|
|
|
632
|
+
if uri.nil?
|
|
633
|
+
@namespaces.delete(prefix)
|
|
634
|
+
else
|
|
635
|
+
@namespaces[prefix] = uri
|
|
636
|
+
end
|
|
637
|
+
end
|
|
638
|
+
end
|
|
639
|
+
|
|
640
|
+
def record_entity_expansion(delta=1)
|
|
641
|
+
@entity_expansion_count += delta
|
|
642
|
+
if @entity_expansion_count > @entity_expansion_limit
|
|
643
|
+
raise "number of entity expansions exceeded, processing aborted."
|
|
644
|
+
end
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
def need_source_encoding_update?(xml_declaration_encoding)
|
|
648
|
+
return false if xml_declaration_encoding.nil?
|
|
649
|
+
return false if /^UTF-16$/i =~ xml_declaration_encoding
|
|
650
|
+
true
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
def normalize_xml_declaration_encoding(xml_declaration_encoding)
|
|
654
|
+
/^UTF-16(?:BE|LE)$/i.match?(xml_declaration_encoding) ? "UTF-16" : nil
|
|
655
|
+
end
|
|
656
|
+
|
|
657
|
+
def parse_name(base_error_message)
|
|
658
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
|
659
|
+
unless md
|
|
660
|
+
if @source.match?(/\S/um)
|
|
661
|
+
message = "#{base_error_message}: invalid name"
|
|
662
|
+
else
|
|
663
|
+
message = "#{base_error_message}: name is missing"
|
|
664
|
+
end
|
|
665
|
+
raise REXML::ParseException.new(message, @source)
|
|
666
|
+
end
|
|
667
|
+
md[0]
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
def parse_id(base_error_message,
|
|
671
|
+
accept_external_id:,
|
|
672
|
+
accept_public_id:)
|
|
673
|
+
if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
|
|
674
|
+
pubid = system = nil
|
|
675
|
+
pubid_literal = md[1]
|
|
676
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
|
677
|
+
system_literal = md[2]
|
|
678
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
|
679
|
+
["PUBLIC", pubid, system]
|
|
680
|
+
elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
|
|
681
|
+
pubid = system = nil
|
|
682
|
+
pubid_literal = md[1]
|
|
683
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
|
684
|
+
["PUBLIC", pubid, nil]
|
|
685
|
+
elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
|
|
686
|
+
system = nil
|
|
687
|
+
system_literal = md[1]
|
|
688
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
|
689
|
+
["SYSTEM", nil, system]
|
|
690
|
+
else
|
|
691
|
+
details = parse_id_invalid_details(accept_external_id: accept_external_id,
|
|
692
|
+
accept_public_id: accept_public_id)
|
|
693
|
+
message = "#{base_error_message}: #{details}"
|
|
694
|
+
raise REXML::ParseException.new(message, @source)
|
|
695
|
+
end
|
|
696
|
+
end
|
|
697
|
+
|
|
698
|
+
def parse_id_invalid_details(accept_external_id:,
|
|
699
|
+
accept_public_id:)
|
|
700
|
+
public = /^\s*PUBLIC/um
|
|
701
|
+
system = /^\s*SYSTEM/um
|
|
702
|
+
if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
|
|
703
|
+
if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
|
704
|
+
return "public ID literal is missing"
|
|
705
|
+
end
|
|
706
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
|
|
707
|
+
return "invalid public ID literal"
|
|
708
|
+
end
|
|
709
|
+
if accept_public_id
|
|
710
|
+
if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
|
711
|
+
return "system ID literal is missing"
|
|
712
|
+
end
|
|
713
|
+
unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
|
714
|
+
return "invalid system literal"
|
|
715
|
+
end
|
|
716
|
+
"garbage after system literal"
|
|
717
|
+
else
|
|
718
|
+
"garbage after public ID literal"
|
|
719
|
+
end
|
|
720
|
+
elsif accept_external_id and @source.match?(/#{system}/um)
|
|
721
|
+
if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
|
722
|
+
return "system literal is missing"
|
|
723
|
+
end
|
|
724
|
+
unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
|
|
725
|
+
return "invalid system literal"
|
|
726
|
+
end
|
|
727
|
+
"garbage after system literal"
|
|
728
|
+
else
|
|
729
|
+
unless @source.match?(/^\s*(?:PUBLIC|SYSTEM)\s/um)
|
|
730
|
+
return "invalid ID type"
|
|
731
|
+
end
|
|
732
|
+
"ID type is missing"
|
|
733
|
+
end
|
|
734
|
+
end
|
|
735
|
+
|
|
736
|
+
def process_comment
|
|
737
|
+
text = @source.read_until("-->")
|
|
738
|
+
unless text.end_with?("-->")
|
|
739
|
+
raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
|
|
740
|
+
end
|
|
741
|
+
text = text[0...-3]
|
|
742
|
+
|
|
743
|
+
if text.include? "--" or text.end_with?("-")
|
|
744
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
|
745
|
+
end
|
|
746
|
+
text
|
|
747
|
+
end
|
|
748
|
+
|
|
749
|
+
def process_instruction
|
|
750
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
|
751
|
+
if name == "xml"
|
|
752
|
+
xml_declaration
|
|
753
|
+
else # PITarget
|
|
754
|
+
if @source.skip_spaces # e.g. <?name content?>
|
|
755
|
+
start_position = @source.position
|
|
756
|
+
content = @source.read_until("?>")
|
|
757
|
+
unless content.end_with?("?>")
|
|
758
|
+
@source.position = start_position
|
|
759
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
760
|
+
end
|
|
761
|
+
content = content[0...-2]
|
|
762
|
+
else # e.g. <?name?>
|
|
763
|
+
content = nil
|
|
764
|
+
unless @source.match?("?>", true)
|
|
765
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
|
|
766
|
+
end
|
|
767
|
+
end
|
|
768
|
+
[:processing_instruction, name, content]
|
|
769
|
+
end
|
|
770
|
+
end
|
|
771
|
+
|
|
772
|
+
def xml_declaration
|
|
773
|
+
unless @version.nil?
|
|
774
|
+
raise ParseException.new("Malformed XML: XML declaration is duplicated", @source)
|
|
775
|
+
end
|
|
776
|
+
if @document_status
|
|
777
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
|
778
|
+
end
|
|
779
|
+
unless @source.skip_spaces
|
|
780
|
+
raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source)
|
|
781
|
+
end
|
|
782
|
+
unless @source.match?("version", true)
|
|
783
|
+
raise ParseException.new("Malformed XML: XML declaration misses version", @source)
|
|
784
|
+
end
|
|
785
|
+
@version = parse_attribute_value_with_equal("xml")
|
|
786
|
+
unless @source.skip_spaces
|
|
787
|
+
unless @source.match?("?>", true)
|
|
788
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
789
|
+
end
|
|
790
|
+
encoding = normalize_xml_declaration_encoding(@source.encoding)
|
|
791
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.0"?>
|
|
792
|
+
end
|
|
793
|
+
|
|
794
|
+
if @source.match?("encoding", true)
|
|
795
|
+
encoding = parse_attribute_value_with_equal("xml")
|
|
796
|
+
unless @source.skip_spaces
|
|
797
|
+
unless @source.match?("?>", true)
|
|
798
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
799
|
+
end
|
|
800
|
+
if need_source_encoding_update?(encoding)
|
|
801
|
+
@source.encoding = encoding
|
|
802
|
+
end
|
|
803
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
804
|
+
return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
|
|
805
|
+
end
|
|
806
|
+
end
|
|
807
|
+
|
|
808
|
+
if @source.match?("standalone", true)
|
|
809
|
+
standalone = parse_attribute_value_with_equal("xml")
|
|
810
|
+
case standalone
|
|
811
|
+
when "yes", "no"
|
|
812
|
+
else
|
|
813
|
+
raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source)
|
|
814
|
+
end
|
|
815
|
+
end
|
|
816
|
+
@source.skip_spaces
|
|
817
|
+
unless @source.match?("?>", true)
|
|
818
|
+
raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
|
|
819
|
+
end
|
|
820
|
+
|
|
821
|
+
if need_source_encoding_update?(encoding)
|
|
822
|
+
@source.encoding = encoding
|
|
823
|
+
end
|
|
824
|
+
encoding ||= normalize_xml_declaration_encoding(@source.encoding)
|
|
825
|
+
|
|
826
|
+
# e.g. <?xml version="1.0" ?>
|
|
827
|
+
# <?xml version="1.1" encoding="UTF-8" ?>
|
|
828
|
+
# <?xml version="1.1" standalone="yes"?>
|
|
829
|
+
# <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
|
|
830
|
+
[ :xmldecl, @version, encoding, standalone ]
|
|
831
|
+
end
|
|
832
|
+
|
|
833
|
+
if StringScanner::Version < "3.1.1"
|
|
834
|
+
def scan_quote
|
|
835
|
+
@source.match(/(['"])/, true)&.[](1)
|
|
836
|
+
end
|
|
837
|
+
else
|
|
838
|
+
def scan_quote
|
|
839
|
+
case @source.peek_byte
|
|
840
|
+
when 34 # '"'.ord
|
|
841
|
+
@source.scan_byte
|
|
842
|
+
'"'
|
|
843
|
+
when 39 # "'".ord
|
|
844
|
+
@source.scan_byte
|
|
845
|
+
"'"
|
|
846
|
+
else
|
|
847
|
+
nil
|
|
848
|
+
end
|
|
849
|
+
end
|
|
850
|
+
end
|
|
851
|
+
|
|
852
|
+
def parse_attribute_value_with_equal(name)
|
|
853
|
+
unless @source.match?(Private::EQUAL_PATTERN, true)
|
|
854
|
+
message = "Missing attribute equal: <#{name}>"
|
|
855
|
+
raise REXML::ParseException.new(message, @source)
|
|
856
|
+
end
|
|
857
|
+
unless quote = scan_quote
|
|
858
|
+
message = "Missing attribute value start quote: <#{name}>"
|
|
859
|
+
raise REXML::ParseException.new(message, @source)
|
|
860
|
+
end
|
|
861
|
+
start_position = @source.position
|
|
862
|
+
value = @source.read_until(quote)
|
|
863
|
+
unless value.end_with?(quote)
|
|
864
|
+
@source.position = start_position
|
|
865
|
+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
|
|
866
|
+
raise REXML::ParseException.new(message, @source)
|
|
867
|
+
end
|
|
868
|
+
value = value[0...-1]
|
|
869
|
+
end
|
|
870
|
+
|
|
871
|
+
def parse_attributes(prefixes)
|
|
872
|
+
attributes = {}
|
|
873
|
+
expanded_names = {}
|
|
874
|
+
closed = false
|
|
875
|
+
while true
|
|
876
|
+
if @source.match?(">", true)
|
|
877
|
+
return attributes, closed
|
|
878
|
+
elsif @source.match?("/>", true)
|
|
879
|
+
closed = true
|
|
880
|
+
return attributes, closed
|
|
881
|
+
elsif match = @source.match(QNAME, true)
|
|
882
|
+
name = match[1]
|
|
883
|
+
prefix = match[2]
|
|
884
|
+
local_part = match[3]
|
|
885
|
+
value = parse_attribute_value_with_equal(name)
|
|
886
|
+
@source.skip_spaces
|
|
887
|
+
if prefix == "xmlns"
|
|
888
|
+
if local_part == "xml"
|
|
889
|
+
if value != Private::XML_PREFIXED_NAMESPACE
|
|
890
|
+
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
|
891
|
+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
892
|
+
raise REXML::ParseException.new( msg, @source, self )
|
|
893
|
+
end
|
|
894
|
+
elsif local_part == "xmlns"
|
|
895
|
+
msg = "The 'xmlns' prefix must not be declared "+
|
|
896
|
+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
|
897
|
+
raise REXML::ParseException.new( msg, @source, self)
|
|
898
|
+
end
|
|
899
|
+
add_namespace(local_part, value)
|
|
900
|
+
elsif prefix
|
|
901
|
+
prefixes << prefix unless prefix == "xml"
|
|
902
|
+
end
|
|
903
|
+
|
|
904
|
+
if attributes[name]
|
|
905
|
+
msg = "Duplicate attribute #{name.inspect}"
|
|
906
|
+
raise REXML::ParseException.new(msg, @source, self)
|
|
907
|
+
end
|
|
908
|
+
|
|
909
|
+
unless prefix == "xmlns"
|
|
910
|
+
uri = @namespaces[prefix]
|
|
911
|
+
expanded_name = [uri, local_part]
|
|
912
|
+
existing_prefix = expanded_names[expanded_name]
|
|
913
|
+
if existing_prefix
|
|
914
|
+
message = "Namespace conflict in adding attribute " +
|
|
915
|
+
"\"#{local_part}\": " +
|
|
916
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
|
917
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
|
918
|
+
raise REXML::ParseException.new(message, @source, self)
|
|
919
|
+
end
|
|
920
|
+
expanded_names[expanded_name] = prefix
|
|
921
|
+
end
|
|
922
|
+
|
|
923
|
+
attributes[name] = value
|
|
924
|
+
else
|
|
925
|
+
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
|
926
|
+
raise REXML::ParseException.new(message, @source)
|
|
927
|
+
end
|
|
928
|
+
end
|
|
929
|
+
end
|
|
930
|
+
end
|
|
931
|
+
end
|
|
932
|
+
end
|
|
933
|
+
|
|
934
|
+
=begin
|
|
935
|
+
case event[0]
|
|
936
|
+
when :start_element
|
|
937
|
+
when :text
|
|
938
|
+
when :end_element
|
|
939
|
+
when :processing_instruction
|
|
940
|
+
when :cdata
|
|
941
|
+
when :comment
|
|
942
|
+
when :xmldecl
|
|
943
|
+
when :start_doctype
|
|
944
|
+
when :end_doctype
|
|
945
|
+
when :externalentity
|
|
946
|
+
when :elementdecl
|
|
947
|
+
when :entity
|
|
948
|
+
when :attlistdecl
|
|
949
|
+
when :notationdecl
|
|
950
|
+
when :end_doctype
|
|
951
|
+
end
|
|
952
|
+
=end
|