rexml 3.1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.travis.yml +10 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +60 -0
  7. data/Rakefile +10 -0
  8. data/bin/console +14 -0
  9. data/bin/setup +8 -0
  10. data/lib/rexml/attlistdecl.rb +63 -0
  11. data/lib/rexml/attribute.rb +192 -0
  12. data/lib/rexml/cdata.rb +68 -0
  13. data/lib/rexml/child.rb +97 -0
  14. data/lib/rexml/comment.rb +80 -0
  15. data/lib/rexml/doctype.rb +270 -0
  16. data/lib/rexml/document.rb +291 -0
  17. data/lib/rexml/dtd/attlistdecl.rb +11 -0
  18. data/lib/rexml/dtd/dtd.rb +47 -0
  19. data/lib/rexml/dtd/elementdecl.rb +18 -0
  20. data/lib/rexml/dtd/entitydecl.rb +57 -0
  21. data/lib/rexml/dtd/notationdecl.rb +40 -0
  22. data/lib/rexml/element.rb +1267 -0
  23. data/lib/rexml/encoding.rb +51 -0
  24. data/lib/rexml/entity.rb +171 -0
  25. data/lib/rexml/formatters/default.rb +112 -0
  26. data/lib/rexml/formatters/pretty.rb +142 -0
  27. data/lib/rexml/formatters/transitive.rb +58 -0
  28. data/lib/rexml/functions.rb +447 -0
  29. data/lib/rexml/instruction.rb +71 -0
  30. data/lib/rexml/light/node.rb +196 -0
  31. data/lib/rexml/namespace.rb +48 -0
  32. data/lib/rexml/node.rb +76 -0
  33. data/lib/rexml/output.rb +30 -0
  34. data/lib/rexml/parent.rb +166 -0
  35. data/lib/rexml/parseexception.rb +52 -0
  36. data/lib/rexml/parsers/baseparser.rb +586 -0
  37. data/lib/rexml/parsers/lightparser.rb +59 -0
  38. data/lib/rexml/parsers/pullparser.rb +197 -0
  39. data/lib/rexml/parsers/sax2parser.rb +273 -0
  40. data/lib/rexml/parsers/streamparser.rb +61 -0
  41. data/lib/rexml/parsers/treeparser.rb +101 -0
  42. data/lib/rexml/parsers/ultralightparser.rb +57 -0
  43. data/lib/rexml/parsers/xpathparser.rb +675 -0
  44. data/lib/rexml/quickpath.rb +266 -0
  45. data/lib/rexml/rexml.rb +32 -0
  46. data/lib/rexml/sax2listener.rb +98 -0
  47. data/lib/rexml/security.rb +28 -0
  48. data/lib/rexml/source.rb +298 -0
  49. data/lib/rexml/streamlistener.rb +93 -0
  50. data/lib/rexml/syncenumerator.rb +33 -0
  51. data/lib/rexml/text.rb +424 -0
  52. data/lib/rexml/undefinednamespaceexception.rb +9 -0
  53. data/lib/rexml/validation/relaxng.rb +539 -0
  54. data/lib/rexml/validation/validation.rb +144 -0
  55. data/lib/rexml/validation/validationexception.rb +10 -0
  56. data/lib/rexml/xmldecl.rb +116 -0
  57. data/lib/rexml/xmltokens.rb +85 -0
  58. data/lib/rexml/xpath.rb +81 -0
  59. data/lib/rexml/xpath_parser.rb +934 -0
  60. data/rexml.gemspec +42 -0
  61. metadata +131 -0
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: false
2
+ module REXML
3
+ class ParseException < RuntimeError
4
+ attr_accessor :source, :parser, :continued_exception
5
+
6
+ def initialize( message, source=nil, parser=nil, exception=nil )
7
+ super(message)
8
+ @source = source
9
+ @parser = parser
10
+ @continued_exception = exception
11
+ end
12
+
13
+ def to_s
14
+ # Quote the original exception, if there was one
15
+ if @continued_exception
16
+ err = @continued_exception.inspect
17
+ err << "\n"
18
+ err << @continued_exception.backtrace.join("\n")
19
+ err << "\n...\n"
20
+ else
21
+ err = ""
22
+ end
23
+
24
+ # Get the stack trace and error message
25
+ err << super
26
+
27
+ # Add contextual information
28
+ if @source
29
+ err << "\nLine: #{line}\n"
30
+ err << "Position: #{position}\n"
31
+ err << "Last 80 unconsumed characters:\n"
32
+ err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
33
+ end
34
+
35
+ err
36
+ end
37
+
38
+ def position
39
+ @source.current_line[0] if @source and defined? @source.current_line and
40
+ @source.current_line
41
+ end
42
+
43
+ def line
44
+ @source.current_line[2] if @source and defined? @source.current_line and
45
+ @source.current_line
46
+ end
47
+
48
+ def context
49
+ @source.current_line
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,586 @@
1
+ # frozen_string_literal: false
2
+ require_relative '../parseexception'
3
+ require_relative '../undefinednamespaceexception'
4
+ require_relative '../source'
5
+ require 'set'
6
+ require "strscan"
7
+
8
+ module REXML
9
+ module Parsers
10
+ # = Using the Pull Parser
11
+ # <em>This API is experimental, and subject to change.</em>
12
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
13
+ # while parser.has_next?
14
+ # res = parser.next
15
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
16
+ # end
17
+ # See the PullEvent class for information on the content of the results.
18
+ # The data is identical to the arguments passed for the various events to
19
+ # the StreamListener API.
20
+ #
21
+ # Notice that:
22
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
23
+ # while parser.has_next?
24
+ # res = parser.next
25
+ # raise res[1] if res.error?
26
+ # end
27
+ #
28
+ # Nat Price gave me some good ideas for the API.
29
+ class BaseParser
30
+ LETTER = '[:alpha:]'
31
+ DIGIT = '[:digit:]'
32
+
33
+ COMBININGCHAR = '' # TODO
34
+ EXTENDER = '' # TODO
35
+
36
+ NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
37
+ QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
38
+ QNAME = /(#{QNAME_STR})/
39
+
40
+ NAMECHAR = '[\-\w\.:]'
41
+ NAME = "([\\w:]#{NAMECHAR}*)"
42
+ NMTOKEN = "(?:#{NAMECHAR})+"
43
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
44
+ REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
45
+ REFERENCE_RE = /#{REFERENCE}/
46
+
47
+ DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
48
+ DOCTYPE_END = /\A\s*\]\s*>/um
49
+ DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
50
+ ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
51
+ COMMENT_START = /\A<!--/u
52
+ COMMENT_PATTERN = /<!--(.*?)-->/um
53
+ CDATA_START = /\A<!\[CDATA\[/u
54
+ CDATA_END = /\A\s*\]\s*>/um
55
+ CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
56
+ XMLDECL_START = /\A<\?xml\s/u;
57
+ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
58
+ INSTRUCTION_START = /\A<\?/u
59
+ INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
60
+ TAG_MATCH = /^<((?>#{QNAME_STR}))/um
61
+ CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
62
+
63
+ VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
64
+ ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
65
+ STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
66
+
67
+ ENTITY_START = /\A\s*<!ENTITY/
68
+ IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
69
+ ELEMENTDECL_START = /\A\s*<!ELEMENT/um
70
+ ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
71
+ SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
72
+ ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
73
+ NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
74
+ ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
75
+ ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
76
+ ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
77
+ DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
78
+ ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
79
+ ATTDEF_RE = /#{ATTDEF}/
80
+ ATTLISTDECL_START = /\A\s*<!ATTLIST/um
81
+ ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
82
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
83
+ PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
84
+ SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
85
+
86
+ TEXT_PATTERN = /\A([^<]*)/um
87
+
88
+ # Entity constants
89
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
90
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
91
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
92
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
93
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
94
+ PEREFERENCE = "%#{NAME};"
95
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
96
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
97
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
98
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
99
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
100
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
101
+
102
+ EREFERENCE = /&(?!#{NAME};)/
103
+
104
+ DEFAULT_ENTITIES = {
105
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
106
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
107
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
108
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
109
+ }
110
+
111
+ def initialize( source )
112
+ self.stream = source
113
+ @listeners = []
114
+ end
115
+
116
+ def add_listener( listener )
117
+ @listeners << listener
118
+ end
119
+
120
+ attr_reader :source
121
+
122
+ def stream=( source )
123
+ @source = SourceFactory.create_from( source )
124
+ @closed = nil
125
+ @document_status = nil
126
+ @tags = []
127
+ @stack = []
128
+ @entities = []
129
+ @nsstack = []
130
+ end
131
+
132
+ def position
133
+ if @source.respond_to? :position
134
+ @source.position
135
+ else
136
+ # FIXME
137
+ 0
138
+ end
139
+ end
140
+
141
+ # Returns true if there are no more events
142
+ def empty?
143
+ return (@source.empty? and @stack.empty?)
144
+ end
145
+
146
+ # Returns true if there are more events. Synonymous with !empty?
147
+ def has_next?
148
+ return !(@source.empty? and @stack.empty?)
149
+ end
150
+
151
+ # Push an event back on the head of the stream. This method
152
+ # has (theoretically) infinite depth.
153
+ def unshift token
154
+ @stack.unshift(token)
155
+ end
156
+
157
+ # Peek at the +depth+ event in the stack. The first element on the stack
158
+ # is at depth 0. If +depth+ is -1, will parse to the end of the input
159
+ # stream and return the last event, which is always :end_document.
160
+ # Be aware that this causes the stream to be parsed up to the +depth+
161
+ # event, so you can effectively pre-parse the entire document (pull the
162
+ # entire thing into memory) using this method.
163
+ def peek depth=0
164
+ raise %Q[Illegal argument "#{depth}"] if depth < -1
165
+ temp = []
166
+ if depth == -1
167
+ temp.push(pull()) until empty?
168
+ else
169
+ while @stack.size+temp.size < depth+1
170
+ temp.push(pull())
171
+ end
172
+ end
173
+ @stack += temp if temp.size > 0
174
+ @stack[depth]
175
+ end
176
+
177
+ # Returns the next event. This is a +PullEvent+ object.
178
+ def pull
179
+ pull_event.tap do |event|
180
+ @listeners.each do |listener|
181
+ listener.receive event
182
+ end
183
+ end
184
+ end
185
+
186
+ def pull_event
187
+ if @closed
188
+ x, @closed = @closed, nil
189
+ return [ :end_element, x ]
190
+ end
191
+ return [ :end_document ] if empty?
192
+ return @stack.shift if @stack.size > 0
193
+ #STDERR.puts @source.encoding
194
+ @source.read if @source.buffer.size<2
195
+ #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
196
+ if @document_status == nil
197
+ #@source.consume( /^\s*/um )
198
+ word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
199
+ word = word[1] unless word.nil?
200
+ #STDERR.puts "WORD = #{word.inspect}"
201
+ case word
202
+ when COMMENT_START
203
+ return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
204
+ when XMLDECL_START
205
+ #STDERR.puts "XMLDECL"
206
+ results = @source.match( XMLDECL_PATTERN, true )[1]
207
+ version = VERSION.match( results )
208
+ version = version[1] unless version.nil?
209
+ encoding = ENCODING.match(results)
210
+ encoding = encoding[1] unless encoding.nil?
211
+ if need_source_encoding_update?(encoding)
212
+ @source.encoding = encoding
213
+ end
214
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
215
+ encoding = "UTF-16"
216
+ end
217
+ standalone = STANDALONE.match(results)
218
+ standalone = standalone[1] unless standalone.nil?
219
+ return [ :xmldecl, version, encoding, standalone ]
220
+ when INSTRUCTION_START
221
+ return process_instruction
222
+ when DOCTYPE_START
223
+ md = @source.match( DOCTYPE_PATTERN, true )
224
+ @nsstack.unshift(curr_ns=Set.new)
225
+ identity = md[1]
226
+ close = md[2]
227
+ identity =~ IDENTITY
228
+ name = $1
229
+ raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
230
+ pub_sys = $2.nil? ? nil : $2.strip
231
+ long_name = $4.nil? ? nil : $4.strip
232
+ uri = $6.nil? ? nil : $6.strip
233
+ args = [ :start_doctype, name, pub_sys, long_name, uri ]
234
+ if close == ">"
235
+ @document_status = :after_doctype
236
+ @source.read if @source.buffer.size<2
237
+ md = @source.match(/^\s*/um, true)
238
+ @stack << [ :end_doctype ]
239
+ else
240
+ @document_status = :in_doctype
241
+ end
242
+ return args
243
+ when /^\s+/
244
+ else
245
+ @document_status = :after_doctype
246
+ @source.read if @source.buffer.size<2
247
+ md = @source.match(/\s*/um, true)
248
+ if @source.encoding == "UTF-8"
249
+ @source.buffer.force_encoding(::Encoding::UTF_8)
250
+ end
251
+ end
252
+ end
253
+ if @document_status == :in_doctype
254
+ md = @source.match(/\s*(.*?>)/um)
255
+ case md[1]
256
+ when SYSTEMENTITY
257
+ match = @source.match( SYSTEMENTITY, true )[1]
258
+ return [ :externalentity, match ]
259
+
260
+ when ELEMENTDECL_START
261
+ return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
262
+
263
+ when ENTITY_START
264
+ match = @source.match( ENTITYDECL, true ).to_a.compact
265
+ match[0] = :entitydecl
266
+ ref = false
267
+ if match[1] == '%'
268
+ ref = true
269
+ match.delete_at 1
270
+ end
271
+ # Now we have to sort out what kind of entity reference this is
272
+ if match[2] == 'SYSTEM'
273
+ # External reference
274
+ match[3] = match[3][1..-2] # PUBID
275
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
276
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
277
+ elsif match[2] == 'PUBLIC'
278
+ # External reference
279
+ match[3] = match[3][1..-2] # PUBID
280
+ match[4] = match[4][1..-2] # HREF
281
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
282
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
283
+ else
284
+ match[2] = match[2][1..-2]
285
+ match.pop if match.size == 4
286
+ # match is [ :entity, name, value ]
287
+ end
288
+ match << '%' if ref
289
+ return match
290
+ when ATTLISTDECL_START
291
+ md = @source.match( ATTLISTDECL_PATTERN, true )
292
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
293
+ element = md[1]
294
+ contents = md[0]
295
+
296
+ pairs = {}
297
+ values = md[0].scan( ATTDEF_RE )
298
+ values.each do |attdef|
299
+ unless attdef[3] == "#IMPLIED"
300
+ attdef.compact!
301
+ val = attdef[3]
302
+ val = attdef[4] if val == "#FIXED "
303
+ pairs[attdef[0]] = val
304
+ if attdef[0] =~ /^xmlns:(.*)/
305
+ @nsstack[0] << $1
306
+ end
307
+ end
308
+ end
309
+ return [ :attlistdecl, element, pairs, contents ]
310
+ when NOTATIONDECL_START
311
+ md = nil
312
+ if @source.match( PUBLIC )
313
+ md = @source.match( PUBLIC, true )
314
+ vals = [md[1],md[2],md[4],md[6]]
315
+ elsif @source.match( SYSTEM )
316
+ md = @source.match( SYSTEM, true )
317
+ vals = [md[1],md[2],nil,md[4]]
318
+ else
319
+ raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
320
+ end
321
+ return [ :notationdecl, *vals ]
322
+ when DOCTYPE_END
323
+ @document_status = :after_doctype
324
+ @source.match( DOCTYPE_END, true )
325
+ return [ :end_doctype ]
326
+ end
327
+ end
328
+ begin
329
+ if @source.buffer[0] == ?<
330
+ if @source.buffer[1] == ?/
331
+ @nsstack.shift
332
+ last_tag = @tags.pop
333
+ md = @source.match( CLOSE_MATCH, true )
334
+ if md.nil? or last_tag != md[1]
335
+ message = "Missing end tag for '#{last_tag}'"
336
+ message << " (got '#{md[1]}')" if md
337
+ raise REXML::ParseException.new(message, @source)
338
+ end
339
+ return [ :end_element, last_tag ]
340
+ elsif @source.buffer[1] == ?!
341
+ md = @source.match(/\A(\s*[^>]*>)/um)
342
+ #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
343
+ raise REXML::ParseException.new("Malformed node", @source) unless md
344
+ if md[0][2] == ?-
345
+ md = @source.match( COMMENT_PATTERN, true )
346
+
347
+ case md[1]
348
+ when /--/, /-\z/
349
+ raise REXML::ParseException.new("Malformed comment", @source)
350
+ end
351
+
352
+ return [ :comment, md[1] ] if md
353
+ else
354
+ md = @source.match( CDATA_PATTERN, true )
355
+ return [ :cdata, md[1] ] if md
356
+ end
357
+ raise REXML::ParseException.new( "Declarations can only occur "+
358
+ "in the doctype declaration.", @source)
359
+ elsif @source.buffer[1] == ??
360
+ return process_instruction
361
+ else
362
+ # Get the next tag
363
+ md = @source.match(TAG_MATCH, true)
364
+ unless md
365
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source)
366
+ end
367
+ prefixes = Set.new
368
+ prefixes << md[2] if md[2]
369
+ @nsstack.unshift(curr_ns=Set.new)
370
+ attributes, closed = parse_attributes(prefixes, curr_ns)
371
+ # Verify that all of the prefixes have been defined
372
+ for prefix in prefixes
373
+ unless @nsstack.find{|k| k.member?(prefix)}
374
+ raise UndefinedNamespaceException.new(prefix,@source,self)
375
+ end
376
+ end
377
+
378
+ if closed
379
+ @closed = md[1]
380
+ @nsstack.shift
381
+ else
382
+ @tags.push( md[1] )
383
+ end
384
+ return [ :start_element, md[1], attributes ]
385
+ end
386
+ else
387
+ md = @source.match( TEXT_PATTERN, true )
388
+ if md[0].length == 0
389
+ @source.match( /(\s+)/, true )
390
+ end
391
+ #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
392
+ #return [ :text, "" ] if md[0].length == 0
393
+ # unnormalized = Text::unnormalize( md[1], self )
394
+ # return PullEvent.new( :text, md[1], unnormalized )
395
+ return [ :text, md[1] ]
396
+ end
397
+ rescue REXML::UndefinedNamespaceException
398
+ raise
399
+ rescue REXML::ParseException
400
+ raise
401
+ rescue => error
402
+ raise REXML::ParseException.new( "Exception parsing",
403
+ @source, self, (error ? error : $!) )
404
+ end
405
+ return [ :dummy ]
406
+ end
407
+ private :pull_event
408
+
409
+ def entity( reference, entities )
410
+ value = nil
411
+ value = entities[ reference ] if entities
412
+ if not value
413
+ value = DEFAULT_ENTITIES[ reference ]
414
+ value = value[2] if value
415
+ end
416
+ unnormalize( value, entities ) if value
417
+ end
418
+
419
+ # Escapes all possible entities
420
+ def normalize( input, entities=nil, entity_filter=nil )
421
+ copy = input.clone
422
+ # Doing it like this rather than in a loop improves the speed
423
+ copy.gsub!( EREFERENCE, '&amp;' )
424
+ entities.each do |key, value|
425
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
426
+ entity_filter.include?(entity)
427
+ end if entities
428
+ copy.gsub!( EREFERENCE, '&amp;' )
429
+ DEFAULT_ENTITIES.each do |key, value|
430
+ copy.gsub!( value[3], value[1] )
431
+ end
432
+ copy
433
+ end
434
+
435
+ # Unescapes all possible entities
436
+ def unnormalize( string, entities=nil, filter=nil )
437
+ rv = string.clone
438
+ rv.gsub!( /\r\n?/, "\n" )
439
+ matches = rv.scan( REFERENCE_RE )
440
+ return rv if matches.size == 0
441
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
442
+ m=$1
443
+ m = "0#{m}" if m[0] == ?x
444
+ [Integer(m)].pack('U*')
445
+ }
446
+ matches.collect!{|x|x[0]}.compact!
447
+ if matches.size > 0
448
+ matches.each do |entity_reference|
449
+ unless filter and filter.include?(entity_reference)
450
+ entity_value = entity( entity_reference, entities )
451
+ if entity_value
452
+ re = /&#{entity_reference};/
453
+ rv.gsub!( re, entity_value )
454
+ else
455
+ er = DEFAULT_ENTITIES[entity_reference]
456
+ rv.gsub!( er[0], er[2] ) if er
457
+ end
458
+ end
459
+ end
460
+ rv.gsub!( /&amp;/, '&' )
461
+ end
462
+ rv
463
+ end
464
+
465
+ private
466
+ def need_source_encoding_update?(xml_declaration_encoding)
467
+ return false if xml_declaration_encoding.nil?
468
+ return false if /\AUTF-16\z/i =~ xml_declaration_encoding
469
+ true
470
+ end
471
+
472
+ def process_instruction
473
+ match_data = @source.match(INSTRUCTION_PATTERN, true)
474
+ unless match_data
475
+ message = "Invalid processing instruction node"
476
+ raise REXML::ParseException.new(message, @source)
477
+ end
478
+ [:processing_instruction, match_data[1], match_data[2]]
479
+ end
480
+
481
+ def parse_attributes(prefixes, curr_ns)
482
+ attributes = {}
483
+ closed = false
484
+ match_data = @source.match(/^(.*?)(\/)?>/um, true)
485
+ if match_data.nil?
486
+ message = "Start tag isn't ended"
487
+ raise REXML::ParseException.new(message, @source)
488
+ end
489
+
490
+ raw_attributes = match_data[1]
491
+ closed = !match_data[2].nil?
492
+ return attributes, closed if raw_attributes.nil?
493
+ return attributes, closed if raw_attributes.empty?
494
+
495
+ scanner = StringScanner.new(raw_attributes)
496
+ until scanner.eos?
497
+ if scanner.scan(/\s+/)
498
+ break if scanner.eos?
499
+ end
500
+
501
+ pos = scanner.pos
502
+ loop do
503
+ break if scanner.scan(ATTRIBUTE_PATTERN)
504
+ unless scanner.scan(QNAME)
505
+ message = "Invalid attribute name: <#{scanner.rest}>"
506
+ raise REXML::ParseException.new(message, @source)
507
+ end
508
+ name = scanner[0]
509
+ unless scanner.scan(/\s*=\s*/um)
510
+ message = "Missing attribute equal: <#{name}>"
511
+ raise REXML::ParseException.new(message, @source)
512
+ end
513
+ quote = scanner.scan(/['"]/)
514
+ unless quote
515
+ message = "Missing attribute value start quote: <#{name}>"
516
+ raise REXML::ParseException.new(message, @source)
517
+ end
518
+ unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
519
+ match_data = @source.match(/^(.*?)(\/)?>/um, true)
520
+ if match_data
521
+ scanner << "/" if closed
522
+ scanner << ">"
523
+ scanner << match_data[1]
524
+ scanner.pos = pos
525
+ closed = !match_data[2].nil?
526
+ next
527
+ end
528
+ message =
529
+ "Missing attribute value end quote: <#{name}>: <#{quote}>"
530
+ raise REXML::ParseException.new(message, @source)
531
+ end
532
+ end
533
+ name = scanner[1]
534
+ prefix = scanner[2]
535
+ local_part = scanner[3]
536
+ # quote = scanner[4]
537
+ value = scanner[5]
538
+ if prefix == "xmlns"
539
+ if local_part == "xml"
540
+ if value != "http://www.w3.org/XML/1998/namespace"
541
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
542
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
543
+ raise REXML::ParseException.new( msg, @source, self )
544
+ end
545
+ elsif local_part == "xmlns"
546
+ msg = "The 'xmlns' prefix must not be declared "+
547
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
548
+ raise REXML::ParseException.new( msg, @source, self)
549
+ end
550
+ curr_ns << local_part
551
+ elsif prefix
552
+ prefixes << prefix unless prefix == "xml"
553
+ end
554
+
555
+ if attributes.has_key?(name)
556
+ msg = "Duplicate attribute #{name.inspect}"
557
+ raise REXML::ParseException.new(msg, @source, self)
558
+ end
559
+
560
+ attributes[name] = value
561
+ end
562
+ return attributes, closed
563
+ end
564
+ end
565
+ end
566
+ end
567
+
568
+ =begin
569
+ case event[0]
570
+ when :start_element
571
+ when :text
572
+ when :end_element
573
+ when :processing_instruction
574
+ when :cdata
575
+ when :comment
576
+ when :xmldecl
577
+ when :start_doctype
578
+ when :end_doctype
579
+ when :externalentity
580
+ when :elementdecl
581
+ when :entity
582
+ when :attlistdecl
583
+ when :notationdecl
584
+ when :end_doctype
585
+ end
586
+ =end