moxml 0.1.22 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,10 @@
1
1
  # frozen_string_literal: true
2
- require 'rexml/parseexception'
3
- require 'rexml/undefinednamespaceexception'
4
- require 'rexml/security'
5
- require 'rexml/source'
6
- require 'set'
2
+
3
+ require "rexml/parseexception"
4
+ require "rexml/undefinednamespaceexception"
5
+ require "rexml/security"
6
+ require "rexml/source"
7
+ require "set"
7
8
  require "strscan"
8
9
 
9
10
  module REXML
@@ -55,25 +56,25 @@ module REXML
55
56
  #
56
57
  # Nat Price gave me some good ideas for the API.
57
58
  class BaseParser
58
- LETTER = 'A-Za-z'
59
- DIGIT = '0-9'
59
+ LETTER = "A-Za-z"
60
+ DIGIT = "0-9"
60
61
 
61
- COMBININGCHAR = '' # TODO
62
- EXTENDER = '' # TODO
62
+ COMBININGCHAR = "" # TODO
63
+ EXTENDER = "" # TODO
63
64
 
64
- NCNAME_STR= "[#{LETTER}_][-A-Za-z0-9._#{COMBININGCHAR}#{EXTENDER}]*"
65
- QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
65
+ NCNAME_STR = "[#{LETTER}_][-A-Za-z0-9._#{COMBININGCHAR}#{EXTENDER}]*".freeze
66
+ QNAME_STR = "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})".freeze
66
67
  QNAME = /(#{QNAME_STR})/
67
68
 
68
69
  # Just for backward compatibility. For example, kramdown uses this.
69
70
  # It's not used in REXML.
70
- UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
71
+ UNAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}".freeze
71
72
 
72
73
  NAMECHAR = '[\-\w\.:]'
73
- NAME = "([\\w:]#{NAMECHAR}*)"
74
- NMTOKEN = "(?:#{NAMECHAR})+"
75
- NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
76
- REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
74
+ NAME = "([\\w:]#{NAMECHAR}*)".freeze
75
+ NMTOKEN = "(?:#{NAMECHAR})+".freeze
76
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*".freeze
77
+ REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)".freeze
77
78
  REFERENCE_RE = /#{REFERENCE}/
78
79
 
79
80
  DOCTYPE_START = /^\s*<!DOCTYPE\s/um
@@ -84,7 +85,7 @@ module REXML
84
85
  CDATA_START = /^<!\[CDATA\[/u
85
86
  CDATA_END = /^\s*\]\s*>/um
86
87
  CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
87
- XMLDECL_START = /^<\?xml\s/u;
88
+ XMLDECL_START = /^<\?xml\s/u
88
89
  XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
89
90
  INSTRUCTION_START = /^<\?/u
90
91
  INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
@@ -99,13 +100,13 @@ module REXML
99
100
  ELEMENTDECL_START = /^\s*<!ELEMENT/um
100
101
  ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
101
102
  SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
102
- ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
103
- NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
104
- ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
105
- ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
106
- ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
107
- DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
108
- ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
103
+ ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)".freeze
104
+ NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)".freeze
105
+ ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))".freeze
106
+ ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})".freeze
107
+ ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')".freeze
108
+ DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))".freeze
109
+ ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}".freeze
109
110
  ATTDEF_RE = /#{ATTDEF}/
110
111
  ATTLISTDECL_START = /^\s*<!ATTLIST/um
111
112
  ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
@@ -114,16 +115,16 @@ module REXML
114
115
 
115
116
  # Entity constants
116
117
  PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
117
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
118
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
119
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
120
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
121
- PEREFERENCE = "%#{NAME};"
122
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
123
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
124
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
125
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
126
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
118
+ SYSTEMLITERAL = %{((?:"[^"]*")|(?:'[^']*'))}
119
+ PUBIDLITERAL = %{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}.freeze
120
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))".freeze
121
+ NDATADECL = "\\s+NDATA\\s+#{NAME}".freeze
122
+ PEREFERENCE = "%#{NAME};".freeze
123
+ ENTITYVALUE = %{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}.freeze
124
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})".freeze
125
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))".freeze
126
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>".freeze
127
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>".freeze
127
128
  ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
128
129
 
129
130
  NOTATIONDECL_START = /^\s*<!NOTATION/um
@@ -134,11 +135,11 @@ module REXML
134
135
  EREFERENCE = /&(?!#{NAME};)/
135
136
 
136
137
  DEFAULT_ENTITIES = {
137
- 'gt' => [/&gt;/, '&gt;', '>', />/],
138
- 'lt' => [/&lt;/, '&lt;', '<', /</],
139
- 'quot' => [/&quot;/, '&quot;', '"', /"/],
140
- "apos" => [/&apos;/, "&apos;", "'", /'/]
141
- }
138
+ "gt" => [/&gt;/, "&gt;", ">", />/],
139
+ "lt" => [/&lt;/, "&lt;", "<", /</],
140
+ "quot" => [/&quot;/, "&quot;", '"', /"/],
141
+ "apos" => [/&apos;/, "&apos;", "'", /'/],
142
+ }.freeze
142
143
 
143
144
  module Private
144
145
  PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
@@ -147,21 +148,22 @@ module REXML
147
148
  EQUAL_PATTERN = /\s*=\s*/um
148
149
  ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
149
150
  NAME_PATTERN = /#{NAME}/um
150
- GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
151
- PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
151
+ GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>".freeze
152
+ PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>".freeze
152
153
  ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
153
154
  CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
154
155
  CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
155
156
  DEFAULT_ENTITIES_PATTERNS = {}
156
- default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
157
+ default_entities = ["gt", "lt", "quot", "apos", "amp"]
157
158
  default_entities.each do |term|
158
159
  DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
159
160
  end
161
+ DEFAULT_ENTITIES_PATTERNS.freeze
160
162
  XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
161
163
  end
162
164
  private_constant :Private
163
165
 
164
- def initialize( source )
166
+ def initialize(source)
165
167
  self.stream = source
166
168
  @listeners = []
167
169
  @prefixes = Set.new
@@ -172,17 +174,15 @@ module REXML
172
174
  @version = nil
173
175
  end
174
176
 
175
- def add_listener( listener )
177
+ def add_listener(listener)
176
178
  @listeners << listener
177
179
  end
178
180
 
179
- attr_reader :source
180
- attr_reader :entity_expansion_count
181
- attr_writer :entity_expansion_limit
182
- attr_writer :entity_expansion_text_limit
181
+ attr_reader :source, :entity_expansion_count
182
+ attr_writer :entity_expansion_limit, :entity_expansion_text_limit
183
183
 
184
- def stream=( source )
185
- @source = SourceFactory.create_from( source )
184
+ def stream=(source)
185
+ @source = SourceFactory.create_from(source)
186
186
  reset
187
187
  end
188
188
 
@@ -193,7 +193,7 @@ module REXML
193
193
  @tags = []
194
194
  @stack = []
195
195
  @entities = []
196
- @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
196
+ @namespaces = { "xml" => Private::XML_PREFIXED_NAMESPACE }
197
197
  @namespaces_restore_stack = []
198
198
  end
199
199
 
@@ -218,7 +218,7 @@ module REXML
218
218
 
219
219
  # Push an event back on the head of the stream. This method
220
220
  # has (theoretically) infinite depth.
221
- def unshift token
221
+ def unshift(token)
222
222
  @stack.unshift(token)
223
223
  end
224
224
 
@@ -228,17 +228,18 @@ module REXML
228
228
  # Be aware that this causes the stream to be parsed up to the +depth+
229
229
  # event, so you can effectively pre-parse the entire document (pull the
230
230
  # entire thing into memory) using this method.
231
- def peek depth=0
232
- raise %Q[Illegal argument "#{depth}"] if depth < -1
231
+ def peek(depth = 0)
232
+ raise %[Illegal argument "#{depth}"] if depth < -1
233
+
233
234
  temp = []
234
235
  if depth == -1
235
- temp.push(pull()) until empty?
236
+ temp.push(pull) until empty?
236
237
  else
237
- while @stack.size+temp.size < depth+1
238
- temp.push(pull())
238
+ while @stack.size + temp.size < depth + 1
239
+ temp.push(pull)
239
240
  end
240
241
  end
241
- @stack += temp if temp.size > 0
242
+ @stack += temp if temp.size.positive?
242
243
  @stack[depth]
243
244
  end
244
245
 
@@ -255,15 +256,17 @@ module REXML
255
256
 
256
257
  def pull_event
257
258
  if @closed
258
- x, @closed = @closed, nil
259
- return [ :end_element, x ]
259
+ x = @closed
260
+ @closed = nil
261
+ return [:end_element, x]
260
262
  end
261
263
  if empty?
262
264
  if @document_status == :in_doctype
263
265
  raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
264
266
  end
267
+
265
268
  unless @tags.empty?
266
- path = "/" + @tags.join("/")
269
+ path = "/#{@tags.join('/')}"
267
270
  raise ParseException.new("Missing end tag for '#{path}'", @source)
268
271
  end
269
272
 
@@ -271,11 +274,12 @@ module REXML
271
274
  raise ParseException.new("Malformed XML: No root element", @source)
272
275
  end
273
276
 
274
- return [ :end_document ]
277
+ return [:end_document]
275
278
  end
276
- return @stack.shift if @stack.size > 0
277
- #STDERR.puts @source.encoding
278
- #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
279
+ return @stack.shift if @stack.size.positive?
280
+
281
+ # STDERR.puts @source.encoding
282
+ # STDERR.puts "BUFFER = #{@source.buffer.inspect}"
279
283
 
280
284
  @source.ensure_buffer
281
285
  if @document_status == nil
@@ -284,15 +288,15 @@ module REXML
284
288
  return process_instruction
285
289
  elsif @source.match?("<!", true)
286
290
  if @source.match?("--", true)
287
- return [ :comment, process_comment ]
291
+ return [:comment, process_comment]
288
292
  elsif @source.match?("DOCTYPE", true)
289
293
  base_error_message = "Malformed DOCTYPE"
290
294
  unless @source.skip_spaces
291
- if @source.match?(">")
292
- message = "#{base_error_message}: name is missing"
293
- else
294
- message = "#{base_error_message}: invalid name"
295
- end
295
+ message = if @source.match?(">")
296
+ "#{base_error_message}: name is missing"
297
+ else
298
+ "#{base_error_message}: invalid name"
299
+ end
296
300
  @source.position = start_position
297
301
  raise REXML::ParseException.new(message, @source)
298
302
  end
@@ -311,7 +315,8 @@ module REXML
311
315
  accept_public_id: false)
312
316
  if id[0] == "SYSTEM"
313
317
  # For backward compatibility
314
- id[1], id[2] = id[2], nil
318
+ id[1] = id[2]
319
+ id[2] = nil
315
320
  end
316
321
  @source.skip_spaces
317
322
  if @source.match?("[", true)
@@ -327,7 +332,7 @@ module REXML
327
332
  args = [:start_doctype, name, *id]
328
333
  if @document_status == :after_doctype
329
334
  @source.skip_spaces
330
- @stack << [ :end_doctype ]
335
+ @stack << [:end_doctype]
331
336
  end
332
337
  return args
333
338
  else
@@ -342,48 +347,61 @@ module REXML
342
347
  if @source.match?("<!", true)
343
348
  if @source.match?("ELEMENT", true)
344
349
  md = @source.match(/(.*?)>/um, true)
345
- raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
346
- return [ :elementdecl, "<!ELEMENT" + md[1] ]
350
+ if md.nil?
351
+ raise REXML::ParseException.new("Bad ELEMENT declaration!",
352
+ @source)
353
+ end
354
+
355
+ return [:elementdecl, "<!ELEMENT#{md[1]}"]
347
356
  elsif @source.match?("ENTITY", true)
348
357
  match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
349
358
  unless match_data
350
- raise REXML::ParseException.new("Malformed entity declaration", @source)
359
+ raise REXML::ParseException.new("Malformed entity declaration",
360
+ @source)
351
361
  end
362
+
352
363
  match = [:entitydecl, *match_data.captures.compact]
353
364
  ref = false
354
- if match[1] == '%'
365
+ if match[1] == "%"
355
366
  ref = true
356
367
  match.delete_at 1
357
368
  end
358
369
  # Now we have to sort out what kind of entity reference this is
359
- if match[2] == 'SYSTEM'
370
+ case match[2]
371
+ when "SYSTEM"
360
372
  # External reference
361
373
  match[3] = match[3][1..-2] # PUBID
362
374
  match.delete_at(4) if match.size > 4 # Chop out NDATA decl
363
375
  # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
364
- elsif match[2] == 'PUBLIC'
376
+ when "PUBLIC"
365
377
  # External reference
366
378
  match[3] = match[3][1..-2] # PUBID
367
379
  match[4] = match[4][1..-2] # HREF
368
380
  match.delete_at(5) if match.size > 5 # Chop out NDATA decl
369
381
  # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
370
- elsif Private::PEREFERENCE_PATTERN.match?(match[2])
371
- raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
382
+ when Private::PEREFERENCE_PATTERN
383
+ raise REXML::ParseException.new(
384
+ "Parameter entity references forbidden in internal subset: #{match[2]}", @source
385
+ )
372
386
  else
373
387
  match[2] = match[2][1..-2]
374
388
  match.pop if match.size == 4
375
389
  # match is [ :entity, name, value ]
376
390
  end
377
- match << '%' if ref
391
+ match << "%" if ref
378
392
  return match
379
393
  elsif @source.match?("ATTLIST", true)
380
394
  md = @source.match(Private::ATTLISTDECL_END, true)
381
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
395
+ if md.nil?
396
+ raise REXML::ParseException.new("Bad ATTLIST declaration!",
397
+ @source)
398
+ end
399
+
382
400
  element = md[1]
383
- contents = "<!ATTLIST" + md[0]
401
+ contents = "<!ATTLIST#{md[0]}"
384
402
 
385
403
  pairs = {}
386
- values = md[0].strip.scan( ATTDEF_RE )
404
+ values = md[0].strip.scan(ATTDEF_RE)
387
405
  values.each do |attdef|
388
406
  unless attdef[3] == "#IMPLIED"
389
407
  attdef.compact!
@@ -395,15 +413,15 @@ module REXML
395
413
  end
396
414
  end
397
415
  end
398
- return [ :attlistdecl, element, pairs, contents ]
416
+ return [:attlistdecl, element, pairs, contents]
399
417
  elsif @source.match?("NOTATION", true)
400
418
  base_error_message = "Malformed notation declaration"
401
419
  unless @source.skip_spaces
402
- if @source.match?(">")
403
- message = "#{base_error_message}: name is missing"
404
- else
405
- message = "#{base_error_message}: invalid name"
406
- end
420
+ message = if @source.match?(">")
421
+ "#{base_error_message}: name is missing"
422
+ else
423
+ "#{base_error_message}: invalid name"
424
+ end
407
425
  @source.position = start_position
408
426
  raise REXML::ParseException.new(message, @source)
409
427
  end
@@ -418,17 +436,20 @@ module REXML
418
436
  end
419
437
  return [:notationdecl, name, *id]
420
438
  elsif @source.match?("--", true)
421
- return [ :comment, process_comment ]
439
+ return [:comment, process_comment]
422
440
  else
423
- raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source)
441
+ raise REXML::ParseException.new(
442
+ "Malformed node: Started with '<!' but not a comment nor ELEMENT,ENTITY,ATTLIST,NOTATION", @source
443
+ )
424
444
  end
425
445
  elsif match = @source.match(/(%.*?;)\s*/um, true)
426
- return [ :externalentity, match[1] ]
446
+ return [:externalentity, match[1]]
427
447
  elsif @source.match?(/\]\s*>/um, true)
428
448
  @document_status = :after_doctype
429
- return [ :end_doctype ]
449
+ return [:end_doctype]
430
450
  else
431
- raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
451
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration",
452
+ @source)
432
453
  end
433
454
  end
434
455
  if @document_status == :after_doctype
@@ -445,39 +466,46 @@ module REXML
445
466
  @namespaces_restore_stack.pop
446
467
  last_tag = @tags.pop
447
468
  md = @source.match(Private::CLOSE_PATTERN, true)
448
- if md and !last_tag
469
+ if md && !last_tag
449
470
  message = "Unexpected top-level end tag (got '#{md[1]}')"
450
471
  raise REXML::ParseException.new(message, @source)
451
472
  end
452
- if md.nil? or last_tag != md[1]
473
+ if md.nil? || (last_tag != md[1])
453
474
  message = "Missing end tag for '#{last_tag}'"
454
475
  message += " (got '#{md[1]}')" if md
455
476
  @source.position = start_position if md.nil?
456
477
  raise REXML::ParseException.new(message, @source)
457
478
  end
458
- return [ :end_element, last_tag ]
479
+ [:end_element, last_tag]
459
480
  elsif @source.match?("!", true)
460
- #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
481
+ # STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
461
482
  if @source.match?("--", true)
462
- return [ :comment, process_comment ]
483
+ [:comment, process_comment]
463
484
  elsif @source.match?("[CDATA[", true)
464
485
  text = @source.read_until("]]>")
465
486
  unless text.end_with?("]]>")
466
- raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
487
+ raise REXML::ParseException.new(
488
+ "Malformed CDATA: Missing end ']]>'", @source
489
+ )
467
490
  end
491
+
468
492
  text = text[0...-3]
469
- return [ :cdata, text ]
493
+ [:cdata, text]
470
494
  else
471
- raise REXML::ParseException.new("Malformed node: Started with '<!' but not a comment nor CDATA", @source)
495
+ raise REXML::ParseException.new(
496
+ "Malformed node: Started with '<!' but not a comment nor CDATA", @source
497
+ )
472
498
  end
473
499
  elsif @source.match?("?", true)
474
- return process_instruction
500
+ process_instruction
475
501
  else
476
502
  # Get the next tag
477
503
  md = @source.match(Private::TAG_PATTERN, true)
478
504
  unless md
479
505
  @source.position = start_position
480
- raise REXML::ParseException.new("malformed XML: missing tag start", @source)
506
+ raise REXML::ParseException.new(
507
+ "malformed XML: missing tag start", @source
508
+ )
481
509
  end
482
510
  tag = md[1]
483
511
  @document_status = :in_element
@@ -486,9 +514,9 @@ module REXML
486
514
  push_namespaces_restore
487
515
  attributes, closed = parse_attributes(@prefixes)
488
516
  # Verify that all of the prefixes have been defined
489
- for prefix in @prefixes
517
+ @prefixes.each do |prefix|
490
518
  unless @namespaces.key?(prefix)
491
- raise UndefinedNamespaceException.new(prefix,@source,self)
519
+ raise UndefinedNamespaceException.new(prefix, @source, self)
492
520
  end
493
521
  end
494
522
 
@@ -496,13 +524,16 @@ module REXML
496
524
  @closed = tag
497
525
  pop_namespaces_restore
498
526
  else
499
- if @tags.empty? and @have_root
500
- raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
527
+ if @tags.empty? && @have_root
528
+ raise ParseException.new(
529
+ "Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source
530
+ )
501
531
  end
502
- @tags.push( tag )
532
+
533
+ @tags.push(tag)
503
534
  end
504
535
  @have_root = true
505
- return [ :start_element, tag, attributes ]
536
+ [:start_element, tag, attributes]
506
537
  end
507
538
  else
508
539
  text = @source.read_until("<")
@@ -513,82 +544,90 @@ module REXML
513
544
  if @tags.empty?
514
545
  unless /^\s*$/.match?(text)
515
546
  if @have_root
516
- raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
547
+ raise ParseException.new(
548
+ "Malformed XML: Extra content at the end of the document (got '#{text}')", @source
549
+ )
517
550
  else
518
- raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
551
+ raise ParseException.new(
552
+ "Malformed XML: Content at the start of the document (got '#{text}')", @source
553
+ )
519
554
  end
520
555
  end
521
556
  return pull_event if @have_root
522
557
  end
523
- return [ :text, text ]
558
+ [:text, text]
524
559
  end
525
560
  rescue REXML::UndefinedNamespaceException
526
561
  raise
527
562
  rescue REXML::ParseException
528
563
  raise
529
- rescue => error
530
- raise REXML::ParseException.new( "Exception parsing",
531
- @source, self, (error ? error : $!) )
564
+ rescue StandardError => e
565
+ raise REXML::ParseException.new("Exception parsing",
566
+ @source, self, e || $!)
532
567
  end
533
568
  # NOTE: The end of the method never runs, because it is unreachable.
534
569
  # All branches of code above have explicit unconditional return or raise statements.
535
570
  end
536
571
  private :pull_event
537
572
 
538
- def entity( reference, entities )
573
+ def entity(reference, entities)
539
574
  return unless entities
540
575
 
541
- value = entities[ reference ]
576
+ value = entities[reference]
542
577
  return if value.nil?
543
578
 
544
579
  record_entity_expansion
545
- unnormalize( value, entities )
580
+ unnormalize(value, entities)
546
581
  end
547
582
 
548
583
  # Escapes all possible entities
549
- def normalize( input, entities=nil, entity_filter=nil )
584
+ def normalize(input, entities = nil, entity_filter = nil)
550
585
  copy = input.clone
551
586
  # Doing it like this rather than in a loop improves the speed
552
- copy.gsub!( EREFERENCE, '&amp;' )
553
- entities.each do |key, value|
554
- copy.gsub!( value, "&#{key};" ) unless entity_filter and
555
- entity_filter.include?(entity)
556
- end if entities
557
- copy.gsub!( EREFERENCE, '&amp;' )
558
- DEFAULT_ENTITIES.each do |key, value|
559
- copy.gsub!( value[3], value[1] )
587
+ copy.gsub!(EREFERENCE, "&amp;")
588
+ if entities
589
+ entities.each do |key, value|
590
+ unless entity_filter && entity_filter.include?(entity)
591
+ copy.gsub!(value, "&#{key};")
592
+ end
593
+ end
594
+ end
595
+ copy.gsub!(EREFERENCE, "&amp;")
596
+ DEFAULT_ENTITIES.each_value do |value|
597
+ copy.gsub!(value[3], value[1])
560
598
  end
561
599
  copy
562
600
  end
563
601
 
564
602
  # Unescapes all possible entities
565
- def unnormalize( string, entities=nil, filter=nil )
566
- if string.include?("\r")
567
- rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
568
- else
569
- rv = string.dup
570
- end
571
- matches = rv.scan( REFERENCE_RE )
572
- return rv if matches.size == 0
573
- rv.gsub!( Private::CHARACTER_REFERENCES ) {
574
- m=$1
575
- if m.start_with?("x")
576
- code_point = Integer(m[1..-1], 16)
577
- else
578
- code_point = Integer(m, 10)
579
- end
580
- [code_point].pack('U*')
581
- }
582
- matches.collect!{|x|x[0]}.compact!
603
+ def unnormalize(string, entities = nil, filter = nil)
604
+ rv = if string.include?("\r")
605
+ string.gsub(Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n")
606
+ else
607
+ string.dup
608
+ end
609
+ matches = rv.scan(REFERENCE_RE)
610
+ return rv if matches.empty?
611
+
612
+ rv.gsub!(Private::CHARACTER_REFERENCES) do
613
+ m = $1
614
+ code_point = if m.start_with?("x")
615
+ Integer(m[1..], 16)
616
+ else
617
+ Integer(m, 10)
618
+ end
619
+ [code_point].pack("U*")
620
+ end
621
+ matches.collect! { |x| x[0] }.compact!
583
622
  if filter
584
623
  matches.reject! do |entity_reference|
585
624
  filter.include?(entity_reference)
586
625
  end
587
626
  end
588
- if matches.size > 0
627
+ if matches.size.positive?
589
628
  matches.tally.each do |entity_reference, n|
590
629
  entity_expansion_count_before = @entity_expansion_count
591
- entity_value = entity( entity_reference, entities )
630
+ entity_value = entity(entity_reference, entities)
592
631
  if entity_value
593
632
  if n > 1
594
633
  entity_expansion_count_delta =
@@ -596,21 +635,22 @@ module REXML
596
635
  record_entity_expansion(entity_expansion_count_delta * (n - 1))
597
636
  end
598
637
  re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
599
- rv.gsub!( re, entity_value )
638
+ rv.gsub!(re, entity_value)
600
639
  if rv.bytesize > @entity_expansion_text_limit
601
640
  raise "entity expansion has grown too large"
602
641
  end
603
642
  else
604
643
  er = DEFAULT_ENTITIES[entity_reference]
605
- rv.gsub!( er[0], er[2] ) if er
644
+ rv.gsub!(er[0], er[2]) if er
606
645
  end
607
646
  end
608
- rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
647
+ rv.gsub!(Private::DEFAULT_ENTITIES_PATTERNS["amp"], "&")
609
648
  end
610
649
  rv
611
650
  end
612
651
 
613
652
  private
653
+
614
654
  def add_namespace(prefix, uri)
615
655
  @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
616
656
  if uri.nil?
@@ -637,7 +677,7 @@ module REXML
637
677
  end
638
678
  end
639
679
 
640
- def record_entity_expansion(delta=1)
680
+ def record_entity_expansion(delta = 1)
641
681
  @entity_expansion_count += delta
642
682
  if @entity_expansion_count > @entity_expansion_limit
643
683
  raise "number of entity expansions exceeded, processing aborted."
@@ -646,7 +686,8 @@ module REXML
646
686
 
647
687
  def need_source_encoding_update?(xml_declaration_encoding)
648
688
  return false if xml_declaration_encoding.nil?
649
- return false if /^UTF-16$/i =~ xml_declaration_encoding
689
+ return false if /^UTF-16$/i.match?(xml_declaration_encoding)
690
+
650
691
  true
651
692
  end
652
693
 
@@ -657,11 +698,11 @@ module REXML
657
698
  def parse_name(base_error_message)
658
699
  md = @source.match(Private::NAME_PATTERN, true)
659
700
  unless md
660
- if @source.match?(/\S/um)
661
- message = "#{base_error_message}: invalid name"
662
- else
663
- message = "#{base_error_message}: name is missing"
664
- end
701
+ message = if @source.match?(/\S/um)
702
+ "#{base_error_message}: invalid name"
703
+ else
704
+ "#{base_error_message}: name is missing"
705
+ end
665
706
  raise REXML::ParseException.new(message, @source)
666
707
  end
667
708
  md[0]
@@ -670,19 +711,20 @@ module REXML
670
711
  def parse_id(base_error_message,
671
712
  accept_external_id:,
672
713
  accept_public_id:)
673
- if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
714
+ if accept_external_id && (md = @source.match(EXTERNAL_ID_PUBLIC, true))
674
715
  pubid = system = nil
675
716
  pubid_literal = md[1]
676
717
  pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
677
718
  system_literal = md[2]
678
719
  system = system_literal[1..-2] if system_literal # Remove quote
679
720
  ["PUBLIC", pubid, system]
680
- elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
681
- pubid = system = nil
721
+ elsif accept_public_id && (md = @source.match(PUBLIC_ID, true))
722
+ pubid = nil
682
723
  pubid_literal = md[1]
683
724
  pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
684
725
  ["PUBLIC", pubid, nil]
685
- elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
726
+ elsif accept_external_id && (md = @source.match(EXTERNAL_ID_SYSTEM,
727
+ true))
686
728
  system = nil
687
729
  system_literal = md[1]
688
730
  system = system_literal[1..-2] if system_literal # Remove quote
@@ -699,13 +741,14 @@ module REXML
699
741
  accept_public_id:)
700
742
  public = /^\s*PUBLIC/um
701
743
  system = /^\s*SYSTEM/um
702
- if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um)
744
+ if (accept_external_id || accept_public_id) && @source.match?(/#{public}/um)
703
745
  if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
704
746
  return "public ID literal is missing"
705
747
  end
706
748
  unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um)
707
749
  return "invalid public ID literal"
708
750
  end
751
+
709
752
  if accept_public_id
710
753
  if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
711
754
  return "system ID literal is missing"
@@ -713,22 +756,25 @@ module REXML
713
756
  unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
714
757
  return "invalid system literal"
715
758
  end
759
+
716
760
  "garbage after system literal"
717
761
  else
718
762
  "garbage after public ID literal"
719
763
  end
720
- elsif accept_external_id and @source.match?(/#{system}/um)
764
+ elsif accept_external_id && @source.match?(/#{system}/um)
721
765
  if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
722
766
  return "system literal is missing"
723
767
  end
724
768
  unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um)
725
769
  return "invalid system literal"
726
770
  end
771
+
727
772
  "garbage after system literal"
728
773
  else
729
774
  unless @source.match?(/^\s*(?:PUBLIC|SYSTEM)\s/um)
730
775
  return "invalid ID type"
731
776
  end
777
+
732
778
  "ID type is missing"
733
779
  end
734
780
  end
@@ -736,13 +782,17 @@ module REXML
736
782
  def process_comment
737
783
  text = @source.read_until("-->")
738
784
  unless text.end_with?("-->")
739
- raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
785
+ raise REXML::ParseException.new(
786
+ "Unclosed comment: Missing end '-->'", @source
787
+ )
740
788
  end
789
+
741
790
  text = text[0...-3]
742
791
 
743
- if text.include? "--" or text.end_with?("-")
792
+ if text.include?("--") || text.end_with?("-")
744
793
  raise REXML::ParseException.new("Malformed comment", @source)
745
794
  end
795
+
746
796
  text
747
797
  end
748
798
 
@@ -756,13 +806,17 @@ module REXML
756
806
  content = @source.read_until("?>")
757
807
  unless content.end_with?("?>")
758
808
  @source.position = start_position
759
- raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
809
+ raise ParseException.new(
810
+ "Malformed XML: Unclosed processing instruction: <#{name}>", @source
811
+ )
760
812
  end
761
813
  content = content[0...-2]
762
814
  else # e.g. <?name?>
763
815
  content = nil
764
816
  unless @source.match?("?>", true)
765
- raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source)
817
+ raise ParseException.new(
818
+ "Malformed XML: Unclosed processing instruction: <#{name}>", @source
819
+ )
766
820
  end
767
821
  end
768
822
  [:processing_instruction, name, content]
@@ -771,37 +825,51 @@ module REXML
771
825
 
772
826
  def xml_declaration
773
827
  unless @version.nil?
774
- raise ParseException.new("Malformed XML: XML declaration is duplicated", @source)
828
+ raise ParseException.new(
829
+ "Malformed XML: XML declaration is duplicated", @source
830
+ )
775
831
  end
776
832
  if @document_status
777
- raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
833
+ raise ParseException.new(
834
+ "Malformed XML: XML declaration is not at the start", @source
835
+ )
778
836
  end
779
837
  unless @source.skip_spaces
780
- raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source)
838
+ raise ParseException.new(
839
+ "Malformed XML: XML declaration misses spaces before version", @source
840
+ )
781
841
  end
782
842
  unless @source.match?("version", true)
783
- raise ParseException.new("Malformed XML: XML declaration misses version", @source)
843
+ raise ParseException.new(
844
+ "Malformed XML: XML declaration misses version", @source
845
+ )
784
846
  end
847
+
785
848
  @version = parse_attribute_value_with_equal("xml")
786
849
  unless @source.skip_spaces
787
850
  unless @source.match?("?>", true)
788
- raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
851
+ raise ParseException.new("Malformed XML: Unclosed XML declaration",
852
+ @source)
789
853
  end
854
+
790
855
  encoding = normalize_xml_declaration_encoding(@source.encoding)
791
- return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.0"?>
856
+ return [:xmldecl, @version, encoding, nil] # e.g. <?xml version="1.0"?>
792
857
  end
793
858
 
794
859
  if @source.match?("encoding", true)
795
860
  encoding = parse_attribute_value_with_equal("xml")
796
861
  unless @source.skip_spaces
797
862
  unless @source.match?("?>", true)
798
- raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
863
+ raise ParseException.new(
864
+ "Malformed XML: Unclosed XML declaration", @source
865
+ )
799
866
  end
867
+
800
868
  if need_source_encoding_update?(encoding)
801
869
  @source.encoding = encoding
802
870
  end
803
871
  encoding ||= normalize_xml_declaration_encoding(@source.encoding)
804
- return [ :xmldecl, @version, encoding, nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
872
+ return [:xmldecl, @version, encoding, nil] # e.g. <?xml version="1.1" encoding="UTF-8"?>
805
873
  end
806
874
  end
807
875
 
@@ -810,12 +878,15 @@ module REXML
810
878
  case standalone
811
879
  when "yes", "no"
812
880
  else
813
- raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source)
881
+ raise ParseException.new(
882
+ "Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source
883
+ )
814
884
  end
815
885
  end
816
886
  @source.skip_spaces
817
887
  unless @source.match?("?>", true)
818
- raise ParseException.new("Malformed XML: Unclosed XML declaration", @source)
888
+ raise ParseException.new("Malformed XML: Unclosed XML declaration",
889
+ @source)
819
890
  end
820
891
 
821
892
  if need_source_encoding_update?(encoding)
@@ -827,7 +898,7 @@ module REXML
827
898
  # <?xml version="1.1" encoding="UTF-8" ?>
828
899
  # <?xml version="1.1" standalone="yes"?>
829
900
  # <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
830
- [ :xmldecl, @version, encoding, standalone ]
901
+ [:xmldecl, @version, encoding, standalone]
831
902
  end
832
903
 
833
904
  if StringScanner::Version < "3.1.1"
@@ -843,8 +914,6 @@ module REXML
843
914
  when 39 # "'".ord
844
915
  @source.scan_byte
845
916
  "'"
846
- else
847
- nil
848
917
  end
849
918
  end
850
919
  end
@@ -865,14 +934,14 @@ module REXML
865
934
  message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
866
935
  raise REXML::ParseException.new(message, @source)
867
936
  end
868
- value = value[0...-1]
937
+ value[0...-1]
869
938
  end
870
939
 
871
940
  def parse_attributes(prefixes)
872
941
  attributes = {}
873
942
  expanded_names = {}
874
943
  closed = false
875
- while true
944
+ loop do
876
945
  if @source.match?(">", true)
877
946
  return attributes, closed
878
947
  elsif @source.match?("/>", true)
@@ -887,14 +956,14 @@ module REXML
887
956
  if prefix == "xmlns"
888
957
  if local_part == "xml"
889
958
  if value != Private::XML_PREFIXED_NAMESPACE
890
- msg = "The 'xml' prefix must not be bound to any other namespace "+
959
+ msg = "The 'xml' prefix must not be bound to any other namespace " +
891
960
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
892
- raise REXML::ParseException.new( msg, @source, self )
961
+ raise REXML::ParseException.new(msg, @source, self)
893
962
  end
894
963
  elsif local_part == "xmlns"
895
- msg = "The 'xmlns' prefix must not be declared "+
964
+ msg = "The 'xmlns' prefix must not be declared " +
896
965
  "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
897
- raise REXML::ParseException.new( msg, @source, self)
966
+ raise REXML::ParseException.new(msg, @source, self)
898
967
  end
899
968
  add_namespace(local_part, value)
900
969
  elsif prefix
@@ -911,10 +980,7 @@ module REXML
911
980
  expanded_name = [uri, local_part]
912
981
  existing_prefix = expanded_names[expanded_name]
913
982
  if existing_prefix
914
- message = "Namespace conflict in adding attribute " +
915
- "\"#{local_part}\": " +
916
- "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
917
- "prefix \"#{prefix}\" = \"#{uri}\""
983
+ message = "Namespace conflict in adding attribute \"#{local_part}\": Prefix \"#{existing_prefix}\" = \"#{uri}\" and prefix \"#{prefix}\" = \"#{uri}\""
918
984
  raise REXML::ParseException.new(message, @source, self)
919
985
  end
920
986
  expanded_names[expanded_name] = prefix
@@ -931,22 +997,20 @@ module REXML
931
997
  end
932
998
  end
933
999
 
934
- =begin
935
- case event[0]
936
- when :start_element
937
- when :text
938
- when :end_element
939
- when :processing_instruction
940
- when :cdata
941
- when :comment
942
- when :xmldecl
943
- when :start_doctype
944
- when :end_doctype
945
- when :externalentity
946
- when :elementdecl
947
- when :entity
948
- when :attlistdecl
949
- when :notationdecl
950
- when :end_doctype
951
- end
952
- =end
1000
+ # case event[0]
1001
+ # when :start_element
1002
+ # when :text
1003
+ # when :end_element
1004
+ # when :processing_instruction
1005
+ # when :cdata
1006
+ # when :comment
1007
+ # when :xmldecl
1008
+ # when :start_doctype
1009
+ # when :end_doctype
1010
+ # when :externalentity
1011
+ # when :elementdecl
1012
+ # when :entity
1013
+ # when :attlistdecl
1014
+ # when :notationdecl
1015
+ # when :end_doctype
1016
+ # end