brakeman 4.10.1 → 5.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +9 -7
  3. data/README.md +1 -1
  4. data/bundle/load.rb +8 -9
  5. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/CHANGELOG.md +1 -8
  6. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/FAQ.md +0 -0
  7. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/Gemfile +0 -0
  8. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/MIT-LICENSE +0 -0
  9. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/README.md +0 -0
  10. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/REFERENCE.md +5 -9
  11. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/TODO +0 -0
  12. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/haml.gemspec +1 -1
  13. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml.rb +0 -0
  14. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/attribute_builder.rb +0 -0
  15. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/attribute_compiler.rb +0 -0
  16. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/attribute_parser.rb +0 -0
  17. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/buffer.rb +0 -0
  18. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/compiler.rb +0 -0
  19. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/engine.rb +0 -0
  20. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/error.rb +0 -0
  21. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/escapable.rb +0 -0
  22. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/exec.rb +0 -0
  23. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/filters.rb +0 -0
  24. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/generator.rb +0 -0
  25. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers.rb +0 -0
  26. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/action_view_extensions.rb +0 -0
  27. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/action_view_mods.rb +0 -0
  28. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/action_view_xss_mods.rb +0 -0
  29. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/safe_erubi_template.rb +0 -0
  30. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/safe_erubis_template.rb +0 -0
  31. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/xss_mods.rb +0 -0
  32. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/options.rb +0 -0
  33. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/parser.rb +3 -31
  34. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/plugin.rb +0 -0
  35. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/railtie.rb +0 -0
  36. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/sass_rails_filter.rb +0 -0
  37. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/template.rb +0 -0
  38. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/template/options.rb +0 -0
  39. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/temple_engine.rb +0 -0
  40. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/temple_line_counter.rb +0 -0
  41. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/util.rb +1 -1
  42. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/version.rb +1 -1
  43. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/yard/default/fulldoc/html/css/common.sass +0 -0
  44. data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/yard/default/layout/html/footer.erb +0 -0
  45. data/lib/brakeman.rb +6 -0
  46. data/lib/brakeman/app_tree.rb +36 -3
  47. data/lib/brakeman/checks/check_execute.rb +1 -1
  48. data/lib/brakeman/checks/check_regex_dos.rb +1 -1
  49. data/lib/brakeman/checks/check_unsafe_reflection_methods.rb +68 -0
  50. data/lib/brakeman/checks/check_verb_confusion.rb +75 -0
  51. data/lib/brakeman/file_parser.rb +19 -23
  52. data/lib/brakeman/options.rb +5 -1
  53. data/lib/brakeman/parsers/template_parser.rb +2 -3
  54. data/lib/brakeman/processors/alias_processor.rb +2 -2
  55. data/lib/brakeman/processors/controller_processor.rb +1 -1
  56. data/lib/brakeman/processors/lib/file_type_detector.rb +64 -0
  57. data/lib/brakeman/processors/output_processor.rb +1 -1
  58. data/lib/brakeman/processors/template_alias_processor.rb +0 -5
  59. data/lib/brakeman/report.rb +8 -0
  60. data/lib/brakeman/report/report_sonar.rb +38 -0
  61. data/lib/brakeman/rescanner.rb +7 -5
  62. data/lib/brakeman/scanner.rb +42 -18
  63. data/lib/brakeman/tracker.rb +6 -0
  64. data/lib/brakeman/tracker/controller.rb +1 -1
  65. data/lib/brakeman/util.rb +9 -4
  66. data/lib/brakeman/version.rb +1 -1
  67. data/lib/brakeman/warning_codes.rb +2 -0
  68. data/lib/ruby_parser/bm_sexp.rb +9 -9
  69. metadata +49 -99
  70. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/Gemfile +0 -6
  71. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/LICENSE.txt +0 -22
  72. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/NEWS.md +0 -141
  73. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/README.md +0 -60
  74. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/attlistdecl.rb +0 -63
  75. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/attribute.rb +0 -205
  76. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/cdata.rb +0 -68
  77. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/child.rb +0 -97
  78. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/comment.rb +0 -80
  79. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/doctype.rb +0 -287
  80. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/document.rb +0 -291
  81. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/attlistdecl.rb +0 -11
  82. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/dtd.rb +0 -47
  83. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/elementdecl.rb +0 -18
  84. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/entitydecl.rb +0 -57
  85. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/notationdecl.rb +0 -40
  86. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/element.rb +0 -1269
  87. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/encoding.rb +0 -51
  88. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/entity.rb +0 -171
  89. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/formatters/default.rb +0 -116
  90. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/formatters/pretty.rb +0 -142
  91. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/formatters/transitive.rb +0 -58
  92. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/functions.rb +0 -447
  93. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/instruction.rb +0 -79
  94. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/light/node.rb +0 -196
  95. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/namespace.rb +0 -59
  96. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/node.rb +0 -76
  97. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/output.rb +0 -30
  98. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parent.rb +0 -166
  99. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parseexception.rb +0 -52
  100. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/baseparser.rb +0 -594
  101. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/lightparser.rb +0 -59
  102. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/pullparser.rb +0 -197
  103. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/sax2parser.rb +0 -273
  104. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/streamparser.rb +0 -61
  105. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/treeparser.rb +0 -101
  106. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/ultralightparser.rb +0 -57
  107. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/xpathparser.rb +0 -675
  108. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/quickpath.rb +0 -266
  109. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/rexml.rb +0 -32
  110. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/sax2listener.rb +0 -98
  111. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/security.rb +0 -28
  112. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/source.rb +0 -298
  113. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/streamlistener.rb +0 -93
  114. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/text.rb +0 -424
  115. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/undefinednamespaceexception.rb +0 -9
  116. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/validation/relaxng.rb +0 -539
  117. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/validation/validation.rb +0 -144
  118. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/validation/validationexception.rb +0 -10
  119. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/xmldecl.rb +0 -130
  120. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/xmltokens.rb +0 -85
  121. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/xpath.rb +0 -81
  122. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/xpath_parser.rb +0 -968
  123. data/bundle/ruby/2.7.0/gems/rexml-3.2.4/rexml.gemspec +0 -84
@@ -1,52 +0,0 @@
1
- # frozen_string_literal: false
2
- module REXML
3
- class ParseException < RuntimeError
4
- attr_accessor :source, :parser, :continued_exception
5
-
6
- def initialize( message, source=nil, parser=nil, exception=nil )
7
- super(message)
8
- @source = source
9
- @parser = parser
10
- @continued_exception = exception
11
- end
12
-
13
- def to_s
14
- # Quote the original exception, if there was one
15
- if @continued_exception
16
- err = @continued_exception.inspect
17
- err << "\n"
18
- err << @continued_exception.backtrace.join("\n")
19
- err << "\n...\n"
20
- else
21
- err = ""
22
- end
23
-
24
- # Get the stack trace and error message
25
- err << super
26
-
27
- # Add contextual information
28
- if @source
29
- err << "\nLine: #{line}\n"
30
- err << "Position: #{position}\n"
31
- err << "Last 80 unconsumed characters:\n"
32
- err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
33
- end
34
-
35
- err
36
- end
37
-
38
- def position
39
- @source.current_line[0] if @source and defined? @source.current_line and
40
- @source.current_line
41
- end
42
-
43
- def line
44
- @source.current_line[2] if @source and defined? @source.current_line and
45
- @source.current_line
46
- end
47
-
48
- def context
49
- @source.current_line
50
- end
51
- end
52
- end
@@ -1,594 +0,0 @@
1
- # frozen_string_literal: false
2
- require_relative '../parseexception'
3
- require_relative '../undefinednamespaceexception'
4
- require_relative '../source'
5
- require 'set'
6
- require "strscan"
7
-
8
- module REXML
9
- module Parsers
10
- # = Using the Pull Parser
11
- # <em>This API is experimental, and subject to change.</em>
12
- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
13
- # while parser.has_next?
14
- # res = parser.next
15
- # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
16
- # end
17
- # See the PullEvent class for information on the content of the results.
18
- # The data is identical to the arguments passed for the various events to
19
- # the StreamListener API.
20
- #
21
- # Notice that:
22
- # parser = PullParser.new( "<a>BAD DOCUMENT" )
23
- # while parser.has_next?
24
- # res = parser.next
25
- # raise res[1] if res.error?
26
- # end
27
- #
28
- # Nat Price gave me some good ideas for the API.
29
- class BaseParser
30
- LETTER = '[:alpha:]'
31
- DIGIT = '[:digit:]'
32
-
33
- COMBININGCHAR = '' # TODO
34
- EXTENDER = '' # TODO
35
-
36
- NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
37
- QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
38
- QNAME = /(#{QNAME_STR})/
39
-
40
- # Just for backward compatibility. For example, kramdown uses this.
41
- # It's not used in REXML.
42
- UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
43
-
44
- NAMECHAR = '[\-\w\.:]'
45
- NAME = "([\\w:]#{NAMECHAR}*)"
46
- NMTOKEN = "(?:#{NAMECHAR})+"
47
- NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
48
- REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
49
- REFERENCE_RE = /#{REFERENCE}/
50
-
51
- DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
52
- DOCTYPE_END = /\A\s*\]\s*>/um
53
- DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
54
- ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
55
- COMMENT_START = /\A<!--/u
56
- COMMENT_PATTERN = /<!--(.*?)-->/um
57
- CDATA_START = /\A<!\[CDATA\[/u
58
- CDATA_END = /\A\s*\]\s*>/um
59
- CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
60
- XMLDECL_START = /\A<\?xml\s/u;
61
- XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
62
- INSTRUCTION_START = /\A<\?/u
63
- INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
64
- TAG_MATCH = /^<((?>#{QNAME_STR}))/um
65
- CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
66
-
67
- VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
68
- ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
69
- STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
70
-
71
- ENTITY_START = /\A\s*<!ENTITY/
72
- IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
73
- ELEMENTDECL_START = /\A\s*<!ELEMENT/um
74
- ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
75
- SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
76
- ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
77
- NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
78
- ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
79
- ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
80
- ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
81
- DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
82
- ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
83
- ATTDEF_RE = /#{ATTDEF}/
84
- ATTLISTDECL_START = /\A\s*<!ATTLIST/um
85
- ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
86
- NOTATIONDECL_START = /\A\s*<!NOTATION/um
87
- PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
88
- SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
89
-
90
- TEXT_PATTERN = /\A([^<]*)/um
91
-
92
- # Entity constants
93
- PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
94
- SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
95
- PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
96
- EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
97
- NDATADECL = "\\s+NDATA\\s+#{NAME}"
98
- PEREFERENCE = "%#{NAME};"
99
- ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
100
- PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
101
- ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
102
- PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
103
- GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
104
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
105
-
106
- EREFERENCE = /&(?!#{NAME};)/
107
-
108
- DEFAULT_ENTITIES = {
109
- 'gt' => [/&gt;/, '&gt;', '>', />/],
110
- 'lt' => [/&lt;/, '&lt;', '<', /</],
111
- 'quot' => [/&quot;/, '&quot;', '"', /"/],
112
- "apos" => [/&apos;/, "&apos;", "'", /'/]
113
- }
114
-
115
- def initialize( source )
116
- self.stream = source
117
- @listeners = []
118
- end
119
-
120
- def add_listener( listener )
121
- @listeners << listener
122
- end
123
-
124
- attr_reader :source
125
-
126
- def stream=( source )
127
- @source = SourceFactory.create_from( source )
128
- @closed = nil
129
- @document_status = nil
130
- @tags = []
131
- @stack = []
132
- @entities = []
133
- @nsstack = []
134
- end
135
-
136
- def position
137
- if @source.respond_to? :position
138
- @source.position
139
- else
140
- # FIXME
141
- 0
142
- end
143
- end
144
-
145
- # Returns true if there are no more events
146
- def empty?
147
- return (@source.empty? and @stack.empty?)
148
- end
149
-
150
- # Returns true if there are more events. Synonymous with !empty?
151
- def has_next?
152
- return !(@source.empty? and @stack.empty?)
153
- end
154
-
155
- # Push an event back on the head of the stream. This method
156
- # has (theoretically) infinite depth.
157
- def unshift token
158
- @stack.unshift(token)
159
- end
160
-
161
- # Peek at the +depth+ event in the stack. The first element on the stack
162
- # is at depth 0. If +depth+ is -1, will parse to the end of the input
163
- # stream and return the last event, which is always :end_document.
164
- # Be aware that this causes the stream to be parsed up to the +depth+
165
- # event, so you can effectively pre-parse the entire document (pull the
166
- # entire thing into memory) using this method.
167
- def peek depth=0
168
- raise %Q[Illegal argument "#{depth}"] if depth < -1
169
- temp = []
170
- if depth == -1
171
- temp.push(pull()) until empty?
172
- else
173
- while @stack.size+temp.size < depth+1
174
- temp.push(pull())
175
- end
176
- end
177
- @stack += temp if temp.size > 0
178
- @stack[depth]
179
- end
180
-
181
- # Returns the next event. This is a +PullEvent+ object.
182
- def pull
183
- pull_event.tap do |event|
184
- @listeners.each do |listener|
185
- listener.receive event
186
- end
187
- end
188
- end
189
-
190
- def pull_event
191
- if @closed
192
- x, @closed = @closed, nil
193
- return [ :end_element, x ]
194
- end
195
- return [ :end_document ] if empty?
196
- return @stack.shift if @stack.size > 0
197
- #STDERR.puts @source.encoding
198
- @source.read if @source.buffer.size<2
199
- #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
200
- if @document_status == nil
201
- #@source.consume( /^\s*/um )
202
- word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
203
- word = word[1] unless word.nil?
204
- #STDERR.puts "WORD = #{word.inspect}"
205
- case word
206
- when COMMENT_START
207
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
208
- when XMLDECL_START
209
- #STDERR.puts "XMLDECL"
210
- results = @source.match( XMLDECL_PATTERN, true )[1]
211
- version = VERSION.match( results )
212
- version = version[1] unless version.nil?
213
- encoding = ENCODING.match(results)
214
- encoding = encoding[1] unless encoding.nil?
215
- if need_source_encoding_update?(encoding)
216
- @source.encoding = encoding
217
- end
218
- if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
219
- encoding = "UTF-16"
220
- end
221
- standalone = STANDALONE.match(results)
222
- standalone = standalone[1] unless standalone.nil?
223
- return [ :xmldecl, version, encoding, standalone ]
224
- when INSTRUCTION_START
225
- return process_instruction
226
- when DOCTYPE_START
227
- md = @source.match( DOCTYPE_PATTERN, true )
228
- @nsstack.unshift(curr_ns=Set.new)
229
- identity = md[1]
230
- close = md[2]
231
- identity =~ IDENTITY
232
- name = $1
233
- raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
234
- pub_sys = $2.nil? ? nil : $2.strip
235
- long_name = $4.nil? ? nil : $4.strip
236
- uri = $6.nil? ? nil : $6.strip
237
- args = [ :start_doctype, name, pub_sys, long_name, uri ]
238
- if close == ">"
239
- @document_status = :after_doctype
240
- @source.read if @source.buffer.size<2
241
- md = @source.match(/^\s*/um, true)
242
- @stack << [ :end_doctype ]
243
- else
244
- @document_status = :in_doctype
245
- end
246
- return args
247
- when /^\s+/
248
- else
249
- @document_status = :after_doctype
250
- @source.read if @source.buffer.size<2
251
- md = @source.match(/\s*/um, true)
252
- if @source.encoding == "UTF-8"
253
- @source.buffer.force_encoding(::Encoding::UTF_8)
254
- end
255
- end
256
- end
257
- if @document_status == :in_doctype
258
- md = @source.match(/\s*(.*?>)/um)
259
- case md[1]
260
- when SYSTEMENTITY
261
- match = @source.match( SYSTEMENTITY, true )[1]
262
- return [ :externalentity, match ]
263
-
264
- when ELEMENTDECL_START
265
- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
266
-
267
- when ENTITY_START
268
- match = @source.match( ENTITYDECL, true ).to_a.compact
269
- match[0] = :entitydecl
270
- ref = false
271
- if match[1] == '%'
272
- ref = true
273
- match.delete_at 1
274
- end
275
- # Now we have to sort out what kind of entity reference this is
276
- if match[2] == 'SYSTEM'
277
- # External reference
278
- match[3] = match[3][1..-2] # PUBID
279
- match.delete_at(4) if match.size > 4 # Chop out NDATA decl
280
- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
281
- elsif match[2] == 'PUBLIC'
282
- # External reference
283
- match[3] = match[3][1..-2] # PUBID
284
- match[4] = match[4][1..-2] # HREF
285
- match.delete_at(5) if match.size > 5 # Chop out NDATA decl
286
- # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
287
- else
288
- match[2] = match[2][1..-2]
289
- match.pop if match.size == 4
290
- # match is [ :entity, name, value ]
291
- end
292
- match << '%' if ref
293
- return match
294
- when ATTLISTDECL_START
295
- md = @source.match( ATTLISTDECL_PATTERN, true )
296
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
297
- element = md[1]
298
- contents = md[0]
299
-
300
- pairs = {}
301
- values = md[0].scan( ATTDEF_RE )
302
- values.each do |attdef|
303
- unless attdef[3] == "#IMPLIED"
304
- attdef.compact!
305
- val = attdef[3]
306
- val = attdef[4] if val == "#FIXED "
307
- pairs[attdef[0]] = val
308
- if attdef[0] =~ /^xmlns:(.*)/
309
- @nsstack[0] << $1
310
- end
311
- end
312
- end
313
- return [ :attlistdecl, element, pairs, contents ]
314
- when NOTATIONDECL_START
315
- md = nil
316
- if @source.match( PUBLIC )
317
- md = @source.match( PUBLIC, true )
318
- vals = [md[1],md[2],md[4],md[6]]
319
- elsif @source.match( SYSTEM )
320
- md = @source.match( SYSTEM, true )
321
- vals = [md[1],md[2],nil,md[4]]
322
- else
323
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
324
- end
325
- return [ :notationdecl, *vals ]
326
- when DOCTYPE_END
327
- @document_status = :after_doctype
328
- @source.match( DOCTYPE_END, true )
329
- return [ :end_doctype ]
330
- end
331
- end
332
- begin
333
- if @source.buffer[0] == ?<
334
- if @source.buffer[1] == ?/
335
- @nsstack.shift
336
- last_tag = @tags.pop
337
- md = @source.match( CLOSE_MATCH, true )
338
- if md and !last_tag
339
- message = "Unexpected top-level end tag (got '#{md[1]}')"
340
- raise REXML::ParseException.new(message, @source)
341
- end
342
- if md.nil? or last_tag != md[1]
343
- message = "Missing end tag for '#{last_tag}'"
344
- message << " (got '#{md[1]}')" if md
345
- raise REXML::ParseException.new(message, @source)
346
- end
347
- return [ :end_element, last_tag ]
348
- elsif @source.buffer[1] == ?!
349
- md = @source.match(/\A(\s*[^>]*>)/um)
350
- #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
351
- raise REXML::ParseException.new("Malformed node", @source) unless md
352
- if md[0][2] == ?-
353
- md = @source.match( COMMENT_PATTERN, true )
354
-
355
- case md[1]
356
- when /--/, /-\z/
357
- raise REXML::ParseException.new("Malformed comment", @source)
358
- end
359
-
360
- return [ :comment, md[1] ] if md
361
- else
362
- md = @source.match( CDATA_PATTERN, true )
363
- return [ :cdata, md[1] ] if md
364
- end
365
- raise REXML::ParseException.new( "Declarations can only occur "+
366
- "in the doctype declaration.", @source)
367
- elsif @source.buffer[1] == ??
368
- return process_instruction
369
- else
370
- # Get the next tag
371
- md = @source.match(TAG_MATCH, true)
372
- unless md
373
- raise REXML::ParseException.new("malformed XML: missing tag start", @source)
374
- end
375
- prefixes = Set.new
376
- prefixes << md[2] if md[2]
377
- @nsstack.unshift(curr_ns=Set.new)
378
- attributes, closed = parse_attributes(prefixes, curr_ns)
379
- # Verify that all of the prefixes have been defined
380
- for prefix in prefixes
381
- unless @nsstack.find{|k| k.member?(prefix)}
382
- raise UndefinedNamespaceException.new(prefix,@source,self)
383
- end
384
- end
385
-
386
- if closed
387
- @closed = md[1]
388
- @nsstack.shift
389
- else
390
- @tags.push( md[1] )
391
- end
392
- return [ :start_element, md[1], attributes ]
393
- end
394
- else
395
- md = @source.match( TEXT_PATTERN, true )
396
- if md[0].length == 0
397
- @source.match( /(\s+)/, true )
398
- end
399
- #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
400
- #return [ :text, "" ] if md[0].length == 0
401
- # unnormalized = Text::unnormalize( md[1], self )
402
- # return PullEvent.new( :text, md[1], unnormalized )
403
- return [ :text, md[1] ]
404
- end
405
- rescue REXML::UndefinedNamespaceException
406
- raise
407
- rescue REXML::ParseException
408
- raise
409
- rescue => error
410
- raise REXML::ParseException.new( "Exception parsing",
411
- @source, self, (error ? error : $!) )
412
- end
413
- return [ :dummy ]
414
- end
415
- private :pull_event
416
-
417
- def entity( reference, entities )
418
- value = nil
419
- value = entities[ reference ] if entities
420
- if not value
421
- value = DEFAULT_ENTITIES[ reference ]
422
- value = value[2] if value
423
- end
424
- unnormalize( value, entities ) if value
425
- end
426
-
427
- # Escapes all possible entities
428
- def normalize( input, entities=nil, entity_filter=nil )
429
- copy = input.clone
430
- # Doing it like this rather than in a loop improves the speed
431
- copy.gsub!( EREFERENCE, '&amp;' )
432
- entities.each do |key, value|
433
- copy.gsub!( value, "&#{key};" ) unless entity_filter and
434
- entity_filter.include?(entity)
435
- end if entities
436
- copy.gsub!( EREFERENCE, '&amp;' )
437
- DEFAULT_ENTITIES.each do |key, value|
438
- copy.gsub!( value[3], value[1] )
439
- end
440
- copy
441
- end
442
-
443
- # Unescapes all possible entities
444
- def unnormalize( string, entities=nil, filter=nil )
445
- rv = string.clone
446
- rv.gsub!( /\r\n?/, "\n" )
447
- matches = rv.scan( REFERENCE_RE )
448
- return rv if matches.size == 0
449
- rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
450
- m=$1
451
- m = "0#{m}" if m[0] == ?x
452
- [Integer(m)].pack('U*')
453
- }
454
- matches.collect!{|x|x[0]}.compact!
455
- if matches.size > 0
456
- matches.each do |entity_reference|
457
- unless filter and filter.include?(entity_reference)
458
- entity_value = entity( entity_reference, entities )
459
- if entity_value
460
- re = /&#{entity_reference};/
461
- rv.gsub!( re, entity_value )
462
- else
463
- er = DEFAULT_ENTITIES[entity_reference]
464
- rv.gsub!( er[0], er[2] ) if er
465
- end
466
- end
467
- end
468
- rv.gsub!( /&amp;/, '&' )
469
- end
470
- rv
471
- end
472
-
473
- private
474
- def need_source_encoding_update?(xml_declaration_encoding)
475
- return false if xml_declaration_encoding.nil?
476
- return false if /\AUTF-16\z/i =~ xml_declaration_encoding
477
- true
478
- end
479
-
480
- def process_instruction
481
- match_data = @source.match(INSTRUCTION_PATTERN, true)
482
- unless match_data
483
- message = "Invalid processing instruction node"
484
- raise REXML::ParseException.new(message, @source)
485
- end
486
- [:processing_instruction, match_data[1], match_data[2]]
487
- end
488
-
489
- def parse_attributes(prefixes, curr_ns)
490
- attributes = {}
491
- closed = false
492
- match_data = @source.match(/^(.*?)(\/)?>/um, true)
493
- if match_data.nil?
494
- message = "Start tag isn't ended"
495
- raise REXML::ParseException.new(message, @source)
496
- end
497
-
498
- raw_attributes = match_data[1]
499
- closed = !match_data[2].nil?
500
- return attributes, closed if raw_attributes.nil?
501
- return attributes, closed if raw_attributes.empty?
502
-
503
- scanner = StringScanner.new(raw_attributes)
504
- until scanner.eos?
505
- if scanner.scan(/\s+/)
506
- break if scanner.eos?
507
- end
508
-
509
- pos = scanner.pos
510
- loop do
511
- break if scanner.scan(ATTRIBUTE_PATTERN)
512
- unless scanner.scan(QNAME)
513
- message = "Invalid attribute name: <#{scanner.rest}>"
514
- raise REXML::ParseException.new(message, @source)
515
- end
516
- name = scanner[0]
517
- unless scanner.scan(/\s*=\s*/um)
518
- message = "Missing attribute equal: <#{name}>"
519
- raise REXML::ParseException.new(message, @source)
520
- end
521
- quote = scanner.scan(/['"]/)
522
- unless quote
523
- message = "Missing attribute value start quote: <#{name}>"
524
- raise REXML::ParseException.new(message, @source)
525
- end
526
- unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
527
- match_data = @source.match(/^(.*?)(\/)?>/um, true)
528
- if match_data
529
- scanner << "/" if closed
530
- scanner << ">"
531
- scanner << match_data[1]
532
- scanner.pos = pos
533
- closed = !match_data[2].nil?
534
- next
535
- end
536
- message =
537
- "Missing attribute value end quote: <#{name}>: <#{quote}>"
538
- raise REXML::ParseException.new(message, @source)
539
- end
540
- end
541
- name = scanner[1]
542
- prefix = scanner[2]
543
- local_part = scanner[3]
544
- # quote = scanner[4]
545
- value = scanner[5]
546
- if prefix == "xmlns"
547
- if local_part == "xml"
548
- if value != "http://www.w3.org/XML/1998/namespace"
549
- msg = "The 'xml' prefix must not be bound to any other namespace "+
550
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
551
- raise REXML::ParseException.new( msg, @source, self )
552
- end
553
- elsif local_part == "xmlns"
554
- msg = "The 'xmlns' prefix must not be declared "+
555
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
556
- raise REXML::ParseException.new( msg, @source, self)
557
- end
558
- curr_ns << local_part
559
- elsif prefix
560
- prefixes << prefix unless prefix == "xml"
561
- end
562
-
563
- if attributes.has_key?(name)
564
- msg = "Duplicate attribute #{name.inspect}"
565
- raise REXML::ParseException.new(msg, @source, self)
566
- end
567
-
568
- attributes[name] = value
569
- end
570
- return attributes, closed
571
- end
572
- end
573
- end
574
- end
575
-
576
- =begin
577
- case event[0]
578
- when :start_element
579
- when :text
580
- when :end_element
581
- when :processing_instruction
582
- when :cdata
583
- when :comment
584
- when :xmldecl
585
- when :start_doctype
586
- when :end_doctype
587
- when :externalentity
588
- when :elementdecl
589
- when :entity
590
- when :attlistdecl
591
- when :notationdecl
592
- when :end_doctype
593
- end
594
- =end