brakeman 4.10.1 → 5.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +9 -7
- data/README.md +1 -1
- data/bundle/load.rb +8 -9
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/CHANGELOG.md +1 -8
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/FAQ.md +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/Gemfile +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/MIT-LICENSE +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/README.md +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/REFERENCE.md +5 -9
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/TODO +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/haml.gemspec +1 -1
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/attribute_builder.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/attribute_compiler.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/attribute_parser.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/buffer.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/compiler.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/engine.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/error.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/escapable.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/exec.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/filters.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/generator.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/action_view_extensions.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/action_view_mods.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/action_view_xss_mods.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/safe_erubi_template.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/safe_erubis_template.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/helpers/xss_mods.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/options.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/parser.rb +3 -31
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/plugin.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/railtie.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/sass_rails_filter.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/template.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/template/options.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/temple_engine.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/temple_line_counter.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/util.rb +1 -1
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/lib/haml/version.rb +1 -1
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/yard/default/fulldoc/html/css/common.sass +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.2.1 → haml-5.2.0}/yard/default/layout/html/footer.erb +0 -0
- data/lib/brakeman.rb +6 -0
- data/lib/brakeman/app_tree.rb +36 -3
- data/lib/brakeman/checks/check_execute.rb +1 -1
- data/lib/brakeman/checks/check_regex_dos.rb +1 -1
- data/lib/brakeman/checks/check_unsafe_reflection_methods.rb +68 -0
- data/lib/brakeman/checks/check_verb_confusion.rb +75 -0
- data/lib/brakeman/file_parser.rb +19 -23
- data/lib/brakeman/options.rb +5 -1
- data/lib/brakeman/parsers/template_parser.rb +2 -3
- data/lib/brakeman/processors/alias_processor.rb +2 -2
- data/lib/brakeman/processors/controller_processor.rb +1 -1
- data/lib/brakeman/processors/lib/file_type_detector.rb +64 -0
- data/lib/brakeman/processors/output_processor.rb +1 -1
- data/lib/brakeman/processors/template_alias_processor.rb +0 -5
- data/lib/brakeman/report.rb +8 -0
- data/lib/brakeman/report/report_sonar.rb +38 -0
- data/lib/brakeman/rescanner.rb +7 -5
- data/lib/brakeman/scanner.rb +42 -18
- data/lib/brakeman/tracker.rb +6 -0
- data/lib/brakeman/tracker/controller.rb +1 -1
- data/lib/brakeman/util.rb +9 -4
- data/lib/brakeman/version.rb +1 -1
- data/lib/brakeman/warning_codes.rb +2 -0
- data/lib/ruby_parser/bm_sexp.rb +9 -9
- metadata +49 -99
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/Gemfile +0 -6
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/LICENSE.txt +0 -22
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/NEWS.md +0 -141
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/README.md +0 -60
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/attlistdecl.rb +0 -63
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/attribute.rb +0 -205
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/cdata.rb +0 -68
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/child.rb +0 -97
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/comment.rb +0 -80
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/doctype.rb +0 -287
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/document.rb +0 -291
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/attlistdecl.rb +0 -11
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/dtd.rb +0 -47
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/elementdecl.rb +0 -18
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/entitydecl.rb +0 -57
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/dtd/notationdecl.rb +0 -40
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/element.rb +0 -1269
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/encoding.rb +0 -51
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/entity.rb +0 -171
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/formatters/default.rb +0 -116
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/formatters/pretty.rb +0 -142
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/formatters/transitive.rb +0 -58
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/functions.rb +0 -447
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/instruction.rb +0 -79
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/light/node.rb +0 -196
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/namespace.rb +0 -59
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/node.rb +0 -76
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/output.rb +0 -30
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parent.rb +0 -166
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parseexception.rb +0 -52
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/baseparser.rb +0 -594
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/lightparser.rb +0 -59
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/pullparser.rb +0 -197
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/sax2parser.rb +0 -273
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/streamparser.rb +0 -61
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/treeparser.rb +0 -101
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/ultralightparser.rb +0 -57
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/parsers/xpathparser.rb +0 -675
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/quickpath.rb +0 -266
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/rexml.rb +0 -32
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/sax2listener.rb +0 -98
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/security.rb +0 -28
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/source.rb +0 -298
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/streamlistener.rb +0 -93
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/text.rb +0 -424
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/undefinednamespaceexception.rb +0 -9
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/validation/relaxng.rb +0 -539
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/validation/validation.rb +0 -144
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/validation/validationexception.rb +0 -10
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/xmldecl.rb +0 -130
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/xmltokens.rb +0 -85
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/xpath.rb +0 -81
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/lib/rexml/xpath_parser.rb +0 -968
- data/bundle/ruby/2.7.0/gems/rexml-3.2.4/rexml.gemspec +0 -84
@@ -1,52 +0,0 @@
|
|
1
|
-
# frozen_string_literal: false
|
2
|
-
module REXML
|
3
|
-
class ParseException < RuntimeError
|
4
|
-
attr_accessor :source, :parser, :continued_exception
|
5
|
-
|
6
|
-
def initialize( message, source=nil, parser=nil, exception=nil )
|
7
|
-
super(message)
|
8
|
-
@source = source
|
9
|
-
@parser = parser
|
10
|
-
@continued_exception = exception
|
11
|
-
end
|
12
|
-
|
13
|
-
def to_s
|
14
|
-
# Quote the original exception, if there was one
|
15
|
-
if @continued_exception
|
16
|
-
err = @continued_exception.inspect
|
17
|
-
err << "\n"
|
18
|
-
err << @continued_exception.backtrace.join("\n")
|
19
|
-
err << "\n...\n"
|
20
|
-
else
|
21
|
-
err = ""
|
22
|
-
end
|
23
|
-
|
24
|
-
# Get the stack trace and error message
|
25
|
-
err << super
|
26
|
-
|
27
|
-
# Add contextual information
|
28
|
-
if @source
|
29
|
-
err << "\nLine: #{line}\n"
|
30
|
-
err << "Position: #{position}\n"
|
31
|
-
err << "Last 80 unconsumed characters:\n"
|
32
|
-
err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
|
33
|
-
end
|
34
|
-
|
35
|
-
err
|
36
|
-
end
|
37
|
-
|
38
|
-
def position
|
39
|
-
@source.current_line[0] if @source and defined? @source.current_line and
|
40
|
-
@source.current_line
|
41
|
-
end
|
42
|
-
|
43
|
-
def line
|
44
|
-
@source.current_line[2] if @source and defined? @source.current_line and
|
45
|
-
@source.current_line
|
46
|
-
end
|
47
|
-
|
48
|
-
def context
|
49
|
-
@source.current_line
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
@@ -1,594 +0,0 @@
|
|
1
|
-
# frozen_string_literal: false
|
2
|
-
require_relative '../parseexception'
|
3
|
-
require_relative '../undefinednamespaceexception'
|
4
|
-
require_relative '../source'
|
5
|
-
require 'set'
|
6
|
-
require "strscan"
|
7
|
-
|
8
|
-
module REXML
|
9
|
-
module Parsers
|
10
|
-
# = Using the Pull Parser
|
11
|
-
# <em>This API is experimental, and subject to change.</em>
|
12
|
-
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
13
|
-
# while parser.has_next?
|
14
|
-
# res = parser.next
|
15
|
-
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
16
|
-
# end
|
17
|
-
# See the PullEvent class for information on the content of the results.
|
18
|
-
# The data is identical to the arguments passed for the various events to
|
19
|
-
# the StreamListener API.
|
20
|
-
#
|
21
|
-
# Notice that:
|
22
|
-
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
23
|
-
# while parser.has_next?
|
24
|
-
# res = parser.next
|
25
|
-
# raise res[1] if res.error?
|
26
|
-
# end
|
27
|
-
#
|
28
|
-
# Nat Price gave me some good ideas for the API.
|
29
|
-
class BaseParser
|
30
|
-
LETTER = '[:alpha:]'
|
31
|
-
DIGIT = '[:digit:]'
|
32
|
-
|
33
|
-
COMBININGCHAR = '' # TODO
|
34
|
-
EXTENDER = '' # TODO
|
35
|
-
|
36
|
-
NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
|
37
|
-
QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
|
38
|
-
QNAME = /(#{QNAME_STR})/
|
39
|
-
|
40
|
-
# Just for backward compatibility. For example, kramdown uses this.
|
41
|
-
# It's not used in REXML.
|
42
|
-
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
43
|
-
|
44
|
-
NAMECHAR = '[\-\w\.:]'
|
45
|
-
NAME = "([\\w:]#{NAMECHAR}*)"
|
46
|
-
NMTOKEN = "(?:#{NAMECHAR})+"
|
47
|
-
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
48
|
-
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
|
49
|
-
REFERENCE_RE = /#{REFERENCE}/
|
50
|
-
|
51
|
-
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
52
|
-
DOCTYPE_END = /\A\s*\]\s*>/um
|
53
|
-
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
|
54
|
-
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
|
55
|
-
COMMENT_START = /\A<!--/u
|
56
|
-
COMMENT_PATTERN = /<!--(.*?)-->/um
|
57
|
-
CDATA_START = /\A<!\[CDATA\[/u
|
58
|
-
CDATA_END = /\A\s*\]\s*>/um
|
59
|
-
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
60
|
-
XMLDECL_START = /\A<\?xml\s/u;
|
61
|
-
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
62
|
-
INSTRUCTION_START = /\A<\?/u
|
63
|
-
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
|
64
|
-
TAG_MATCH = /^<((?>#{QNAME_STR}))/um
|
65
|
-
CLOSE_MATCH = /^\s*<\/(#{QNAME_STR})\s*>/um
|
66
|
-
|
67
|
-
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
68
|
-
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
69
|
-
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
|
70
|
-
|
71
|
-
ENTITY_START = /\A\s*<!ENTITY/
|
72
|
-
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
|
73
|
-
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
|
74
|
-
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
|
75
|
-
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
|
76
|
-
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
77
|
-
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
78
|
-
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
79
|
-
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
80
|
-
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
81
|
-
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
82
|
-
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
83
|
-
ATTDEF_RE = /#{ATTDEF}/
|
84
|
-
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
|
85
|
-
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
86
|
-
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
87
|
-
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
|
88
|
-
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
|
89
|
-
|
90
|
-
TEXT_PATTERN = /\A([^<]*)/um
|
91
|
-
|
92
|
-
# Entity constants
|
93
|
-
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
94
|
-
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
95
|
-
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
96
|
-
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
97
|
-
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
98
|
-
PEREFERENCE = "%#{NAME};"
|
99
|
-
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
100
|
-
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
101
|
-
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
102
|
-
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
103
|
-
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
104
|
-
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
105
|
-
|
106
|
-
EREFERENCE = /&(?!#{NAME};)/
|
107
|
-
|
108
|
-
DEFAULT_ENTITIES = {
|
109
|
-
'gt' => [/>/, '>', '>', />/],
|
110
|
-
'lt' => [/</, '<', '<', /</],
|
111
|
-
'quot' => [/"/, '"', '"', /"/],
|
112
|
-
"apos" => [/'/, "'", "'", /'/]
|
113
|
-
}
|
114
|
-
|
115
|
-
def initialize( source )
|
116
|
-
self.stream = source
|
117
|
-
@listeners = []
|
118
|
-
end
|
119
|
-
|
120
|
-
def add_listener( listener )
|
121
|
-
@listeners << listener
|
122
|
-
end
|
123
|
-
|
124
|
-
attr_reader :source
|
125
|
-
|
126
|
-
def stream=( source )
|
127
|
-
@source = SourceFactory.create_from( source )
|
128
|
-
@closed = nil
|
129
|
-
@document_status = nil
|
130
|
-
@tags = []
|
131
|
-
@stack = []
|
132
|
-
@entities = []
|
133
|
-
@nsstack = []
|
134
|
-
end
|
135
|
-
|
136
|
-
def position
|
137
|
-
if @source.respond_to? :position
|
138
|
-
@source.position
|
139
|
-
else
|
140
|
-
# FIXME
|
141
|
-
0
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
|
-
# Returns true if there are no more events
|
146
|
-
def empty?
|
147
|
-
return (@source.empty? and @stack.empty?)
|
148
|
-
end
|
149
|
-
|
150
|
-
# Returns true if there are more events. Synonymous with !empty?
|
151
|
-
def has_next?
|
152
|
-
return !(@source.empty? and @stack.empty?)
|
153
|
-
end
|
154
|
-
|
155
|
-
# Push an event back on the head of the stream. This method
|
156
|
-
# has (theoretically) infinite depth.
|
157
|
-
def unshift token
|
158
|
-
@stack.unshift(token)
|
159
|
-
end
|
160
|
-
|
161
|
-
# Peek at the +depth+ event in the stack. The first element on the stack
|
162
|
-
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
163
|
-
# stream and return the last event, which is always :end_document.
|
164
|
-
# Be aware that this causes the stream to be parsed up to the +depth+
|
165
|
-
# event, so you can effectively pre-parse the entire document (pull the
|
166
|
-
# entire thing into memory) using this method.
|
167
|
-
def peek depth=0
|
168
|
-
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
169
|
-
temp = []
|
170
|
-
if depth == -1
|
171
|
-
temp.push(pull()) until empty?
|
172
|
-
else
|
173
|
-
while @stack.size+temp.size < depth+1
|
174
|
-
temp.push(pull())
|
175
|
-
end
|
176
|
-
end
|
177
|
-
@stack += temp if temp.size > 0
|
178
|
-
@stack[depth]
|
179
|
-
end
|
180
|
-
|
181
|
-
# Returns the next event. This is a +PullEvent+ object.
|
182
|
-
def pull
|
183
|
-
pull_event.tap do |event|
|
184
|
-
@listeners.each do |listener|
|
185
|
-
listener.receive event
|
186
|
-
end
|
187
|
-
end
|
188
|
-
end
|
189
|
-
|
190
|
-
def pull_event
|
191
|
-
if @closed
|
192
|
-
x, @closed = @closed, nil
|
193
|
-
return [ :end_element, x ]
|
194
|
-
end
|
195
|
-
return [ :end_document ] if empty?
|
196
|
-
return @stack.shift if @stack.size > 0
|
197
|
-
#STDERR.puts @source.encoding
|
198
|
-
@source.read if @source.buffer.size<2
|
199
|
-
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
200
|
-
if @document_status == nil
|
201
|
-
#@source.consume( /^\s*/um )
|
202
|
-
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
|
203
|
-
word = word[1] unless word.nil?
|
204
|
-
#STDERR.puts "WORD = #{word.inspect}"
|
205
|
-
case word
|
206
|
-
when COMMENT_START
|
207
|
-
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
208
|
-
when XMLDECL_START
|
209
|
-
#STDERR.puts "XMLDECL"
|
210
|
-
results = @source.match( XMLDECL_PATTERN, true )[1]
|
211
|
-
version = VERSION.match( results )
|
212
|
-
version = version[1] unless version.nil?
|
213
|
-
encoding = ENCODING.match(results)
|
214
|
-
encoding = encoding[1] unless encoding.nil?
|
215
|
-
if need_source_encoding_update?(encoding)
|
216
|
-
@source.encoding = encoding
|
217
|
-
end
|
218
|
-
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
219
|
-
encoding = "UTF-16"
|
220
|
-
end
|
221
|
-
standalone = STANDALONE.match(results)
|
222
|
-
standalone = standalone[1] unless standalone.nil?
|
223
|
-
return [ :xmldecl, version, encoding, standalone ]
|
224
|
-
when INSTRUCTION_START
|
225
|
-
return process_instruction
|
226
|
-
when DOCTYPE_START
|
227
|
-
md = @source.match( DOCTYPE_PATTERN, true )
|
228
|
-
@nsstack.unshift(curr_ns=Set.new)
|
229
|
-
identity = md[1]
|
230
|
-
close = md[2]
|
231
|
-
identity =~ IDENTITY
|
232
|
-
name = $1
|
233
|
-
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
|
234
|
-
pub_sys = $2.nil? ? nil : $2.strip
|
235
|
-
long_name = $4.nil? ? nil : $4.strip
|
236
|
-
uri = $6.nil? ? nil : $6.strip
|
237
|
-
args = [ :start_doctype, name, pub_sys, long_name, uri ]
|
238
|
-
if close == ">"
|
239
|
-
@document_status = :after_doctype
|
240
|
-
@source.read if @source.buffer.size<2
|
241
|
-
md = @source.match(/^\s*/um, true)
|
242
|
-
@stack << [ :end_doctype ]
|
243
|
-
else
|
244
|
-
@document_status = :in_doctype
|
245
|
-
end
|
246
|
-
return args
|
247
|
-
when /^\s+/
|
248
|
-
else
|
249
|
-
@document_status = :after_doctype
|
250
|
-
@source.read if @source.buffer.size<2
|
251
|
-
md = @source.match(/\s*/um, true)
|
252
|
-
if @source.encoding == "UTF-8"
|
253
|
-
@source.buffer.force_encoding(::Encoding::UTF_8)
|
254
|
-
end
|
255
|
-
end
|
256
|
-
end
|
257
|
-
if @document_status == :in_doctype
|
258
|
-
md = @source.match(/\s*(.*?>)/um)
|
259
|
-
case md[1]
|
260
|
-
when SYSTEMENTITY
|
261
|
-
match = @source.match( SYSTEMENTITY, true )[1]
|
262
|
-
return [ :externalentity, match ]
|
263
|
-
|
264
|
-
when ELEMENTDECL_START
|
265
|
-
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
|
266
|
-
|
267
|
-
when ENTITY_START
|
268
|
-
match = @source.match( ENTITYDECL, true ).to_a.compact
|
269
|
-
match[0] = :entitydecl
|
270
|
-
ref = false
|
271
|
-
if match[1] == '%'
|
272
|
-
ref = true
|
273
|
-
match.delete_at 1
|
274
|
-
end
|
275
|
-
# Now we have to sort out what kind of entity reference this is
|
276
|
-
if match[2] == 'SYSTEM'
|
277
|
-
# External reference
|
278
|
-
match[3] = match[3][1..-2] # PUBID
|
279
|
-
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
280
|
-
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
281
|
-
elsif match[2] == 'PUBLIC'
|
282
|
-
# External reference
|
283
|
-
match[3] = match[3][1..-2] # PUBID
|
284
|
-
match[4] = match[4][1..-2] # HREF
|
285
|
-
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
286
|
-
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
287
|
-
else
|
288
|
-
match[2] = match[2][1..-2]
|
289
|
-
match.pop if match.size == 4
|
290
|
-
# match is [ :entity, name, value ]
|
291
|
-
end
|
292
|
-
match << '%' if ref
|
293
|
-
return match
|
294
|
-
when ATTLISTDECL_START
|
295
|
-
md = @source.match( ATTLISTDECL_PATTERN, true )
|
296
|
-
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
297
|
-
element = md[1]
|
298
|
-
contents = md[0]
|
299
|
-
|
300
|
-
pairs = {}
|
301
|
-
values = md[0].scan( ATTDEF_RE )
|
302
|
-
values.each do |attdef|
|
303
|
-
unless attdef[3] == "#IMPLIED"
|
304
|
-
attdef.compact!
|
305
|
-
val = attdef[3]
|
306
|
-
val = attdef[4] if val == "#FIXED "
|
307
|
-
pairs[attdef[0]] = val
|
308
|
-
if attdef[0] =~ /^xmlns:(.*)/
|
309
|
-
@nsstack[0] << $1
|
310
|
-
end
|
311
|
-
end
|
312
|
-
end
|
313
|
-
return [ :attlistdecl, element, pairs, contents ]
|
314
|
-
when NOTATIONDECL_START
|
315
|
-
md = nil
|
316
|
-
if @source.match( PUBLIC )
|
317
|
-
md = @source.match( PUBLIC, true )
|
318
|
-
vals = [md[1],md[2],md[4],md[6]]
|
319
|
-
elsif @source.match( SYSTEM )
|
320
|
-
md = @source.match( SYSTEM, true )
|
321
|
-
vals = [md[1],md[2],nil,md[4]]
|
322
|
-
else
|
323
|
-
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
|
324
|
-
end
|
325
|
-
return [ :notationdecl, *vals ]
|
326
|
-
when DOCTYPE_END
|
327
|
-
@document_status = :after_doctype
|
328
|
-
@source.match( DOCTYPE_END, true )
|
329
|
-
return [ :end_doctype ]
|
330
|
-
end
|
331
|
-
end
|
332
|
-
begin
|
333
|
-
if @source.buffer[0] == ?<
|
334
|
-
if @source.buffer[1] == ?/
|
335
|
-
@nsstack.shift
|
336
|
-
last_tag = @tags.pop
|
337
|
-
md = @source.match( CLOSE_MATCH, true )
|
338
|
-
if md and !last_tag
|
339
|
-
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
340
|
-
raise REXML::ParseException.new(message, @source)
|
341
|
-
end
|
342
|
-
if md.nil? or last_tag != md[1]
|
343
|
-
message = "Missing end tag for '#{last_tag}'"
|
344
|
-
message << " (got '#{md[1]}')" if md
|
345
|
-
raise REXML::ParseException.new(message, @source)
|
346
|
-
end
|
347
|
-
return [ :end_element, last_tag ]
|
348
|
-
elsif @source.buffer[1] == ?!
|
349
|
-
md = @source.match(/\A(\s*[^>]*>)/um)
|
350
|
-
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
351
|
-
raise REXML::ParseException.new("Malformed node", @source) unless md
|
352
|
-
if md[0][2] == ?-
|
353
|
-
md = @source.match( COMMENT_PATTERN, true )
|
354
|
-
|
355
|
-
case md[1]
|
356
|
-
when /--/, /-\z/
|
357
|
-
raise REXML::ParseException.new("Malformed comment", @source)
|
358
|
-
end
|
359
|
-
|
360
|
-
return [ :comment, md[1] ] if md
|
361
|
-
else
|
362
|
-
md = @source.match( CDATA_PATTERN, true )
|
363
|
-
return [ :cdata, md[1] ] if md
|
364
|
-
end
|
365
|
-
raise REXML::ParseException.new( "Declarations can only occur "+
|
366
|
-
"in the doctype declaration.", @source)
|
367
|
-
elsif @source.buffer[1] == ??
|
368
|
-
return process_instruction
|
369
|
-
else
|
370
|
-
# Get the next tag
|
371
|
-
md = @source.match(TAG_MATCH, true)
|
372
|
-
unless md
|
373
|
-
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
374
|
-
end
|
375
|
-
prefixes = Set.new
|
376
|
-
prefixes << md[2] if md[2]
|
377
|
-
@nsstack.unshift(curr_ns=Set.new)
|
378
|
-
attributes, closed = parse_attributes(prefixes, curr_ns)
|
379
|
-
# Verify that all of the prefixes have been defined
|
380
|
-
for prefix in prefixes
|
381
|
-
unless @nsstack.find{|k| k.member?(prefix)}
|
382
|
-
raise UndefinedNamespaceException.new(prefix,@source,self)
|
383
|
-
end
|
384
|
-
end
|
385
|
-
|
386
|
-
if closed
|
387
|
-
@closed = md[1]
|
388
|
-
@nsstack.shift
|
389
|
-
else
|
390
|
-
@tags.push( md[1] )
|
391
|
-
end
|
392
|
-
return [ :start_element, md[1], attributes ]
|
393
|
-
end
|
394
|
-
else
|
395
|
-
md = @source.match( TEXT_PATTERN, true )
|
396
|
-
if md[0].length == 0
|
397
|
-
@source.match( /(\s+)/, true )
|
398
|
-
end
|
399
|
-
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
|
400
|
-
#return [ :text, "" ] if md[0].length == 0
|
401
|
-
# unnormalized = Text::unnormalize( md[1], self )
|
402
|
-
# return PullEvent.new( :text, md[1], unnormalized )
|
403
|
-
return [ :text, md[1] ]
|
404
|
-
end
|
405
|
-
rescue REXML::UndefinedNamespaceException
|
406
|
-
raise
|
407
|
-
rescue REXML::ParseException
|
408
|
-
raise
|
409
|
-
rescue => error
|
410
|
-
raise REXML::ParseException.new( "Exception parsing",
|
411
|
-
@source, self, (error ? error : $!) )
|
412
|
-
end
|
413
|
-
return [ :dummy ]
|
414
|
-
end
|
415
|
-
private :pull_event
|
416
|
-
|
417
|
-
def entity( reference, entities )
|
418
|
-
value = nil
|
419
|
-
value = entities[ reference ] if entities
|
420
|
-
if not value
|
421
|
-
value = DEFAULT_ENTITIES[ reference ]
|
422
|
-
value = value[2] if value
|
423
|
-
end
|
424
|
-
unnormalize( value, entities ) if value
|
425
|
-
end
|
426
|
-
|
427
|
-
# Escapes all possible entities
|
428
|
-
def normalize( input, entities=nil, entity_filter=nil )
|
429
|
-
copy = input.clone
|
430
|
-
# Doing it like this rather than in a loop improves the speed
|
431
|
-
copy.gsub!( EREFERENCE, '&' )
|
432
|
-
entities.each do |key, value|
|
433
|
-
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
434
|
-
entity_filter.include?(entity)
|
435
|
-
end if entities
|
436
|
-
copy.gsub!( EREFERENCE, '&' )
|
437
|
-
DEFAULT_ENTITIES.each do |key, value|
|
438
|
-
copy.gsub!( value[3], value[1] )
|
439
|
-
end
|
440
|
-
copy
|
441
|
-
end
|
442
|
-
|
443
|
-
# Unescapes all possible entities
|
444
|
-
def unnormalize( string, entities=nil, filter=nil )
|
445
|
-
rv = string.clone
|
446
|
-
rv.gsub!( /\r\n?/, "\n" )
|
447
|
-
matches = rv.scan( REFERENCE_RE )
|
448
|
-
return rv if matches.size == 0
|
449
|
-
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
450
|
-
m=$1
|
451
|
-
m = "0#{m}" if m[0] == ?x
|
452
|
-
[Integer(m)].pack('U*')
|
453
|
-
}
|
454
|
-
matches.collect!{|x|x[0]}.compact!
|
455
|
-
if matches.size > 0
|
456
|
-
matches.each do |entity_reference|
|
457
|
-
unless filter and filter.include?(entity_reference)
|
458
|
-
entity_value = entity( entity_reference, entities )
|
459
|
-
if entity_value
|
460
|
-
re = /&#{entity_reference};/
|
461
|
-
rv.gsub!( re, entity_value )
|
462
|
-
else
|
463
|
-
er = DEFAULT_ENTITIES[entity_reference]
|
464
|
-
rv.gsub!( er[0], er[2] ) if er
|
465
|
-
end
|
466
|
-
end
|
467
|
-
end
|
468
|
-
rv.gsub!( /&/, '&' )
|
469
|
-
end
|
470
|
-
rv
|
471
|
-
end
|
472
|
-
|
473
|
-
private
|
474
|
-
def need_source_encoding_update?(xml_declaration_encoding)
|
475
|
-
return false if xml_declaration_encoding.nil?
|
476
|
-
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
477
|
-
true
|
478
|
-
end
|
479
|
-
|
480
|
-
def process_instruction
|
481
|
-
match_data = @source.match(INSTRUCTION_PATTERN, true)
|
482
|
-
unless match_data
|
483
|
-
message = "Invalid processing instruction node"
|
484
|
-
raise REXML::ParseException.new(message, @source)
|
485
|
-
end
|
486
|
-
[:processing_instruction, match_data[1], match_data[2]]
|
487
|
-
end
|
488
|
-
|
489
|
-
def parse_attributes(prefixes, curr_ns)
|
490
|
-
attributes = {}
|
491
|
-
closed = false
|
492
|
-
match_data = @source.match(/^(.*?)(\/)?>/um, true)
|
493
|
-
if match_data.nil?
|
494
|
-
message = "Start tag isn't ended"
|
495
|
-
raise REXML::ParseException.new(message, @source)
|
496
|
-
end
|
497
|
-
|
498
|
-
raw_attributes = match_data[1]
|
499
|
-
closed = !match_data[2].nil?
|
500
|
-
return attributes, closed if raw_attributes.nil?
|
501
|
-
return attributes, closed if raw_attributes.empty?
|
502
|
-
|
503
|
-
scanner = StringScanner.new(raw_attributes)
|
504
|
-
until scanner.eos?
|
505
|
-
if scanner.scan(/\s+/)
|
506
|
-
break if scanner.eos?
|
507
|
-
end
|
508
|
-
|
509
|
-
pos = scanner.pos
|
510
|
-
loop do
|
511
|
-
break if scanner.scan(ATTRIBUTE_PATTERN)
|
512
|
-
unless scanner.scan(QNAME)
|
513
|
-
message = "Invalid attribute name: <#{scanner.rest}>"
|
514
|
-
raise REXML::ParseException.new(message, @source)
|
515
|
-
end
|
516
|
-
name = scanner[0]
|
517
|
-
unless scanner.scan(/\s*=\s*/um)
|
518
|
-
message = "Missing attribute equal: <#{name}>"
|
519
|
-
raise REXML::ParseException.new(message, @source)
|
520
|
-
end
|
521
|
-
quote = scanner.scan(/['"]/)
|
522
|
-
unless quote
|
523
|
-
message = "Missing attribute value start quote: <#{name}>"
|
524
|
-
raise REXML::ParseException.new(message, @source)
|
525
|
-
end
|
526
|
-
unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
|
527
|
-
match_data = @source.match(/^(.*?)(\/)?>/um, true)
|
528
|
-
if match_data
|
529
|
-
scanner << "/" if closed
|
530
|
-
scanner << ">"
|
531
|
-
scanner << match_data[1]
|
532
|
-
scanner.pos = pos
|
533
|
-
closed = !match_data[2].nil?
|
534
|
-
next
|
535
|
-
end
|
536
|
-
message =
|
537
|
-
"Missing attribute value end quote: <#{name}>: <#{quote}>"
|
538
|
-
raise REXML::ParseException.new(message, @source)
|
539
|
-
end
|
540
|
-
end
|
541
|
-
name = scanner[1]
|
542
|
-
prefix = scanner[2]
|
543
|
-
local_part = scanner[3]
|
544
|
-
# quote = scanner[4]
|
545
|
-
value = scanner[5]
|
546
|
-
if prefix == "xmlns"
|
547
|
-
if local_part == "xml"
|
548
|
-
if value != "http://www.w3.org/XML/1998/namespace"
|
549
|
-
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
550
|
-
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
551
|
-
raise REXML::ParseException.new( msg, @source, self )
|
552
|
-
end
|
553
|
-
elsif local_part == "xmlns"
|
554
|
-
msg = "The 'xmlns' prefix must not be declared "+
|
555
|
-
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
556
|
-
raise REXML::ParseException.new( msg, @source, self)
|
557
|
-
end
|
558
|
-
curr_ns << local_part
|
559
|
-
elsif prefix
|
560
|
-
prefixes << prefix unless prefix == "xml"
|
561
|
-
end
|
562
|
-
|
563
|
-
if attributes.has_key?(name)
|
564
|
-
msg = "Duplicate attribute #{name.inspect}"
|
565
|
-
raise REXML::ParseException.new(msg, @source, self)
|
566
|
-
end
|
567
|
-
|
568
|
-
attributes[name] = value
|
569
|
-
end
|
570
|
-
return attributes, closed
|
571
|
-
end
|
572
|
-
end
|
573
|
-
end
|
574
|
-
end
|
575
|
-
|
576
|
-
=begin
|
577
|
-
case event[0]
|
578
|
-
when :start_element
|
579
|
-
when :text
|
580
|
-
when :end_element
|
581
|
-
when :processing_instruction
|
582
|
-
when :cdata
|
583
|
-
when :comment
|
584
|
-
when :xmldecl
|
585
|
-
when :start_doctype
|
586
|
-
when :end_doctype
|
587
|
-
when :externalentity
|
588
|
-
when :elementdecl
|
589
|
-
when :entity
|
590
|
-
when :attlistdecl
|
591
|
-
when :notationdecl
|
592
|
-
when :end_doctype
|
593
|
-
end
|
594
|
-
=end
|