brakeman 4.10.0 → 5.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +46 -0
- data/README.md +11 -2
- data/bundle/load.rb +5 -3
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/CHANGELOG.md +16 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/FAQ.md +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/Gemfile +1 -4
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/MIT-LICENSE +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/README.md +2 -3
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/REFERENCE.md +29 -7
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/TODO +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/haml.gemspec +2 -1
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/attribute_builder.rb +3 -3
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/attribute_compiler.rb +42 -31
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/attribute_parser.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/buffer.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/compiler.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/engine.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/error.rb +0 -0
- data/bundle/ruby/2.7.0/gems/haml-5.2.1/lib/haml/escapable.rb +77 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/exec.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/filters.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/generator.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers.rb +7 -1
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/action_view_extensions.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/action_view_mods.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/action_view_xss_mods.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/safe_erubi_template.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/safe_erubis_template.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/xss_mods.rb +6 -3
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/options.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/parser.rb +32 -4
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/plugin.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/railtie.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/sass_rails_filter.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/template.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/template/options.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/temple_engine.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/temple_line_counter.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/util.rb +1 -1
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/version.rb +1 -1
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/yard/default/fulldoc/html/css/common.sass +0 -0
- data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/yard/default/layout/html/footer.erb +0 -0
- data/bundle/ruby/2.7.0/gems/parallel-1.20.1/MIT-LICENSE.txt +20 -0
- data/bundle/ruby/2.7.0/gems/parallel-1.20.1/lib/parallel.rb +523 -0
- data/bundle/ruby/2.7.0/gems/parallel-1.20.1/lib/parallel/processor_count.rb +42 -0
- data/bundle/ruby/2.7.0/gems/parallel-1.20.1/lib/parallel/version.rb +3 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/LICENSE.txt +22 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/NEWS.md +178 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/README.md +48 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml.rb +3 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/attlistdecl.rb +63 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/attribute.rb +205 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/cdata.rb +68 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/child.rb +97 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/comment.rb +80 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/doctype.rb +311 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/document.rb +451 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/attlistdecl.rb +11 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/dtd.rb +47 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/elementdecl.rb +18 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/entitydecl.rb +57 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/notationdecl.rb +40 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/element.rb +2599 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/encoding.rb +51 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/entity.rb +171 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/formatters/default.rb +116 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb +142 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/formatters/transitive.rb +58 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/functions.rb +447 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/instruction.rb +79 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/light/node.rb +188 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/namespace.rb +59 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/node.rb +76 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/output.rb +30 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parent.rb +166 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parseexception.rb +52 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +694 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/lightparser.rb +59 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/pullparser.rb +197 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb +273 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb +61 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/treeparser.rb +101 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/ultralightparser.rb +57 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/xpathparser.rb +689 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/quickpath.rb +266 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/rexml.rb +37 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/sax2listener.rb +98 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/security.rb +28 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/source.rb +298 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/streamlistener.rb +93 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/text.rb +424 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/undefinednamespaceexception.rb +9 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/validation/relaxng.rb +539 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/validation/validation.rb +144 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/validation/validationexception.rb +10 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/xmldecl.rb +130 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/xmltokens.rb +85 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/xpath.rb +81 -0
- data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/xpath_parser.rb +974 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/History.rdoc +25 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/Manifest.txt +2 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/README.rdoc +0 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/compare/normalize.rb +2 -2
- data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/debugging.md +190 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/rp_extensions.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/rp_stringscanner.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby20_parser.rb +2392 -2384
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby20_parser.y +6 -1
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby21_parser.rb +2553 -2550
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby21_parser.y +6 -1
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby22_parser.rb +2491 -2471
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby22_parser.y +6 -1
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby23_parser.rb +2422 -2403
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby23_parser.y +6 -1
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby24_parser.rb +2460 -2450
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby24_parser.y +6 -1
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby25_parser.rb +2450 -2441
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby25_parser.y +6 -1
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby26_parser.rb +2444 -2433
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby26_parser.y +7 -1
- data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/lib/ruby27_parser.rb +7310 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby27_parser.y +21 -1
- data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/lib/ruby30_parser.rb +7310 -0
- data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/lib/ruby30_parser.y +2677 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_lexer.rb +19 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_lexer.rex +1 -1
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_lexer.rex.rb +1 -1
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_parser.rb +2 -0
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_parser.yy +27 -1
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_parser_extras.rb +2 -2
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/tools/munge.rb +2 -2
- data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/tools/ripper.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/History.rdoc +12 -0
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/Manifest.txt +0 -0
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/README.rdoc +0 -0
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/composite_sexp_processor.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/pt_testcase.rb +2 -2
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/sexp.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/sexp_matcher.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/sexp_processor.rb +1 -1
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/strict_sexp.rb +0 -0
- data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/unique.rb +0 -0
- data/lib/brakeman.rb +21 -4
- data/lib/brakeman/app_tree.rb +36 -3
- data/lib/brakeman/checks/base_check.rb +7 -1
- data/lib/brakeman/checks/check_detailed_exceptions.rb +1 -1
- data/lib/brakeman/checks/check_evaluation.rb +1 -1
- data/lib/brakeman/checks/check_execute.rb +2 -1
- data/lib/brakeman/checks/check_mass_assignment.rb +4 -6
- data/lib/brakeman/checks/check_regex_dos.rb +1 -1
- data/lib/brakeman/checks/check_sanitize_methods.rb +2 -1
- data/lib/brakeman/checks/check_sql.rb +16 -3
- data/lib/brakeman/checks/check_unsafe_reflection_methods.rb +68 -0
- data/lib/brakeman/checks/check_verb_confusion.rb +75 -0
- data/lib/brakeman/file_parser.rb +50 -22
- data/lib/brakeman/options.rb +5 -1
- data/lib/brakeman/parsers/template_parser.rb +26 -3
- data/lib/brakeman/processors/alias_processor.rb +91 -19
- data/lib/brakeman/processors/base_processor.rb +4 -4
- data/lib/brakeman/processors/controller_alias_processor.rb +6 -43
- data/lib/brakeman/processors/controller_processor.rb +1 -1
- data/lib/brakeman/processors/haml_template_processor.rb +8 -1
- data/lib/brakeman/processors/lib/call_conversion_helper.rb +10 -0
- data/lib/brakeman/processors/lib/file_type_detector.rb +64 -0
- data/lib/brakeman/processors/lib/rails3_config_processor.rb +16 -16
- data/lib/brakeman/processors/lib/rails4_config_processor.rb +2 -1
- data/lib/brakeman/processors/library_processor.rb +9 -0
- data/lib/brakeman/processors/output_processor.rb +1 -1
- data/lib/brakeman/processors/template_alias_processor.rb +5 -0
- data/lib/brakeman/report.rb +12 -1
- data/lib/brakeman/report/ignore/interactive.rb +1 -1
- data/lib/brakeman/report/report_base.rb +0 -2
- data/lib/brakeman/report/report_csv.rb +37 -60
- data/lib/brakeman/report/report_github.rb +31 -0
- data/lib/brakeman/report/report_junit.rb +2 -2
- data/lib/brakeman/report/report_sarif.rb +1 -1
- data/lib/brakeman/report/report_sonar.rb +38 -0
- data/lib/brakeman/report/report_tabs.rb +1 -1
- data/lib/brakeman/report/report_text.rb +1 -1
- data/lib/brakeman/rescanner.rb +7 -5
- data/lib/brakeman/scanner.rb +47 -18
- data/lib/brakeman/tracker.rb +39 -4
- data/lib/brakeman/tracker/collection.rb +27 -5
- data/lib/brakeman/tracker/config.rb +73 -0
- data/lib/brakeman/tracker/controller.rb +1 -1
- data/lib/brakeman/tracker/method_info.rb +29 -0
- data/lib/brakeman/util.rb +17 -4
- data/lib/brakeman/version.rb +1 -1
- data/lib/brakeman/warning.rb +10 -2
- data/lib/brakeman/warning_codes.rb +2 -0
- data/lib/ruby_parser/bm_sexp.rb +9 -9
- metadata +149 -84
- data/bundle/ruby/2.7.0/gems/haml-5.1.2/lib/haml/escapable.rb +0 -50
- data/bundle/ruby/2.7.0/gems/ruby_parser-3.15.0/debugging.md +0 -57
- data/bundle/ruby/2.7.0/gems/ruby_parser-3.15.0/lib/ruby27_parser.rb +0 -7224
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
module REXML
|
3
|
+
class ParseException < RuntimeError
|
4
|
+
attr_accessor :source, :parser, :continued_exception
|
5
|
+
|
6
|
+
def initialize( message, source=nil, parser=nil, exception=nil )
|
7
|
+
super(message)
|
8
|
+
@source = source
|
9
|
+
@parser = parser
|
10
|
+
@continued_exception = exception
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
# Quote the original exception, if there was one
|
15
|
+
if @continued_exception
|
16
|
+
err = @continued_exception.inspect
|
17
|
+
err << "\n"
|
18
|
+
err << @continued_exception.backtrace.join("\n")
|
19
|
+
err << "\n...\n"
|
20
|
+
else
|
21
|
+
err = ""
|
22
|
+
end
|
23
|
+
|
24
|
+
# Get the stack trace and error message
|
25
|
+
err << super
|
26
|
+
|
27
|
+
# Add contextual information
|
28
|
+
if @source
|
29
|
+
err << "\nLine: #{line}\n"
|
30
|
+
err << "Position: #{position}\n"
|
31
|
+
err << "Last 80 unconsumed characters:\n"
|
32
|
+
err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
|
33
|
+
end
|
34
|
+
|
35
|
+
err
|
36
|
+
end
|
37
|
+
|
38
|
+
def position
|
39
|
+
@source.current_line[0] if @source and defined? @source.current_line and
|
40
|
+
@source.current_line
|
41
|
+
end
|
42
|
+
|
43
|
+
def line
|
44
|
+
@source.current_line[2] if @source and defined? @source.current_line and
|
45
|
+
@source.current_line
|
46
|
+
end
|
47
|
+
|
48
|
+
def context
|
49
|
+
@source.current_line
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,694 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
require_relative '../parseexception'
|
3
|
+
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../source'
|
5
|
+
require 'set'
|
6
|
+
require "strscan"
|
7
|
+
|
8
|
+
module REXML
|
9
|
+
module Parsers
|
10
|
+
# = Using the Pull Parser
|
11
|
+
# <em>This API is experimental, and subject to change.</em>
|
12
|
+
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
|
13
|
+
# while parser.has_next?
|
14
|
+
# res = parser.next
|
15
|
+
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
|
16
|
+
# end
|
17
|
+
# See the PullEvent class for information on the content of the results.
|
18
|
+
# The data is identical to the arguments passed for the various events to
|
19
|
+
# the StreamListener API.
|
20
|
+
#
|
21
|
+
# Notice that:
|
22
|
+
# parser = PullParser.new( "<a>BAD DOCUMENT" )
|
23
|
+
# while parser.has_next?
|
24
|
+
# res = parser.next
|
25
|
+
# raise res[1] if res.error?
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# Nat Price gave me some good ideas for the API.
|
29
|
+
class BaseParser
|
30
|
+
LETTER = '[:alpha:]'
|
31
|
+
DIGIT = '[:digit:]'
|
32
|
+
|
33
|
+
COMBININGCHAR = '' # TODO
|
34
|
+
EXTENDER = '' # TODO
|
35
|
+
|
36
|
+
NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
|
37
|
+
QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
|
38
|
+
QNAME = /(#{QNAME_STR})/
|
39
|
+
|
40
|
+
# Just for backward compatibility. For example, kramdown uses this.
|
41
|
+
# It's not used in REXML.
|
42
|
+
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
43
|
+
|
44
|
+
NAMECHAR = '[\-\w\.:]'
|
45
|
+
NAME = "([\\w:]#{NAMECHAR}*)"
|
46
|
+
NMTOKEN = "(?:#{NAMECHAR})+"
|
47
|
+
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
48
|
+
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
|
49
|
+
REFERENCE_RE = /#{REFERENCE}/
|
50
|
+
|
51
|
+
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
|
52
|
+
DOCTYPE_END = /\A\s*\]\s*>/um
|
53
|
+
ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
|
54
|
+
COMMENT_START = /\A<!--/u
|
55
|
+
COMMENT_PATTERN = /<!--(.*?)-->/um
|
56
|
+
CDATA_START = /\A<!\[CDATA\[/u
|
57
|
+
CDATA_END = /\A\s*\]\s*>/um
|
58
|
+
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
|
59
|
+
XMLDECL_START = /\A<\?xml\s/u;
|
60
|
+
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
|
61
|
+
INSTRUCTION_START = /\A<\?/u
|
62
|
+
INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
|
63
|
+
TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
|
64
|
+
CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
|
65
|
+
|
66
|
+
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
|
67
|
+
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
|
68
|
+
STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
|
69
|
+
|
70
|
+
ENTITY_START = /\A\s*<!ENTITY/
|
71
|
+
ELEMENTDECL_START = /\A\s*<!ELEMENT/um
|
72
|
+
ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
|
73
|
+
SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
|
74
|
+
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
|
75
|
+
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
|
76
|
+
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
|
77
|
+
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
|
78
|
+
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
|
79
|
+
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
|
80
|
+
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
|
81
|
+
ATTDEF_RE = /#{ATTDEF}/
|
82
|
+
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
|
83
|
+
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
84
|
+
|
85
|
+
TEXT_PATTERN = /\A([^<]*)/um
|
86
|
+
|
87
|
+
# Entity constants
|
88
|
+
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
|
89
|
+
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
|
90
|
+
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
|
91
|
+
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
92
|
+
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
93
|
+
PEREFERENCE = "%#{NAME};"
|
94
|
+
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
95
|
+
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
96
|
+
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
97
|
+
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
98
|
+
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
99
|
+
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
100
|
+
|
101
|
+
NOTATIONDECL_START = /\A\s*<!NOTATION/um
|
102
|
+
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
|
103
|
+
EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
|
104
|
+
PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
|
105
|
+
|
106
|
+
EREFERENCE = /&(?!#{NAME};)/
|
107
|
+
|
108
|
+
DEFAULT_ENTITIES = {
|
109
|
+
'gt' => [/>/, '>', '>', />/],
|
110
|
+
'lt' => [/</, '<', '<', /</],
|
111
|
+
'quot' => [/"/, '"', '"', /"/],
|
112
|
+
"apos" => [/'/, "'", "'", /'/]
|
113
|
+
}
|
114
|
+
|
115
|
+
def initialize( source )
|
116
|
+
self.stream = source
|
117
|
+
@listeners = []
|
118
|
+
end
|
119
|
+
|
120
|
+
def add_listener( listener )
|
121
|
+
@listeners << listener
|
122
|
+
end
|
123
|
+
|
124
|
+
attr_reader :source
|
125
|
+
|
126
|
+
def stream=( source )
|
127
|
+
@source = SourceFactory.create_from( source )
|
128
|
+
@closed = nil
|
129
|
+
@document_status = nil
|
130
|
+
@tags = []
|
131
|
+
@stack = []
|
132
|
+
@entities = []
|
133
|
+
@nsstack = []
|
134
|
+
end
|
135
|
+
|
136
|
+
def position
|
137
|
+
if @source.respond_to? :position
|
138
|
+
@source.position
|
139
|
+
else
|
140
|
+
# FIXME
|
141
|
+
0
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Returns true if there are no more events
|
146
|
+
def empty?
|
147
|
+
return (@source.empty? and @stack.empty?)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Returns true if there are more events. Synonymous with !empty?
|
151
|
+
def has_next?
|
152
|
+
return !(@source.empty? and @stack.empty?)
|
153
|
+
end
|
154
|
+
|
155
|
+
# Push an event back on the head of the stream. This method
|
156
|
+
# has (theoretically) infinite depth.
|
157
|
+
def unshift token
|
158
|
+
@stack.unshift(token)
|
159
|
+
end
|
160
|
+
|
161
|
+
# Peek at the +depth+ event in the stack. The first element on the stack
|
162
|
+
# is at depth 0. If +depth+ is -1, will parse to the end of the input
|
163
|
+
# stream and return the last event, which is always :end_document.
|
164
|
+
# Be aware that this causes the stream to be parsed up to the +depth+
|
165
|
+
# event, so you can effectively pre-parse the entire document (pull the
|
166
|
+
# entire thing into memory) using this method.
|
167
|
+
def peek depth=0
|
168
|
+
raise %Q[Illegal argument "#{depth}"] if depth < -1
|
169
|
+
temp = []
|
170
|
+
if depth == -1
|
171
|
+
temp.push(pull()) until empty?
|
172
|
+
else
|
173
|
+
while @stack.size+temp.size < depth+1
|
174
|
+
temp.push(pull())
|
175
|
+
end
|
176
|
+
end
|
177
|
+
@stack += temp if temp.size > 0
|
178
|
+
@stack[depth]
|
179
|
+
end
|
180
|
+
|
181
|
+
# Returns the next event. This is a +PullEvent+ object.
|
182
|
+
def pull
|
183
|
+
pull_event.tap do |event|
|
184
|
+
@listeners.each do |listener|
|
185
|
+
listener.receive event
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def pull_event
|
191
|
+
if @closed
|
192
|
+
x, @closed = @closed, nil
|
193
|
+
return [ :end_element, x ]
|
194
|
+
end
|
195
|
+
return [ :end_document ] if empty?
|
196
|
+
return @stack.shift if @stack.size > 0
|
197
|
+
#STDERR.puts @source.encoding
|
198
|
+
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
199
|
+
if @document_status == nil
|
200
|
+
word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
|
201
|
+
word = word[1] unless word.nil?
|
202
|
+
#STDERR.puts "WORD = #{word.inspect}"
|
203
|
+
case word
|
204
|
+
when COMMENT_START
|
205
|
+
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
|
206
|
+
when XMLDECL_START
|
207
|
+
#STDERR.puts "XMLDECL"
|
208
|
+
results = @source.match( XMLDECL_PATTERN, true )[1]
|
209
|
+
version = VERSION.match( results )
|
210
|
+
version = version[1] unless version.nil?
|
211
|
+
encoding = ENCODING.match(results)
|
212
|
+
encoding = encoding[1] unless encoding.nil?
|
213
|
+
if need_source_encoding_update?(encoding)
|
214
|
+
@source.encoding = encoding
|
215
|
+
end
|
216
|
+
if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
|
217
|
+
encoding = "UTF-16"
|
218
|
+
end
|
219
|
+
standalone = STANDALONE.match(results)
|
220
|
+
standalone = standalone[1] unless standalone.nil?
|
221
|
+
return [ :xmldecl, version, encoding, standalone ]
|
222
|
+
when INSTRUCTION_START
|
223
|
+
return process_instruction
|
224
|
+
when DOCTYPE_START
|
225
|
+
base_error_message = "Malformed DOCTYPE"
|
226
|
+
@source.match(DOCTYPE_START, true)
|
227
|
+
@nsstack.unshift(curr_ns=Set.new)
|
228
|
+
name = parse_name(base_error_message)
|
229
|
+
if @source.match(/\A\s*\[/um, true)
|
230
|
+
id = [nil, nil, nil]
|
231
|
+
@document_status = :in_doctype
|
232
|
+
elsif @source.match(/\A\s*>/um, true)
|
233
|
+
id = [nil, nil, nil]
|
234
|
+
@document_status = :after_doctype
|
235
|
+
else
|
236
|
+
id = parse_id(base_error_message,
|
237
|
+
accept_external_id: true,
|
238
|
+
accept_public_id: false)
|
239
|
+
if id[0] == "SYSTEM"
|
240
|
+
# For backward compatibility
|
241
|
+
id[1], id[2] = id[2], nil
|
242
|
+
end
|
243
|
+
if @source.match(/\A\s*\[/um, true)
|
244
|
+
@document_status = :in_doctype
|
245
|
+
elsif @source.match(/\A\s*>/um, true)
|
246
|
+
@document_status = :after_doctype
|
247
|
+
else
|
248
|
+
message = "#{base_error_message}: garbage after external ID"
|
249
|
+
raise REXML::ParseException.new(message, @source)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
args = [:start_doctype, name, *id]
|
253
|
+
if @document_status == :after_doctype
|
254
|
+
@source.match(/\A\s*/um, true)
|
255
|
+
@stack << [ :end_doctype ]
|
256
|
+
end
|
257
|
+
return args
|
258
|
+
when /\A\s+/
|
259
|
+
else
|
260
|
+
@document_status = :after_doctype
|
261
|
+
if @source.encoding == "UTF-8"
|
262
|
+
@source.buffer.force_encoding(::Encoding::UTF_8)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
if @document_status == :in_doctype
|
267
|
+
md = @source.match(/\A\s*(.*?>)/um)
|
268
|
+
case md[1]
|
269
|
+
when SYSTEMENTITY
|
270
|
+
match = @source.match( SYSTEMENTITY, true )[1]
|
271
|
+
return [ :externalentity, match ]
|
272
|
+
|
273
|
+
when ELEMENTDECL_START
|
274
|
+
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
|
275
|
+
|
276
|
+
when ENTITY_START
|
277
|
+
match = @source.match( ENTITYDECL, true ).to_a.compact
|
278
|
+
match[0] = :entitydecl
|
279
|
+
ref = false
|
280
|
+
if match[1] == '%'
|
281
|
+
ref = true
|
282
|
+
match.delete_at 1
|
283
|
+
end
|
284
|
+
# Now we have to sort out what kind of entity reference this is
|
285
|
+
if match[2] == 'SYSTEM'
|
286
|
+
# External reference
|
287
|
+
match[3] = match[3][1..-2] # PUBID
|
288
|
+
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
|
289
|
+
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
|
290
|
+
elsif match[2] == 'PUBLIC'
|
291
|
+
# External reference
|
292
|
+
match[3] = match[3][1..-2] # PUBID
|
293
|
+
match[4] = match[4][1..-2] # HREF
|
294
|
+
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
295
|
+
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
296
|
+
else
|
297
|
+
match[2] = match[2][1..-2]
|
298
|
+
match.pop if match.size == 4
|
299
|
+
# match is [ :entity, name, value ]
|
300
|
+
end
|
301
|
+
match << '%' if ref
|
302
|
+
return match
|
303
|
+
when ATTLISTDECL_START
|
304
|
+
md = @source.match( ATTLISTDECL_PATTERN, true )
|
305
|
+
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
306
|
+
element = md[1]
|
307
|
+
contents = md[0]
|
308
|
+
|
309
|
+
pairs = {}
|
310
|
+
values = md[0].scan( ATTDEF_RE )
|
311
|
+
values.each do |attdef|
|
312
|
+
unless attdef[3] == "#IMPLIED"
|
313
|
+
attdef.compact!
|
314
|
+
val = attdef[3]
|
315
|
+
val = attdef[4] if val == "#FIXED "
|
316
|
+
pairs[attdef[0]] = val
|
317
|
+
if attdef[0] =~ /^xmlns:(.*)/
|
318
|
+
@nsstack[0] << $1
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
return [ :attlistdecl, element, pairs, contents ]
|
323
|
+
when NOTATIONDECL_START
|
324
|
+
base_error_message = "Malformed notation declaration"
|
325
|
+
unless @source.match(/\A\s*<!NOTATION\s+/um, true)
|
326
|
+
if @source.match(/\A\s*<!NOTATION\s*>/um)
|
327
|
+
message = "#{base_error_message}: name is missing"
|
328
|
+
else
|
329
|
+
message = "#{base_error_message}: invalid declaration name"
|
330
|
+
end
|
331
|
+
raise REXML::ParseException.new(message, @source)
|
332
|
+
end
|
333
|
+
name = parse_name(base_error_message)
|
334
|
+
id = parse_id(base_error_message,
|
335
|
+
accept_external_id: true,
|
336
|
+
accept_public_id: true)
|
337
|
+
unless @source.match(/\A\s*>/um, true)
|
338
|
+
message = "#{base_error_message}: garbage before end >"
|
339
|
+
raise REXML::ParseException.new(message, @source)
|
340
|
+
end
|
341
|
+
return [:notationdecl, name, *id]
|
342
|
+
when DOCTYPE_END
|
343
|
+
@document_status = :after_doctype
|
344
|
+
@source.match( DOCTYPE_END, true )
|
345
|
+
return [ :end_doctype ]
|
346
|
+
end
|
347
|
+
end
|
348
|
+
if @document_status == :after_doctype
|
349
|
+
@source.match(/\A\s*/um, true)
|
350
|
+
end
|
351
|
+
begin
|
352
|
+
@source.read if @source.buffer.size<2
|
353
|
+
if @source.buffer[0] == ?<
|
354
|
+
if @source.buffer[1] == ?/
|
355
|
+
@nsstack.shift
|
356
|
+
last_tag = @tags.pop
|
357
|
+
md = @source.match( CLOSE_MATCH, true )
|
358
|
+
if md and !last_tag
|
359
|
+
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
360
|
+
raise REXML::ParseException.new(message, @source)
|
361
|
+
end
|
362
|
+
if md.nil? or last_tag != md[1]
|
363
|
+
message = "Missing end tag for '#{last_tag}'"
|
364
|
+
message << " (got '#{md[1]}')" if md
|
365
|
+
raise REXML::ParseException.new(message, @source)
|
366
|
+
end
|
367
|
+
return [ :end_element, last_tag ]
|
368
|
+
elsif @source.buffer[1] == ?!
|
369
|
+
md = @source.match(/\A(\s*[^>]*>)/um)
|
370
|
+
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
371
|
+
raise REXML::ParseException.new("Malformed node", @source) unless md
|
372
|
+
if md[0][2] == ?-
|
373
|
+
md = @source.match( COMMENT_PATTERN, true )
|
374
|
+
|
375
|
+
case md[1]
|
376
|
+
when /--/, /-\z/
|
377
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
378
|
+
end
|
379
|
+
|
380
|
+
return [ :comment, md[1] ] if md
|
381
|
+
else
|
382
|
+
md = @source.match( CDATA_PATTERN, true )
|
383
|
+
return [ :cdata, md[1] ] if md
|
384
|
+
end
|
385
|
+
raise REXML::ParseException.new( "Declarations can only occur "+
|
386
|
+
"in the doctype declaration.", @source)
|
387
|
+
elsif @source.buffer[1] == ??
|
388
|
+
return process_instruction
|
389
|
+
else
|
390
|
+
# Get the next tag
|
391
|
+
md = @source.match(TAG_MATCH, true)
|
392
|
+
unless md
|
393
|
+
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
394
|
+
end
|
395
|
+
@document_status = :in_element
|
396
|
+
prefixes = Set.new
|
397
|
+
prefixes << md[2] if md[2]
|
398
|
+
@nsstack.unshift(curr_ns=Set.new)
|
399
|
+
attributes, closed = parse_attributes(prefixes, curr_ns)
|
400
|
+
# Verify that all of the prefixes have been defined
|
401
|
+
for prefix in prefixes
|
402
|
+
unless @nsstack.find{|k| k.member?(prefix)}
|
403
|
+
raise UndefinedNamespaceException.new(prefix,@source,self)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
if closed
|
408
|
+
@closed = md[1]
|
409
|
+
@nsstack.shift
|
410
|
+
else
|
411
|
+
@tags.push( md[1] )
|
412
|
+
end
|
413
|
+
return [ :start_element, md[1], attributes ]
|
414
|
+
end
|
415
|
+
else
|
416
|
+
md = @source.match( TEXT_PATTERN, true )
|
417
|
+
if md[0].length == 0
|
418
|
+
@source.match( /(\s+)/, true )
|
419
|
+
end
|
420
|
+
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
|
421
|
+
#return [ :text, "" ] if md[0].length == 0
|
422
|
+
# unnormalized = Text::unnormalize( md[1], self )
|
423
|
+
# return PullEvent.new( :text, md[1], unnormalized )
|
424
|
+
return [ :text, md[1] ]
|
425
|
+
end
|
426
|
+
rescue REXML::UndefinedNamespaceException
|
427
|
+
raise
|
428
|
+
rescue REXML::ParseException
|
429
|
+
raise
|
430
|
+
rescue => error
|
431
|
+
raise REXML::ParseException.new( "Exception parsing",
|
432
|
+
@source, self, (error ? error : $!) )
|
433
|
+
end
|
434
|
+
return [ :dummy ]
|
435
|
+
end
|
436
|
+
private :pull_event
|
437
|
+
|
438
|
+
def entity( reference, entities )
|
439
|
+
value = nil
|
440
|
+
value = entities[ reference ] if entities
|
441
|
+
if not value
|
442
|
+
value = DEFAULT_ENTITIES[ reference ]
|
443
|
+
value = value[2] if value
|
444
|
+
end
|
445
|
+
unnormalize( value, entities ) if value
|
446
|
+
end
|
447
|
+
|
448
|
+
# Escapes all possible entities
|
449
|
+
def normalize( input, entities=nil, entity_filter=nil )
|
450
|
+
copy = input.clone
|
451
|
+
# Doing it like this rather than in a loop improves the speed
|
452
|
+
copy.gsub!( EREFERENCE, '&' )
|
453
|
+
entities.each do |key, value|
|
454
|
+
copy.gsub!( value, "&#{key};" ) unless entity_filter and
|
455
|
+
entity_filter.include?(entity)
|
456
|
+
end if entities
|
457
|
+
copy.gsub!( EREFERENCE, '&' )
|
458
|
+
DEFAULT_ENTITIES.each do |key, value|
|
459
|
+
copy.gsub!( value[3], value[1] )
|
460
|
+
end
|
461
|
+
copy
|
462
|
+
end
|
463
|
+
|
464
|
+
# Unescapes all possible entities
|
465
|
+
def unnormalize( string, entities=nil, filter=nil )
|
466
|
+
rv = string.clone
|
467
|
+
rv.gsub!( /\r\n?/, "\n" )
|
468
|
+
matches = rv.scan( REFERENCE_RE )
|
469
|
+
return rv if matches.size == 0
|
470
|
+
rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
|
471
|
+
m=$1
|
472
|
+
m = "0#{m}" if m[0] == ?x
|
473
|
+
[Integer(m)].pack('U*')
|
474
|
+
}
|
475
|
+
matches.collect!{|x|x[0]}.compact!
|
476
|
+
if matches.size > 0
|
477
|
+
matches.each do |entity_reference|
|
478
|
+
unless filter and filter.include?(entity_reference)
|
479
|
+
entity_value = entity( entity_reference, entities )
|
480
|
+
if entity_value
|
481
|
+
re = /&#{entity_reference};/
|
482
|
+
rv.gsub!( re, entity_value )
|
483
|
+
else
|
484
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
485
|
+
rv.gsub!( er[0], er[2] ) if er
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
489
|
+
rv.gsub!( /&/, '&' )
|
490
|
+
end
|
491
|
+
rv
|
492
|
+
end
|
493
|
+
|
494
|
+
private
|
495
|
+
def need_source_encoding_update?(xml_declaration_encoding)
|
496
|
+
return false if xml_declaration_encoding.nil?
|
497
|
+
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
498
|
+
true
|
499
|
+
end
|
500
|
+
|
501
|
+
def parse_name(base_error_message)
|
502
|
+
md = @source.match(/\A\s*#{NAME}/um, true)
|
503
|
+
unless md
|
504
|
+
if @source.match(/\A\s*\S/um)
|
505
|
+
message = "#{base_error_message}: invalid name"
|
506
|
+
else
|
507
|
+
message = "#{base_error_message}: name is missing"
|
508
|
+
end
|
509
|
+
raise REXML::ParseException.new(message, @source)
|
510
|
+
end
|
511
|
+
md[1]
|
512
|
+
end
|
513
|
+
|
514
|
+
def parse_id(base_error_message,
|
515
|
+
accept_external_id:,
|
516
|
+
accept_public_id:)
|
517
|
+
if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
|
518
|
+
pubid = system = nil
|
519
|
+
pubid_literal = md[1]
|
520
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
521
|
+
system_literal = md[2]
|
522
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
523
|
+
["PUBLIC", pubid, system]
|
524
|
+
elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
|
525
|
+
pubid = system = nil
|
526
|
+
pubid_literal = md[1]
|
527
|
+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
|
528
|
+
["PUBLIC", pubid, nil]
|
529
|
+
elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
|
530
|
+
system = nil
|
531
|
+
system_literal = md[1]
|
532
|
+
system = system_literal[1..-2] if system_literal # Remove quote
|
533
|
+
["SYSTEM", nil, system]
|
534
|
+
else
|
535
|
+
details = parse_id_invalid_details(accept_external_id: accept_external_id,
|
536
|
+
accept_public_id: accept_public_id)
|
537
|
+
message = "#{base_error_message}: #{details}"
|
538
|
+
raise REXML::ParseException.new(message, @source)
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
def parse_id_invalid_details(accept_external_id:,
|
543
|
+
accept_public_id:)
|
544
|
+
public = /\A\s*PUBLIC/um
|
545
|
+
system = /\A\s*SYSTEM/um
|
546
|
+
if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
|
547
|
+
if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
|
548
|
+
return "public ID literal is missing"
|
549
|
+
end
|
550
|
+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
|
551
|
+
return "invalid public ID literal"
|
552
|
+
end
|
553
|
+
if accept_public_id
|
554
|
+
if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
|
555
|
+
return "system ID literal is missing"
|
556
|
+
end
|
557
|
+
unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
|
558
|
+
return "invalid system literal"
|
559
|
+
end
|
560
|
+
"garbage after system literal"
|
561
|
+
else
|
562
|
+
"garbage after public ID literal"
|
563
|
+
end
|
564
|
+
elsif accept_external_id and @source.match(/#{system}/um)
|
565
|
+
if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
|
566
|
+
return "system literal is missing"
|
567
|
+
end
|
568
|
+
unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
|
569
|
+
return "invalid system literal"
|
570
|
+
end
|
571
|
+
"garbage after system literal"
|
572
|
+
else
|
573
|
+
unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
|
574
|
+
return "invalid ID type"
|
575
|
+
end
|
576
|
+
"ID type is missing"
|
577
|
+
end
|
578
|
+
end
|
579
|
+
|
580
|
+
def process_instruction
|
581
|
+
match_data = @source.match(INSTRUCTION_PATTERN, true)
|
582
|
+
unless match_data
|
583
|
+
message = "Invalid processing instruction node"
|
584
|
+
raise REXML::ParseException.new(message, @source)
|
585
|
+
end
|
586
|
+
[:processing_instruction, match_data[1], match_data[2]]
|
587
|
+
end
|
588
|
+
|
589
|
+
def parse_attributes(prefixes, curr_ns)
|
590
|
+
attributes = {}
|
591
|
+
closed = false
|
592
|
+
match_data = @source.match(/^(.*?)(\/)?>/um, true)
|
593
|
+
if match_data.nil?
|
594
|
+
message = "Start tag isn't ended"
|
595
|
+
raise REXML::ParseException.new(message, @source)
|
596
|
+
end
|
597
|
+
|
598
|
+
raw_attributes = match_data[1]
|
599
|
+
closed = !match_data[2].nil?
|
600
|
+
return attributes, closed if raw_attributes.nil?
|
601
|
+
return attributes, closed if raw_attributes.empty?
|
602
|
+
|
603
|
+
scanner = StringScanner.new(raw_attributes)
|
604
|
+
until scanner.eos?
|
605
|
+
if scanner.scan(/\s+/)
|
606
|
+
break if scanner.eos?
|
607
|
+
end
|
608
|
+
|
609
|
+
pos = scanner.pos
|
610
|
+
loop do
|
611
|
+
break if scanner.scan(ATTRIBUTE_PATTERN)
|
612
|
+
unless scanner.scan(QNAME)
|
613
|
+
message = "Invalid attribute name: <#{scanner.rest}>"
|
614
|
+
raise REXML::ParseException.new(message, @source)
|
615
|
+
end
|
616
|
+
name = scanner[0]
|
617
|
+
unless scanner.scan(/\s*=\s*/um)
|
618
|
+
message = "Missing attribute equal: <#{name}>"
|
619
|
+
raise REXML::ParseException.new(message, @source)
|
620
|
+
end
|
621
|
+
quote = scanner.scan(/['"]/)
|
622
|
+
unless quote
|
623
|
+
message = "Missing attribute value start quote: <#{name}>"
|
624
|
+
raise REXML::ParseException.new(message, @source)
|
625
|
+
end
|
626
|
+
unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
|
627
|
+
match_data = @source.match(/^(.*?)(\/)?>/um, true)
|
628
|
+
if match_data
|
629
|
+
scanner << "/" if closed
|
630
|
+
scanner << ">"
|
631
|
+
scanner << match_data[1]
|
632
|
+
scanner.pos = pos
|
633
|
+
closed = !match_data[2].nil?
|
634
|
+
next
|
635
|
+
end
|
636
|
+
message =
|
637
|
+
"Missing attribute value end quote: <#{name}>: <#{quote}>"
|
638
|
+
raise REXML::ParseException.new(message, @source)
|
639
|
+
end
|
640
|
+
end
|
641
|
+
name = scanner[1]
|
642
|
+
prefix = scanner[2]
|
643
|
+
local_part = scanner[3]
|
644
|
+
# quote = scanner[4]
|
645
|
+
value = scanner[5]
|
646
|
+
if prefix == "xmlns"
|
647
|
+
if local_part == "xml"
|
648
|
+
if value != "http://www.w3.org/XML/1998/namespace"
|
649
|
+
msg = "The 'xml' prefix must not be bound to any other namespace "+
|
650
|
+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
651
|
+
raise REXML::ParseException.new( msg, @source, self )
|
652
|
+
end
|
653
|
+
elsif local_part == "xmlns"
|
654
|
+
msg = "The 'xmlns' prefix must not be declared "+
|
655
|
+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
656
|
+
raise REXML::ParseException.new( msg, @source, self)
|
657
|
+
end
|
658
|
+
curr_ns << local_part
|
659
|
+
elsif prefix
|
660
|
+
prefixes << prefix unless prefix == "xml"
|
661
|
+
end
|
662
|
+
|
663
|
+
if attributes.has_key?(name)
|
664
|
+
msg = "Duplicate attribute #{name.inspect}"
|
665
|
+
raise REXML::ParseException.new(msg, @source, self)
|
666
|
+
end
|
667
|
+
|
668
|
+
attributes[name] = value
|
669
|
+
end
|
670
|
+
return attributes, closed
|
671
|
+
end
|
672
|
+
end
|
673
|
+
end
|
674
|
+
end
|
675
|
+
|
676
|
+
=begin
|
677
|
+
case event[0]
|
678
|
+
when :start_element
|
679
|
+
when :text
|
680
|
+
when :end_element
|
681
|
+
when :processing_instruction
|
682
|
+
when :cdata
|
683
|
+
when :comment
|
684
|
+
when :xmldecl
|
685
|
+
when :start_doctype
|
686
|
+
when :end_doctype
|
687
|
+
when :externalentity
|
688
|
+
when :elementdecl
|
689
|
+
when :entity
|
690
|
+
when :attlistdecl
|
691
|
+
when :notationdecl
|
692
|
+
when :end_doctype
|
693
|
+
end
|
694
|
+
=end
|