brakeman 4.10.0 → 5.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +52 -0
  3. data/README.md +11 -2
  4. data/bundle/load.rb +4 -3
  5. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/CHANGELOG.md +16 -0
  6. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/FAQ.md +0 -0
  7. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/Gemfile +1 -4
  8. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/MIT-LICENSE +0 -0
  9. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/README.md +2 -3
  10. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/REFERENCE.md +29 -7
  11. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/TODO +0 -0
  12. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/haml.gemspec +2 -1
  13. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml.rb +0 -0
  14. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/attribute_builder.rb +3 -3
  15. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/attribute_compiler.rb +42 -31
  16. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/attribute_parser.rb +0 -0
  17. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/buffer.rb +0 -0
  18. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/compiler.rb +0 -0
  19. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/engine.rb +0 -0
  20. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/error.rb +0 -0
  21. data/bundle/ruby/2.7.0/gems/haml-5.2.1/lib/haml/escapable.rb +77 -0
  22. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/exec.rb +0 -0
  23. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/filters.rb +0 -0
  24. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/generator.rb +0 -0
  25. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers.rb +7 -1
  26. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/action_view_extensions.rb +0 -0
  27. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/action_view_mods.rb +0 -0
  28. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/action_view_xss_mods.rb +0 -0
  29. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/safe_erubi_template.rb +0 -0
  30. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/safe_erubis_template.rb +0 -0
  31. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/helpers/xss_mods.rb +6 -3
  32. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/options.rb +0 -0
  33. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/parser.rb +32 -4
  34. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/plugin.rb +0 -0
  35. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/railtie.rb +0 -0
  36. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/sass_rails_filter.rb +0 -0
  37. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/template.rb +0 -0
  38. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/template/options.rb +0 -0
  39. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/temple_engine.rb +0 -0
  40. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/temple_line_counter.rb +0 -0
  41. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/util.rb +1 -1
  42. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/lib/haml/version.rb +1 -1
  43. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/yard/default/fulldoc/html/css/common.sass +0 -0
  44. data/bundle/ruby/2.7.0/gems/{haml-5.1.2 → haml-5.2.1}/yard/default/layout/html/footer.erb +0 -0
  45. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/LICENSE.txt +22 -0
  46. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/NEWS.md +178 -0
  47. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/README.md +48 -0
  48. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml.rb +3 -0
  49. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/attlistdecl.rb +63 -0
  50. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/attribute.rb +205 -0
  51. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/cdata.rb +68 -0
  52. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/child.rb +97 -0
  53. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/comment.rb +80 -0
  54. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/doctype.rb +311 -0
  55. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/document.rb +451 -0
  56. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/attlistdecl.rb +11 -0
  57. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/dtd.rb +47 -0
  58. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/elementdecl.rb +18 -0
  59. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/entitydecl.rb +57 -0
  60. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/dtd/notationdecl.rb +40 -0
  61. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/element.rb +2599 -0
  62. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/encoding.rb +51 -0
  63. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/entity.rb +171 -0
  64. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/formatters/default.rb +116 -0
  65. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb +142 -0
  66. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/formatters/transitive.rb +58 -0
  67. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/functions.rb +447 -0
  68. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/instruction.rb +79 -0
  69. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/light/node.rb +188 -0
  70. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/namespace.rb +59 -0
  71. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/node.rb +76 -0
  72. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/output.rb +30 -0
  73. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parent.rb +166 -0
  74. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parseexception.rb +52 -0
  75. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +694 -0
  76. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/lightparser.rb +59 -0
  77. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/pullparser.rb +197 -0
  78. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb +273 -0
  79. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb +61 -0
  80. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/treeparser.rb +101 -0
  81. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/ultralightparser.rb +57 -0
  82. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/parsers/xpathparser.rb +689 -0
  83. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/quickpath.rb +266 -0
  84. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/rexml.rb +37 -0
  85. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/sax2listener.rb +98 -0
  86. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/security.rb +28 -0
  87. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/source.rb +298 -0
  88. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/streamlistener.rb +93 -0
  89. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/text.rb +424 -0
  90. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/undefinednamespaceexception.rb +9 -0
  91. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/validation/relaxng.rb +539 -0
  92. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/validation/validation.rb +144 -0
  93. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/validation/validationexception.rb +10 -0
  94. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/xmldecl.rb +130 -0
  95. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/xmltokens.rb +85 -0
  96. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/xpath.rb +81 -0
  97. data/bundle/ruby/2.7.0/gems/rexml-3.2.5/lib/rexml/xpath_parser.rb +974 -0
  98. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/History.rdoc +25 -0
  99. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/Manifest.txt +2 -0
  100. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/README.rdoc +0 -0
  101. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/compare/normalize.rb +2 -2
  102. data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/debugging.md +190 -0
  103. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/rp_extensions.rb +0 -0
  104. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/rp_stringscanner.rb +0 -0
  105. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby20_parser.rb +2550 -2537
  106. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby20_parser.y +9 -1
  107. data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/lib/ruby21_parser.rb +7148 -0
  108. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby21_parser.y +9 -1
  109. data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/lib/ruby22_parser.rb +7185 -0
  110. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby22_parser.y +9 -1
  111. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby23_parser.rb +2585 -2561
  112. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby23_parser.y +9 -1
  113. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby24_parser.rb +2622 -2607
  114. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby24_parser.y +9 -1
  115. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby25_parser.rb +2612 -2598
  116. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby25_parser.y +9 -1
  117. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby26_parser.rb +2610 -2594
  118. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby26_parser.y +10 -1
  119. data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/lib/ruby27_parser.rb +7358 -0
  120. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby27_parser.y +47 -1
  121. data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/lib/ruby30_parser.rb +7358 -0
  122. data/bundle/ruby/2.7.0/gems/ruby_parser-3.16.0/lib/ruby30_parser.y +2703 -0
  123. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_lexer.rb +19 -0
  124. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_lexer.rex +1 -1
  125. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_lexer.rex.rb +1 -1
  126. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_parser.rb +2 -0
  127. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_parser.yy +57 -1
  128. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/lib/ruby_parser_extras.rb +2 -2
  129. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/tools/munge.rb +2 -2
  130. data/bundle/ruby/2.7.0/gems/{ruby_parser-3.15.0 → ruby_parser-3.16.0}/tools/ripper.rb +1 -1
  131. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/History.rdoc +12 -0
  132. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/Manifest.txt +0 -0
  133. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/README.rdoc +0 -0
  134. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/composite_sexp_processor.rb +0 -0
  135. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/pt_testcase.rb +2 -2
  136. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/sexp.rb +0 -0
  137. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/sexp_matcher.rb +0 -0
  138. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/sexp_processor.rb +1 -1
  139. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/strict_sexp.rb +0 -0
  140. data/bundle/ruby/2.7.0/gems/{sexp_processor-4.15.1 → sexp_processor-4.15.3}/lib/unique.rb +0 -0
  141. data/lib/brakeman.rb +17 -4
  142. data/lib/brakeman/app_tree.rb +36 -3
  143. data/lib/brakeman/checks/base_check.rb +7 -1
  144. data/lib/brakeman/checks/check_execute.rb +2 -1
  145. data/lib/brakeman/checks/check_mass_assignment.rb +4 -6
  146. data/lib/brakeman/checks/check_regex_dos.rb +1 -1
  147. data/lib/brakeman/checks/check_sanitize_methods.rb +2 -1
  148. data/lib/brakeman/checks/check_sql.rb +1 -1
  149. data/lib/brakeman/checks/check_unsafe_reflection_methods.rb +68 -0
  150. data/lib/brakeman/checks/check_verb_confusion.rb +75 -0
  151. data/lib/brakeman/file_parser.rb +24 -18
  152. data/lib/brakeman/options.rb +5 -1
  153. data/lib/brakeman/parsers/template_parser.rb +26 -3
  154. data/lib/brakeman/processors/alias_processor.rb +40 -13
  155. data/lib/brakeman/processors/base_processor.rb +4 -4
  156. data/lib/brakeman/processors/controller_processor.rb +1 -1
  157. data/lib/brakeman/processors/haml_template_processor.rb +8 -1
  158. data/lib/brakeman/processors/lib/file_type_detector.rb +64 -0
  159. data/lib/brakeman/processors/lib/rails3_config_processor.rb +16 -16
  160. data/lib/brakeman/processors/lib/rails4_config_processor.rb +2 -1
  161. data/lib/brakeman/processors/output_processor.rb +1 -1
  162. data/lib/brakeman/processors/template_alias_processor.rb +5 -0
  163. data/lib/brakeman/report.rb +8 -0
  164. data/lib/brakeman/report/report_base.rb +0 -2
  165. data/lib/brakeman/report/report_csv.rb +37 -60
  166. data/lib/brakeman/report/report_junit.rb +2 -2
  167. data/lib/brakeman/report/report_sarif.rb +1 -1
  168. data/lib/brakeman/report/report_sonar.rb +38 -0
  169. data/lib/brakeman/report/report_tabs.rb +1 -1
  170. data/lib/brakeman/report/report_text.rb +1 -1
  171. data/lib/brakeman/rescanner.rb +7 -5
  172. data/lib/brakeman/scanner.rb +44 -18
  173. data/lib/brakeman/tracker.rb +6 -0
  174. data/lib/brakeman/tracker/config.rb +73 -0
  175. data/lib/brakeman/tracker/controller.rb +1 -1
  176. data/lib/brakeman/util.rb +9 -4
  177. data/lib/brakeman/version.rb +1 -1
  178. data/lib/brakeman/warning.rb +10 -2
  179. data/lib/brakeman/warning_codes.rb +2 -0
  180. data/lib/ruby_parser/bm_sexp.rb +9 -9
  181. metadata +143 -84
  182. data/bundle/ruby/2.7.0/gems/haml-5.1.2/lib/haml/escapable.rb +0 -50
  183. data/bundle/ruby/2.7.0/gems/ruby_parser-3.15.0/debugging.md +0 -57
  184. data/bundle/ruby/2.7.0/gems/ruby_parser-3.15.0/lib/ruby21_parser.rb +0 -7140
  185. data/bundle/ruby/2.7.0/gems/ruby_parser-3.15.0/lib/ruby22_parser.rb +0 -7160
  186. data/bundle/ruby/2.7.0/gems/ruby_parser-3.15.0/lib/ruby27_parser.rb +0 -7224
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: false
2
+ module REXML
3
+ class ParseException < RuntimeError
4
+ attr_accessor :source, :parser, :continued_exception
5
+
6
+ def initialize( message, source=nil, parser=nil, exception=nil )
7
+ super(message)
8
+ @source = source
9
+ @parser = parser
10
+ @continued_exception = exception
11
+ end
12
+
13
+ def to_s
14
+ # Quote the original exception, if there was one
15
+ if @continued_exception
16
+ err = @continued_exception.inspect
17
+ err << "\n"
18
+ err << @continued_exception.backtrace.join("\n")
19
+ err << "\n...\n"
20
+ else
21
+ err = ""
22
+ end
23
+
24
+ # Get the stack trace and error message
25
+ err << super
26
+
27
+ # Add contextual information
28
+ if @source
29
+ err << "\nLine: #{line}\n"
30
+ err << "Position: #{position}\n"
31
+ err << "Last 80 unconsumed characters:\n"
32
+ err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
33
+ end
34
+
35
+ err
36
+ end
37
+
38
+ def position
39
+ @source.current_line[0] if @source and defined? @source.current_line and
40
+ @source.current_line
41
+ end
42
+
43
+ def line
44
+ @source.current_line[2] if @source and defined? @source.current_line and
45
+ @source.current_line
46
+ end
47
+
48
+ def context
49
+ @source.current_line
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,694 @@
1
+ # frozen_string_literal: false
2
+ require_relative '../parseexception'
3
+ require_relative '../undefinednamespaceexception'
4
+ require_relative '../source'
5
+ require 'set'
6
+ require "strscan"
7
+
8
+ module REXML
9
+ module Parsers
10
+ # = Using the Pull Parser
11
+ # <em>This API is experimental, and subject to change.</em>
12
+ # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
13
+ # while parser.has_next?
14
+ # res = parser.next
15
+ # puts res[1]['att'] if res.start_tag? and res[0] == 'b'
16
+ # end
17
+ # See the PullEvent class for information on the content of the results.
18
+ # The data is identical to the arguments passed for the various events to
19
+ # the StreamListener API.
20
+ #
21
+ # Notice that:
22
+ # parser = PullParser.new( "<a>BAD DOCUMENT" )
23
+ # while parser.has_next?
24
+ # res = parser.next
25
+ # raise res[1] if res.error?
26
+ # end
27
+ #
28
+ # Nat Price gave me some good ideas for the API.
29
+ class BaseParser
30
+ LETTER = '[:alpha:]'
31
+ DIGIT = '[:digit:]'
32
+
33
+ COMBININGCHAR = '' # TODO
34
+ EXTENDER = '' # TODO
35
+
36
+ NCNAME_STR= "[#{LETTER}_][-[:alnum:]._#{COMBININGCHAR}#{EXTENDER}]*"
37
+ QNAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
38
+ QNAME = /(#{QNAME_STR})/
39
+
40
+ # Just for backward compatibility. For example, kramdown uses this.
41
+ # It's not used in REXML.
42
+ UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
43
+
44
+ NAMECHAR = '[\-\w\.:]'
45
+ NAME = "([\\w:]#{NAMECHAR}*)"
46
+ NMTOKEN = "(?:#{NAMECHAR})+"
47
+ NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
48
+ REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
49
+ REFERENCE_RE = /#{REFERENCE}/
50
+
51
+ DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
52
+ DOCTYPE_END = /\A\s*\]\s*>/um
53
+ ATTRIBUTE_PATTERN = /\s*(#{QNAME_STR})\s*=\s*(["'])(.*?)\4/um
54
+ COMMENT_START = /\A<!--/u
55
+ COMMENT_PATTERN = /<!--(.*?)-->/um
56
+ CDATA_START = /\A<!\[CDATA\[/u
57
+ CDATA_END = /\A\s*\]\s*>/um
58
+ CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
59
+ XMLDECL_START = /\A<\?xml\s/u;
60
+ XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
61
+ INSTRUCTION_START = /\A<\?/u
62
+ INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
63
+ TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
64
+ CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
65
+
66
+ VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
67
+ ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
68
+ STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um
69
+
70
+ ENTITY_START = /\A\s*<!ENTITY/
71
+ ELEMENTDECL_START = /\A\s*<!ELEMENT/um
72
+ ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um
73
+ SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um
74
+ ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
75
+ NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
76
+ ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
77
+ ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
78
+ ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
79
+ DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
80
+ ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
81
+ ATTDEF_RE = /#{ATTDEF}/
82
+ ATTLISTDECL_START = /\A\s*<!ATTLIST/um
83
+ ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
84
+
85
+ TEXT_PATTERN = /\A([^<]*)/um
86
+
87
+ # Entity constants
88
+ PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
89
+ SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
90
+ PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
91
+ EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
92
+ NDATADECL = "\\s+NDATA\\s+#{NAME}"
93
+ PEREFERENCE = "%#{NAME};"
94
+ ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
95
+ PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
96
+ ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
97
+ PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
98
+ GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
99
+ ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
100
+
101
+ NOTATIONDECL_START = /\A\s*<!NOTATION/um
102
+ EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
103
+ EXTERNAL_ID_SYSTEM = /\A\s*SYSTEM\s+#{SYSTEMLITERAL}\s*/um
104
+ PUBLIC_ID = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s*/um
105
+
106
+ EREFERENCE = /&(?!#{NAME};)/
107
+
108
+ DEFAULT_ENTITIES = {
109
+ 'gt' => [/&gt;/, '&gt;', '>', />/],
110
+ 'lt' => [/&lt;/, '&lt;', '<', /</],
111
+ 'quot' => [/&quot;/, '&quot;', '"', /"/],
112
+ "apos" => [/&apos;/, "&apos;", "'", /'/]
113
+ }
114
+
115
+ def initialize( source )
116
+ self.stream = source
117
+ @listeners = []
118
+ end
119
+
120
+ def add_listener( listener )
121
+ @listeners << listener
122
+ end
123
+
124
+ attr_reader :source
125
+
126
+ def stream=( source )
127
+ @source = SourceFactory.create_from( source )
128
+ @closed = nil
129
+ @document_status = nil
130
+ @tags = []
131
+ @stack = []
132
+ @entities = []
133
+ @nsstack = []
134
+ end
135
+
136
+ def position
137
+ if @source.respond_to? :position
138
+ @source.position
139
+ else
140
+ # FIXME
141
+ 0
142
+ end
143
+ end
144
+
145
+ # Returns true if there are no more events
146
+ def empty?
147
+ return (@source.empty? and @stack.empty?)
148
+ end
149
+
150
+ # Returns true if there are more events. Synonymous with !empty?
151
+ def has_next?
152
+ return !(@source.empty? and @stack.empty?)
153
+ end
154
+
155
+ # Push an event back on the head of the stream. This method
156
+ # has (theoretically) infinite depth.
157
+ def unshift token
158
+ @stack.unshift(token)
159
+ end
160
+
161
+ # Peek at the +depth+ event in the stack. The first element on the stack
162
+ # is at depth 0. If +depth+ is -1, will parse to the end of the input
163
+ # stream and return the last event, which is always :end_document.
164
+ # Be aware that this causes the stream to be parsed up to the +depth+
165
+ # event, so you can effectively pre-parse the entire document (pull the
166
+ # entire thing into memory) using this method.
167
+ def peek depth=0
168
+ raise %Q[Illegal argument "#{depth}"] if depth < -1
169
+ temp = []
170
+ if depth == -1
171
+ temp.push(pull()) until empty?
172
+ else
173
+ while @stack.size+temp.size < depth+1
174
+ temp.push(pull())
175
+ end
176
+ end
177
+ @stack += temp if temp.size > 0
178
+ @stack[depth]
179
+ end
180
+
181
+ # Returns the next event. This is a +PullEvent+ object.
182
+ def pull
183
+ pull_event.tap do |event|
184
+ @listeners.each do |listener|
185
+ listener.receive event
186
+ end
187
+ end
188
+ end
189
+
190
+ def pull_event
191
+ if @closed
192
+ x, @closed = @closed, nil
193
+ return [ :end_element, x ]
194
+ end
195
+ return [ :end_document ] if empty?
196
+ return @stack.shift if @stack.size > 0
197
+ #STDERR.puts @source.encoding
198
+ #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
199
+ if @document_status == nil
200
+ word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
201
+ word = word[1] unless word.nil?
202
+ #STDERR.puts "WORD = #{word.inspect}"
203
+ case word
204
+ when COMMENT_START
205
+ return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
206
+ when XMLDECL_START
207
+ #STDERR.puts "XMLDECL"
208
+ results = @source.match( XMLDECL_PATTERN, true )[1]
209
+ version = VERSION.match( results )
210
+ version = version[1] unless version.nil?
211
+ encoding = ENCODING.match(results)
212
+ encoding = encoding[1] unless encoding.nil?
213
+ if need_source_encoding_update?(encoding)
214
+ @source.encoding = encoding
215
+ end
216
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
217
+ encoding = "UTF-16"
218
+ end
219
+ standalone = STANDALONE.match(results)
220
+ standalone = standalone[1] unless standalone.nil?
221
+ return [ :xmldecl, version, encoding, standalone ]
222
+ when INSTRUCTION_START
223
+ return process_instruction
224
+ when DOCTYPE_START
225
+ base_error_message = "Malformed DOCTYPE"
226
+ @source.match(DOCTYPE_START, true)
227
+ @nsstack.unshift(curr_ns=Set.new)
228
+ name = parse_name(base_error_message)
229
+ if @source.match(/\A\s*\[/um, true)
230
+ id = [nil, nil, nil]
231
+ @document_status = :in_doctype
232
+ elsif @source.match(/\A\s*>/um, true)
233
+ id = [nil, nil, nil]
234
+ @document_status = :after_doctype
235
+ else
236
+ id = parse_id(base_error_message,
237
+ accept_external_id: true,
238
+ accept_public_id: false)
239
+ if id[0] == "SYSTEM"
240
+ # For backward compatibility
241
+ id[1], id[2] = id[2], nil
242
+ end
243
+ if @source.match(/\A\s*\[/um, true)
244
+ @document_status = :in_doctype
245
+ elsif @source.match(/\A\s*>/um, true)
246
+ @document_status = :after_doctype
247
+ else
248
+ message = "#{base_error_message}: garbage after external ID"
249
+ raise REXML::ParseException.new(message, @source)
250
+ end
251
+ end
252
+ args = [:start_doctype, name, *id]
253
+ if @document_status == :after_doctype
254
+ @source.match(/\A\s*/um, true)
255
+ @stack << [ :end_doctype ]
256
+ end
257
+ return args
258
+ when /\A\s+/
259
+ else
260
+ @document_status = :after_doctype
261
+ if @source.encoding == "UTF-8"
262
+ @source.buffer.force_encoding(::Encoding::UTF_8)
263
+ end
264
+ end
265
+ end
266
+ if @document_status == :in_doctype
267
+ md = @source.match(/\A\s*(.*?>)/um)
268
+ case md[1]
269
+ when SYSTEMENTITY
270
+ match = @source.match( SYSTEMENTITY, true )[1]
271
+ return [ :externalentity, match ]
272
+
273
+ when ELEMENTDECL_START
274
+ return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
275
+
276
+ when ENTITY_START
277
+ match = @source.match( ENTITYDECL, true ).to_a.compact
278
+ match[0] = :entitydecl
279
+ ref = false
280
+ if match[1] == '%'
281
+ ref = true
282
+ match.delete_at 1
283
+ end
284
+ # Now we have to sort out what kind of entity reference this is
285
+ if match[2] == 'SYSTEM'
286
+ # External reference
287
+ match[3] = match[3][1..-2] # PUBID
288
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
289
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
290
+ elsif match[2] == 'PUBLIC'
291
+ # External reference
292
+ match[3] = match[3][1..-2] # PUBID
293
+ match[4] = match[4][1..-2] # HREF
294
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
295
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
296
+ else
297
+ match[2] = match[2][1..-2]
298
+ match.pop if match.size == 4
299
+ # match is [ :entity, name, value ]
300
+ end
301
+ match << '%' if ref
302
+ return match
303
+ when ATTLISTDECL_START
304
+ md = @source.match( ATTLISTDECL_PATTERN, true )
305
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
306
+ element = md[1]
307
+ contents = md[0]
308
+
309
+ pairs = {}
310
+ values = md[0].scan( ATTDEF_RE )
311
+ values.each do |attdef|
312
+ unless attdef[3] == "#IMPLIED"
313
+ attdef.compact!
314
+ val = attdef[3]
315
+ val = attdef[4] if val == "#FIXED "
316
+ pairs[attdef[0]] = val
317
+ if attdef[0] =~ /^xmlns:(.*)/
318
+ @nsstack[0] << $1
319
+ end
320
+ end
321
+ end
322
+ return [ :attlistdecl, element, pairs, contents ]
323
+ when NOTATIONDECL_START
324
+ base_error_message = "Malformed notation declaration"
325
+ unless @source.match(/\A\s*<!NOTATION\s+/um, true)
326
+ if @source.match(/\A\s*<!NOTATION\s*>/um)
327
+ message = "#{base_error_message}: name is missing"
328
+ else
329
+ message = "#{base_error_message}: invalid declaration name"
330
+ end
331
+ raise REXML::ParseException.new(message, @source)
332
+ end
333
+ name = parse_name(base_error_message)
334
+ id = parse_id(base_error_message,
335
+ accept_external_id: true,
336
+ accept_public_id: true)
337
+ unless @source.match(/\A\s*>/um, true)
338
+ message = "#{base_error_message}: garbage before end >"
339
+ raise REXML::ParseException.new(message, @source)
340
+ end
341
+ return [:notationdecl, name, *id]
342
+ when DOCTYPE_END
343
+ @document_status = :after_doctype
344
+ @source.match( DOCTYPE_END, true )
345
+ return [ :end_doctype ]
346
+ end
347
+ end
348
+ if @document_status == :after_doctype
349
+ @source.match(/\A\s*/um, true)
350
+ end
351
+ begin
352
+ @source.read if @source.buffer.size<2
353
+ if @source.buffer[0] == ?<
354
+ if @source.buffer[1] == ?/
355
+ @nsstack.shift
356
+ last_tag = @tags.pop
357
+ md = @source.match( CLOSE_MATCH, true )
358
+ if md and !last_tag
359
+ message = "Unexpected top-level end tag (got '#{md[1]}')"
360
+ raise REXML::ParseException.new(message, @source)
361
+ end
362
+ if md.nil? or last_tag != md[1]
363
+ message = "Missing end tag for '#{last_tag}'"
364
+ message << " (got '#{md[1]}')" if md
365
+ raise REXML::ParseException.new(message, @source)
366
+ end
367
+ return [ :end_element, last_tag ]
368
+ elsif @source.buffer[1] == ?!
369
+ md = @source.match(/\A(\s*[^>]*>)/um)
370
+ #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
371
+ raise REXML::ParseException.new("Malformed node", @source) unless md
372
+ if md[0][2] == ?-
373
+ md = @source.match( COMMENT_PATTERN, true )
374
+
375
+ case md[1]
376
+ when /--/, /-\z/
377
+ raise REXML::ParseException.new("Malformed comment", @source)
378
+ end
379
+
380
+ return [ :comment, md[1] ] if md
381
+ else
382
+ md = @source.match( CDATA_PATTERN, true )
383
+ return [ :cdata, md[1] ] if md
384
+ end
385
+ raise REXML::ParseException.new( "Declarations can only occur "+
386
+ "in the doctype declaration.", @source)
387
+ elsif @source.buffer[1] == ??
388
+ return process_instruction
389
+ else
390
+ # Get the next tag
391
+ md = @source.match(TAG_MATCH, true)
392
+ unless md
393
+ raise REXML::ParseException.new("malformed XML: missing tag start", @source)
394
+ end
395
+ @document_status = :in_element
396
+ prefixes = Set.new
397
+ prefixes << md[2] if md[2]
398
+ @nsstack.unshift(curr_ns=Set.new)
399
+ attributes, closed = parse_attributes(prefixes, curr_ns)
400
+ # Verify that all of the prefixes have been defined
401
+ for prefix in prefixes
402
+ unless @nsstack.find{|k| k.member?(prefix)}
403
+ raise UndefinedNamespaceException.new(prefix,@source,self)
404
+ end
405
+ end
406
+
407
+ if closed
408
+ @closed = md[1]
409
+ @nsstack.shift
410
+ else
411
+ @tags.push( md[1] )
412
+ end
413
+ return [ :start_element, md[1], attributes ]
414
+ end
415
+ else
416
+ md = @source.match( TEXT_PATTERN, true )
417
+ if md[0].length == 0
418
+ @source.match( /(\s+)/, true )
419
+ end
420
+ #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
421
+ #return [ :text, "" ] if md[0].length == 0
422
+ # unnormalized = Text::unnormalize( md[1], self )
423
+ # return PullEvent.new( :text, md[1], unnormalized )
424
+ return [ :text, md[1] ]
425
+ end
426
+ rescue REXML::UndefinedNamespaceException
427
+ raise
428
+ rescue REXML::ParseException
429
+ raise
430
+ rescue => error
431
+ raise REXML::ParseException.new( "Exception parsing",
432
+ @source, self, (error ? error : $!) )
433
+ end
434
+ return [ :dummy ]
435
+ end
436
+ private :pull_event
437
+
438
+ def entity( reference, entities )
439
+ value = nil
440
+ value = entities[ reference ] if entities
441
+ if not value
442
+ value = DEFAULT_ENTITIES[ reference ]
443
+ value = value[2] if value
444
+ end
445
+ unnormalize( value, entities ) if value
446
+ end
447
+
448
+ # Escapes all possible entities
449
+ def normalize( input, entities=nil, entity_filter=nil )
450
+ copy = input.clone
451
+ # Doing it like this rather than in a loop improves the speed
452
+ copy.gsub!( EREFERENCE, '&amp;' )
453
+ entities.each do |key, value|
454
+ copy.gsub!( value, "&#{key};" ) unless entity_filter and
455
+ entity_filter.include?(entity)
456
+ end if entities
457
+ copy.gsub!( EREFERENCE, '&amp;' )
458
+ DEFAULT_ENTITIES.each do |key, value|
459
+ copy.gsub!( value[3], value[1] )
460
+ end
461
+ copy
462
+ end
463
+
464
+ # Unescapes all possible entities
465
+ def unnormalize( string, entities=nil, filter=nil )
466
+ rv = string.clone
467
+ rv.gsub!( /\r\n?/, "\n" )
468
+ matches = rv.scan( REFERENCE_RE )
469
+ return rv if matches.size == 0
470
+ rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
471
+ m=$1
472
+ m = "0#{m}" if m[0] == ?x
473
+ [Integer(m)].pack('U*')
474
+ }
475
+ matches.collect!{|x|x[0]}.compact!
476
+ if matches.size > 0
477
+ matches.each do |entity_reference|
478
+ unless filter and filter.include?(entity_reference)
479
+ entity_value = entity( entity_reference, entities )
480
+ if entity_value
481
+ re = /&#{entity_reference};/
482
+ rv.gsub!( re, entity_value )
483
+ else
484
+ er = DEFAULT_ENTITIES[entity_reference]
485
+ rv.gsub!( er[0], er[2] ) if er
486
+ end
487
+ end
488
+ end
489
+ rv.gsub!( /&amp;/, '&' )
490
+ end
491
+ rv
492
+ end
493
+
494
+ private
495
+ def need_source_encoding_update?(xml_declaration_encoding)
496
+ return false if xml_declaration_encoding.nil?
497
+ return false if /\AUTF-16\z/i =~ xml_declaration_encoding
498
+ true
499
+ end
500
+
501
+ def parse_name(base_error_message)
502
+ md = @source.match(/\A\s*#{NAME}/um, true)
503
+ unless md
504
+ if @source.match(/\A\s*\S/um)
505
+ message = "#{base_error_message}: invalid name"
506
+ else
507
+ message = "#{base_error_message}: name is missing"
508
+ end
509
+ raise REXML::ParseException.new(message, @source)
510
+ end
511
+ md[1]
512
+ end
513
+
514
+ def parse_id(base_error_message,
515
+ accept_external_id:,
516
+ accept_public_id:)
517
+ if accept_external_id and (md = @source.match(EXTERNAL_ID_PUBLIC, true))
518
+ pubid = system = nil
519
+ pubid_literal = md[1]
520
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
521
+ system_literal = md[2]
522
+ system = system_literal[1..-2] if system_literal # Remove quote
523
+ ["PUBLIC", pubid, system]
524
+ elsif accept_public_id and (md = @source.match(PUBLIC_ID, true))
525
+ pubid = system = nil
526
+ pubid_literal = md[1]
527
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
528
+ ["PUBLIC", pubid, nil]
529
+ elsif accept_external_id and (md = @source.match(EXTERNAL_ID_SYSTEM, true))
530
+ system = nil
531
+ system_literal = md[1]
532
+ system = system_literal[1..-2] if system_literal # Remove quote
533
+ ["SYSTEM", nil, system]
534
+ else
535
+ details = parse_id_invalid_details(accept_external_id: accept_external_id,
536
+ accept_public_id: accept_public_id)
537
+ message = "#{base_error_message}: #{details}"
538
+ raise REXML::ParseException.new(message, @source)
539
+ end
540
+ end
541
+
542
+ def parse_id_invalid_details(accept_external_id:,
543
+ accept_public_id:)
544
+ public = /\A\s*PUBLIC/um
545
+ system = /\A\s*SYSTEM/um
546
+ if (accept_external_id or accept_public_id) and @source.match(/#{public}/um)
547
+ if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um)
548
+ return "public ID literal is missing"
549
+ end
550
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
551
+ return "invalid public ID literal"
552
+ end
553
+ if accept_public_id
554
+ if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um)
555
+ return "system ID literal is missing"
556
+ end
557
+ unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
558
+ return "invalid system literal"
559
+ end
560
+ "garbage after system literal"
561
+ else
562
+ "garbage after public ID literal"
563
+ end
564
+ elsif accept_external_id and @source.match(/#{system}/um)
565
+ if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um)
566
+ return "system literal is missing"
567
+ end
568
+ unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
569
+ return "invalid system literal"
570
+ end
571
+ "garbage after system literal"
572
+ else
573
+ unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um)
574
+ return "invalid ID type"
575
+ end
576
+ "ID type is missing"
577
+ end
578
+ end
579
+
580
+ def process_instruction
581
+ match_data = @source.match(INSTRUCTION_PATTERN, true)
582
+ unless match_data
583
+ message = "Invalid processing instruction node"
584
+ raise REXML::ParseException.new(message, @source)
585
+ end
586
+ [:processing_instruction, match_data[1], match_data[2]]
587
+ end
588
+
589
+ def parse_attributes(prefixes, curr_ns)
590
+ attributes = {}
591
+ closed = false
592
+ match_data = @source.match(/^(.*?)(\/)?>/um, true)
593
+ if match_data.nil?
594
+ message = "Start tag isn't ended"
595
+ raise REXML::ParseException.new(message, @source)
596
+ end
597
+
598
+ raw_attributes = match_data[1]
599
+ closed = !match_data[2].nil?
600
+ return attributes, closed if raw_attributes.nil?
601
+ return attributes, closed if raw_attributes.empty?
602
+
603
+ scanner = StringScanner.new(raw_attributes)
604
+ until scanner.eos?
605
+ if scanner.scan(/\s+/)
606
+ break if scanner.eos?
607
+ end
608
+
609
+ pos = scanner.pos
610
+ loop do
611
+ break if scanner.scan(ATTRIBUTE_PATTERN)
612
+ unless scanner.scan(QNAME)
613
+ message = "Invalid attribute name: <#{scanner.rest}>"
614
+ raise REXML::ParseException.new(message, @source)
615
+ end
616
+ name = scanner[0]
617
+ unless scanner.scan(/\s*=\s*/um)
618
+ message = "Missing attribute equal: <#{name}>"
619
+ raise REXML::ParseException.new(message, @source)
620
+ end
621
+ quote = scanner.scan(/['"]/)
622
+ unless quote
623
+ message = "Missing attribute value start quote: <#{name}>"
624
+ raise REXML::ParseException.new(message, @source)
625
+ end
626
+ unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
627
+ match_data = @source.match(/^(.*?)(\/)?>/um, true)
628
+ if match_data
629
+ scanner << "/" if closed
630
+ scanner << ">"
631
+ scanner << match_data[1]
632
+ scanner.pos = pos
633
+ closed = !match_data[2].nil?
634
+ next
635
+ end
636
+ message =
637
+ "Missing attribute value end quote: <#{name}>: <#{quote}>"
638
+ raise REXML::ParseException.new(message, @source)
639
+ end
640
+ end
641
+ name = scanner[1]
642
+ prefix = scanner[2]
643
+ local_part = scanner[3]
644
+ # quote = scanner[4]
645
+ value = scanner[5]
646
+ if prefix == "xmlns"
647
+ if local_part == "xml"
648
+ if value != "http://www.w3.org/XML/1998/namespace"
649
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
650
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
651
+ raise REXML::ParseException.new( msg, @source, self )
652
+ end
653
+ elsif local_part == "xmlns"
654
+ msg = "The 'xmlns' prefix must not be declared "+
655
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
656
+ raise REXML::ParseException.new( msg, @source, self)
657
+ end
658
+ curr_ns << local_part
659
+ elsif prefix
660
+ prefixes << prefix unless prefix == "xml"
661
+ end
662
+
663
+ if attributes.has_key?(name)
664
+ msg = "Duplicate attribute #{name.inspect}"
665
+ raise REXML::ParseException.new(msg, @source, self)
666
+ end
667
+
668
+ attributes[name] = value
669
+ end
670
+ return attributes, closed
671
+ end
672
+ end
673
+ end
674
+ end
675
+
676
+ =begin
677
+ case event[0]
678
+ when :start_element
679
+ when :text
680
+ when :end_element
681
+ when :processing_instruction
682
+ when :cdata
683
+ when :comment
684
+ when :xmldecl
685
+ when :start_doctype
686
+ when :end_doctype
687
+ when :externalentity
688
+ when :elementdecl
689
+ when :entity
690
+ when :attlistdecl
691
+ when :notationdecl
692
+ when :end_doctype
693
+ end
694
+ =end