arachni 0.2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (262) hide show
  1. data/ACKNOWLEDGMENTS.md +14 -0
  2. data/AUTHORS.md +6 -0
  3. data/CHANGELOG.md +162 -0
  4. data/CONTRIBUTORS.md +10 -0
  5. data/EXPLOITATION.md +429 -0
  6. data/HACKING.md +101 -0
  7. data/LICENSE.md +341 -0
  8. data/README.md +350 -0
  9. data/Rakefile +86 -0
  10. data/bin/arachni +22 -0
  11. data/bin/arachni_web +77 -0
  12. data/bin/arachni_xmlrpc +21 -0
  13. data/bin/arachni_xmlrpcd +82 -0
  14. data/bin/arachni_xmlrpcd_monitor +74 -0
  15. data/conf/README.webui.yaml.txt +44 -0
  16. data/conf/webui.yaml +11 -0
  17. data/external/metasploit/LICENSE +24 -0
  18. data/external/metasploit/modules/exploits/unix/webapp/arachni_exec.rb +142 -0
  19. data/external/metasploit/modules/exploits/unix/webapp/arachni_path_traversal.rb +113 -0
  20. data/external/metasploit/modules/exploits/unix/webapp/arachni_php_eval.rb +150 -0
  21. data/external/metasploit/modules/exploits/unix/webapp/arachni_php_include.rb +141 -0
  22. data/external/metasploit/modules/exploits/unix/webapp/arachni_sqlmap.rb +92 -0
  23. data/external/metasploit/plugins/arachni.rb +536 -0
  24. data/getoptslong.rb +241 -0
  25. data/lib/anemone.rb +2 -0
  26. data/lib/anemone/cookie_store.rb +35 -0
  27. data/lib/anemone/core.rb +371 -0
  28. data/lib/anemone/exceptions.rb +5 -0
  29. data/lib/anemone/http.rb +144 -0
  30. data/lib/anemone/page.rb +337 -0
  31. data/lib/anemone/page_store.rb +160 -0
  32. data/lib/anemone/storage.rb +34 -0
  33. data/lib/anemone/storage/base.rb +75 -0
  34. data/lib/anemone/storage/exceptions.rb +15 -0
  35. data/lib/anemone/storage/mongodb.rb +89 -0
  36. data/lib/anemone/storage/pstore.rb +50 -0
  37. data/lib/anemone/storage/redis.rb +90 -0
  38. data/lib/anemone/storage/tokyo_cabinet.rb +57 -0
  39. data/lib/anemone/tentacle.rb +40 -0
  40. data/lib/arachni.rb +16 -0
  41. data/lib/audit_store.rb +346 -0
  42. data/lib/component_manager.rb +293 -0
  43. data/lib/component_options.rb +395 -0
  44. data/lib/exceptions.rb +76 -0
  45. data/lib/framework.rb +637 -0
  46. data/lib/http.rb +809 -0
  47. data/lib/issue.rb +302 -0
  48. data/lib/module.rb +4 -0
  49. data/lib/module/auditor.rb +455 -0
  50. data/lib/module/base.rb +188 -0
  51. data/lib/module/element_db.rb +158 -0
  52. data/lib/module/key_filler.rb +87 -0
  53. data/lib/module/manager.rb +87 -0
  54. data/lib/module/output.rb +68 -0
  55. data/lib/module/trainer.rb +240 -0
  56. data/lib/module/utilities.rb +110 -0
  57. data/lib/options.rb +547 -0
  58. data/lib/parser.rb +2 -0
  59. data/lib/parser/auditable.rb +522 -0
  60. data/lib/parser/elements.rb +296 -0
  61. data/lib/parser/page.rb +149 -0
  62. data/lib/parser/parser.rb +717 -0
  63. data/lib/plugin.rb +4 -0
  64. data/lib/plugin/base.rb +110 -0
  65. data/lib/plugin/manager.rb +162 -0
  66. data/lib/report.rb +4 -0
  67. data/lib/report/base.rb +119 -0
  68. data/lib/report/manager.rb +92 -0
  69. data/lib/rpc/xml/client/base.rb +71 -0
  70. data/lib/rpc/xml/client/dispatcher.rb +49 -0
  71. data/lib/rpc/xml/client/instance.rb +88 -0
  72. data/lib/rpc/xml/server/base.rb +90 -0
  73. data/lib/rpc/xml/server/dispatcher.rb +357 -0
  74. data/lib/rpc/xml/server/framework.rb +206 -0
  75. data/lib/rpc/xml/server/instance.rb +191 -0
  76. data/lib/rpc/xml/server/module/manager.rb +46 -0
  77. data/lib/rpc/xml/server/options.rb +124 -0
  78. data/lib/rpc/xml/server/output.rb +299 -0
  79. data/lib/rpc/xml/server/plugin/manager.rb +58 -0
  80. data/lib/ruby.rb +5 -0
  81. data/lib/ruby/object.rb +32 -0
  82. data/lib/ruby/string.rb +74 -0
  83. data/lib/ruby/xmlrpc/server.rb +27 -0
  84. data/lib/spider.rb +200 -0
  85. data/lib/typhoeus/request.rb +91 -0
  86. data/lib/typhoeus/response.rb +34 -0
  87. data/lib/ui/cli/cli.rb +744 -0
  88. data/lib/ui/cli/output.rb +279 -0
  89. data/lib/ui/web/log.rb +82 -0
  90. data/lib/ui/web/output_stream.rb +94 -0
  91. data/lib/ui/web/report_manager.rb +222 -0
  92. data/lib/ui/web/server.rb +903 -0
  93. data/lib/ui/web/server/db/placeholder +0 -0
  94. data/lib/ui/web/server/public/banner.png +0 -0
  95. data/lib/ui/web/server/public/bodybg-small.png +0 -0
  96. data/lib/ui/web/server/public/bodybg.png +0 -0
  97. data/lib/ui/web/server/public/css/smoothness/images/pbar-ani.gif +0 -0
  98. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
  99. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
  100. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
  101. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
  102. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png +0 -0
  103. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
  104. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
  105. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
  106. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_222222_256x240.png +0 -0
  107. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_2e83ff_256x240.png +0 -0
  108. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_454545_256x240.png +0 -0
  109. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_888888_256x240.png +0 -0
  110. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_cd0a0a_256x240.png +0 -0
  111. data/lib/ui/web/server/public/css/smoothness/jquery-ui-1.8.9.custom.css +573 -0
  112. data/lib/ui/web/server/public/favicon.ico +0 -0
  113. data/lib/ui/web/server/public/footer.jpg +0 -0
  114. data/lib/ui/web/server/public/icons/error.png +0 -0
  115. data/lib/ui/web/server/public/icons/info.png +0 -0
  116. data/lib/ui/web/server/public/icons/ok.png +0 -0
  117. data/lib/ui/web/server/public/icons/status.png +0 -0
  118. data/lib/ui/web/server/public/js/jquery-1.4.4.min.js +167 -0
  119. data/lib/ui/web/server/public/js/jquery-ui-1.8.9.custom.min.js +781 -0
  120. data/lib/ui/web/server/public/logo.png +0 -0
  121. data/lib/ui/web/server/public/nav-left.jpg +0 -0
  122. data/lib/ui/web/server/public/nav-right.jpg +0 -0
  123. data/lib/ui/web/server/public/nav-selected-left.jpg +0 -0
  124. data/lib/ui/web/server/public/nav-selected-right.jpg +0 -0
  125. data/lib/ui/web/server/public/reports/placeholder +1 -0
  126. data/lib/ui/web/server/public/sidebar-bottom.jpg +0 -0
  127. data/lib/ui/web/server/public/sidebar-h4.jpg +0 -0
  128. data/lib/ui/web/server/public/sidebar-top.jpg +0 -0
  129. data/lib/ui/web/server/public/spider.png +0 -0
  130. data/lib/ui/web/server/public/style.css +604 -0
  131. data/lib/ui/web/server/tmp/placeholder +0 -0
  132. data/lib/ui/web/server/views/dispatcher.erb +85 -0
  133. data/lib/ui/web/server/views/dispatcher_error.erb +14 -0
  134. data/lib/ui/web/server/views/error.erb +1 -0
  135. data/lib/ui/web/server/views/flash.erb +18 -0
  136. data/lib/ui/web/server/views/home.erb +14 -0
  137. data/lib/ui/web/server/views/instance.erb +213 -0
  138. data/lib/ui/web/server/views/layout.erb +95 -0
  139. data/lib/ui/web/server/views/log.erb +40 -0
  140. data/lib/ui/web/server/views/modules.erb +71 -0
  141. data/lib/ui/web/server/views/options.erb +23 -0
  142. data/lib/ui/web/server/views/output_results.erb +51 -0
  143. data/lib/ui/web/server/views/plugins.erb +42 -0
  144. data/lib/ui/web/server/views/report_formats.erb +30 -0
  145. data/lib/ui/web/server/views/reports.erb +55 -0
  146. data/lib/ui/web/server/views/settings.erb +120 -0
  147. data/lib/ui/web/server/views/welcome.erb +38 -0
  148. data/lib/ui/xmlrpc/dispatcher_monitor.rb +204 -0
  149. data/lib/ui/xmlrpc/xmlrpc.rb +843 -0
  150. data/logs/placeholder +0 -0
  151. data/metamodules/autothrottle.rb +74 -0
  152. data/metamodules/timeout_notice.rb +118 -0
  153. data/metamodules/uniformity.rb +98 -0
  154. data/modules/audit/code_injection.rb +136 -0
  155. data/modules/audit/code_injection_timing.rb +115 -0
  156. data/modules/audit/code_injection_timing/payloads.txt +4 -0
  157. data/modules/audit/csrf.rb +301 -0
  158. data/modules/audit/ldapi.rb +103 -0
  159. data/modules/audit/ldapi/errors.txt +26 -0
  160. data/modules/audit/os_cmd_injection.rb +103 -0
  161. data/modules/audit/os_cmd_injection/payloads.txt +2 -0
  162. data/modules/audit/os_cmd_injection_timing.rb +104 -0
  163. data/modules/audit/os_cmd_injection_timing/payloads.txt +3 -0
  164. data/modules/audit/path_traversal.rb +141 -0
  165. data/modules/audit/response_splitting.rb +105 -0
  166. data/modules/audit/rfi.rb +193 -0
  167. data/modules/audit/sqli.rb +120 -0
  168. data/modules/audit/sqli/regexp_ids.txt +90 -0
  169. data/modules/audit/sqli_blind_rdiff.rb +321 -0
  170. data/modules/audit/sqli_blind_timing.rb +103 -0
  171. data/modules/audit/sqli_blind_timing/payloads.txt +51 -0
  172. data/modules/audit/trainer.rb +89 -0
  173. data/modules/audit/unvalidated_redirect.rb +90 -0
  174. data/modules/audit/xpath.rb +104 -0
  175. data/modules/audit/xpath/errors.txt +26 -0
  176. data/modules/audit/xss.rb +99 -0
  177. data/modules/audit/xss_event.rb +134 -0
  178. data/modules/audit/xss_path.rb +125 -0
  179. data/modules/audit/xss_script_tag.rb +112 -0
  180. data/modules/audit/xss_tag.rb +112 -0
  181. data/modules/audit/xss_uri.rb +125 -0
  182. data/modules/recon/allowed_methods.rb +104 -0
  183. data/modules/recon/backdoors.rb +131 -0
  184. data/modules/recon/backdoors/filenames.txt +16 -0
  185. data/modules/recon/backup_files.rb +177 -0
  186. data/modules/recon/backup_files/extensions.txt +28 -0
  187. data/modules/recon/common_directories.rb +138 -0
  188. data/modules/recon/common_directories/directories.txt +265 -0
  189. data/modules/recon/common_files.rb +138 -0
  190. data/modules/recon/common_files/filenames.txt +17 -0
  191. data/modules/recon/directory_listing.rb +171 -0
  192. data/modules/recon/grep/captcha.rb +62 -0
  193. data/modules/recon/grep/credit_card.rb +85 -0
  194. data/modules/recon/grep/cvs_svn_users.rb +73 -0
  195. data/modules/recon/grep/emails.rb +59 -0
  196. data/modules/recon/grep/html_objects.rb +53 -0
  197. data/modules/recon/grep/private_ip.rb +54 -0
  198. data/modules/recon/grep/ssn.rb +53 -0
  199. data/modules/recon/htaccess_limit.rb +82 -0
  200. data/modules/recon/http_put.rb +95 -0
  201. data/modules/recon/interesting_responses.rb +118 -0
  202. data/modules/recon/unencrypted_password_forms.rb +119 -0
  203. data/modules/recon/webdav.rb +126 -0
  204. data/modules/recon/xst.rb +107 -0
  205. data/path_extractors/anchors.rb +35 -0
  206. data/path_extractors/forms.rb +35 -0
  207. data/path_extractors/frames.rb +38 -0
  208. data/path_extractors/generic.rb +39 -0
  209. data/path_extractors/links.rb +35 -0
  210. data/path_extractors/meta_refresh.rb +39 -0
  211. data/path_extractors/scripts.rb +37 -0
  212. data/path_extractors/sitemap.rb +31 -0
  213. data/plugins/autologin.rb +137 -0
  214. data/plugins/content_types.rb +90 -0
  215. data/plugins/cookie_collector.rb +99 -0
  216. data/plugins/form_dicattack.rb +185 -0
  217. data/plugins/healthmap.rb +94 -0
  218. data/plugins/http_dicattack.rb +133 -0
  219. data/plugins/metamodules.rb +118 -0
  220. data/plugins/proxy.rb +248 -0
  221. data/plugins/proxy/server.rb +66 -0
  222. data/plugins/waf_detector.rb +184 -0
  223. data/profiles/comprehensive.afp +74 -0
  224. data/profiles/full.afp +75 -0
  225. data/reports/afr.rb +59 -0
  226. data/reports/ap.rb +55 -0
  227. data/reports/html.rb +179 -0
  228. data/reports/html/default.erb +967 -0
  229. data/reports/metareport.rb +139 -0
  230. data/reports/metareport/arachni_metareport.rb +174 -0
  231. data/reports/plugin_formatters/html/content_types.rb +82 -0
  232. data/reports/plugin_formatters/html/cookie_collector.rb +66 -0
  233. data/reports/plugin_formatters/html/form_dicattack.rb +54 -0
  234. data/reports/plugin_formatters/html/healthmap.rb +76 -0
  235. data/reports/plugin_formatters/html/http_dicattack.rb +54 -0
  236. data/reports/plugin_formatters/html/metaformatters/timeout_notice.rb +65 -0
  237. data/reports/plugin_formatters/html/metaformatters/uniformity.rb +71 -0
  238. data/reports/plugin_formatters/html/metamodules.rb +93 -0
  239. data/reports/plugin_formatters/html/waf_detector.rb +54 -0
  240. data/reports/plugin_formatters/stdout/content_types.rb +73 -0
  241. data/reports/plugin_formatters/stdout/cookie_collector.rb +61 -0
  242. data/reports/plugin_formatters/stdout/form_dicattack.rb +52 -0
  243. data/reports/plugin_formatters/stdout/healthmap.rb +72 -0
  244. data/reports/plugin_formatters/stdout/http_dicattack.rb +53 -0
  245. data/reports/plugin_formatters/stdout/metaformatters/timeout_notice.rb +55 -0
  246. data/reports/plugin_formatters/stdout/metaformatters/uniformity.rb +68 -0
  247. data/reports/plugin_formatters/stdout/metamodules.rb +89 -0
  248. data/reports/plugin_formatters/stdout/waf_detector.rb +48 -0
  249. data/reports/plugin_formatters/xml/content_types.rb +91 -0
  250. data/reports/plugin_formatters/xml/cookie_collector.rb +70 -0
  251. data/reports/plugin_formatters/xml/form_dicattack.rb +57 -0
  252. data/reports/plugin_formatters/xml/healthmap.rb +82 -0
  253. data/reports/plugin_formatters/xml/http_dicattack.rb +57 -0
  254. data/reports/plugin_formatters/xml/metaformatters/timeout_notice.rb +67 -0
  255. data/reports/plugin_formatters/xml/metaformatters/uniformity.rb +82 -0
  256. data/reports/plugin_formatters/xml/metamodules.rb +91 -0
  257. data/reports/plugin_formatters/xml/waf_detector.rb +58 -0
  258. data/reports/stdout.rb +182 -0
  259. data/reports/txt.rb +77 -0
  260. data/reports/xml.rb +231 -0
  261. data/reports/xml/buffer.rb +98 -0
  262. metadata +516 -0
@@ -0,0 +1,5 @@
1
+ module Anemone
2
+ class Error < ::StandardError
3
+ attr_accessor :wrapped_exception
4
+ end
5
+ end
@@ -0,0 +1,144 @@
1
+ =begin
2
+ Arachni
3
+ Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
4
+
5
+ This is free software; you can copy and distribute and modify
6
+ this program under the term of the GPL v2.0 License
7
+ (See LICENSE file for details)
8
+
9
+ =end
10
+
11
+ require Arachni::Options.instance.dir['lib'] + 'anemone/page'
12
+ require Arachni::Options.instance.dir['lib'] + 'anemone/cookie_store'
13
+
14
+
15
+ #
16
+ # Overides Anemone's HTTP class methods:
17
+ # o refresh_connection( ): added proxy support
18
+ # o get_response( ): upped the retry counter to 7 and generalized exception handling
19
+ #
20
+ # @author: Tasos "Zapotek" Laskos
21
+ # <tasos.laskos@gmail.com>
22
+ # <zapotek@segfault.gr>
23
+ # @version: 0.1.1
24
+ #
25
+ module Anemone
26
+
27
+ class HTTP
28
+
29
+ include Arachni::UI::Output
30
+
31
+ # Maximum number of redirects to follow on each get_response
32
+ REDIRECT_LIMIT = 5
33
+
34
+ # CookieStore for this HTTP client
35
+ attr_reader :cookie_store
36
+
37
+ def initialize(opts = {})
38
+ @connections = {}
39
+ @opts = opts
40
+ @cookie_store = CookieStore.new(@opts[:cookies])
41
+ end
42
+
43
+ #
44
+ # Fetch a single Page from the response of an HTTP request to *url*.
45
+ # Just gets the final destination page.
46
+ #
47
+ def fetch_page(url, referer = nil, depth = nil)
48
+ fetch_pages(url, referer, depth).last
49
+ end
50
+
51
+ #
52
+ # Create new Pages from the response of an HTTP request to *url*,
53
+ # including redirects
54
+ #
55
+ def fetch_pages(url, referer = nil, depth = nil)
56
+ begin
57
+ url = URI(url) unless url.is_a?(URI)
58
+ pages = []
59
+ get(url, referer) do |response, code, location, redirect_to, response_time|
60
+ pages << Page.new(location, :body => response.body.dup,
61
+ :code => code,
62
+ :headers => response.headers_hash,
63
+ :referer => referer,
64
+ :depth => depth,
65
+ :redirect_to => redirect_to,
66
+ :response_time => response_time)
67
+ end
68
+
69
+ return pages
70
+ rescue => e
71
+ if verbose?
72
+ puts e.inspect
73
+ puts e.backtrace
74
+ end
75
+ return [Page.new(url, :error => e)]
76
+ end
77
+ end
78
+
79
+ #
80
+ # The maximum number of redirects to follow
81
+ #
82
+ def redirect_limit
83
+ @opts[:redirect_limit] || REDIRECT_LIMIT
84
+ end
85
+
86
+ #
87
+ # The user-agent string which will be sent with each request,
88
+ # or nil if no such option is set
89
+ #
90
+ def user_agent
91
+ @opts[:user_agent]
92
+ end
93
+
94
+ #
95
+ # Does this HTTP client accept cookies from the server?
96
+ #
97
+ def accept_cookies?
98
+ @opts[:accept_cookies]
99
+ end
100
+
101
+ private
102
+
103
+ #
104
+ # Retrieve HTTP responses for *url*, including redirects.
105
+ # Yields the response object, response code, and URI location
106
+ # for each response.
107
+ #
108
+ def get(url, referer = nil)
109
+ response = get_response(url, referer)
110
+ yield response, response.code, url, '', response.time
111
+ end
112
+
113
+ #
114
+ # Get an HTTPResponse for *url*, sending the appropriate User-Agent string
115
+ #
116
+ def get_response(url, referer = nil)
117
+ opts = {}
118
+ opts['Referer'] = referer.to_s if referer
119
+
120
+ response = Arachni::HTTP.instance.get( url.to_s,
121
+ :headers => opts,
122
+ :follow_location => true,
123
+ :async => false,
124
+ :remove_id => true
125
+ ).response
126
+
127
+ return response
128
+ end
129
+
130
+
131
+ def verbose?
132
+ @opts[:verbose]
133
+ end
134
+
135
+ #
136
+ # Allowed to connect to the requested url?
137
+ #
138
+ def allowed?(to_url, from_url)
139
+ to_url.host.nil? || (to_url.host == from_url.host)
140
+ end
141
+
142
+
143
+ end
144
+ end
@@ -0,0 +1,337 @@
1
+ =begin
2
+ Arachni
3
+ Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
4
+
5
+ This is free software; you can copy and distribute and modify
6
+ this program under the term of the GPL v2.0 License
7
+ (See LICENSE file for details)
8
+
9
+ =end
10
+
11
+ require 'nokogiri'
12
+ require 'ostruct'
13
+ require 'webrick/cookie'
14
+
15
+ #
16
+ # Overides Anemone's Page class methods:<br/>
17
+ # o in_domain?( uri ): adding support for subdomain crawling<br/>
18
+ # o links(): adding support for frame and iframe src URLs<br/>
19
+ #
20
+ # @author: Tasos "Zapotek" Laskos
21
+ # <tasos.laskos@gmail.com>
22
+ # <zapotek@segfault.gr>
23
+ # @version: 0.1
24
+ #
25
+ module Anemone
26
+
27
+ module Extractors
28
+ #
29
+ # Base Spider parser class for modules.
30
+ #
31
+ # The aim of such modules is to extract paths from a webpage for the Spider to follow.
32
+ #
33
+ #
34
+ # @author: Tasos "Zapotek" Laskos
35
+ # <tasos.laskos@gmail.com>
36
+ # <zapotek@segfault.gr>
37
+ # @version: 0.1
38
+ # @abstract
39
+ #
40
+ class Paths
41
+
42
+ #
43
+ # This method must be implemented by all modules and must return an array
44
+ # of paths as plain strings
45
+ #
46
+ # @param [Nokogiri] Nokogiri document
47
+ #
48
+ # @return [Array<String>] paths
49
+ #
50
+ def parse( doc )
51
+
52
+ end
53
+ end
54
+ end
55
+
56
+ class Page
57
+
58
+ include Arachni::UI::Output
59
+
60
+ # The URL of the page
61
+ attr_reader :url
62
+ # The raw HTTP response body of the page
63
+ attr_reader :body
64
+ # Headers of the HTTP response
65
+ attr_reader :headers
66
+ # URL of the page this one redirected to, if any
67
+ attr_reader :redirect_to
68
+ # Exception object, if one was raised during HTTP#fetch_page
69
+ attr_reader :error
70
+
71
+ # OpenStruct for user-stored data
72
+ attr_accessor :data
73
+ # Integer response code of the page
74
+ attr_accessor :code
75
+ # Boolean indicating whether or not this page has been visited in PageStore#shortest_paths!
76
+ attr_accessor :visited
77
+ # Depth of this page from the root of the crawl. This is not necessarily the
78
+ # shortest path; use PageStore#shortest_paths! to find that value.
79
+ attr_accessor :depth
80
+ # URL of the page that brought us to this page
81
+ attr_accessor :referer
82
+ # Response time of the request for this page in milliseconds
83
+ attr_accessor :response_time
84
+
85
+ #
86
+ # Create a new page
87
+ #
88
+ def initialize(url, params = {})
89
+ @url = url
90
+ @data = OpenStruct.new
91
+
92
+ @code = params[:code]
93
+ @headers = params[:headers] || {}
94
+ @headers['content-type'] ||= ['']
95
+ @aliases = Array(params[:aka]).compact
96
+ @referer = params[:referer]
97
+ @depth = params[:depth] || 0
98
+ @redirect_to = to_absolute(params[:redirect_to])
99
+ @response_time = params[:response_time]
100
+ @body = params[:body]
101
+ @error = params[:error]
102
+
103
+ @fetched = !params[:code].nil?
104
+ end
105
+
106
+ #
107
+ # Runs all Spider (path extraction) modules and returns an array of paths
108
+ #
109
+ # @return [Array] paths
110
+ #
111
+ def run_modules
112
+ opts = Arachni::Options.instance
113
+ require opts.dir['lib'] + 'component_manager'
114
+
115
+ lib = opts.dir['root'] + 'path_extractors/'
116
+
117
+
118
+ begin
119
+ @@manager ||= ::Arachni::ComponentManager.new( lib, Extractors )
120
+
121
+ return @@manager.available.map {
122
+ |name|
123
+ @@manager[name].new.run( doc )
124
+ }.flatten.uniq
125
+
126
+ rescue ::Exception => e
127
+ print_error( e.to_s )
128
+ print_debug_backtrace( e )
129
+ end
130
+ end
131
+
132
+ def dir( url )
133
+ URI( File.dirname( URI( url.to_s ).path ) + '/' )
134
+ end
135
+
136
+ #
137
+ # Array of distinct links to follow
138
+ #
139
+ # @return [Array<URI>]
140
+ #
141
+ def links
142
+ return @links unless @links.nil?
143
+ @links = []
144
+ return @links if !doc
145
+
146
+ run_modules( ).each {
147
+ |path|
148
+ next if path.nil? or path.empty?
149
+ abs = to_absolute( URI( path ) ) rescue next
150
+
151
+ if in_domain?( abs )
152
+ @links << abs
153
+ # force dir listing
154
+ # ap to_absolute( get_path( abs.to_s ).to_s ).to_s
155
+ # @links << to_absolute( dir( abs.to_s ).to_s ) rescue next
156
+ end
157
+ }
158
+
159
+ @links.uniq!
160
+ return @links
161
+ end
162
+
163
+ #
164
+ # Nokogiri document for the HTML body
165
+ #
166
+ def doc
167
+ type = Arachni::HTTP.content_type( @headers )
168
+ return if type.is_a?( String) && !type.substring?( 'text' )
169
+
170
+ return @doc if @doc
171
+ @doc = Nokogiri::HTML( @body ) if @body rescue nil
172
+ end
173
+
174
+ #
175
+ # Delete the Nokogiri document and response body to conserve memory
176
+ #
177
+ def discard_doc!
178
+ links # force parsing of page links before we trash the document
179
+ @doc = @body = nil
180
+ end
181
+
182
+ #
183
+ # Was the page successfully fetched?
184
+ # +true+ if the page was fetched with no error, +false+ otherwise.
185
+ #
186
+ def fetched?
187
+ @fetched
188
+ end
189
+
190
+ #
191
+ # Array of cookies received with this page as WEBrick::Cookie objects.
192
+ #
193
+ def cookies
194
+ WEBrick::Cookie.parse_set_cookies(@headers['Set-Cookie']) rescue []
195
+ end
196
+
197
+ #
198
+ # The content-type returned by the HTTP request for this page
199
+ #
200
+ def content_type
201
+ headers['content-type'].first
202
+ end
203
+
204
+ #
205
+ # Returns +true+ if the page is a HTML document, returns +false+
206
+ # otherwise.
207
+ #
208
+ def html?
209
+ !!(content_type =~ %r{^(text/html|application/xhtml+xml)\b})
210
+ end
211
+
212
+ #
213
+ # Returns +true+ if the page is a HTTP redirect, returns +false+
214
+ # otherwise.
215
+ #
216
+ def redirect?
217
+ (300..307).include?(@code)
218
+ end
219
+
220
+ #
221
+ # Returns +true+ if the page was not found (returned 404 code),
222
+ # returns +false+ otherwise.
223
+ #
224
+ def not_found?
225
+ 404 == @code
226
+ end
227
+
228
+ #
229
+ # Converts relative URL *link* into an absolute URL based on the
230
+ # location of the page
231
+ #
232
+ def to_absolute(link)
233
+ return nil if link.nil?
234
+
235
+ # remove anchor
236
+ link = URI.encode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))
237
+
238
+ if url = base
239
+ base_url = URI(url)
240
+ else
241
+ base_url = @url.dup
242
+ end
243
+
244
+ relative = URI(link)
245
+ absolute = base_url.merge(relative)
246
+
247
+ absolute.path = '/' if absolute.path.empty?
248
+
249
+ return absolute
250
+ end
251
+
252
+ def base
253
+ begin
254
+ tmp = doc.search( '//base[@href]' )
255
+ return tmp[0]['href'].dup
256
+ rescue
257
+ return
258
+ end
259
+ end
260
+
261
+ #
262
+ # Returns +true+ if *uri* is in the same domain as the page, returns
263
+ # +false+ otherwise.
264
+ #
265
+ # The added code enables optional subdomain crawling.
266
+ #
267
+ def in_domain?( uri )
268
+ if( Arachni::Options.instance.follow_subdomains )
269
+ return extract_domain( uri ) == extract_domain( @url )
270
+ end
271
+
272
+ uri.host == @url.host
273
+ end
274
+
275
+ #
276
+ # Extracts the domain from a URI object
277
+ #
278
+ # @param [URI] url
279
+ #
280
+ # @return [String]
281
+ #
282
+ def extract_domain( url )
283
+
284
+ if !url.host then return false end
285
+
286
+ splits = url.host.split( /\./ )
287
+
288
+ if splits.length == 1 then return true end
289
+
290
+ splits[-2] + "." + splits[-1]
291
+ end
292
+
293
+
294
+ def marshal_dump
295
+ [@url, @headers, @data, @body, @links, @code, @visited, @depth, @referer, @redirect_to, @response_time, @fetched]
296
+ end
297
+
298
+ def marshal_load(ary)
299
+ @url, @headers, @data, @body, @links, @code, @visited, @depth, @referer, @redirect_to, @response_time, @fetched = ary
300
+ end
301
+
302
+ def to_hash
303
+ {'url' => @url.to_s,
304
+ 'headers' => Marshal.dump(@headers),
305
+ 'data' => Marshal.dump(@data),
306
+ 'body' => @body,
307
+ 'links' => links.map(&:to_s),
308
+ 'code' => @code,
309
+ 'visited' => @visited,
310
+ 'depth' => @depth,
311
+ 'referer' => @referer.to_s,
312
+ 'redirect_to' => @redirect_to.to_s,
313
+ 'response_time' => @response_time,
314
+ 'fetched' => @fetched}
315
+ end
316
+
317
+ def self.from_hash(hash)
318
+ page = self.new(URI(hash['url']))
319
+ {'@headers' => Marshal.load(hash['headers']),
320
+ '@data' => Marshal.load(hash['data']),
321
+ '@body' => hash['body'],
322
+ '@links' => hash['links'].map { |link| URI(link) },
323
+ '@code' => hash['code'].to_i,
324
+ '@visited' => hash['visited'],
325
+ '@depth' => hash['depth'].to_i,
326
+ '@referer' => hash['referer'],
327
+ '@redirect_to' => URI(hash['redirect_to']),
328
+ '@response_time' => hash['response_time'].to_i,
329
+ '@fetched' => hash['fetched']
330
+ }.each do |var, value|
331
+ page.instance_variable_set(var, value)
332
+ end
333
+ page
334
+ end
335
+
336
+ end
337
+ end