arachni 0.2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. data/ACKNOWLEDGMENTS.md +14 -0
  2. data/AUTHORS.md +6 -0
  3. data/CHANGELOG.md +162 -0
  4. data/CONTRIBUTORS.md +10 -0
  5. data/EXPLOITATION.md +429 -0
  6. data/HACKING.md +101 -0
  7. data/LICENSE.md +341 -0
  8. data/README.md +350 -0
  9. data/Rakefile +86 -0
  10. data/bin/arachni +22 -0
  11. data/bin/arachni_web +77 -0
  12. data/bin/arachni_xmlrpc +21 -0
  13. data/bin/arachni_xmlrpcd +82 -0
  14. data/bin/arachni_xmlrpcd_monitor +74 -0
  15. data/conf/README.webui.yaml.txt +44 -0
  16. data/conf/webui.yaml +11 -0
  17. data/external/metasploit/LICENSE +24 -0
  18. data/external/metasploit/modules/exploits/unix/webapp/arachni_exec.rb +142 -0
  19. data/external/metasploit/modules/exploits/unix/webapp/arachni_path_traversal.rb +113 -0
  20. data/external/metasploit/modules/exploits/unix/webapp/arachni_php_eval.rb +150 -0
  21. data/external/metasploit/modules/exploits/unix/webapp/arachni_php_include.rb +141 -0
  22. data/external/metasploit/modules/exploits/unix/webapp/arachni_sqlmap.rb +92 -0
  23. data/external/metasploit/plugins/arachni.rb +536 -0
  24. data/getoptslong.rb +241 -0
  25. data/lib/anemone.rb +2 -0
  26. data/lib/anemone/cookie_store.rb +35 -0
  27. data/lib/anemone/core.rb +371 -0
  28. data/lib/anemone/exceptions.rb +5 -0
  29. data/lib/anemone/http.rb +144 -0
  30. data/lib/anemone/page.rb +337 -0
  31. data/lib/anemone/page_store.rb +160 -0
  32. data/lib/anemone/storage.rb +34 -0
  33. data/lib/anemone/storage/base.rb +75 -0
  34. data/lib/anemone/storage/exceptions.rb +15 -0
  35. data/lib/anemone/storage/mongodb.rb +89 -0
  36. data/lib/anemone/storage/pstore.rb +50 -0
  37. data/lib/anemone/storage/redis.rb +90 -0
  38. data/lib/anemone/storage/tokyo_cabinet.rb +57 -0
  39. data/lib/anemone/tentacle.rb +40 -0
  40. data/lib/arachni.rb +16 -0
  41. data/lib/audit_store.rb +346 -0
  42. data/lib/component_manager.rb +293 -0
  43. data/lib/component_options.rb +395 -0
  44. data/lib/exceptions.rb +76 -0
  45. data/lib/framework.rb +637 -0
  46. data/lib/http.rb +809 -0
  47. data/lib/issue.rb +302 -0
  48. data/lib/module.rb +4 -0
  49. data/lib/module/auditor.rb +455 -0
  50. data/lib/module/base.rb +188 -0
  51. data/lib/module/element_db.rb +158 -0
  52. data/lib/module/key_filler.rb +87 -0
  53. data/lib/module/manager.rb +87 -0
  54. data/lib/module/output.rb +68 -0
  55. data/lib/module/trainer.rb +240 -0
  56. data/lib/module/utilities.rb +110 -0
  57. data/lib/options.rb +547 -0
  58. data/lib/parser.rb +2 -0
  59. data/lib/parser/auditable.rb +522 -0
  60. data/lib/parser/elements.rb +296 -0
  61. data/lib/parser/page.rb +149 -0
  62. data/lib/parser/parser.rb +717 -0
  63. data/lib/plugin.rb +4 -0
  64. data/lib/plugin/base.rb +110 -0
  65. data/lib/plugin/manager.rb +162 -0
  66. data/lib/report.rb +4 -0
  67. data/lib/report/base.rb +119 -0
  68. data/lib/report/manager.rb +92 -0
  69. data/lib/rpc/xml/client/base.rb +71 -0
  70. data/lib/rpc/xml/client/dispatcher.rb +49 -0
  71. data/lib/rpc/xml/client/instance.rb +88 -0
  72. data/lib/rpc/xml/server/base.rb +90 -0
  73. data/lib/rpc/xml/server/dispatcher.rb +357 -0
  74. data/lib/rpc/xml/server/framework.rb +206 -0
  75. data/lib/rpc/xml/server/instance.rb +191 -0
  76. data/lib/rpc/xml/server/module/manager.rb +46 -0
  77. data/lib/rpc/xml/server/options.rb +124 -0
  78. data/lib/rpc/xml/server/output.rb +299 -0
  79. data/lib/rpc/xml/server/plugin/manager.rb +58 -0
  80. data/lib/ruby.rb +5 -0
  81. data/lib/ruby/object.rb +32 -0
  82. data/lib/ruby/string.rb +74 -0
  83. data/lib/ruby/xmlrpc/server.rb +27 -0
  84. data/lib/spider.rb +200 -0
  85. data/lib/typhoeus/request.rb +91 -0
  86. data/lib/typhoeus/response.rb +34 -0
  87. data/lib/ui/cli/cli.rb +744 -0
  88. data/lib/ui/cli/output.rb +279 -0
  89. data/lib/ui/web/log.rb +82 -0
  90. data/lib/ui/web/output_stream.rb +94 -0
  91. data/lib/ui/web/report_manager.rb +222 -0
  92. data/lib/ui/web/server.rb +903 -0
  93. data/lib/ui/web/server/db/placeholder +0 -0
  94. data/lib/ui/web/server/public/banner.png +0 -0
  95. data/lib/ui/web/server/public/bodybg-small.png +0 -0
  96. data/lib/ui/web/server/public/bodybg.png +0 -0
  97. data/lib/ui/web/server/public/css/smoothness/images/pbar-ani.gif +0 -0
  98. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
  99. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
  100. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
  101. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
  102. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png +0 -0
  103. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
  104. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
  105. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
  106. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_222222_256x240.png +0 -0
  107. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_2e83ff_256x240.png +0 -0
  108. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_454545_256x240.png +0 -0
  109. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_888888_256x240.png +0 -0
  110. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_cd0a0a_256x240.png +0 -0
  111. data/lib/ui/web/server/public/css/smoothness/jquery-ui-1.8.9.custom.css +573 -0
  112. data/lib/ui/web/server/public/favicon.ico +0 -0
  113. data/lib/ui/web/server/public/footer.jpg +0 -0
  114. data/lib/ui/web/server/public/icons/error.png +0 -0
  115. data/lib/ui/web/server/public/icons/info.png +0 -0
  116. data/lib/ui/web/server/public/icons/ok.png +0 -0
  117. data/lib/ui/web/server/public/icons/status.png +0 -0
  118. data/lib/ui/web/server/public/js/jquery-1.4.4.min.js +167 -0
  119. data/lib/ui/web/server/public/js/jquery-ui-1.8.9.custom.min.js +781 -0
  120. data/lib/ui/web/server/public/logo.png +0 -0
  121. data/lib/ui/web/server/public/nav-left.jpg +0 -0
  122. data/lib/ui/web/server/public/nav-right.jpg +0 -0
  123. data/lib/ui/web/server/public/nav-selected-left.jpg +0 -0
  124. data/lib/ui/web/server/public/nav-selected-right.jpg +0 -0
  125. data/lib/ui/web/server/public/reports/placeholder +1 -0
  126. data/lib/ui/web/server/public/sidebar-bottom.jpg +0 -0
  127. data/lib/ui/web/server/public/sidebar-h4.jpg +0 -0
  128. data/lib/ui/web/server/public/sidebar-top.jpg +0 -0
  129. data/lib/ui/web/server/public/spider.png +0 -0
  130. data/lib/ui/web/server/public/style.css +604 -0
  131. data/lib/ui/web/server/tmp/placeholder +0 -0
  132. data/lib/ui/web/server/views/dispatcher.erb +85 -0
  133. data/lib/ui/web/server/views/dispatcher_error.erb +14 -0
  134. data/lib/ui/web/server/views/error.erb +1 -0
  135. data/lib/ui/web/server/views/flash.erb +18 -0
  136. data/lib/ui/web/server/views/home.erb +14 -0
  137. data/lib/ui/web/server/views/instance.erb +213 -0
  138. data/lib/ui/web/server/views/layout.erb +95 -0
  139. data/lib/ui/web/server/views/log.erb +40 -0
  140. data/lib/ui/web/server/views/modules.erb +71 -0
  141. data/lib/ui/web/server/views/options.erb +23 -0
  142. data/lib/ui/web/server/views/output_results.erb +51 -0
  143. data/lib/ui/web/server/views/plugins.erb +42 -0
  144. data/lib/ui/web/server/views/report_formats.erb +30 -0
  145. data/lib/ui/web/server/views/reports.erb +55 -0
  146. data/lib/ui/web/server/views/settings.erb +120 -0
  147. data/lib/ui/web/server/views/welcome.erb +38 -0
  148. data/lib/ui/xmlrpc/dispatcher_monitor.rb +204 -0
  149. data/lib/ui/xmlrpc/xmlrpc.rb +843 -0
  150. data/logs/placeholder +0 -0
  151. data/metamodules/autothrottle.rb +74 -0
  152. data/metamodules/timeout_notice.rb +118 -0
  153. data/metamodules/uniformity.rb +98 -0
  154. data/modules/audit/code_injection.rb +136 -0
  155. data/modules/audit/code_injection_timing.rb +115 -0
  156. data/modules/audit/code_injection_timing/payloads.txt +4 -0
  157. data/modules/audit/csrf.rb +301 -0
  158. data/modules/audit/ldapi.rb +103 -0
  159. data/modules/audit/ldapi/errors.txt +26 -0
  160. data/modules/audit/os_cmd_injection.rb +103 -0
  161. data/modules/audit/os_cmd_injection/payloads.txt +2 -0
  162. data/modules/audit/os_cmd_injection_timing.rb +104 -0
  163. data/modules/audit/os_cmd_injection_timing/payloads.txt +3 -0
  164. data/modules/audit/path_traversal.rb +141 -0
  165. data/modules/audit/response_splitting.rb +105 -0
  166. data/modules/audit/rfi.rb +193 -0
  167. data/modules/audit/sqli.rb +120 -0
  168. data/modules/audit/sqli/regexp_ids.txt +90 -0
  169. data/modules/audit/sqli_blind_rdiff.rb +321 -0
  170. data/modules/audit/sqli_blind_timing.rb +103 -0
  171. data/modules/audit/sqli_blind_timing/payloads.txt +51 -0
  172. data/modules/audit/trainer.rb +89 -0
  173. data/modules/audit/unvalidated_redirect.rb +90 -0
  174. data/modules/audit/xpath.rb +104 -0
  175. data/modules/audit/xpath/errors.txt +26 -0
  176. data/modules/audit/xss.rb +99 -0
  177. data/modules/audit/xss_event.rb +134 -0
  178. data/modules/audit/xss_path.rb +125 -0
  179. data/modules/audit/xss_script_tag.rb +112 -0
  180. data/modules/audit/xss_tag.rb +112 -0
  181. data/modules/audit/xss_uri.rb +125 -0
  182. data/modules/recon/allowed_methods.rb +104 -0
  183. data/modules/recon/backdoors.rb +131 -0
  184. data/modules/recon/backdoors/filenames.txt +16 -0
  185. data/modules/recon/backup_files.rb +177 -0
  186. data/modules/recon/backup_files/extensions.txt +28 -0
  187. data/modules/recon/common_directories.rb +138 -0
  188. data/modules/recon/common_directories/directories.txt +265 -0
  189. data/modules/recon/common_files.rb +138 -0
  190. data/modules/recon/common_files/filenames.txt +17 -0
  191. data/modules/recon/directory_listing.rb +171 -0
  192. data/modules/recon/grep/captcha.rb +62 -0
  193. data/modules/recon/grep/credit_card.rb +85 -0
  194. data/modules/recon/grep/cvs_svn_users.rb +73 -0
  195. data/modules/recon/grep/emails.rb +59 -0
  196. data/modules/recon/grep/html_objects.rb +53 -0
  197. data/modules/recon/grep/private_ip.rb +54 -0
  198. data/modules/recon/grep/ssn.rb +53 -0
  199. data/modules/recon/htaccess_limit.rb +82 -0
  200. data/modules/recon/http_put.rb +95 -0
  201. data/modules/recon/interesting_responses.rb +118 -0
  202. data/modules/recon/unencrypted_password_forms.rb +119 -0
  203. data/modules/recon/webdav.rb +126 -0
  204. data/modules/recon/xst.rb +107 -0
  205. data/path_extractors/anchors.rb +35 -0
  206. data/path_extractors/forms.rb +35 -0
  207. data/path_extractors/frames.rb +38 -0
  208. data/path_extractors/generic.rb +39 -0
  209. data/path_extractors/links.rb +35 -0
  210. data/path_extractors/meta_refresh.rb +39 -0
  211. data/path_extractors/scripts.rb +37 -0
  212. data/path_extractors/sitemap.rb +31 -0
  213. data/plugins/autologin.rb +137 -0
  214. data/plugins/content_types.rb +90 -0
  215. data/plugins/cookie_collector.rb +99 -0
  216. data/plugins/form_dicattack.rb +185 -0
  217. data/plugins/healthmap.rb +94 -0
  218. data/plugins/http_dicattack.rb +133 -0
  219. data/plugins/metamodules.rb +118 -0
  220. data/plugins/proxy.rb +248 -0
  221. data/plugins/proxy/server.rb +66 -0
  222. data/plugins/waf_detector.rb +184 -0
  223. data/profiles/comprehensive.afp +74 -0
  224. data/profiles/full.afp +75 -0
  225. data/reports/afr.rb +59 -0
  226. data/reports/ap.rb +55 -0
  227. data/reports/html.rb +179 -0
  228. data/reports/html/default.erb +967 -0
  229. data/reports/metareport.rb +139 -0
  230. data/reports/metareport/arachni_metareport.rb +174 -0
  231. data/reports/plugin_formatters/html/content_types.rb +82 -0
  232. data/reports/plugin_formatters/html/cookie_collector.rb +66 -0
  233. data/reports/plugin_formatters/html/form_dicattack.rb +54 -0
  234. data/reports/plugin_formatters/html/healthmap.rb +76 -0
  235. data/reports/plugin_formatters/html/http_dicattack.rb +54 -0
  236. data/reports/plugin_formatters/html/metaformatters/timeout_notice.rb +65 -0
  237. data/reports/plugin_formatters/html/metaformatters/uniformity.rb +71 -0
  238. data/reports/plugin_formatters/html/metamodules.rb +93 -0
  239. data/reports/plugin_formatters/html/waf_detector.rb +54 -0
  240. data/reports/plugin_formatters/stdout/content_types.rb +73 -0
  241. data/reports/plugin_formatters/stdout/cookie_collector.rb +61 -0
  242. data/reports/plugin_formatters/stdout/form_dicattack.rb +52 -0
  243. data/reports/plugin_formatters/stdout/healthmap.rb +72 -0
  244. data/reports/plugin_formatters/stdout/http_dicattack.rb +53 -0
  245. data/reports/plugin_formatters/stdout/metaformatters/timeout_notice.rb +55 -0
  246. data/reports/plugin_formatters/stdout/metaformatters/uniformity.rb +68 -0
  247. data/reports/plugin_formatters/stdout/metamodules.rb +89 -0
  248. data/reports/plugin_formatters/stdout/waf_detector.rb +48 -0
  249. data/reports/plugin_formatters/xml/content_types.rb +91 -0
  250. data/reports/plugin_formatters/xml/cookie_collector.rb +70 -0
  251. data/reports/plugin_formatters/xml/form_dicattack.rb +57 -0
  252. data/reports/plugin_formatters/xml/healthmap.rb +82 -0
  253. data/reports/plugin_formatters/xml/http_dicattack.rb +57 -0
  254. data/reports/plugin_formatters/xml/metaformatters/timeout_notice.rb +67 -0
  255. data/reports/plugin_formatters/xml/metaformatters/uniformity.rb +82 -0
  256. data/reports/plugin_formatters/xml/metamodules.rb +91 -0
  257. data/reports/plugin_formatters/xml/waf_detector.rb +58 -0
  258. data/reports/stdout.rb +182 -0
  259. data/reports/txt.rb +77 -0
  260. data/reports/xml.rb +231 -0
  261. data/reports/xml/buffer.rb +98 -0
  262. metadata +516 -0
@@ -0,0 +1,5 @@
1
+ module Anemone
2
+ class Error < ::StandardError
3
+ attr_accessor :wrapped_exception
4
+ end
5
+ end
@@ -0,0 +1,144 @@
1
+ =begin
2
+ Arachni
3
+ Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
4
+
5
+ This is free software; you can copy and distribute and modify
6
+ this program under the term of the GPL v2.0 License
7
+ (See LICENSE file for details)
8
+
9
+ =end
10
+
11
+ require Arachni::Options.instance.dir['lib'] + 'anemone/page'
12
+ require Arachni::Options.instance.dir['lib'] + 'anemone/cookie_store'
13
+
14
+
15
+ #
16
+ # Overides Anemone's HTTP class methods:
17
+ # o refresh_connection( ): added proxy support
18
+ # o get_response( ): upped the retry counter to 7 and generalized exception handling
19
+ #
20
+ # @author: Tasos "Zapotek" Laskos
21
+ # <tasos.laskos@gmail.com>
22
+ # <zapotek@segfault.gr>
23
+ # @version: 0.1.1
24
+ #
25
+ module Anemone
26
+
27
+ class HTTP
28
+
29
+ include Arachni::UI::Output
30
+
31
+ # Maximum number of redirects to follow on each get_response
32
+ REDIRECT_LIMIT = 5
33
+
34
+ # CookieStore for this HTTP client
35
+ attr_reader :cookie_store
36
+
37
+ def initialize(opts = {})
38
+ @connections = {}
39
+ @opts = opts
40
+ @cookie_store = CookieStore.new(@opts[:cookies])
41
+ end
42
+
43
+ #
44
+ # Fetch a single Page from the response of an HTTP request to *url*.
45
+ # Just gets the final destination page.
46
+ #
47
+ def fetch_page(url, referer = nil, depth = nil)
48
+ fetch_pages(url, referer, depth).last
49
+ end
50
+
51
+ #
52
+ # Create new Pages from the response of an HTTP request to *url*,
53
+ # including redirects
54
+ #
55
+ def fetch_pages(url, referer = nil, depth = nil)
56
+ begin
57
+ url = URI(url) unless url.is_a?(URI)
58
+ pages = []
59
+ get(url, referer) do |response, code, location, redirect_to, response_time|
60
+ pages << Page.new(location, :body => response.body.dup,
61
+ :code => code,
62
+ :headers => response.headers_hash,
63
+ :referer => referer,
64
+ :depth => depth,
65
+ :redirect_to => redirect_to,
66
+ :response_time => response_time)
67
+ end
68
+
69
+ return pages
70
+ rescue => e
71
+ if verbose?
72
+ puts e.inspect
73
+ puts e.backtrace
74
+ end
75
+ return [Page.new(url, :error => e)]
76
+ end
77
+ end
78
+
79
+ #
80
+ # The maximum number of redirects to follow
81
+ #
82
+ def redirect_limit
83
+ @opts[:redirect_limit] || REDIRECT_LIMIT
84
+ end
85
+
86
+ #
87
+ # The user-agent string which will be sent with each request,
88
+ # or nil if no such option is set
89
+ #
90
+ def user_agent
91
+ @opts[:user_agent]
92
+ end
93
+
94
+ #
95
+ # Does this HTTP client accept cookies from the server?
96
+ #
97
+ def accept_cookies?
98
+ @opts[:accept_cookies]
99
+ end
100
+
101
+ private
102
+
103
+ #
104
+ # Retrieve HTTP responses for *url*, including redirects.
105
+ # Yields the response object, response code, and URI location
106
+ # for each response.
107
+ #
108
+ def get(url, referer = nil)
109
+ response = get_response(url, referer)
110
+ yield response, response.code, url, '', response.time
111
+ end
112
+
113
+ #
114
+ # Get an HTTPResponse for *url*, sending the appropriate User-Agent string
115
+ #
116
+ def get_response(url, referer = nil)
117
+ opts = {}
118
+ opts['Referer'] = referer.to_s if referer
119
+
120
+ response = Arachni::HTTP.instance.get( url.to_s,
121
+ :headers => opts,
122
+ :follow_location => true,
123
+ :async => false,
124
+ :remove_id => true
125
+ ).response
126
+
127
+ return response
128
+ end
129
+
130
+
131
+ def verbose?
132
+ @opts[:verbose]
133
+ end
134
+
135
+ #
136
+ # Allowed to connect to the requested url?
137
+ #
138
+ def allowed?(to_url, from_url)
139
+ to_url.host.nil? || (to_url.host == from_url.host)
140
+ end
141
+
142
+
143
+ end
144
+ end
@@ -0,0 +1,337 @@
1
+ =begin
2
+ Arachni
3
+ Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
4
+
5
+ This is free software; you can copy and distribute and modify
6
+ this program under the term of the GPL v2.0 License
7
+ (See LICENSE file for details)
8
+
9
+ =end
10
+
11
+ require 'nokogiri'
12
+ require 'ostruct'
13
+ require 'webrick/cookie'
14
+
15
+ #
16
+ # Overides Anemone's Page class methods:<br/>
17
+ # o in_domain?( uri ): adding support for subdomain crawling<br/>
18
+ # o links(): adding support for frame and iframe src URLs<br/>
19
+ #
20
+ # @author: Tasos "Zapotek" Laskos
21
+ # <tasos.laskos@gmail.com>
22
+ # <zapotek@segfault.gr>
23
+ # @version: 0.1
24
+ #
25
+ module Anemone
26
+
27
+ module Extractors
28
+ #
29
+ # Base Spider parser class for modules.
30
+ #
31
+ # The aim of such modules is to extract paths from a webpage for the Spider to follow.
32
+ #
33
+ #
34
+ # @author: Tasos "Zapotek" Laskos
35
+ # <tasos.laskos@gmail.com>
36
+ # <zapotek@segfault.gr>
37
+ # @version: 0.1
38
+ # @abstract
39
+ #
40
+ class Paths
41
+
42
+ #
43
+ # This method must be implemented by all modules and must return an array
44
+ # of paths as plain strings
45
+ #
46
+ # @param [Nokogiri] Nokogiri document
47
+ #
48
+ # @return [Array<String>] paths
49
+ #
50
+ def parse( doc )
51
+
52
+ end
53
+ end
54
+ end
55
+
56
+ class Page
57
+
58
+ include Arachni::UI::Output
59
+
60
+ # The URL of the page
61
+ attr_reader :url
62
+ # The raw HTTP response body of the page
63
+ attr_reader :body
64
+ # Headers of the HTTP response
65
+ attr_reader :headers
66
+ # URL of the page this one redirected to, if any
67
+ attr_reader :redirect_to
68
+ # Exception object, if one was raised during HTTP#fetch_page
69
+ attr_reader :error
70
+
71
+ # OpenStruct for user-stored data
72
+ attr_accessor :data
73
+ # Integer response code of the page
74
+ attr_accessor :code
75
+ # Boolean indicating whether or not this page has been visited in PageStore#shortest_paths!
76
+ attr_accessor :visited
77
+ # Depth of this page from the root of the crawl. This is not necessarily the
78
+ # shortest path; use PageStore#shortest_paths! to find that value.
79
+ attr_accessor :depth
80
+ # URL of the page that brought us to this page
81
+ attr_accessor :referer
82
+ # Response time of the request for this page in milliseconds
83
+ attr_accessor :response_time
84
+
85
+ #
86
+ # Create a new page
87
+ #
88
+ def initialize(url, params = {})
89
+ @url = url
90
+ @data = OpenStruct.new
91
+
92
+ @code = params[:code]
93
+ @headers = params[:headers] || {}
94
+ @headers['content-type'] ||= ['']
95
+ @aliases = Array(params[:aka]).compact
96
+ @referer = params[:referer]
97
+ @depth = params[:depth] || 0
98
+ @redirect_to = to_absolute(params[:redirect_to])
99
+ @response_time = params[:response_time]
100
+ @body = params[:body]
101
+ @error = params[:error]
102
+
103
+ @fetched = !params[:code].nil?
104
+ end
105
+
106
+ #
107
+ # Runs all Spider (path extraction) modules and returns an array of paths
108
+ #
109
+ # @return [Array] paths
110
+ #
111
+ def run_modules
112
+ opts = Arachni::Options.instance
113
+ require opts.dir['lib'] + 'component_manager'
114
+
115
+ lib = opts.dir['root'] + 'path_extractors/'
116
+
117
+
118
+ begin
119
+ @@manager ||= ::Arachni::ComponentManager.new( lib, Extractors )
120
+
121
+ return @@manager.available.map {
122
+ |name|
123
+ @@manager[name].new.run( doc )
124
+ }.flatten.uniq
125
+
126
+ rescue ::Exception => e
127
+ print_error( e.to_s )
128
+ print_debug_backtrace( e )
129
+ end
130
+ end
131
+
132
+ def dir( url )
133
+ URI( File.dirname( URI( url.to_s ).path ) + '/' )
134
+ end
135
+
136
+ #
137
+ # Array of distinct links to follow
138
+ #
139
+ # @return [Array<URI>]
140
+ #
141
+ def links
142
+ return @links unless @links.nil?
143
+ @links = []
144
+ return @links if !doc
145
+
146
+ run_modules( ).each {
147
+ |path|
148
+ next if path.nil? or path.empty?
149
+ abs = to_absolute( URI( path ) ) rescue next
150
+
151
+ if in_domain?( abs )
152
+ @links << abs
153
+ # force dir listing
154
+ # ap to_absolute( get_path( abs.to_s ).to_s ).to_s
155
+ # @links << to_absolute( dir( abs.to_s ).to_s ) rescue next
156
+ end
157
+ }
158
+
159
+ @links.uniq!
160
+ return @links
161
+ end
162
+
163
+ #
164
+ # Nokogiri document for the HTML body
165
+ #
166
+ def doc
167
+ type = Arachni::HTTP.content_type( @headers )
168
+ return if type.is_a?( String) && !type.substring?( 'text' )
169
+
170
+ return @doc if @doc
171
+ @doc = Nokogiri::HTML( @body ) if @body rescue nil
172
+ end
173
+
174
+ #
175
+ # Delete the Nokogiri document and response body to conserve memory
176
+ #
177
+ def discard_doc!
178
+ links # force parsing of page links before we trash the document
179
+ @doc = @body = nil
180
+ end
181
+
182
+ #
183
+ # Was the page successfully fetched?
184
+ # +true+ if the page was fetched with no error, +false+ otherwise.
185
+ #
186
+ def fetched?
187
+ @fetched
188
+ end
189
+
190
+ #
191
+ # Array of cookies received with this page as WEBrick::Cookie objects.
192
+ #
193
+ def cookies
194
+ WEBrick::Cookie.parse_set_cookies(@headers['Set-Cookie']) rescue []
195
+ end
196
+
197
+ #
198
+ # The content-type returned by the HTTP request for this page
199
+ #
200
+ def content_type
201
+ headers['content-type'].first
202
+ end
203
+
204
+ #
205
+ # Returns +true+ if the page is a HTML document, returns +false+
206
+ # otherwise.
207
+ #
208
+ def html?
209
+ !!(content_type =~ %r{^(text/html|application/xhtml+xml)\b})
210
+ end
211
+
212
+ #
213
+ # Returns +true+ if the page is a HTTP redirect, returns +false+
214
+ # otherwise.
215
+ #
216
+ def redirect?
217
+ (300..307).include?(@code)
218
+ end
219
+
220
+ #
221
+ # Returns +true+ if the page was not found (returned 404 code),
222
+ # returns +false+ otherwise.
223
+ #
224
+ def not_found?
225
+ 404 == @code
226
+ end
227
+
228
+ #
229
+ # Converts relative URL *link* into an absolute URL based on the
230
+ # location of the page
231
+ #
232
+ def to_absolute(link)
233
+ return nil if link.nil?
234
+
235
+ # remove anchor
236
+ link = URI.encode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))
237
+
238
+ if url = base
239
+ base_url = URI(url)
240
+ else
241
+ base_url = @url.dup
242
+ end
243
+
244
+ relative = URI(link)
245
+ absolute = base_url.merge(relative)
246
+
247
+ absolute.path = '/' if absolute.path.empty?
248
+
249
+ return absolute
250
+ end
251
+
252
+ def base
253
+ begin
254
+ tmp = doc.search( '//base[@href]' )
255
+ return tmp[0]['href'].dup
256
+ rescue
257
+ return
258
+ end
259
+ end
260
+
261
+ #
262
+ # Returns +true+ if *uri* is in the same domain as the page, returns
263
+ # +false+ otherwise.
264
+ #
265
+ # The added code enables optional subdomain crawling.
266
+ #
267
+ def in_domain?( uri )
268
+ if( Arachni::Options.instance.follow_subdomains )
269
+ return extract_domain( uri ) == extract_domain( @url )
270
+ end
271
+
272
+ uri.host == @url.host
273
+ end
274
+
275
+ #
276
+ # Extracts the domain from a URI object
277
+ #
278
+ # @param [URI] url
279
+ #
280
+ # @return [String]
281
+ #
282
+ def extract_domain( url )
283
+
284
+ if !url.host then return false end
285
+
286
+ splits = url.host.split( /\./ )
287
+
288
+ if splits.length == 1 then return true end
289
+
290
+ splits[-2] + "." + splits[-1]
291
+ end
292
+
293
+
294
+ def marshal_dump
295
+ [@url, @headers, @data, @body, @links, @code, @visited, @depth, @referer, @redirect_to, @response_time, @fetched]
296
+ end
297
+
298
+ def marshal_load(ary)
299
+ @url, @headers, @data, @body, @links, @code, @visited, @depth, @referer, @redirect_to, @response_time, @fetched = ary
300
+ end
301
+
302
+ def to_hash
303
+ {'url' => @url.to_s,
304
+ 'headers' => Marshal.dump(@headers),
305
+ 'data' => Marshal.dump(@data),
306
+ 'body' => @body,
307
+ 'links' => links.map(&:to_s),
308
+ 'code' => @code,
309
+ 'visited' => @visited,
310
+ 'depth' => @depth,
311
+ 'referer' => @referer.to_s,
312
+ 'redirect_to' => @redirect_to.to_s,
313
+ 'response_time' => @response_time,
314
+ 'fetched' => @fetched}
315
+ end
316
+
317
+ def self.from_hash(hash)
318
+ page = self.new(URI(hash['url']))
319
+ {'@headers' => Marshal.load(hash['headers']),
320
+ '@data' => Marshal.load(hash['data']),
321
+ '@body' => hash['body'],
322
+ '@links' => hash['links'].map { |link| URI(link) },
323
+ '@code' => hash['code'].to_i,
324
+ '@visited' => hash['visited'],
325
+ '@depth' => hash['depth'].to_i,
326
+ '@referer' => hash['referer'],
327
+ '@redirect_to' => URI(hash['redirect_to']),
328
+ '@response_time' => hash['response_time'].to_i,
329
+ '@fetched' => hash['fetched']
330
+ }.each do |var, value|
331
+ page.instance_variable_set(var, value)
332
+ end
333
+ page
334
+ end
335
+
336
+ end
337
+ end