arachni 0.2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ACKNOWLEDGMENTS.md +14 -0
- data/AUTHORS.md +6 -0
- data/CHANGELOG.md +162 -0
- data/CONTRIBUTORS.md +10 -0
- data/EXPLOITATION.md +429 -0
- data/HACKING.md +101 -0
- data/LICENSE.md +341 -0
- data/README.md +350 -0
- data/Rakefile +86 -0
- data/bin/arachni +22 -0
- data/bin/arachni_web +77 -0
- data/bin/arachni_xmlrpc +21 -0
- data/bin/arachni_xmlrpcd +82 -0
- data/bin/arachni_xmlrpcd_monitor +74 -0
- data/conf/README.webui.yaml.txt +44 -0
- data/conf/webui.yaml +11 -0
- data/external/metasploit/LICENSE +24 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_exec.rb +142 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_path_traversal.rb +113 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_eval.rb +150 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_include.rb +141 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_sqlmap.rb +92 -0
- data/external/metasploit/plugins/arachni.rb +536 -0
- data/getoptslong.rb +241 -0
- data/lib/anemone.rb +2 -0
- data/lib/anemone/cookie_store.rb +35 -0
- data/lib/anemone/core.rb +371 -0
- data/lib/anemone/exceptions.rb +5 -0
- data/lib/anemone/http.rb +144 -0
- data/lib/anemone/page.rb +337 -0
- data/lib/anemone/page_store.rb +160 -0
- data/lib/anemone/storage.rb +34 -0
- data/lib/anemone/storage/base.rb +75 -0
- data/lib/anemone/storage/exceptions.rb +15 -0
- data/lib/anemone/storage/mongodb.rb +89 -0
- data/lib/anemone/storage/pstore.rb +50 -0
- data/lib/anemone/storage/redis.rb +90 -0
- data/lib/anemone/storage/tokyo_cabinet.rb +57 -0
- data/lib/anemone/tentacle.rb +40 -0
- data/lib/arachni.rb +16 -0
- data/lib/audit_store.rb +346 -0
- data/lib/component_manager.rb +293 -0
- data/lib/component_options.rb +395 -0
- data/lib/exceptions.rb +76 -0
- data/lib/framework.rb +637 -0
- data/lib/http.rb +809 -0
- data/lib/issue.rb +302 -0
- data/lib/module.rb +4 -0
- data/lib/module/auditor.rb +455 -0
- data/lib/module/base.rb +188 -0
- data/lib/module/element_db.rb +158 -0
- data/lib/module/key_filler.rb +87 -0
- data/lib/module/manager.rb +87 -0
- data/lib/module/output.rb +68 -0
- data/lib/module/trainer.rb +240 -0
- data/lib/module/utilities.rb +110 -0
- data/lib/options.rb +547 -0
- data/lib/parser.rb +2 -0
- data/lib/parser/auditable.rb +522 -0
- data/lib/parser/elements.rb +296 -0
- data/lib/parser/page.rb +149 -0
- data/lib/parser/parser.rb +717 -0
- data/lib/plugin.rb +4 -0
- data/lib/plugin/base.rb +110 -0
- data/lib/plugin/manager.rb +162 -0
- data/lib/report.rb +4 -0
- data/lib/report/base.rb +119 -0
- data/lib/report/manager.rb +92 -0
- data/lib/rpc/xml/client/base.rb +71 -0
- data/lib/rpc/xml/client/dispatcher.rb +49 -0
- data/lib/rpc/xml/client/instance.rb +88 -0
- data/lib/rpc/xml/server/base.rb +90 -0
- data/lib/rpc/xml/server/dispatcher.rb +357 -0
- data/lib/rpc/xml/server/framework.rb +206 -0
- data/lib/rpc/xml/server/instance.rb +191 -0
- data/lib/rpc/xml/server/module/manager.rb +46 -0
- data/lib/rpc/xml/server/options.rb +124 -0
- data/lib/rpc/xml/server/output.rb +299 -0
- data/lib/rpc/xml/server/plugin/manager.rb +58 -0
- data/lib/ruby.rb +5 -0
- data/lib/ruby/object.rb +32 -0
- data/lib/ruby/string.rb +74 -0
- data/lib/ruby/xmlrpc/server.rb +27 -0
- data/lib/spider.rb +200 -0
- data/lib/typhoeus/request.rb +91 -0
- data/lib/typhoeus/response.rb +34 -0
- data/lib/ui/cli/cli.rb +744 -0
- data/lib/ui/cli/output.rb +279 -0
- data/lib/ui/web/log.rb +82 -0
- data/lib/ui/web/output_stream.rb +94 -0
- data/lib/ui/web/report_manager.rb +222 -0
- data/lib/ui/web/server.rb +903 -0
- data/lib/ui/web/server/db/placeholder +0 -0
- data/lib/ui/web/server/public/banner.png +0 -0
- data/lib/ui/web/server/public/bodybg-small.png +0 -0
- data/lib/ui/web/server/public/bodybg.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/pbar-ani.gif +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_222222_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_2e83ff_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_454545_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_888888_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_cd0a0a_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/jquery-ui-1.8.9.custom.css +573 -0
- data/lib/ui/web/server/public/favicon.ico +0 -0
- data/lib/ui/web/server/public/footer.jpg +0 -0
- data/lib/ui/web/server/public/icons/error.png +0 -0
- data/lib/ui/web/server/public/icons/info.png +0 -0
- data/lib/ui/web/server/public/icons/ok.png +0 -0
- data/lib/ui/web/server/public/icons/status.png +0 -0
- data/lib/ui/web/server/public/js/jquery-1.4.4.min.js +167 -0
- data/lib/ui/web/server/public/js/jquery-ui-1.8.9.custom.min.js +781 -0
- data/lib/ui/web/server/public/logo.png +0 -0
- data/lib/ui/web/server/public/nav-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-right.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-right.jpg +0 -0
- data/lib/ui/web/server/public/reports/placeholder +1 -0
- data/lib/ui/web/server/public/sidebar-bottom.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-h4.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-top.jpg +0 -0
- data/lib/ui/web/server/public/spider.png +0 -0
- data/lib/ui/web/server/public/style.css +604 -0
- data/lib/ui/web/server/tmp/placeholder +0 -0
- data/lib/ui/web/server/views/dispatcher.erb +85 -0
- data/lib/ui/web/server/views/dispatcher_error.erb +14 -0
- data/lib/ui/web/server/views/error.erb +1 -0
- data/lib/ui/web/server/views/flash.erb +18 -0
- data/lib/ui/web/server/views/home.erb +14 -0
- data/lib/ui/web/server/views/instance.erb +213 -0
- data/lib/ui/web/server/views/layout.erb +95 -0
- data/lib/ui/web/server/views/log.erb +40 -0
- data/lib/ui/web/server/views/modules.erb +71 -0
- data/lib/ui/web/server/views/options.erb +23 -0
- data/lib/ui/web/server/views/output_results.erb +51 -0
- data/lib/ui/web/server/views/plugins.erb +42 -0
- data/lib/ui/web/server/views/report_formats.erb +30 -0
- data/lib/ui/web/server/views/reports.erb +55 -0
- data/lib/ui/web/server/views/settings.erb +120 -0
- data/lib/ui/web/server/views/welcome.erb +38 -0
- data/lib/ui/xmlrpc/dispatcher_monitor.rb +204 -0
- data/lib/ui/xmlrpc/xmlrpc.rb +843 -0
- data/logs/placeholder +0 -0
- data/metamodules/autothrottle.rb +74 -0
- data/metamodules/timeout_notice.rb +118 -0
- data/metamodules/uniformity.rb +98 -0
- data/modules/audit/code_injection.rb +136 -0
- data/modules/audit/code_injection_timing.rb +115 -0
- data/modules/audit/code_injection_timing/payloads.txt +4 -0
- data/modules/audit/csrf.rb +301 -0
- data/modules/audit/ldapi.rb +103 -0
- data/modules/audit/ldapi/errors.txt +26 -0
- data/modules/audit/os_cmd_injection.rb +103 -0
- data/modules/audit/os_cmd_injection/payloads.txt +2 -0
- data/modules/audit/os_cmd_injection_timing.rb +104 -0
- data/modules/audit/os_cmd_injection_timing/payloads.txt +3 -0
- data/modules/audit/path_traversal.rb +141 -0
- data/modules/audit/response_splitting.rb +105 -0
- data/modules/audit/rfi.rb +193 -0
- data/modules/audit/sqli.rb +120 -0
- data/modules/audit/sqli/regexp_ids.txt +90 -0
- data/modules/audit/sqli_blind_rdiff.rb +321 -0
- data/modules/audit/sqli_blind_timing.rb +103 -0
- data/modules/audit/sqli_blind_timing/payloads.txt +51 -0
- data/modules/audit/trainer.rb +89 -0
- data/modules/audit/unvalidated_redirect.rb +90 -0
- data/modules/audit/xpath.rb +104 -0
- data/modules/audit/xpath/errors.txt +26 -0
- data/modules/audit/xss.rb +99 -0
- data/modules/audit/xss_event.rb +134 -0
- data/modules/audit/xss_path.rb +125 -0
- data/modules/audit/xss_script_tag.rb +112 -0
- data/modules/audit/xss_tag.rb +112 -0
- data/modules/audit/xss_uri.rb +125 -0
- data/modules/recon/allowed_methods.rb +104 -0
- data/modules/recon/backdoors.rb +131 -0
- data/modules/recon/backdoors/filenames.txt +16 -0
- data/modules/recon/backup_files.rb +177 -0
- data/modules/recon/backup_files/extensions.txt +28 -0
- data/modules/recon/common_directories.rb +138 -0
- data/modules/recon/common_directories/directories.txt +265 -0
- data/modules/recon/common_files.rb +138 -0
- data/modules/recon/common_files/filenames.txt +17 -0
- data/modules/recon/directory_listing.rb +171 -0
- data/modules/recon/grep/captcha.rb +62 -0
- data/modules/recon/grep/credit_card.rb +85 -0
- data/modules/recon/grep/cvs_svn_users.rb +73 -0
- data/modules/recon/grep/emails.rb +59 -0
- data/modules/recon/grep/html_objects.rb +53 -0
- data/modules/recon/grep/private_ip.rb +54 -0
- data/modules/recon/grep/ssn.rb +53 -0
- data/modules/recon/htaccess_limit.rb +82 -0
- data/modules/recon/http_put.rb +95 -0
- data/modules/recon/interesting_responses.rb +118 -0
- data/modules/recon/unencrypted_password_forms.rb +119 -0
- data/modules/recon/webdav.rb +126 -0
- data/modules/recon/xst.rb +107 -0
- data/path_extractors/anchors.rb +35 -0
- data/path_extractors/forms.rb +35 -0
- data/path_extractors/frames.rb +38 -0
- data/path_extractors/generic.rb +39 -0
- data/path_extractors/links.rb +35 -0
- data/path_extractors/meta_refresh.rb +39 -0
- data/path_extractors/scripts.rb +37 -0
- data/path_extractors/sitemap.rb +31 -0
- data/plugins/autologin.rb +137 -0
- data/plugins/content_types.rb +90 -0
- data/plugins/cookie_collector.rb +99 -0
- data/plugins/form_dicattack.rb +185 -0
- data/plugins/healthmap.rb +94 -0
- data/plugins/http_dicattack.rb +133 -0
- data/plugins/metamodules.rb +118 -0
- data/plugins/proxy.rb +248 -0
- data/plugins/proxy/server.rb +66 -0
- data/plugins/waf_detector.rb +184 -0
- data/profiles/comprehensive.afp +74 -0
- data/profiles/full.afp +75 -0
- data/reports/afr.rb +59 -0
- data/reports/ap.rb +55 -0
- data/reports/html.rb +179 -0
- data/reports/html/default.erb +967 -0
- data/reports/metareport.rb +139 -0
- data/reports/metareport/arachni_metareport.rb +174 -0
- data/reports/plugin_formatters/html/content_types.rb +82 -0
- data/reports/plugin_formatters/html/cookie_collector.rb +66 -0
- data/reports/plugin_formatters/html/form_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/healthmap.rb +76 -0
- data/reports/plugin_formatters/html/http_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/metaformatters/timeout_notice.rb +65 -0
- data/reports/plugin_formatters/html/metaformatters/uniformity.rb +71 -0
- data/reports/plugin_formatters/html/metamodules.rb +93 -0
- data/reports/plugin_formatters/html/waf_detector.rb +54 -0
- data/reports/plugin_formatters/stdout/content_types.rb +73 -0
- data/reports/plugin_formatters/stdout/cookie_collector.rb +61 -0
- data/reports/plugin_formatters/stdout/form_dicattack.rb +52 -0
- data/reports/plugin_formatters/stdout/healthmap.rb +72 -0
- data/reports/plugin_formatters/stdout/http_dicattack.rb +53 -0
- data/reports/plugin_formatters/stdout/metaformatters/timeout_notice.rb +55 -0
- data/reports/plugin_formatters/stdout/metaformatters/uniformity.rb +68 -0
- data/reports/plugin_formatters/stdout/metamodules.rb +89 -0
- data/reports/plugin_formatters/stdout/waf_detector.rb +48 -0
- data/reports/plugin_formatters/xml/content_types.rb +91 -0
- data/reports/plugin_formatters/xml/cookie_collector.rb +70 -0
- data/reports/plugin_formatters/xml/form_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/healthmap.rb +82 -0
- data/reports/plugin_formatters/xml/http_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/metaformatters/timeout_notice.rb +67 -0
- data/reports/plugin_formatters/xml/metaformatters/uniformity.rb +82 -0
- data/reports/plugin_formatters/xml/metamodules.rb +91 -0
- data/reports/plugin_formatters/xml/waf_detector.rb +58 -0
- data/reports/stdout.rb +182 -0
- data/reports/txt.rb +77 -0
- data/reports/xml.rb +231 -0
- data/reports/xml/buffer.rb +98 -0
- metadata +516 -0
data/lib/anemone/http.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
=begin
|
2
|
+
Arachni
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
4
|
+
|
5
|
+
This is free software; you can copy and distribute and modify
|
6
|
+
this program under the term of the GPL v2.0 License
|
7
|
+
(See LICENSE file for details)
|
8
|
+
|
9
|
+
=end
|
10
|
+
|
11
|
+
require Arachni::Options.instance.dir['lib'] + 'anemone/page'
|
12
|
+
require Arachni::Options.instance.dir['lib'] + 'anemone/cookie_store'
|
13
|
+
|
14
|
+
|
15
|
+
#
|
16
|
+
# Overides Anemone's HTTP class methods:
|
17
|
+
# o refresh_connection( ): added proxy support
|
18
|
+
# o get_response( ): upped the retry counter to 7 and generalized exception handling
|
19
|
+
#
|
20
|
+
# @author: Tasos "Zapotek" Laskos
|
21
|
+
# <tasos.laskos@gmail.com>
|
22
|
+
# <zapotek@segfault.gr>
|
23
|
+
# @version: 0.1.1
|
24
|
+
#
|
25
|
+
module Anemone
|
26
|
+
|
27
|
+
class HTTP
|
28
|
+
|
29
|
+
include Arachni::UI::Output
|
30
|
+
|
31
|
+
# Maximum number of redirects to follow on each get_response
|
32
|
+
REDIRECT_LIMIT = 5
|
33
|
+
|
34
|
+
# CookieStore for this HTTP client
|
35
|
+
attr_reader :cookie_store
|
36
|
+
|
37
|
+
def initialize(opts = {})
|
38
|
+
@connections = {}
|
39
|
+
@opts = opts
|
40
|
+
@cookie_store = CookieStore.new(@opts[:cookies])
|
41
|
+
end
|
42
|
+
|
43
|
+
#
|
44
|
+
# Fetch a single Page from the response of an HTTP request to *url*.
|
45
|
+
# Just gets the final destination page.
|
46
|
+
#
|
47
|
+
def fetch_page(url, referer = nil, depth = nil)
|
48
|
+
fetch_pages(url, referer, depth).last
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Create new Pages from the response of an HTTP request to *url*,
|
53
|
+
# including redirects
|
54
|
+
#
|
55
|
+
def fetch_pages(url, referer = nil, depth = nil)
|
56
|
+
begin
|
57
|
+
url = URI(url) unless url.is_a?(URI)
|
58
|
+
pages = []
|
59
|
+
get(url, referer) do |response, code, location, redirect_to, response_time|
|
60
|
+
pages << Page.new(location, :body => response.body.dup,
|
61
|
+
:code => code,
|
62
|
+
:headers => response.headers_hash,
|
63
|
+
:referer => referer,
|
64
|
+
:depth => depth,
|
65
|
+
:redirect_to => redirect_to,
|
66
|
+
:response_time => response_time)
|
67
|
+
end
|
68
|
+
|
69
|
+
return pages
|
70
|
+
rescue => e
|
71
|
+
if verbose?
|
72
|
+
puts e.inspect
|
73
|
+
puts e.backtrace
|
74
|
+
end
|
75
|
+
return [Page.new(url, :error => e)]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# The maximum number of redirects to follow
|
81
|
+
#
|
82
|
+
def redirect_limit
|
83
|
+
@opts[:redirect_limit] || REDIRECT_LIMIT
|
84
|
+
end
|
85
|
+
|
86
|
+
#
|
87
|
+
# The user-agent string which will be sent with each request,
|
88
|
+
# or nil if no such option is set
|
89
|
+
#
|
90
|
+
def user_agent
|
91
|
+
@opts[:user_agent]
|
92
|
+
end
|
93
|
+
|
94
|
+
#
|
95
|
+
# Does this HTTP client accept cookies from the server?
|
96
|
+
#
|
97
|
+
def accept_cookies?
|
98
|
+
@opts[:accept_cookies]
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
#
|
104
|
+
# Retrieve HTTP responses for *url*, including redirects.
|
105
|
+
# Yields the response object, response code, and URI location
|
106
|
+
# for each response.
|
107
|
+
#
|
108
|
+
def get(url, referer = nil)
|
109
|
+
response = get_response(url, referer)
|
110
|
+
yield response, response.code, url, '', response.time
|
111
|
+
end
|
112
|
+
|
113
|
+
#
|
114
|
+
# Get an HTTPResponse for *url*, sending the appropriate User-Agent string
|
115
|
+
#
|
116
|
+
def get_response(url, referer = nil)
|
117
|
+
opts = {}
|
118
|
+
opts['Referer'] = referer.to_s if referer
|
119
|
+
|
120
|
+
response = Arachni::HTTP.instance.get( url.to_s,
|
121
|
+
:headers => opts,
|
122
|
+
:follow_location => true,
|
123
|
+
:async => false,
|
124
|
+
:remove_id => true
|
125
|
+
).response
|
126
|
+
|
127
|
+
return response
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
def verbose?
|
132
|
+
@opts[:verbose]
|
133
|
+
end
|
134
|
+
|
135
|
+
#
|
136
|
+
# Allowed to connect to the requested url?
|
137
|
+
#
|
138
|
+
def allowed?(to_url, from_url)
|
139
|
+
to_url.host.nil? || (to_url.host == from_url.host)
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
end
|
144
|
+
end
|
data/lib/anemone/page.rb
ADDED
@@ -0,0 +1,337 @@
|
|
1
|
+
=begin
|
2
|
+
Arachni
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
4
|
+
|
5
|
+
This is free software; you can copy and distribute and modify
|
6
|
+
this program under the term of the GPL v2.0 License
|
7
|
+
(See LICENSE file for details)
|
8
|
+
|
9
|
+
=end
|
10
|
+
|
11
|
+
require 'nokogiri'
|
12
|
+
require 'ostruct'
|
13
|
+
require 'webrick/cookie'
|
14
|
+
|
15
|
+
#
|
16
|
+
# Overides Anemone's Page class methods:<br/>
|
17
|
+
# o in_domain?( uri ): adding support for subdomain crawling<br/>
|
18
|
+
# o links(): adding support for frame and iframe src URLs<br/>
|
19
|
+
#
|
20
|
+
# @author: Tasos "Zapotek" Laskos
|
21
|
+
# <tasos.laskos@gmail.com>
|
22
|
+
# <zapotek@segfault.gr>
|
23
|
+
# @version: 0.1
|
24
|
+
#
|
25
|
+
module Anemone
|
26
|
+
|
27
|
+
module Extractors
|
28
|
+
#
|
29
|
+
# Base Spider parser class for modules.
|
30
|
+
#
|
31
|
+
# The aim of such modules is to extract paths from a webpage for the Spider to follow.
|
32
|
+
#
|
33
|
+
#
|
34
|
+
# @author: Tasos "Zapotek" Laskos
|
35
|
+
# <tasos.laskos@gmail.com>
|
36
|
+
# <zapotek@segfault.gr>
|
37
|
+
# @version: 0.1
|
38
|
+
# @abstract
|
39
|
+
#
|
40
|
+
class Paths
|
41
|
+
|
42
|
+
#
|
43
|
+
# This method must be implemented by all modules and must return an array
|
44
|
+
# of paths as plain strings
|
45
|
+
#
|
46
|
+
# @param [Nokogiri] Nokogiri document
|
47
|
+
#
|
48
|
+
# @return [Array<String>] paths
|
49
|
+
#
|
50
|
+
def parse( doc )
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Page
|
57
|
+
|
58
|
+
include Arachni::UI::Output
|
59
|
+
|
60
|
+
# The URL of the page
|
61
|
+
attr_reader :url
|
62
|
+
# The raw HTTP response body of the page
|
63
|
+
attr_reader :body
|
64
|
+
# Headers of the HTTP response
|
65
|
+
attr_reader :headers
|
66
|
+
# URL of the page this one redirected to, if any
|
67
|
+
attr_reader :redirect_to
|
68
|
+
# Exception object, if one was raised during HTTP#fetch_page
|
69
|
+
attr_reader :error
|
70
|
+
|
71
|
+
# OpenStruct for user-stored data
|
72
|
+
attr_accessor :data
|
73
|
+
# Integer response code of the page
|
74
|
+
attr_accessor :code
|
75
|
+
# Boolean indicating whether or not this page has been visited in PageStore#shortest_paths!
|
76
|
+
attr_accessor :visited
|
77
|
+
# Depth of this page from the root of the crawl. This is not necessarily the
|
78
|
+
# shortest path; use PageStore#shortest_paths! to find that value.
|
79
|
+
attr_accessor :depth
|
80
|
+
# URL of the page that brought us to this page
|
81
|
+
attr_accessor :referer
|
82
|
+
# Response time of the request for this page in milliseconds
|
83
|
+
attr_accessor :response_time
|
84
|
+
|
85
|
+
#
|
86
|
+
# Create a new page
|
87
|
+
#
|
88
|
+
def initialize(url, params = {})
|
89
|
+
@url = url
|
90
|
+
@data = OpenStruct.new
|
91
|
+
|
92
|
+
@code = params[:code]
|
93
|
+
@headers = params[:headers] || {}
|
94
|
+
@headers['content-type'] ||= ['']
|
95
|
+
@aliases = Array(params[:aka]).compact
|
96
|
+
@referer = params[:referer]
|
97
|
+
@depth = params[:depth] || 0
|
98
|
+
@redirect_to = to_absolute(params[:redirect_to])
|
99
|
+
@response_time = params[:response_time]
|
100
|
+
@body = params[:body]
|
101
|
+
@error = params[:error]
|
102
|
+
|
103
|
+
@fetched = !params[:code].nil?
|
104
|
+
end
|
105
|
+
|
106
|
+
#
|
107
|
+
# Runs all Spider (path extraction) modules and returns an array of paths
|
108
|
+
#
|
109
|
+
# @return [Array] paths
|
110
|
+
#
|
111
|
+
def run_modules
|
112
|
+
opts = Arachni::Options.instance
|
113
|
+
require opts.dir['lib'] + 'component_manager'
|
114
|
+
|
115
|
+
lib = opts.dir['root'] + 'path_extractors/'
|
116
|
+
|
117
|
+
|
118
|
+
begin
|
119
|
+
@@manager ||= ::Arachni::ComponentManager.new( lib, Extractors )
|
120
|
+
|
121
|
+
return @@manager.available.map {
|
122
|
+
|name|
|
123
|
+
@@manager[name].new.run( doc )
|
124
|
+
}.flatten.uniq
|
125
|
+
|
126
|
+
rescue ::Exception => e
|
127
|
+
print_error( e.to_s )
|
128
|
+
print_debug_backtrace( e )
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def dir( url )
|
133
|
+
URI( File.dirname( URI( url.to_s ).path ) + '/' )
|
134
|
+
end
|
135
|
+
|
136
|
+
#
|
137
|
+
# Array of distinct links to follow
|
138
|
+
#
|
139
|
+
# @return [Array<URI>]
|
140
|
+
#
|
141
|
+
def links
|
142
|
+
return @links unless @links.nil?
|
143
|
+
@links = []
|
144
|
+
return @links if !doc
|
145
|
+
|
146
|
+
run_modules( ).each {
|
147
|
+
|path|
|
148
|
+
next if path.nil? or path.empty?
|
149
|
+
abs = to_absolute( URI( path ) ) rescue next
|
150
|
+
|
151
|
+
if in_domain?( abs )
|
152
|
+
@links << abs
|
153
|
+
# force dir listing
|
154
|
+
# ap to_absolute( get_path( abs.to_s ).to_s ).to_s
|
155
|
+
# @links << to_absolute( dir( abs.to_s ).to_s ) rescue next
|
156
|
+
end
|
157
|
+
}
|
158
|
+
|
159
|
+
@links.uniq!
|
160
|
+
return @links
|
161
|
+
end
|
162
|
+
|
163
|
+
#
|
164
|
+
# Nokogiri document for the HTML body
|
165
|
+
#
|
166
|
+
def doc
|
167
|
+
type = Arachni::HTTP.content_type( @headers )
|
168
|
+
return if type.is_a?( String) && !type.substring?( 'text' )
|
169
|
+
|
170
|
+
return @doc if @doc
|
171
|
+
@doc = Nokogiri::HTML( @body ) if @body rescue nil
|
172
|
+
end
|
173
|
+
|
174
|
+
#
|
175
|
+
# Delete the Nokogiri document and response body to conserve memory
|
176
|
+
#
|
177
|
+
def discard_doc!
|
178
|
+
links # force parsing of page links before we trash the document
|
179
|
+
@doc = @body = nil
|
180
|
+
end
|
181
|
+
|
182
|
+
#
|
183
|
+
# Was the page successfully fetched?
|
184
|
+
# +true+ if the page was fetched with no error, +false+ otherwise.
|
185
|
+
#
|
186
|
+
def fetched?
|
187
|
+
@fetched
|
188
|
+
end
|
189
|
+
|
190
|
+
#
|
191
|
+
# Array of cookies received with this page as WEBrick::Cookie objects.
|
192
|
+
#
|
193
|
+
def cookies
|
194
|
+
WEBrick::Cookie.parse_set_cookies(@headers['Set-Cookie']) rescue []
|
195
|
+
end
|
196
|
+
|
197
|
+
#
|
198
|
+
# The content-type returned by the HTTP request for this page
|
199
|
+
#
|
200
|
+
def content_type
|
201
|
+
headers['content-type'].first
|
202
|
+
end
|
203
|
+
|
204
|
+
#
|
205
|
+
# Returns +true+ if the page is a HTML document, returns +false+
|
206
|
+
# otherwise.
|
207
|
+
#
|
208
|
+
def html?
|
209
|
+
!!(content_type =~ %r{^(text/html|application/xhtml+xml)\b})
|
210
|
+
end
|
211
|
+
|
212
|
+
#
|
213
|
+
# Returns +true+ if the page is a HTTP redirect, returns +false+
|
214
|
+
# otherwise.
|
215
|
+
#
|
216
|
+
def redirect?
|
217
|
+
(300..307).include?(@code)
|
218
|
+
end
|
219
|
+
|
220
|
+
#
|
221
|
+
# Returns +true+ if the page was not found (returned 404 code),
|
222
|
+
# returns +false+ otherwise.
|
223
|
+
#
|
224
|
+
def not_found?
|
225
|
+
404 == @code
|
226
|
+
end
|
227
|
+
|
228
|
+
#
|
229
|
+
# Converts relative URL *link* into an absolute URL based on the
|
230
|
+
# location of the page
|
231
|
+
#
|
232
|
+
def to_absolute(link)
|
233
|
+
return nil if link.nil?
|
234
|
+
|
235
|
+
# remove anchor
|
236
|
+
link = URI.encode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))
|
237
|
+
|
238
|
+
if url = base
|
239
|
+
base_url = URI(url)
|
240
|
+
else
|
241
|
+
base_url = @url.dup
|
242
|
+
end
|
243
|
+
|
244
|
+
relative = URI(link)
|
245
|
+
absolute = base_url.merge(relative)
|
246
|
+
|
247
|
+
absolute.path = '/' if absolute.path.empty?
|
248
|
+
|
249
|
+
return absolute
|
250
|
+
end
|
251
|
+
|
252
|
+
def base
|
253
|
+
begin
|
254
|
+
tmp = doc.search( '//base[@href]' )
|
255
|
+
return tmp[0]['href'].dup
|
256
|
+
rescue
|
257
|
+
return
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
#
|
262
|
+
# Returns +true+ if *uri* is in the same domain as the page, returns
|
263
|
+
# +false+ otherwise.
|
264
|
+
#
|
265
|
+
# The added code enables optional subdomain crawling.
|
266
|
+
#
|
267
|
+
def in_domain?( uri )
|
268
|
+
if( Arachni::Options.instance.follow_subdomains )
|
269
|
+
return extract_domain( uri ) == extract_domain( @url )
|
270
|
+
end
|
271
|
+
|
272
|
+
uri.host == @url.host
|
273
|
+
end
|
274
|
+
|
275
|
+
#
|
276
|
+
# Extracts the domain from a URI object
|
277
|
+
#
|
278
|
+
# @param [URI] url
|
279
|
+
#
|
280
|
+
# @return [String]
|
281
|
+
#
|
282
|
+
def extract_domain( url )
|
283
|
+
|
284
|
+
if !url.host then return false end
|
285
|
+
|
286
|
+
splits = url.host.split( /\./ )
|
287
|
+
|
288
|
+
if splits.length == 1 then return true end
|
289
|
+
|
290
|
+
splits[-2] + "." + splits[-1]
|
291
|
+
end
|
292
|
+
|
293
|
+
|
294
|
+
def marshal_dump
|
295
|
+
[@url, @headers, @data, @body, @links, @code, @visited, @depth, @referer, @redirect_to, @response_time, @fetched]
|
296
|
+
end
|
297
|
+
|
298
|
+
def marshal_load(ary)
|
299
|
+
@url, @headers, @data, @body, @links, @code, @visited, @depth, @referer, @redirect_to, @response_time, @fetched = ary
|
300
|
+
end
|
301
|
+
|
302
|
+
def to_hash
|
303
|
+
{'url' => @url.to_s,
|
304
|
+
'headers' => Marshal.dump(@headers),
|
305
|
+
'data' => Marshal.dump(@data),
|
306
|
+
'body' => @body,
|
307
|
+
'links' => links.map(&:to_s),
|
308
|
+
'code' => @code,
|
309
|
+
'visited' => @visited,
|
310
|
+
'depth' => @depth,
|
311
|
+
'referer' => @referer.to_s,
|
312
|
+
'redirect_to' => @redirect_to.to_s,
|
313
|
+
'response_time' => @response_time,
|
314
|
+
'fetched' => @fetched}
|
315
|
+
end
|
316
|
+
|
317
|
+
def self.from_hash(hash)
|
318
|
+
page = self.new(URI(hash['url']))
|
319
|
+
{'@headers' => Marshal.load(hash['headers']),
|
320
|
+
'@data' => Marshal.load(hash['data']),
|
321
|
+
'@body' => hash['body'],
|
322
|
+
'@links' => hash['links'].map { |link| URI(link) },
|
323
|
+
'@code' => hash['code'].to_i,
|
324
|
+
'@visited' => hash['visited'],
|
325
|
+
'@depth' => hash['depth'].to_i,
|
326
|
+
'@referer' => hash['referer'],
|
327
|
+
'@redirect_to' => URI(hash['redirect_to']),
|
328
|
+
'@response_time' => hash['response_time'].to_i,
|
329
|
+
'@fetched' => hash['fetched']
|
330
|
+
}.each do |var, value|
|
331
|
+
page.instance_variable_set(var, value)
|
332
|
+
end
|
333
|
+
page
|
334
|
+
end
|
335
|
+
|
336
|
+
end
|
337
|
+
end
|