arachni 0.2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ACKNOWLEDGMENTS.md +14 -0
- data/AUTHORS.md +6 -0
- data/CHANGELOG.md +162 -0
- data/CONTRIBUTORS.md +10 -0
- data/EXPLOITATION.md +429 -0
- data/HACKING.md +101 -0
- data/LICENSE.md +341 -0
- data/README.md +350 -0
- data/Rakefile +86 -0
- data/bin/arachni +22 -0
- data/bin/arachni_web +77 -0
- data/bin/arachni_xmlrpc +21 -0
- data/bin/arachni_xmlrpcd +82 -0
- data/bin/arachni_xmlrpcd_monitor +74 -0
- data/conf/README.webui.yaml.txt +44 -0
- data/conf/webui.yaml +11 -0
- data/external/metasploit/LICENSE +24 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_exec.rb +142 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_path_traversal.rb +113 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_eval.rb +150 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_include.rb +141 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_sqlmap.rb +92 -0
- data/external/metasploit/plugins/arachni.rb +536 -0
- data/getoptslong.rb +241 -0
- data/lib/anemone.rb +2 -0
- data/lib/anemone/cookie_store.rb +35 -0
- data/lib/anemone/core.rb +371 -0
- data/lib/anemone/exceptions.rb +5 -0
- data/lib/anemone/http.rb +144 -0
- data/lib/anemone/page.rb +337 -0
- data/lib/anemone/page_store.rb +160 -0
- data/lib/anemone/storage.rb +34 -0
- data/lib/anemone/storage/base.rb +75 -0
- data/lib/anemone/storage/exceptions.rb +15 -0
- data/lib/anemone/storage/mongodb.rb +89 -0
- data/lib/anemone/storage/pstore.rb +50 -0
- data/lib/anemone/storage/redis.rb +90 -0
- data/lib/anemone/storage/tokyo_cabinet.rb +57 -0
- data/lib/anemone/tentacle.rb +40 -0
- data/lib/arachni.rb +16 -0
- data/lib/audit_store.rb +346 -0
- data/lib/component_manager.rb +293 -0
- data/lib/component_options.rb +395 -0
- data/lib/exceptions.rb +76 -0
- data/lib/framework.rb +637 -0
- data/lib/http.rb +809 -0
- data/lib/issue.rb +302 -0
- data/lib/module.rb +4 -0
- data/lib/module/auditor.rb +455 -0
- data/lib/module/base.rb +188 -0
- data/lib/module/element_db.rb +158 -0
- data/lib/module/key_filler.rb +87 -0
- data/lib/module/manager.rb +87 -0
- data/lib/module/output.rb +68 -0
- data/lib/module/trainer.rb +240 -0
- data/lib/module/utilities.rb +110 -0
- data/lib/options.rb +547 -0
- data/lib/parser.rb +2 -0
- data/lib/parser/auditable.rb +522 -0
- data/lib/parser/elements.rb +296 -0
- data/lib/parser/page.rb +149 -0
- data/lib/parser/parser.rb +717 -0
- data/lib/plugin.rb +4 -0
- data/lib/plugin/base.rb +110 -0
- data/lib/plugin/manager.rb +162 -0
- data/lib/report.rb +4 -0
- data/lib/report/base.rb +119 -0
- data/lib/report/manager.rb +92 -0
- data/lib/rpc/xml/client/base.rb +71 -0
- data/lib/rpc/xml/client/dispatcher.rb +49 -0
- data/lib/rpc/xml/client/instance.rb +88 -0
- data/lib/rpc/xml/server/base.rb +90 -0
- data/lib/rpc/xml/server/dispatcher.rb +357 -0
- data/lib/rpc/xml/server/framework.rb +206 -0
- data/lib/rpc/xml/server/instance.rb +191 -0
- data/lib/rpc/xml/server/module/manager.rb +46 -0
- data/lib/rpc/xml/server/options.rb +124 -0
- data/lib/rpc/xml/server/output.rb +299 -0
- data/lib/rpc/xml/server/plugin/manager.rb +58 -0
- data/lib/ruby.rb +5 -0
- data/lib/ruby/object.rb +32 -0
- data/lib/ruby/string.rb +74 -0
- data/lib/ruby/xmlrpc/server.rb +27 -0
- data/lib/spider.rb +200 -0
- data/lib/typhoeus/request.rb +91 -0
- data/lib/typhoeus/response.rb +34 -0
- data/lib/ui/cli/cli.rb +744 -0
- data/lib/ui/cli/output.rb +279 -0
- data/lib/ui/web/log.rb +82 -0
- data/lib/ui/web/output_stream.rb +94 -0
- data/lib/ui/web/report_manager.rb +222 -0
- data/lib/ui/web/server.rb +903 -0
- data/lib/ui/web/server/db/placeholder +0 -0
- data/lib/ui/web/server/public/banner.png +0 -0
- data/lib/ui/web/server/public/bodybg-small.png +0 -0
- data/lib/ui/web/server/public/bodybg.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/pbar-ani.gif +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_222222_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_2e83ff_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_454545_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_888888_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_cd0a0a_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/jquery-ui-1.8.9.custom.css +573 -0
- data/lib/ui/web/server/public/favicon.ico +0 -0
- data/lib/ui/web/server/public/footer.jpg +0 -0
- data/lib/ui/web/server/public/icons/error.png +0 -0
- data/lib/ui/web/server/public/icons/info.png +0 -0
- data/lib/ui/web/server/public/icons/ok.png +0 -0
- data/lib/ui/web/server/public/icons/status.png +0 -0
- data/lib/ui/web/server/public/js/jquery-1.4.4.min.js +167 -0
- data/lib/ui/web/server/public/js/jquery-ui-1.8.9.custom.min.js +781 -0
- data/lib/ui/web/server/public/logo.png +0 -0
- data/lib/ui/web/server/public/nav-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-right.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-right.jpg +0 -0
- data/lib/ui/web/server/public/reports/placeholder +1 -0
- data/lib/ui/web/server/public/sidebar-bottom.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-h4.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-top.jpg +0 -0
- data/lib/ui/web/server/public/spider.png +0 -0
- data/lib/ui/web/server/public/style.css +604 -0
- data/lib/ui/web/server/tmp/placeholder +0 -0
- data/lib/ui/web/server/views/dispatcher.erb +85 -0
- data/lib/ui/web/server/views/dispatcher_error.erb +14 -0
- data/lib/ui/web/server/views/error.erb +1 -0
- data/lib/ui/web/server/views/flash.erb +18 -0
- data/lib/ui/web/server/views/home.erb +14 -0
- data/lib/ui/web/server/views/instance.erb +213 -0
- data/lib/ui/web/server/views/layout.erb +95 -0
- data/lib/ui/web/server/views/log.erb +40 -0
- data/lib/ui/web/server/views/modules.erb +71 -0
- data/lib/ui/web/server/views/options.erb +23 -0
- data/lib/ui/web/server/views/output_results.erb +51 -0
- data/lib/ui/web/server/views/plugins.erb +42 -0
- data/lib/ui/web/server/views/report_formats.erb +30 -0
- data/lib/ui/web/server/views/reports.erb +55 -0
- data/lib/ui/web/server/views/settings.erb +120 -0
- data/lib/ui/web/server/views/welcome.erb +38 -0
- data/lib/ui/xmlrpc/dispatcher_monitor.rb +204 -0
- data/lib/ui/xmlrpc/xmlrpc.rb +843 -0
- data/logs/placeholder +0 -0
- data/metamodules/autothrottle.rb +74 -0
- data/metamodules/timeout_notice.rb +118 -0
- data/metamodules/uniformity.rb +98 -0
- data/modules/audit/code_injection.rb +136 -0
- data/modules/audit/code_injection_timing.rb +115 -0
- data/modules/audit/code_injection_timing/payloads.txt +4 -0
- data/modules/audit/csrf.rb +301 -0
- data/modules/audit/ldapi.rb +103 -0
- data/modules/audit/ldapi/errors.txt +26 -0
- data/modules/audit/os_cmd_injection.rb +103 -0
- data/modules/audit/os_cmd_injection/payloads.txt +2 -0
- data/modules/audit/os_cmd_injection_timing.rb +104 -0
- data/modules/audit/os_cmd_injection_timing/payloads.txt +3 -0
- data/modules/audit/path_traversal.rb +141 -0
- data/modules/audit/response_splitting.rb +105 -0
- data/modules/audit/rfi.rb +193 -0
- data/modules/audit/sqli.rb +120 -0
- data/modules/audit/sqli/regexp_ids.txt +90 -0
- data/modules/audit/sqli_blind_rdiff.rb +321 -0
- data/modules/audit/sqli_blind_timing.rb +103 -0
- data/modules/audit/sqli_blind_timing/payloads.txt +51 -0
- data/modules/audit/trainer.rb +89 -0
- data/modules/audit/unvalidated_redirect.rb +90 -0
- data/modules/audit/xpath.rb +104 -0
- data/modules/audit/xpath/errors.txt +26 -0
- data/modules/audit/xss.rb +99 -0
- data/modules/audit/xss_event.rb +134 -0
- data/modules/audit/xss_path.rb +125 -0
- data/modules/audit/xss_script_tag.rb +112 -0
- data/modules/audit/xss_tag.rb +112 -0
- data/modules/audit/xss_uri.rb +125 -0
- data/modules/recon/allowed_methods.rb +104 -0
- data/modules/recon/backdoors.rb +131 -0
- data/modules/recon/backdoors/filenames.txt +16 -0
- data/modules/recon/backup_files.rb +177 -0
- data/modules/recon/backup_files/extensions.txt +28 -0
- data/modules/recon/common_directories.rb +138 -0
- data/modules/recon/common_directories/directories.txt +265 -0
- data/modules/recon/common_files.rb +138 -0
- data/modules/recon/common_files/filenames.txt +17 -0
- data/modules/recon/directory_listing.rb +171 -0
- data/modules/recon/grep/captcha.rb +62 -0
- data/modules/recon/grep/credit_card.rb +85 -0
- data/modules/recon/grep/cvs_svn_users.rb +73 -0
- data/modules/recon/grep/emails.rb +59 -0
- data/modules/recon/grep/html_objects.rb +53 -0
- data/modules/recon/grep/private_ip.rb +54 -0
- data/modules/recon/grep/ssn.rb +53 -0
- data/modules/recon/htaccess_limit.rb +82 -0
- data/modules/recon/http_put.rb +95 -0
- data/modules/recon/interesting_responses.rb +118 -0
- data/modules/recon/unencrypted_password_forms.rb +119 -0
- data/modules/recon/webdav.rb +126 -0
- data/modules/recon/xst.rb +107 -0
- data/path_extractors/anchors.rb +35 -0
- data/path_extractors/forms.rb +35 -0
- data/path_extractors/frames.rb +38 -0
- data/path_extractors/generic.rb +39 -0
- data/path_extractors/links.rb +35 -0
- data/path_extractors/meta_refresh.rb +39 -0
- data/path_extractors/scripts.rb +37 -0
- data/path_extractors/sitemap.rb +31 -0
- data/plugins/autologin.rb +137 -0
- data/plugins/content_types.rb +90 -0
- data/plugins/cookie_collector.rb +99 -0
- data/plugins/form_dicattack.rb +185 -0
- data/plugins/healthmap.rb +94 -0
- data/plugins/http_dicattack.rb +133 -0
- data/plugins/metamodules.rb +118 -0
- data/plugins/proxy.rb +248 -0
- data/plugins/proxy/server.rb +66 -0
- data/plugins/waf_detector.rb +184 -0
- data/profiles/comprehensive.afp +74 -0
- data/profiles/full.afp +75 -0
- data/reports/afr.rb +59 -0
- data/reports/ap.rb +55 -0
- data/reports/html.rb +179 -0
- data/reports/html/default.erb +967 -0
- data/reports/metareport.rb +139 -0
- data/reports/metareport/arachni_metareport.rb +174 -0
- data/reports/plugin_formatters/html/content_types.rb +82 -0
- data/reports/plugin_formatters/html/cookie_collector.rb +66 -0
- data/reports/plugin_formatters/html/form_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/healthmap.rb +76 -0
- data/reports/plugin_formatters/html/http_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/metaformatters/timeout_notice.rb +65 -0
- data/reports/plugin_formatters/html/metaformatters/uniformity.rb +71 -0
- data/reports/plugin_formatters/html/metamodules.rb +93 -0
- data/reports/plugin_formatters/html/waf_detector.rb +54 -0
- data/reports/plugin_formatters/stdout/content_types.rb +73 -0
- data/reports/plugin_formatters/stdout/cookie_collector.rb +61 -0
- data/reports/plugin_formatters/stdout/form_dicattack.rb +52 -0
- data/reports/plugin_formatters/stdout/healthmap.rb +72 -0
- data/reports/plugin_formatters/stdout/http_dicattack.rb +53 -0
- data/reports/plugin_formatters/stdout/metaformatters/timeout_notice.rb +55 -0
- data/reports/plugin_formatters/stdout/metaformatters/uniformity.rb +68 -0
- data/reports/plugin_formatters/stdout/metamodules.rb +89 -0
- data/reports/plugin_formatters/stdout/waf_detector.rb +48 -0
- data/reports/plugin_formatters/xml/content_types.rb +91 -0
- data/reports/plugin_formatters/xml/cookie_collector.rb +70 -0
- data/reports/plugin_formatters/xml/form_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/healthmap.rb +82 -0
- data/reports/plugin_formatters/xml/http_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/metaformatters/timeout_notice.rb +67 -0
- data/reports/plugin_formatters/xml/metaformatters/uniformity.rb +82 -0
- data/reports/plugin_formatters/xml/metamodules.rb +91 -0
- data/reports/plugin_formatters/xml/waf_detector.rb +58 -0
- data/reports/stdout.rb +182 -0
- data/reports/txt.rb +77 -0
- data/reports/xml.rb +231 -0
- data/reports/xml/buffer.rb +98 -0
- metadata +516 -0
data/getoptslong.rb
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
=begin
|
|
2
|
+
Arachni
|
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
|
4
|
+
|
|
5
|
+
This is free software; you can copy and distribute and modify
|
|
6
|
+
this program under the term of the GPL v2.0 License
|
|
7
|
+
(See LICENSE file for details)
|
|
8
|
+
|
|
9
|
+
=end
|
|
10
|
+
|
|
11
|
+
require 'getoptlong'
|
|
12
|
+
|
|
13
|
+
# Construct getops struct
|
|
14
|
+
opts = GetoptLong.new(
|
|
15
|
+
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
|
16
|
+
[ '--verbosity', '-v', GetoptLong::NO_ARGUMENT ],
|
|
17
|
+
[ '--only-positives', '-k', GetoptLong::NO_ARGUMENT ],
|
|
18
|
+
[ '--lsmod', GetoptLong::OPTIONAL_ARGUMENT ],
|
|
19
|
+
[ '--lsrep', GetoptLong::OPTIONAL_ARGUMENT ],
|
|
20
|
+
[ '--audit-links', '-g', GetoptLong::NO_ARGUMENT ],
|
|
21
|
+
[ '--audit-forms', '-p', GetoptLong::NO_ARGUMENT ],
|
|
22
|
+
[ '--audit-cookies', '-c', GetoptLong::NO_ARGUMENT ],
|
|
23
|
+
[ '--audit-cookie-jar', GetoptLong::NO_ARGUMENT ],
|
|
24
|
+
[ '--audit-headers', GetoptLong::NO_ARGUMENT ],
|
|
25
|
+
[ '--spider-first', GetoptLong::NO_ARGUMENT ],
|
|
26
|
+
[ '--obey-robots-txt', '-o', GetoptLong::NO_ARGUMENT ],
|
|
27
|
+
[ '--redundant', GetoptLong::REQUIRED_ARGUMENT ],
|
|
28
|
+
[ '--depth', '-d', GetoptLong::REQUIRED_ARGUMENT ],
|
|
29
|
+
[ '--redirect-limit', '-q', GetoptLong::REQUIRED_ARGUMENT ],
|
|
30
|
+
[ '--link-count', '-u', GetoptLong::REQUIRED_ARGUMENT ],
|
|
31
|
+
[ '--mods', '-m', GetoptLong::REQUIRED_ARGUMENT ],
|
|
32
|
+
[ '--report', GetoptLong::REQUIRED_ARGUMENT ],
|
|
33
|
+
[ '--repload', GetoptLong::REQUIRED_ARGUMENT ],
|
|
34
|
+
[ '--authed-by', GetoptLong::REQUIRED_ARGUMENT ],
|
|
35
|
+
[ '--load-profile', GetoptLong::REQUIRED_ARGUMENT ],
|
|
36
|
+
[ '--save-profile', GetoptLong::REQUIRED_ARGUMENT ],
|
|
37
|
+
[ '--show-profile', GetoptLong::NO_ARGUMENT ],
|
|
38
|
+
[ '--proxy', '-z', GetoptLong::REQUIRED_ARGUMENT ],
|
|
39
|
+
[ '--proxy-auth', '-x', GetoptLong::REQUIRED_ARGUMENT ],
|
|
40
|
+
[ '--proxy-type', '-y', GetoptLong::REQUIRED_ARGUMENT ],
|
|
41
|
+
[ '--cookie-jar', '-j', GetoptLong::REQUIRED_ARGUMENT ],
|
|
42
|
+
[ '--user-agent', '-b', GetoptLong::REQUIRED_ARGUMENT ],
|
|
43
|
+
[ '--exclude', '-e', GetoptLong::REQUIRED_ARGUMENT ],
|
|
44
|
+
[ '--include', '-i', GetoptLong::REQUIRED_ARGUMENT ],
|
|
45
|
+
[ '--exclude-cookie', GetoptLong::REQUIRED_ARGUMENT ],
|
|
46
|
+
[ '--http-req-limit', GetoptLong::REQUIRED_ARGUMENT ],
|
|
47
|
+
[ '--follow-subdomains', '-f', GetoptLong::NO_ARGUMENT ],
|
|
48
|
+
[ '--http-harvest-last', '-s', GetoptLong::NO_ARGUMENT ],
|
|
49
|
+
[ '--debug', '-w', GetoptLong::NO_ARGUMENT ],
|
|
50
|
+
[ '--server', GetoptLong::REQUIRED_ARGUMENT ],
|
|
51
|
+
[ '--plugin', GetoptLong::OPTIONAL_ARGUMENT ],
|
|
52
|
+
[ '--lsplug', GetoptLong::OPTIONAL_ARGUMENT ],
|
|
53
|
+
[ '--ssl', GetoptLong::NO_ARGUMENT ],
|
|
54
|
+
[ '--ssl-pkey', GetoptLong::REQUIRED_ARGUMENT ],
|
|
55
|
+
[ '--ssl-cert', GetoptLong::REQUIRED_ARGUMENT ],
|
|
56
|
+
[ '--ssl-ca', GetoptLong::REQUIRED_ARGUMENT ],
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
$:.unshift( File.expand_path( File.dirname( __FILE__ ) ) )
|
|
60
|
+
|
|
61
|
+
require 'lib/options'
|
|
62
|
+
options = Arachni::Options.instance
|
|
63
|
+
|
|
64
|
+
options.dir = Hash.new
|
|
65
|
+
options.dir['root'] = File.dirname( File.expand_path(__FILE__) ) + '/'
|
|
66
|
+
options.dir['modules'] = options.dir['root'] + 'modules/'
|
|
67
|
+
options.dir['reports'] = options.dir['root'] + 'reports/'
|
|
68
|
+
options.dir['plugins'] = options.dir['root'] + 'plugins/'
|
|
69
|
+
options.dir['lib'] = options.dir['root'] + 'lib/'
|
|
70
|
+
|
|
71
|
+
opts.quiet = true
|
|
72
|
+
|
|
73
|
+
begin
|
|
74
|
+
opts.each {
|
|
75
|
+
|opt, arg|
|
|
76
|
+
|
|
77
|
+
case opt
|
|
78
|
+
|
|
79
|
+
when '--help'
|
|
80
|
+
options.help = true
|
|
81
|
+
|
|
82
|
+
when '--only-positives'
|
|
83
|
+
options.only_positives = true
|
|
84
|
+
|
|
85
|
+
when '--verbosity'
|
|
86
|
+
options.arachni_verbose = true
|
|
87
|
+
|
|
88
|
+
when '--debug'
|
|
89
|
+
options.debug = true
|
|
90
|
+
|
|
91
|
+
when '--spider-first'
|
|
92
|
+
options.spider_first = true
|
|
93
|
+
|
|
94
|
+
when '--plugin'
|
|
95
|
+
plugin, opt_str = arg.split( ':', 2 )
|
|
96
|
+
|
|
97
|
+
opts = {}
|
|
98
|
+
if( opt_str )
|
|
99
|
+
opt_arr = opt_str.split( ',' )
|
|
100
|
+
opt_arr.each {
|
|
101
|
+
|opt|
|
|
102
|
+
name, val = opt.split( '=', 2 )
|
|
103
|
+
opts[name] = val
|
|
104
|
+
}
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
options.plugins[plugin] = opts
|
|
108
|
+
|
|
109
|
+
when '--redundant'
|
|
110
|
+
options.redundant << {
|
|
111
|
+
'regexp' => Regexp.new( arg.to_s.split( /:/ )[0] ),
|
|
112
|
+
'count' => Integer( arg.to_s.split( /:/ )[1] ),
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
when '--obey_robots_txt'
|
|
116
|
+
options.obey_robots_txt = true
|
|
117
|
+
|
|
118
|
+
when '--depth'
|
|
119
|
+
options.depth_limit = arg.to_i
|
|
120
|
+
|
|
121
|
+
when '--link-count'
|
|
122
|
+
options.link_count_limit = arg.to_i
|
|
123
|
+
|
|
124
|
+
when '--redirect-limit'
|
|
125
|
+
options.redirect_limit = arg.to_i
|
|
126
|
+
|
|
127
|
+
when '--lsmod'
|
|
128
|
+
options.lsmod << Regexp.new( arg.to_s )
|
|
129
|
+
|
|
130
|
+
when '--lsplug'
|
|
131
|
+
options.lsplug << Regexp.new( arg.to_s )
|
|
132
|
+
|
|
133
|
+
when '--lsrep'
|
|
134
|
+
options.lsrep << Regexp.new( arg.to_s )
|
|
135
|
+
|
|
136
|
+
when '--http-req-limit'
|
|
137
|
+
options.http_req_limit = arg.to_i
|
|
138
|
+
|
|
139
|
+
when '--audit-links'
|
|
140
|
+
options.audit_links = true
|
|
141
|
+
|
|
142
|
+
when '--audit-forms'
|
|
143
|
+
options.audit_forms = true
|
|
144
|
+
|
|
145
|
+
when '--audit-cookies'
|
|
146
|
+
options.audit_cookies = true
|
|
147
|
+
|
|
148
|
+
when '--audit-cookie-jar'
|
|
149
|
+
options.audit_cookie_jar = true
|
|
150
|
+
|
|
151
|
+
when '--audit-headers'
|
|
152
|
+
options.audit_headers = true
|
|
153
|
+
|
|
154
|
+
when '--mods'
|
|
155
|
+
options.mods = arg.to_s.split( /,/ )
|
|
156
|
+
|
|
157
|
+
when '--report'
|
|
158
|
+
report, opt_str = arg.split( ':' )
|
|
159
|
+
|
|
160
|
+
opts = {}
|
|
161
|
+
if( opt_str )
|
|
162
|
+
opt_arr = opt_str.split( ',' )
|
|
163
|
+
opt_arr.each {
|
|
164
|
+
|opt|
|
|
165
|
+
name, val = opt.split( '=' )
|
|
166
|
+
opts[name] = val
|
|
167
|
+
}
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
options.reports[report] = opts
|
|
171
|
+
|
|
172
|
+
when '--repload'
|
|
173
|
+
options.repload = arg
|
|
174
|
+
|
|
175
|
+
when '--save-profile'
|
|
176
|
+
options.save_profile = arg
|
|
177
|
+
|
|
178
|
+
when '--load-profile'
|
|
179
|
+
options.load_profile << arg
|
|
180
|
+
|
|
181
|
+
when '--show-profile'
|
|
182
|
+
options.show_profile = true
|
|
183
|
+
|
|
184
|
+
when '--authed-by'
|
|
185
|
+
options.authed_by = arg
|
|
186
|
+
|
|
187
|
+
when '--proxy'
|
|
188
|
+
options.proxy_addr, options.proxy_port =
|
|
189
|
+
arg.to_s.split( /:/ )
|
|
190
|
+
|
|
191
|
+
when '--proxy-auth'
|
|
192
|
+
options.proxy_user, options.proxy_pass =
|
|
193
|
+
arg.to_s.split( /:/ )
|
|
194
|
+
|
|
195
|
+
when '--proxy-type'
|
|
196
|
+
options.proxy_type = arg.to_s
|
|
197
|
+
|
|
198
|
+
when '--cookie-jar'
|
|
199
|
+
options.cookie_jar = arg.to_s
|
|
200
|
+
|
|
201
|
+
when '--user-agent'
|
|
202
|
+
options.user_agent = arg.to_s
|
|
203
|
+
|
|
204
|
+
when '--exclude'
|
|
205
|
+
options.exclude << Regexp.new( arg )
|
|
206
|
+
|
|
207
|
+
when '--include'
|
|
208
|
+
options.include << Regexp.new( arg )
|
|
209
|
+
|
|
210
|
+
when '--exclude-cookie'
|
|
211
|
+
options.exclude_cookies << arg
|
|
212
|
+
|
|
213
|
+
when '--follow-subdomains'
|
|
214
|
+
options.follow_subdomains = true
|
|
215
|
+
|
|
216
|
+
when '--http-harvest-last'
|
|
217
|
+
options.http_harvest_last = true
|
|
218
|
+
|
|
219
|
+
when '--ssl'
|
|
220
|
+
options.ssl = true
|
|
221
|
+
|
|
222
|
+
when '--ssl-pkey'
|
|
223
|
+
options.ssl_pkey = arg.to_s
|
|
224
|
+
|
|
225
|
+
when '--ssl-cert'
|
|
226
|
+
options.ssl_cert = arg.to_s
|
|
227
|
+
|
|
228
|
+
when '--ssl-ca'
|
|
229
|
+
options.ssl_ca = arg.to_s
|
|
230
|
+
|
|
231
|
+
when '--server'
|
|
232
|
+
options.server = arg.to_s
|
|
233
|
+
|
|
234
|
+
end
|
|
235
|
+
}
|
|
236
|
+
rescue Exception => e
|
|
237
|
+
puts e.inspect
|
|
238
|
+
exit
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
options.url = ARGV.shift
|
data/lib/anemone.rb
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require 'delegate'
|
|
2
|
+
require 'webrick/cookie'
|
|
3
|
+
|
|
4
|
+
class WEBrick::Cookie
|
|
5
|
+
def expired?
|
|
6
|
+
!!expires && expires < Time.now
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
module Anemone
|
|
11
|
+
class CookieStore < DelegateClass(Hash)
|
|
12
|
+
|
|
13
|
+
def initialize(cookies = nil)
|
|
14
|
+
@cookies = {}
|
|
15
|
+
cookies.each { |name, value| @cookies[name] = WEBrick::Cookie.new(name, value) } if cookies
|
|
16
|
+
super(@cookies)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def merge!(set_cookie_str)
|
|
20
|
+
begin
|
|
21
|
+
cookie_hash = WEBrick::Cookie.parse_set_cookies(set_cookie_str).inject({}) do |hash, cookie|
|
|
22
|
+
hash[cookie.name] = cookie if !!cookie
|
|
23
|
+
hash
|
|
24
|
+
end
|
|
25
|
+
@cookies.merge! cookie_hash
|
|
26
|
+
rescue
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def to_s
|
|
31
|
+
@cookies.values.reject { |cookie| cookie.expired? }.map { |cookie| "#{cookie.name}=#{cookie.value}" }.join(';')
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
end
|
|
35
|
+
end
|
data/lib/anemone/core.rb
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
=begin
|
|
2
|
+
Arachni
|
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
|
4
|
+
|
|
5
|
+
This is free software; you can copy and distribute and modify
|
|
6
|
+
this program under the term of the GPL v2.0 License
|
|
7
|
+
(See LICENSE file for details)
|
|
8
|
+
|
|
9
|
+
=end
|
|
10
|
+
|
|
11
|
+
require 'thread'
|
|
12
|
+
require 'robots'
|
|
13
|
+
|
|
14
|
+
opts = Arachni::Options.instance
|
|
15
|
+
require opts.dir['lib'] + 'anemone/tentacle'
|
|
16
|
+
require opts.dir['lib'] + 'anemone/page'
|
|
17
|
+
require opts.dir['lib'] + 'anemone/exceptions'
|
|
18
|
+
require opts.dir['lib'] + 'anemone/page_store'
|
|
19
|
+
require opts.dir['lib'] + 'anemone/storage'
|
|
20
|
+
require opts.dir['lib'] + 'anemone/storage/base'
|
|
21
|
+
|
|
22
|
+
module Anemone
|
|
23
|
+
|
|
24
|
+
VERSION = '0.5.0';
|
|
25
|
+
|
|
26
|
+
#
|
|
27
|
+
# Convenience method to start a crawl
|
|
28
|
+
#
|
|
29
|
+
def Anemone.crawl(urls, options = {}, &block)
|
|
30
|
+
Core.crawl(urls, options, &block)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
#
|
|
35
|
+
# Overides Anemone's Core class method skip_link?( link )
|
|
36
|
+
# to support regexp matching to the whole url and enforce redundancy checks.
|
|
37
|
+
# <br/>
|
|
38
|
+
# Messages were also added to inform the user in case of redundant URLs.
|
|
39
|
+
#
|
|
40
|
+
# @author: Tasos "Zapotek" Laskos
|
|
41
|
+
# <tasos.laskos@gmail.com>
|
|
42
|
+
# <zapotek@segfault.gr>
|
|
43
|
+
# @version: 0.1
|
|
44
|
+
#
|
|
45
|
+
class Core
|
|
46
|
+
|
|
47
|
+
include Arachni::UI::Output
|
|
48
|
+
|
|
49
|
+
# PageStore storing all Page objects encountered during the crawl
|
|
50
|
+
attr_reader :pages
|
|
51
|
+
# Hash of options for the crawl
|
|
52
|
+
attr_reader :opts
|
|
53
|
+
|
|
54
|
+
DEFAULT_OPTS = {
|
|
55
|
+
# run 4 Tentacle threads to fetch pages
|
|
56
|
+
:threads => 4,
|
|
57
|
+
# disable verbose output
|
|
58
|
+
:verbose => false,
|
|
59
|
+
# don't throw away the page response body after scanning it for links
|
|
60
|
+
:discard_page_bodies => false,
|
|
61
|
+
# identify self as Anemone/VERSION
|
|
62
|
+
:user_agent => "Anemone/#{Anemone::VERSION}",
|
|
63
|
+
# no delay between requests
|
|
64
|
+
:delay => 0,
|
|
65
|
+
# don't obey the robots exclusion protocol
|
|
66
|
+
:obey_robots_txt => false,
|
|
67
|
+
# by default, don't limit the depth of the crawl
|
|
68
|
+
:depth_limit => false,
|
|
69
|
+
# number of times HTTP redirects will be followed
|
|
70
|
+
:redirect_limit => 5,
|
|
71
|
+
# storage engine defaults to Hash in +process_options+ if none specified
|
|
72
|
+
:storage => nil,
|
|
73
|
+
# Hash of cookie name => value to send with HTTP requests
|
|
74
|
+
:cookies => nil,
|
|
75
|
+
# accept cookies from the server and send them back?
|
|
76
|
+
:accept_cookies => false,
|
|
77
|
+
# skip any link with a query string? e.g. http://foo.com/?u=user
|
|
78
|
+
:skip_query_strings => false
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Create setter methods for all options to be called from the crawl block
|
|
82
|
+
DEFAULT_OPTS.keys.each do |key|
|
|
83
|
+
define_method "#{key}=" do |value|
|
|
84
|
+
@opts[key.to_sym] = value
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
#
|
|
89
|
+
# Initialize the crawl with starting *urls* (single URL or Array of URLs)
|
|
90
|
+
# and optional *block*
|
|
91
|
+
#
|
|
92
|
+
def initialize(urls, opts = {})
|
|
93
|
+
@urls = [urls].flatten.map{ |url| url.is_a?(URI) ? url : URI(url) }
|
|
94
|
+
@urls.each{ |url| url.path = '/' if url.path.empty? }
|
|
95
|
+
|
|
96
|
+
@tentacles = []
|
|
97
|
+
@on_every_page_blocks = []
|
|
98
|
+
@on_pages_like_blocks = Hash.new { |hash,key| hash[key] = [] }
|
|
99
|
+
@skip_link_patterns = []
|
|
100
|
+
@after_crawl_blocks = []
|
|
101
|
+
@opts = opts
|
|
102
|
+
|
|
103
|
+
yield self if block_given?
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
#
|
|
107
|
+
# Convenience method to start a new crawl
|
|
108
|
+
#
|
|
109
|
+
def self.crawl(urls, opts = {})
|
|
110
|
+
self.new(urls, opts) do |core|
|
|
111
|
+
yield core if block_given?
|
|
112
|
+
core.run
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
#
|
|
117
|
+
# Add a block to be executed on the PageStore after the crawl
|
|
118
|
+
# is finished
|
|
119
|
+
#
|
|
120
|
+
def after_crawl(&block)
|
|
121
|
+
@after_crawl_blocks << block
|
|
122
|
+
self
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
#
|
|
126
|
+
# Add one ore more Regex patterns for URLs which should not be
|
|
127
|
+
# followed
|
|
128
|
+
#
|
|
129
|
+
def skip_links_like(*patterns)
|
|
130
|
+
@skip_link_patterns.concat [patterns].flatten.compact
|
|
131
|
+
self
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
#
|
|
135
|
+
# Add a block to be executed on every Page as they are encountered
|
|
136
|
+
# during the crawl
|
|
137
|
+
#
|
|
138
|
+
def on_every_page(&block)
|
|
139
|
+
@on_every_page_blocks << block
|
|
140
|
+
self
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
#
|
|
144
|
+
# Add a block to be executed on Page objects with a URL matching
|
|
145
|
+
# one or more patterns
|
|
146
|
+
#
|
|
147
|
+
def on_pages_like(*patterns, &block)
|
|
148
|
+
if patterns
|
|
149
|
+
patterns.each do |pattern|
|
|
150
|
+
@on_pages_like_blocks[pattern] << block
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
self
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
#
|
|
157
|
+
# Specify a block which will select which links to follow on each page.
|
|
158
|
+
# The block should return an Array of URI objects.
|
|
159
|
+
#
|
|
160
|
+
def focus_crawl(&block)
|
|
161
|
+
@focus_crawl_block = block
|
|
162
|
+
self
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
#
|
|
166
|
+
# Perform the crawl
|
|
167
|
+
#
|
|
168
|
+
def run
|
|
169
|
+
process_options
|
|
170
|
+
|
|
171
|
+
@urls.delete_if { |url| !visit_link?(url) }
|
|
172
|
+
return if @urls.empty?
|
|
173
|
+
|
|
174
|
+
link_queue = Queue.new
|
|
175
|
+
page_queue = Queue.new
|
|
176
|
+
|
|
177
|
+
@opts[:threads].times do
|
|
178
|
+
@tentacles << Thread.new { Tentacle.new(link_queue, page_queue, @opts).run }
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
@urls.each{ |url| link_queue.enq(url) }
|
|
182
|
+
|
|
183
|
+
loop do
|
|
184
|
+
page = page_queue.deq
|
|
185
|
+
@pages.touch_key page.url
|
|
186
|
+
puts "#{page.url} Queue: #{link_queue.size}" if @opts[:verbose]
|
|
187
|
+
do_page_blocks page
|
|
188
|
+
page.discard_doc! if @opts[:discard_page_bodies]
|
|
189
|
+
|
|
190
|
+
links = links_to_follow page
|
|
191
|
+
links.each do |link|
|
|
192
|
+
link_queue << [link, page.url.dup, page.depth + 1]
|
|
193
|
+
end
|
|
194
|
+
@pages.touch_keys links
|
|
195
|
+
|
|
196
|
+
@pages[page.url] = page
|
|
197
|
+
|
|
198
|
+
# if we are done with the crawl, tell the threads to end
|
|
199
|
+
if link_queue.empty? and page_queue.empty?
|
|
200
|
+
until link_queue.num_waiting == @tentacles.size
|
|
201
|
+
Thread.pass
|
|
202
|
+
end
|
|
203
|
+
if page_queue.empty?
|
|
204
|
+
@tentacles.size.times { link_queue << :END }
|
|
205
|
+
break
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
@tentacles.each { |thread| thread.join }
|
|
211
|
+
do_after_crawl_blocks
|
|
212
|
+
self
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
private
|
|
216
|
+
|
|
217
|
+
def process_options
|
|
218
|
+
@opts = DEFAULT_OPTS.merge @opts
|
|
219
|
+
@opts[:threads] = 1 if @opts[:delay] > 0
|
|
220
|
+
storage = Anemone::Storage::Base.new(@opts[:storage] || Anemone::Storage.Hash)
|
|
221
|
+
@pages = PageStore.new(storage)
|
|
222
|
+
@robots = Robots.new(@opts[:user_agent]) if @opts[:obey_robots_txt]
|
|
223
|
+
|
|
224
|
+
freeze_options
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
#
|
|
228
|
+
# Freeze the opts Hash so that no options can be modified
|
|
229
|
+
# once the crawl begins
|
|
230
|
+
#
|
|
231
|
+
def freeze_options
|
|
232
|
+
@opts.freeze
|
|
233
|
+
@opts.each_key { |key| @opts[key].freeze }
|
|
234
|
+
@opts[:cookies].each_key { |key| @opts[:cookies][key].freeze } rescue nil
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
#
|
|
238
|
+
# Execute the after_crawl blocks
|
|
239
|
+
#
|
|
240
|
+
def do_after_crawl_blocks
|
|
241
|
+
@after_crawl_blocks.each { |block| block.call(@pages) }
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
#
|
|
245
|
+
# Execute the on_every_page blocks for *page*
|
|
246
|
+
#
|
|
247
|
+
# Modified it to fix a bug in Anemone when given more than one<br/>
|
|
248
|
+
# regular expression for "@on_pages_like_blocks".
|
|
249
|
+
#
|
|
250
|
+
def do_page_blocks(page)
|
|
251
|
+
@on_every_page_blocks.each do |block|
|
|
252
|
+
block.call(page)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
@on_pages_like_blocks.each do |patterns, blocks|
|
|
256
|
+
if matches_pattern?( page.url.to_s, patterns )
|
|
257
|
+
blocks.each { |block| block.call(page) }
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
#
|
|
263
|
+
# Return an Array of links to follow from the given page.
|
|
264
|
+
# Based on whether or not the link has already been crawled,
|
|
265
|
+
# and the block given to focus_crawl()
|
|
266
|
+
#
|
|
267
|
+
def links_to_follow(page)
|
|
268
|
+
links = @focus_crawl_block ? @focus_crawl_block.call(page) : page.links
|
|
269
|
+
links.select { |link| visit_link?(link, page) }.map { |link| link.dup }
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
#
|
|
273
|
+
# Returns +true+ if *link* has not been visited already,
|
|
274
|
+
# and is not excluded by a skip_link pattern...
|
|
275
|
+
# and is not excluded by robots.txt...
|
|
276
|
+
# and is not deeper than the depth limit
|
|
277
|
+
# Returns +false+ otherwise.
|
|
278
|
+
#
|
|
279
|
+
def visit_link?(link, from_page = nil)
|
|
280
|
+
!@pages.has_page?(link) &&
|
|
281
|
+
!skip_link?(link) &&
|
|
282
|
+
!skip_query_string?(link) &&
|
|
283
|
+
allowed(link) &&
|
|
284
|
+
!too_deep?(from_page)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
#
|
|
288
|
+
# Returns +true+ if we are obeying robots.txt and the link
|
|
289
|
+
# is granted access in it. Always returns +true+ when we are
|
|
290
|
+
# not obeying robots.txt.
|
|
291
|
+
#
|
|
292
|
+
def allowed(link)
|
|
293
|
+
@opts[:obey_robots_txt] ? @robots.allowed?(link) : true
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
#
|
|
297
|
+
# Returns +true+ if we are over the page depth limit.
|
|
298
|
+
# This only works when coming from a page and with the +depth_limit+ option set.
|
|
299
|
+
# When neither is the case, will always return +false+.
|
|
300
|
+
def too_deep?(from_page)
|
|
301
|
+
if from_page && @opts[:depth_limit]
|
|
302
|
+
from_page.depth >= @opts[:depth_limit]
|
|
303
|
+
else
|
|
304
|
+
false
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
#
|
|
309
|
+
# Returns +true+ if *link* should not be visited because
|
|
310
|
+
# it has a query string and +skip_query_strings+ is true.
|
|
311
|
+
#
|
|
312
|
+
def skip_query_string?(link)
|
|
313
|
+
@opts[:skip_query_strings] && link.query
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
#
|
|
317
|
+
# Returns +true+ if *link* should not be visited because
|
|
318
|
+
# its URL matches a skip_link pattern or the reundancy countdown has reached
|
|
319
|
+
# zero.
|
|
320
|
+
#
|
|
321
|
+
def skip_link?( link )
|
|
322
|
+
|
|
323
|
+
url = link.to_s
|
|
324
|
+
skip = false
|
|
325
|
+
@opts['redundant'].each_with_index {
|
|
326
|
+
|redundant, i|
|
|
327
|
+
|
|
328
|
+
if( url =~ redundant['regexp'] )
|
|
329
|
+
|
|
330
|
+
if( @opts['redundant'][i]['count'] == 0 )
|
|
331
|
+
print_verbose( 'Discarding redundant page: \'' + url + '\'' )
|
|
332
|
+
return true
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
print_info( 'Matched redundancy rule: ' +
|
|
336
|
+
redundant['regexp'].to_s + ' for page \'' +
|
|
337
|
+
url + '\'' )
|
|
338
|
+
|
|
339
|
+
print_info( 'Count-down: ' +
|
|
340
|
+
@opts['redundant'][i]['count'].to_s )
|
|
341
|
+
|
|
342
|
+
@opts['redundant'][i]['count'] -= 1
|
|
343
|
+
end
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
@skip_link_patterns.any? { |pattern| url =~ pattern }
|
|
347
|
+
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
#
|
|
351
|
+
# Decides whether or not a url matches any of the regular expressions
|
|
352
|
+
# in "patterns".
|
|
353
|
+
#
|
|
354
|
+
# @param [String] url
|
|
355
|
+
# @param [Array] patterns array of regular expressions
|
|
356
|
+
#
|
|
357
|
+
# @return [Bool]
|
|
358
|
+
#
|
|
359
|
+
def matches_pattern?( url, patterns )
|
|
360
|
+
|
|
361
|
+
patterns.each {
|
|
362
|
+
|pattern|
|
|
363
|
+
return true if url =~ pattern
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
return false
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
end
|