arachni 0.2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ACKNOWLEDGMENTS.md +14 -0
- data/AUTHORS.md +6 -0
- data/CHANGELOG.md +162 -0
- data/CONTRIBUTORS.md +10 -0
- data/EXPLOITATION.md +429 -0
- data/HACKING.md +101 -0
- data/LICENSE.md +341 -0
- data/README.md +350 -0
- data/Rakefile +86 -0
- data/bin/arachni +22 -0
- data/bin/arachni_web +77 -0
- data/bin/arachni_xmlrpc +21 -0
- data/bin/arachni_xmlrpcd +82 -0
- data/bin/arachni_xmlrpcd_monitor +74 -0
- data/conf/README.webui.yaml.txt +44 -0
- data/conf/webui.yaml +11 -0
- data/external/metasploit/LICENSE +24 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_exec.rb +142 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_path_traversal.rb +113 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_eval.rb +150 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_include.rb +141 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_sqlmap.rb +92 -0
- data/external/metasploit/plugins/arachni.rb +536 -0
- data/getoptslong.rb +241 -0
- data/lib/anemone.rb +2 -0
- data/lib/anemone/cookie_store.rb +35 -0
- data/lib/anemone/core.rb +371 -0
- data/lib/anemone/exceptions.rb +5 -0
- data/lib/anemone/http.rb +144 -0
- data/lib/anemone/page.rb +337 -0
- data/lib/anemone/page_store.rb +160 -0
- data/lib/anemone/storage.rb +34 -0
- data/lib/anemone/storage/base.rb +75 -0
- data/lib/anemone/storage/exceptions.rb +15 -0
- data/lib/anemone/storage/mongodb.rb +89 -0
- data/lib/anemone/storage/pstore.rb +50 -0
- data/lib/anemone/storage/redis.rb +90 -0
- data/lib/anemone/storage/tokyo_cabinet.rb +57 -0
- data/lib/anemone/tentacle.rb +40 -0
- data/lib/arachni.rb +16 -0
- data/lib/audit_store.rb +346 -0
- data/lib/component_manager.rb +293 -0
- data/lib/component_options.rb +395 -0
- data/lib/exceptions.rb +76 -0
- data/lib/framework.rb +637 -0
- data/lib/http.rb +809 -0
- data/lib/issue.rb +302 -0
- data/lib/module.rb +4 -0
- data/lib/module/auditor.rb +455 -0
- data/lib/module/base.rb +188 -0
- data/lib/module/element_db.rb +158 -0
- data/lib/module/key_filler.rb +87 -0
- data/lib/module/manager.rb +87 -0
- data/lib/module/output.rb +68 -0
- data/lib/module/trainer.rb +240 -0
- data/lib/module/utilities.rb +110 -0
- data/lib/options.rb +547 -0
- data/lib/parser.rb +2 -0
- data/lib/parser/auditable.rb +522 -0
- data/lib/parser/elements.rb +296 -0
- data/lib/parser/page.rb +149 -0
- data/lib/parser/parser.rb +717 -0
- data/lib/plugin.rb +4 -0
- data/lib/plugin/base.rb +110 -0
- data/lib/plugin/manager.rb +162 -0
- data/lib/report.rb +4 -0
- data/lib/report/base.rb +119 -0
- data/lib/report/manager.rb +92 -0
- data/lib/rpc/xml/client/base.rb +71 -0
- data/lib/rpc/xml/client/dispatcher.rb +49 -0
- data/lib/rpc/xml/client/instance.rb +88 -0
- data/lib/rpc/xml/server/base.rb +90 -0
- data/lib/rpc/xml/server/dispatcher.rb +357 -0
- data/lib/rpc/xml/server/framework.rb +206 -0
- data/lib/rpc/xml/server/instance.rb +191 -0
- data/lib/rpc/xml/server/module/manager.rb +46 -0
- data/lib/rpc/xml/server/options.rb +124 -0
- data/lib/rpc/xml/server/output.rb +299 -0
- data/lib/rpc/xml/server/plugin/manager.rb +58 -0
- data/lib/ruby.rb +5 -0
- data/lib/ruby/object.rb +32 -0
- data/lib/ruby/string.rb +74 -0
- data/lib/ruby/xmlrpc/server.rb +27 -0
- data/lib/spider.rb +200 -0
- data/lib/typhoeus/request.rb +91 -0
- data/lib/typhoeus/response.rb +34 -0
- data/lib/ui/cli/cli.rb +744 -0
- data/lib/ui/cli/output.rb +279 -0
- data/lib/ui/web/log.rb +82 -0
- data/lib/ui/web/output_stream.rb +94 -0
- data/lib/ui/web/report_manager.rb +222 -0
- data/lib/ui/web/server.rb +903 -0
- data/lib/ui/web/server/db/placeholder +0 -0
- data/lib/ui/web/server/public/banner.png +0 -0
- data/lib/ui/web/server/public/bodybg-small.png +0 -0
- data/lib/ui/web/server/public/bodybg.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/pbar-ani.gif +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_222222_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_2e83ff_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_454545_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_888888_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_cd0a0a_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/jquery-ui-1.8.9.custom.css +573 -0
- data/lib/ui/web/server/public/favicon.ico +0 -0
- data/lib/ui/web/server/public/footer.jpg +0 -0
- data/lib/ui/web/server/public/icons/error.png +0 -0
- data/lib/ui/web/server/public/icons/info.png +0 -0
- data/lib/ui/web/server/public/icons/ok.png +0 -0
- data/lib/ui/web/server/public/icons/status.png +0 -0
- data/lib/ui/web/server/public/js/jquery-1.4.4.min.js +167 -0
- data/lib/ui/web/server/public/js/jquery-ui-1.8.9.custom.min.js +781 -0
- data/lib/ui/web/server/public/logo.png +0 -0
- data/lib/ui/web/server/public/nav-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-right.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-right.jpg +0 -0
- data/lib/ui/web/server/public/reports/placeholder +1 -0
- data/lib/ui/web/server/public/sidebar-bottom.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-h4.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-top.jpg +0 -0
- data/lib/ui/web/server/public/spider.png +0 -0
- data/lib/ui/web/server/public/style.css +604 -0
- data/lib/ui/web/server/tmp/placeholder +0 -0
- data/lib/ui/web/server/views/dispatcher.erb +85 -0
- data/lib/ui/web/server/views/dispatcher_error.erb +14 -0
- data/lib/ui/web/server/views/error.erb +1 -0
- data/lib/ui/web/server/views/flash.erb +18 -0
- data/lib/ui/web/server/views/home.erb +14 -0
- data/lib/ui/web/server/views/instance.erb +213 -0
- data/lib/ui/web/server/views/layout.erb +95 -0
- data/lib/ui/web/server/views/log.erb +40 -0
- data/lib/ui/web/server/views/modules.erb +71 -0
- data/lib/ui/web/server/views/options.erb +23 -0
- data/lib/ui/web/server/views/output_results.erb +51 -0
- data/lib/ui/web/server/views/plugins.erb +42 -0
- data/lib/ui/web/server/views/report_formats.erb +30 -0
- data/lib/ui/web/server/views/reports.erb +55 -0
- data/lib/ui/web/server/views/settings.erb +120 -0
- data/lib/ui/web/server/views/welcome.erb +38 -0
- data/lib/ui/xmlrpc/dispatcher_monitor.rb +204 -0
- data/lib/ui/xmlrpc/xmlrpc.rb +843 -0
- data/logs/placeholder +0 -0
- data/metamodules/autothrottle.rb +74 -0
- data/metamodules/timeout_notice.rb +118 -0
- data/metamodules/uniformity.rb +98 -0
- data/modules/audit/code_injection.rb +136 -0
- data/modules/audit/code_injection_timing.rb +115 -0
- data/modules/audit/code_injection_timing/payloads.txt +4 -0
- data/modules/audit/csrf.rb +301 -0
- data/modules/audit/ldapi.rb +103 -0
- data/modules/audit/ldapi/errors.txt +26 -0
- data/modules/audit/os_cmd_injection.rb +103 -0
- data/modules/audit/os_cmd_injection/payloads.txt +2 -0
- data/modules/audit/os_cmd_injection_timing.rb +104 -0
- data/modules/audit/os_cmd_injection_timing/payloads.txt +3 -0
- data/modules/audit/path_traversal.rb +141 -0
- data/modules/audit/response_splitting.rb +105 -0
- data/modules/audit/rfi.rb +193 -0
- data/modules/audit/sqli.rb +120 -0
- data/modules/audit/sqli/regexp_ids.txt +90 -0
- data/modules/audit/sqli_blind_rdiff.rb +321 -0
- data/modules/audit/sqli_blind_timing.rb +103 -0
- data/modules/audit/sqli_blind_timing/payloads.txt +51 -0
- data/modules/audit/trainer.rb +89 -0
- data/modules/audit/unvalidated_redirect.rb +90 -0
- data/modules/audit/xpath.rb +104 -0
- data/modules/audit/xpath/errors.txt +26 -0
- data/modules/audit/xss.rb +99 -0
- data/modules/audit/xss_event.rb +134 -0
- data/modules/audit/xss_path.rb +125 -0
- data/modules/audit/xss_script_tag.rb +112 -0
- data/modules/audit/xss_tag.rb +112 -0
- data/modules/audit/xss_uri.rb +125 -0
- data/modules/recon/allowed_methods.rb +104 -0
- data/modules/recon/backdoors.rb +131 -0
- data/modules/recon/backdoors/filenames.txt +16 -0
- data/modules/recon/backup_files.rb +177 -0
- data/modules/recon/backup_files/extensions.txt +28 -0
- data/modules/recon/common_directories.rb +138 -0
- data/modules/recon/common_directories/directories.txt +265 -0
- data/modules/recon/common_files.rb +138 -0
- data/modules/recon/common_files/filenames.txt +17 -0
- data/modules/recon/directory_listing.rb +171 -0
- data/modules/recon/grep/captcha.rb +62 -0
- data/modules/recon/grep/credit_card.rb +85 -0
- data/modules/recon/grep/cvs_svn_users.rb +73 -0
- data/modules/recon/grep/emails.rb +59 -0
- data/modules/recon/grep/html_objects.rb +53 -0
- data/modules/recon/grep/private_ip.rb +54 -0
- data/modules/recon/grep/ssn.rb +53 -0
- data/modules/recon/htaccess_limit.rb +82 -0
- data/modules/recon/http_put.rb +95 -0
- data/modules/recon/interesting_responses.rb +118 -0
- data/modules/recon/unencrypted_password_forms.rb +119 -0
- data/modules/recon/webdav.rb +126 -0
- data/modules/recon/xst.rb +107 -0
- data/path_extractors/anchors.rb +35 -0
- data/path_extractors/forms.rb +35 -0
- data/path_extractors/frames.rb +38 -0
- data/path_extractors/generic.rb +39 -0
- data/path_extractors/links.rb +35 -0
- data/path_extractors/meta_refresh.rb +39 -0
- data/path_extractors/scripts.rb +37 -0
- data/path_extractors/sitemap.rb +31 -0
- data/plugins/autologin.rb +137 -0
- data/plugins/content_types.rb +90 -0
- data/plugins/cookie_collector.rb +99 -0
- data/plugins/form_dicattack.rb +185 -0
- data/plugins/healthmap.rb +94 -0
- data/plugins/http_dicattack.rb +133 -0
- data/plugins/metamodules.rb +118 -0
- data/plugins/proxy.rb +248 -0
- data/plugins/proxy/server.rb +66 -0
- data/plugins/waf_detector.rb +184 -0
- data/profiles/comprehensive.afp +74 -0
- data/profiles/full.afp +75 -0
- data/reports/afr.rb +59 -0
- data/reports/ap.rb +55 -0
- data/reports/html.rb +179 -0
- data/reports/html/default.erb +967 -0
- data/reports/metareport.rb +139 -0
- data/reports/metareport/arachni_metareport.rb +174 -0
- data/reports/plugin_formatters/html/content_types.rb +82 -0
- data/reports/plugin_formatters/html/cookie_collector.rb +66 -0
- data/reports/plugin_formatters/html/form_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/healthmap.rb +76 -0
- data/reports/plugin_formatters/html/http_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/metaformatters/timeout_notice.rb +65 -0
- data/reports/plugin_formatters/html/metaformatters/uniformity.rb +71 -0
- data/reports/plugin_formatters/html/metamodules.rb +93 -0
- data/reports/plugin_formatters/html/waf_detector.rb +54 -0
- data/reports/plugin_formatters/stdout/content_types.rb +73 -0
- data/reports/plugin_formatters/stdout/cookie_collector.rb +61 -0
- data/reports/plugin_formatters/stdout/form_dicattack.rb +52 -0
- data/reports/plugin_formatters/stdout/healthmap.rb +72 -0
- data/reports/plugin_formatters/stdout/http_dicattack.rb +53 -0
- data/reports/plugin_formatters/stdout/metaformatters/timeout_notice.rb +55 -0
- data/reports/plugin_formatters/stdout/metaformatters/uniformity.rb +68 -0
- data/reports/plugin_formatters/stdout/metamodules.rb +89 -0
- data/reports/plugin_formatters/stdout/waf_detector.rb +48 -0
- data/reports/plugin_formatters/xml/content_types.rb +91 -0
- data/reports/plugin_formatters/xml/cookie_collector.rb +70 -0
- data/reports/plugin_formatters/xml/form_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/healthmap.rb +82 -0
- data/reports/plugin_formatters/xml/http_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/metaformatters/timeout_notice.rb +67 -0
- data/reports/plugin_formatters/xml/metaformatters/uniformity.rb +82 -0
- data/reports/plugin_formatters/xml/metamodules.rb +91 -0
- data/reports/plugin_formatters/xml/waf_detector.rb +58 -0
- data/reports/stdout.rb +182 -0
- data/reports/txt.rb +77 -0
- data/reports/xml.rb +231 -0
- data/reports/xml/buffer.rb +98 -0
- metadata +516 -0
@@ -0,0 +1,296 @@
|
|
1
|
+
=begin
|
2
|
+
Arachni
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
4
|
+
|
5
|
+
This is free software; you can copy and distribute and modify
|
6
|
+
this program under the term of the GPL v2.0 License
|
7
|
+
(See LICENSE file for details)
|
8
|
+
=end
|
9
|
+
|
10
|
+
module Arachni
|
11
|
+
|
12
|
+
opts = Arachni::Options.instance
|
13
|
+
require opts.dir['lib'] + 'parser/auditable'
|
14
|
+
|
15
|
+
class Parser
|
16
|
+
|
17
|
+
module Element
|
18
|
+
|
19
|
+
#
|
20
|
+
# Base element class.
|
21
|
+
#
|
22
|
+
# Should be extended/implemented by all HTML/HTTP modules.
|
23
|
+
#
|
24
|
+
# @author: Tasos "Zapotek" Laskos
|
25
|
+
# <tasos.laskos@gmail.com>
|
26
|
+
# <zapotek@segfault.gr>
|
27
|
+
# @version: 0.1
|
28
|
+
#
|
29
|
+
# @abstract
|
30
|
+
#
|
31
|
+
class Base < Arachni::Element::Auditable
|
32
|
+
|
33
|
+
#
|
34
|
+
# The URL of the page that owns the element.
|
35
|
+
#
|
36
|
+
# @return [String]
|
37
|
+
#
|
38
|
+
attr_accessor :url
|
39
|
+
|
40
|
+
#
|
41
|
+
# The url to which the element points and should be audited against.
|
42
|
+
#
|
43
|
+
# Ex. 'href' for links, 'action' for forms, etc.
|
44
|
+
#
|
45
|
+
# @return [String]
|
46
|
+
#
|
47
|
+
attr_accessor :action
|
48
|
+
|
49
|
+
attr_accessor :auditable
|
50
|
+
|
51
|
+
#
|
52
|
+
# Relatively 'raw' hash holding the element's attributes, values, etc.
|
53
|
+
#
|
54
|
+
# @return [Hash]
|
55
|
+
#
|
56
|
+
attr_accessor :raw
|
57
|
+
|
58
|
+
#
|
59
|
+
# Method of the element.
|
60
|
+
#
|
61
|
+
# Should represent a method in {Arachni::Module::HTTP}.
|
62
|
+
#
|
63
|
+
# Ex. get, post, cookie, header
|
64
|
+
#
|
65
|
+
# @see Arachni::Module::HTTP
|
66
|
+
#
|
67
|
+
# @return [String]
|
68
|
+
#
|
69
|
+
attr_accessor :method
|
70
|
+
|
71
|
+
#
|
72
|
+
# Initialize the element.
|
73
|
+
#
|
74
|
+
# @param [String] url {#url}
|
75
|
+
# @param [Hash] raw {#raw}
|
76
|
+
#
|
77
|
+
def initialize( url, raw = {} )
|
78
|
+
@raw = raw.dup
|
79
|
+
@url = url.dup
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# Must provide a string uniquely identifying self.
|
84
|
+
#
|
85
|
+
# @return [String]
|
86
|
+
#
|
87
|
+
def id
|
88
|
+
return @raw.to_s
|
89
|
+
end
|
90
|
+
|
91
|
+
#
|
92
|
+
# Must provide a simple hash representation of self
|
93
|
+
#
|
94
|
+
def simple
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
#
|
99
|
+
# Must provide the element type, one of {Arachni::Module::Auditor::Element}.
|
100
|
+
#
|
101
|
+
def type
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
class Link < Base
|
108
|
+
|
109
|
+
def initialize( url, raw = {} )
|
110
|
+
super( url, raw )
|
111
|
+
|
112
|
+
@action = @raw['href']
|
113
|
+
@method = 'get'
|
114
|
+
|
115
|
+
@auditable = @raw['vars']
|
116
|
+
end
|
117
|
+
|
118
|
+
def http_request( url, opts )
|
119
|
+
return @auditor.http.get( url, opts )
|
120
|
+
end
|
121
|
+
|
122
|
+
def simple
|
123
|
+
return { @action => @auditable }
|
124
|
+
end
|
125
|
+
|
126
|
+
def type
|
127
|
+
Arachni::Module::Auditor::Element::LINK
|
128
|
+
end
|
129
|
+
|
130
|
+
def audit_id( injection_str, opts = {} )
|
131
|
+
vars = auditable.keys.sort.to_s
|
132
|
+
url = URI( @auditor.page.url ).merge( URI( @action ).path ).to_s
|
133
|
+
|
134
|
+
timeout = opts[:timeout] || ''
|
135
|
+
return "#{@auditor.class.info[:name]}:" +
|
136
|
+
"#{url}:" + "#{self.type}:" +
|
137
|
+
"#{vars}=#{injection_str.to_s}:timeout=#{timeout}"
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
class Form < Base
|
145
|
+
|
146
|
+
include Arachni::Module::Utilities
|
147
|
+
|
148
|
+
FORM_VALUES_ORIGINAL = '__original_values__'
|
149
|
+
FORM_VALUES_SAMPLE = '__sample_values__'
|
150
|
+
|
151
|
+
def initialize( url, raw = {} )
|
152
|
+
super( url, raw )
|
153
|
+
|
154
|
+
@action = @raw['attrs']['action']
|
155
|
+
@method = @raw['attrs']['method']
|
156
|
+
|
157
|
+
@auditable = simple['auditable'] || {}
|
158
|
+
end
|
159
|
+
|
160
|
+
def http_request( url, opts )
|
161
|
+
|
162
|
+
|
163
|
+
params = opts[:params]
|
164
|
+
altered = opts[:altered]
|
165
|
+
|
166
|
+
curr_opts = opts.dup
|
167
|
+
if( altered == FORM_VALUES_ORIGINAL )
|
168
|
+
orig_id = audit_id( FORM_VALUES_ORIGINAL )
|
169
|
+
|
170
|
+
return if !opts[:redundant] && audited?( orig_id )
|
171
|
+
audited( orig_id )
|
172
|
+
|
173
|
+
print_debug( 'Submitting form with original values;' +
|
174
|
+
' overriding trainer option.' )
|
175
|
+
opts[:train] = true
|
176
|
+
print_debug_trainer( opts )
|
177
|
+
end
|
178
|
+
|
179
|
+
if( altered == FORM_VALUES_SAMPLE )
|
180
|
+
sample_id = audit_id( FORM_VALUES_SAMPLE )
|
181
|
+
|
182
|
+
return if !opts[:redundant] && audited?( sample_id )
|
183
|
+
audited( sample_id )
|
184
|
+
|
185
|
+
print_debug( 'Submitting form with sample values;' +
|
186
|
+
' overriding trainer option.' )
|
187
|
+
opts[:train] = true
|
188
|
+
print_debug_trainer( opts )
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
if( @method.downcase != 'get' )
|
193
|
+
return @auditor.http.post( url, opts )
|
194
|
+
else
|
195
|
+
return @auditor.http.get( url, opts )
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def id
|
200
|
+
|
201
|
+
id = simple['attrs'].to_s
|
202
|
+
|
203
|
+
auditable.map {
|
204
|
+
|name, value|
|
205
|
+
next if name.substring?( seed )
|
206
|
+
id += name
|
207
|
+
}
|
208
|
+
|
209
|
+
return id
|
210
|
+
|
211
|
+
end
|
212
|
+
|
213
|
+
def simple
|
214
|
+
|
215
|
+
form = Hash.new
|
216
|
+
|
217
|
+
return form if !@raw || !@raw['auditable'] || @raw['auditable'].empty?
|
218
|
+
|
219
|
+
form['attrs'] = @raw['attrs']
|
220
|
+
form['auditable'] = {}
|
221
|
+
@raw['auditable'].each {
|
222
|
+
|item|
|
223
|
+
if( !item['name'] ) then next end
|
224
|
+
form['auditable'][item['name']] = item['value']
|
225
|
+
}
|
226
|
+
|
227
|
+
return form.dup
|
228
|
+
end
|
229
|
+
|
230
|
+
def type
|
231
|
+
Arachni::Module::Auditor::Element::FORM
|
232
|
+
end
|
233
|
+
|
234
|
+
end
|
235
|
+
|
236
|
+
class Cookie < Base
|
237
|
+
|
238
|
+
|
239
|
+
def initialize( url, raw = {} )
|
240
|
+
super( url, raw )
|
241
|
+
|
242
|
+
@action = @url
|
243
|
+
@method = 'cookie'
|
244
|
+
|
245
|
+
@auditable = { @raw['name'] => @raw['value'] }
|
246
|
+
@simple = @auditable.dup
|
247
|
+
@auditable.reject! {
|
248
|
+
|cookie|
|
249
|
+
Options.instance.exclude_cookies.include?( cookie )
|
250
|
+
}
|
251
|
+
end
|
252
|
+
|
253
|
+
def http_request( url, opts )
|
254
|
+
return @auditor.http.cookie( url, opts )
|
255
|
+
end
|
256
|
+
|
257
|
+
def simple
|
258
|
+
return @simple
|
259
|
+
end
|
260
|
+
|
261
|
+
def type
|
262
|
+
Arachni::Module::Auditor::Element::COOKIE
|
263
|
+
end
|
264
|
+
|
265
|
+
end
|
266
|
+
|
267
|
+
class Header < Base
|
268
|
+
|
269
|
+
|
270
|
+
def initialize( url, raw = {} )
|
271
|
+
super( url, raw )
|
272
|
+
|
273
|
+
@action = @url
|
274
|
+
@method = 'header'
|
275
|
+
|
276
|
+
@auditable = @raw
|
277
|
+
end
|
278
|
+
|
279
|
+
def http_request( url, opts )
|
280
|
+
return @auditor.http.header( url, opts )
|
281
|
+
end
|
282
|
+
|
283
|
+
def simple
|
284
|
+
return @auditable.dup
|
285
|
+
end
|
286
|
+
|
287
|
+
def type
|
288
|
+
Arachni::Module::Auditor::Element::HEADER
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
|
293
|
+
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
data/lib/parser/page.rb
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
=begin
|
2
|
+
Arachni
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
4
|
+
|
5
|
+
This is free software; you can copy and distribute and modify
|
6
|
+
this program under the term of the GPL v2.0 License
|
7
|
+
(See LICENSE file for details)
|
8
|
+
|
9
|
+
=end
|
10
|
+
|
11
|
+
module Arachni
|
12
|
+
|
13
|
+
class Parser
|
14
|
+
#
|
15
|
+
# Arachni::Page class
|
16
|
+
#
|
17
|
+
# It holds page data like elements, cookies, headers, etc...
|
18
|
+
#
|
19
|
+
# @author: Tasos "Zapotek" Laskos
|
20
|
+
# <tasos.laskos@gmail.com>
|
21
|
+
# <zapotek@segfault.gr>
|
22
|
+
# @version: 0.2
|
23
|
+
#
|
24
|
+
class Page
|
25
|
+
|
26
|
+
#
|
27
|
+
# @return [String] url of the page
|
28
|
+
#
|
29
|
+
attr_accessor :url
|
30
|
+
|
31
|
+
#
|
32
|
+
# @return [Fixnum] the HTTP response code
|
33
|
+
#
|
34
|
+
attr_accessor :code
|
35
|
+
|
36
|
+
#
|
37
|
+
# @return [String] the request method that returned the page
|
38
|
+
#
|
39
|
+
attr_accessor :method
|
40
|
+
|
41
|
+
#
|
42
|
+
# @return [Hash] url variables
|
43
|
+
#
|
44
|
+
attr_accessor :query_vars
|
45
|
+
|
46
|
+
#
|
47
|
+
# @return [String] the HTML response
|
48
|
+
#
|
49
|
+
attr_accessor :html
|
50
|
+
|
51
|
+
#
|
52
|
+
# Request headers
|
53
|
+
#
|
54
|
+
# @return [Array<Arachni::Parser::Element::Header>]
|
55
|
+
#
|
56
|
+
attr_accessor :headers
|
57
|
+
|
58
|
+
#
|
59
|
+
# @return [Hash]
|
60
|
+
#
|
61
|
+
attr_accessor :response_headers
|
62
|
+
|
63
|
+
#
|
64
|
+
# @see Parser#links
|
65
|
+
#
|
66
|
+
# @return [Array<Arachni::Parser::Element::Link>]
|
67
|
+
#
|
68
|
+
attr_accessor :links
|
69
|
+
|
70
|
+
#
|
71
|
+
# @see Parser#forms
|
72
|
+
#
|
73
|
+
# @return [Array<Arachni::Parser::Element::Form>]
|
74
|
+
#
|
75
|
+
attr_accessor :forms
|
76
|
+
|
77
|
+
#
|
78
|
+
# @see Parser#cookies
|
79
|
+
#
|
80
|
+
# @return [Array<Arachni::Parser::Element::Cookie>]
|
81
|
+
#
|
82
|
+
attr_accessor :cookies
|
83
|
+
|
84
|
+
#
|
85
|
+
# Cookies extracted from the supplied cookiejar
|
86
|
+
#
|
87
|
+
# @return [Hash]
|
88
|
+
#
|
89
|
+
attr_accessor :cookiejar
|
90
|
+
|
91
|
+
def initialize( opts = {} )
|
92
|
+
opts.each {
|
93
|
+
|k, v|
|
94
|
+
send( "#{k}=", v )
|
95
|
+
}
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
def body
|
100
|
+
@html
|
101
|
+
end
|
102
|
+
|
103
|
+
#
|
104
|
+
# Returns an array of forms from {#forms} with its attributes and<br/>
|
105
|
+
# its auditable inputs as a name=>value hash
|
106
|
+
#
|
107
|
+
# @return [Array]
|
108
|
+
#
|
109
|
+
def forms_simple( )
|
110
|
+
forms = []
|
111
|
+
@forms.each {
|
112
|
+
|form|
|
113
|
+
forms << form.simple
|
114
|
+
}
|
115
|
+
return forms
|
116
|
+
end
|
117
|
+
|
118
|
+
#
|
119
|
+
# Returns links from {#links} as a name=>value hash with href as key
|
120
|
+
#
|
121
|
+
# @return [Hash]
|
122
|
+
#
|
123
|
+
def links_simple
|
124
|
+
links = []
|
125
|
+
@links.each {
|
126
|
+
|link|
|
127
|
+
links << link.simple
|
128
|
+
}
|
129
|
+
return links
|
130
|
+
end
|
131
|
+
|
132
|
+
#
|
133
|
+
# Returns cookies from {#cookies} as a name=>value hash
|
134
|
+
#
|
135
|
+
# @return [Hash] the cookie attributes, values, etc
|
136
|
+
#
|
137
|
+
def cookies_simple
|
138
|
+
cookies = { }
|
139
|
+
|
140
|
+
@cookies.each {
|
141
|
+
|cookie|
|
142
|
+
cookies.merge!( cookie.simple )
|
143
|
+
}
|
144
|
+
return cookies
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,717 @@
|
|
1
|
+
=begin
|
2
|
+
Arachni
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
4
|
+
|
5
|
+
This is free software; you can copy and distribute and modify
|
6
|
+
this program under the term of the GPL v2.0 License
|
7
|
+
(See LICENSE file for details)
|
8
|
+
=end
|
9
|
+
|
10
|
+
module Arachni
|
11
|
+
|
12
|
+
opts = Arachni::Options.instance
|
13
|
+
require opts.dir['lib'] + 'parser/elements'
|
14
|
+
require opts.dir['lib'] + 'parser/page'
|
15
|
+
require opts.dir['lib'] + 'module/utilities'
|
16
|
+
|
17
|
+
#
|
18
|
+
# Analyzer class
|
19
|
+
#
|
20
|
+
# Analyzes HTML code extracting forms, links and cookies
|
21
|
+
# depending on user opts.<br/>
|
22
|
+
#
|
23
|
+
# It grabs <b>all</b> element attributes not just URLs and variables.<br/>
|
24
|
+
# All URLs are converted to absolute and URLs outside the domain are ignored.<br/>
|
25
|
+
#
|
26
|
+
# === Forms
|
27
|
+
# Form analysis uses both regular expressions and the Nokogiri parser<br/>
|
28
|
+
# in order to be able to handle badly written HTML code, such as not closed<br/>
|
29
|
+
# tags and tag overlaps.
|
30
|
+
#
|
31
|
+
# In order to ease audits, in addition to parsing forms into data structures<br/>
|
32
|
+
# like "select" and "option", all auditable inputs are put under the<br/>
|
33
|
+
# "auditable" key.
|
34
|
+
#
|
35
|
+
# === Links
|
36
|
+
# Links are extracted using the Nokogiri parser.
|
37
|
+
#
|
38
|
+
# === Cookies
|
39
|
+
# Cookies are extracted from the HTTP headers and parsed by WEBrick::Cookie
|
40
|
+
#
|
41
|
+
# @author: Tasos "Zapotek" Laskos
|
42
|
+
# <tasos.laskos@gmail.com>
|
43
|
+
# <zapotek@segfault.gr>
|
44
|
+
# @version: 0.2
|
45
|
+
#
|
46
|
+
class Parser
|
47
|
+
|
48
|
+
include Arachni::Module::Utilities
|
49
|
+
|
50
|
+
#
|
51
|
+
# @return [String] the url of the page
|
52
|
+
#
|
53
|
+
attr_accessor :url
|
54
|
+
|
55
|
+
#
|
56
|
+
# Options instance
|
57
|
+
#
|
58
|
+
# @return [Options]
|
59
|
+
#
|
60
|
+
attr_reader :opts
|
61
|
+
|
62
|
+
#
|
63
|
+
# Constructor <br/>
|
64
|
+
# Instantiates Analyzer class with user options.
|
65
|
+
#
|
66
|
+
# @param [Options] opts
|
67
|
+
#
|
68
|
+
def initialize( opts, res )
|
69
|
+
@opts = opts
|
70
|
+
|
71
|
+
@url = res.effective_url
|
72
|
+
@html = res.body
|
73
|
+
@response_headers = res.headers_hash
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Runs the Analyzer and extracts forms, links and cookies
|
78
|
+
#
|
79
|
+
# @return [Page]
|
80
|
+
#
|
81
|
+
def run
|
82
|
+
|
83
|
+
# non text files won't contain any auditable elements
|
84
|
+
type = Arachni::HTTP.content_type( @response_headers )
|
85
|
+
if type.is_a?( String) && !type.substring?( 'text' )
|
86
|
+
return Page.new( {
|
87
|
+
:url => @url,
|
88
|
+
:query_vars => link_vars( @url ),
|
89
|
+
:html => @html,
|
90
|
+
:headers => [],
|
91
|
+
:response_headers => @response_headers,
|
92
|
+
:forms => [],
|
93
|
+
:links => [],
|
94
|
+
:cookies => [],
|
95
|
+
:cookiejar => []
|
96
|
+
} )
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
cookies_arr = cookies
|
101
|
+
cookies_arr = merge_with_cookiejar( cookies_arr.flatten.uniq )
|
102
|
+
|
103
|
+
jar = {}
|
104
|
+
jar = @opts.cookies = Arachni::HTTP.parse_cookiejar( @opts.cookie_jar ) if @opts.cookie_jar
|
105
|
+
|
106
|
+
preped = {}
|
107
|
+
cookies_arr.each{ |cookie| preped.merge!( cookie.simple ) }
|
108
|
+
|
109
|
+
jar = preped.merge( jar )
|
110
|
+
|
111
|
+
return Page.new( {
|
112
|
+
:url => @url,
|
113
|
+
:query_vars => link_vars( @url ),
|
114
|
+
:html => @html,
|
115
|
+
:headers => headers(),
|
116
|
+
:response_headers => @response_headers,
|
117
|
+
:forms => @opts.audit_forms ? forms() : [],
|
118
|
+
:links => @opts.audit_links ? links() : [],
|
119
|
+
:cookies => merge_with_cookiestore( merge_with_cookiejar( cookies_arr ) ),
|
120
|
+
:cookiejar => jar
|
121
|
+
} )
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
def doc
|
126
|
+
return @doc if @doc
|
127
|
+
@doc = Nokogiri::HTML( @html ) if @html rescue nil
|
128
|
+
end
|
129
|
+
|
130
|
+
def merge_with_cookiestore( cookies )
|
131
|
+
|
132
|
+
@cookiestore ||= []
|
133
|
+
|
134
|
+
if @cookiestore.empty?
|
135
|
+
@cookiestore = cookies
|
136
|
+
else
|
137
|
+
tmp = {}
|
138
|
+
@cookiestore.each {
|
139
|
+
|cookie|
|
140
|
+
tmp.merge!( cookie.simple )
|
141
|
+
}
|
142
|
+
|
143
|
+
cookies.each {
|
144
|
+
|cookie|
|
145
|
+
tmp.merge!( cookie.simple )
|
146
|
+
}
|
147
|
+
|
148
|
+
@cookiestore = tmp.map {
|
149
|
+
|name, value|
|
150
|
+
Element::Cookie.new( @url, {
|
151
|
+
'name' => name,
|
152
|
+
'value' => value
|
153
|
+
} )
|
154
|
+
}
|
155
|
+
end
|
156
|
+
|
157
|
+
return @cookiestore
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Merges 'cookies' with the cookiejar and returns it as an array
|
163
|
+
#
|
164
|
+
# @param [Array<Hash>] cookies
|
165
|
+
#
|
166
|
+
# @return [Array<Element::Cookie>] the merged cookies
|
167
|
+
#
|
168
|
+
def merge_with_cookiejar( cookies )
|
169
|
+
return cookies if !@opts.cookies
|
170
|
+
|
171
|
+
@opts.cookies.each_pair {
|
172
|
+
|name, value|
|
173
|
+
cookies << Element::Cookie.new( @url,
|
174
|
+
{
|
175
|
+
'name' => name,
|
176
|
+
'value' => value
|
177
|
+
} )
|
178
|
+
}
|
179
|
+
|
180
|
+
return cookies
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
#
|
185
|
+
# Returns a list of valid auditable HTTP header fields.
|
186
|
+
#
|
187
|
+
# It's more of a placeholder method, it doesn't actually analyze anything.<br/>
|
188
|
+
# It's a long shot that any of these will be vulnerable but better
|
189
|
+
# be safe than sorry.
|
190
|
+
#
|
191
|
+
# @return [Hash] HTTP header fields
|
192
|
+
#
|
193
|
+
def headers( )
|
194
|
+
headers_arr = []
|
195
|
+
{
|
196
|
+
'accept' => 'text/html,application/xhtml+xml,application' +
|
197
|
+
'/xml;q=0.9,*/*;q=0.8',
|
198
|
+
'accept-charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
199
|
+
'accept-language' => 'en-gb,en;q=0.5',
|
200
|
+
'accept-encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
|
201
|
+
'from' => @opts.authed_by || '',
|
202
|
+
'user-agent' => @opts.user_agent || '',
|
203
|
+
'referer' => @url,
|
204
|
+
'pragma' => 'no-cache'
|
205
|
+
}.each {
|
206
|
+
|k,v|
|
207
|
+
headers_arr << Element::Header.new( @url, { k => v } )
|
208
|
+
}
|
209
|
+
|
210
|
+
return headers_arr
|
211
|
+
end
|
212
|
+
|
213
|
+
# TODO: Add support for radio buttons.
|
214
|
+
#
|
215
|
+
# Extracts forms from HTML document
|
216
|
+
#
|
217
|
+
# @see #form_attrs
|
218
|
+
# @see #form_textareas
|
219
|
+
# @see #form_selects
|
220
|
+
# @see #form_inputs
|
221
|
+
# @see #merge_select_with_input
|
222
|
+
#
|
223
|
+
# @param [String] html
|
224
|
+
#
|
225
|
+
# @return [Array<Element::Form>] array of forms
|
226
|
+
#
|
227
|
+
def forms( html = nil )
|
228
|
+
|
229
|
+
elements = []
|
230
|
+
|
231
|
+
begin
|
232
|
+
html = html || @html.clone
|
233
|
+
#
|
234
|
+
# This imitates Firefox's behavior when it comes to
|
235
|
+
# broken/unclosed form tags
|
236
|
+
#
|
237
|
+
|
238
|
+
# get properly closed forms
|
239
|
+
forms = html.scan( /<form(.*?)<\/form>/ixm ).flatten
|
240
|
+
|
241
|
+
# now remove them from html...
|
242
|
+
forms.each { |form| html.gsub!( form, '' ) }
|
243
|
+
|
244
|
+
# and get unclosed forms.
|
245
|
+
forms |= html.scan( /<form (.*)(?!<\/form>)/ixm ).flatten
|
246
|
+
|
247
|
+
rescue Exception => e
|
248
|
+
return elements
|
249
|
+
end
|
250
|
+
|
251
|
+
i = 0
|
252
|
+
forms.each {
|
253
|
+
|form|
|
254
|
+
|
255
|
+
elements[i] = Hash.new
|
256
|
+
elements[i]['attrs'] = form_attrs( form )
|
257
|
+
|
258
|
+
if( !elements[i]['attrs'] || !elements[i]['attrs']['action'] )
|
259
|
+
action = @url.to_s
|
260
|
+
else
|
261
|
+
action = elements[i]['attrs']['action']
|
262
|
+
end
|
263
|
+
action = URI.escape( action ).to_s
|
264
|
+
|
265
|
+
elements[i]['attrs']['action'] = to_absolute( action.clone ).to_s
|
266
|
+
|
267
|
+
if( !elements[i]['attrs']['method'] )
|
268
|
+
elements[i]['attrs']['method'] = 'post'
|
269
|
+
else
|
270
|
+
elements[i]['attrs']['method'] =
|
271
|
+
elements[i]['attrs']['method'].downcase
|
272
|
+
end
|
273
|
+
|
274
|
+
url = URI.parse( URI.escape( elements[i]['attrs']['action'] ) )
|
275
|
+
if !in_domain?( url )
|
276
|
+
next
|
277
|
+
end
|
278
|
+
|
279
|
+
elements[i]['textarea'] = form_textareas( form )
|
280
|
+
elements[i]['select'] = form_selects( form )
|
281
|
+
elements[i]['input'] = form_inputs( form )
|
282
|
+
|
283
|
+
# merge the form elements to make auditing easier
|
284
|
+
elements[i]['auditable'] =
|
285
|
+
elements[i]['input'] | elements[i]['textarea']
|
286
|
+
|
287
|
+
elements[i]['auditable'] =
|
288
|
+
merge_select_with_input( elements[i]['auditable'],
|
289
|
+
elements[i]['select'] )
|
290
|
+
|
291
|
+
elements[i] = Element::Form.new( @url, elements[i] )
|
292
|
+
|
293
|
+
|
294
|
+
i += 1
|
295
|
+
}
|
296
|
+
|
297
|
+
elements.reject {
|
298
|
+
|form|
|
299
|
+
!form.is_a?( Element::Form ) || form.auditable.empty?
|
300
|
+
}
|
301
|
+
end
|
302
|
+
|
303
|
+
#
|
304
|
+
# Extracts links from HTML document
|
305
|
+
#
|
306
|
+
# @see #link_vars
|
307
|
+
#
|
308
|
+
# @param [String] html
|
309
|
+
#
|
310
|
+
# @return [Array<Element::Link>] of links
|
311
|
+
#
|
312
|
+
def links
|
313
|
+
|
314
|
+
link_arr = []
|
315
|
+
elements_by_name( 'a' ).each_with_index {
|
316
|
+
|link|
|
317
|
+
|
318
|
+
link['href'] = to_absolute( link['href'] )
|
319
|
+
|
320
|
+
if !link['href'] then next end
|
321
|
+
if( exclude?( link['href'] ) ) then next end
|
322
|
+
if( !include?( link['href'] ) ) then next end
|
323
|
+
if !in_domain?( URI.parse( link['href'] ) ) then next end
|
324
|
+
|
325
|
+
link['vars'] = link_vars( link['href'] )
|
326
|
+
|
327
|
+
|
328
|
+
link_arr << Element::Link.new( @url, link )
|
329
|
+
|
330
|
+
}
|
331
|
+
|
332
|
+
return link_arr
|
333
|
+
end
|
334
|
+
|
335
|
+
#
|
336
|
+
# Extracts cookies from an HTTP headers
|
337
|
+
#
|
338
|
+
# @param [String] headers HTTP headers
|
339
|
+
# @param [String] html the HTML code of the page
|
340
|
+
#
|
341
|
+
# @return [Array<Element::Cookie>] of cookies
|
342
|
+
#
|
343
|
+
def cookies
|
344
|
+
|
345
|
+
cookies_arr = []
|
346
|
+
cookies = []
|
347
|
+
|
348
|
+
begin
|
349
|
+
doc.search( "//meta[@http-equiv]" ).each {
|
350
|
+
|elem|
|
351
|
+
|
352
|
+
next if elem['http-equiv'].downcase != 'set-cookie'
|
353
|
+
k, v = elem['content'].split( ';' )[0].split( '=', 2 )
|
354
|
+
cookies_arr << Element::Cookie.new( @url, { 'name' => k, 'value' => v } )
|
355
|
+
}
|
356
|
+
rescue
|
357
|
+
end
|
358
|
+
|
359
|
+
# don't ask me why....
|
360
|
+
if @response_headers.to_s.substring?( 'set-cookie' )
|
361
|
+
begin
|
362
|
+
cookies << WEBrick::Cookie.parse_set_cookies( @response_headers['Set-Cookie'].to_s )
|
363
|
+
cookies << WEBrick::Cookie.parse_set_cookies( @response_headers['set-cookie'].to_s )
|
364
|
+
rescue
|
365
|
+
return cookies_arr
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
cookies.flatten.uniq.each_with_index {
|
370
|
+
|cookie, i|
|
371
|
+
cookies_arr[i] = Hash.new
|
372
|
+
|
373
|
+
cookie.instance_variables.each {
|
374
|
+
|var|
|
375
|
+
value = cookie.instance_variable_get( var ).to_s
|
376
|
+
value.strip!
|
377
|
+
|
378
|
+
key = normalize_name( var )
|
379
|
+
val = value.gsub( /[\"\\\[\]]/, '' )
|
380
|
+
|
381
|
+
next if val == seed
|
382
|
+
cookies_arr[i][key] = val
|
383
|
+
}
|
384
|
+
|
385
|
+
# cookies.reject!{ |cookie| cookie['name'] == cookies_arr[i]['name'] }
|
386
|
+
|
387
|
+
cookies_arr[i] = Element::Cookie.new( @url, cookies_arr[i] )
|
388
|
+
}
|
389
|
+
cookies_arr.flatten!
|
390
|
+
return cookies_arr
|
391
|
+
end
|
392
|
+
|
393
|
+
#
|
394
|
+
# Extracts variables and their values from a link
|
395
|
+
#
|
396
|
+
# @see #links
|
397
|
+
#
|
398
|
+
# @param [String] link
|
399
|
+
#
|
400
|
+
# @return [Hash] name=>value pairs
|
401
|
+
#
|
402
|
+
def link_vars( link )
|
403
|
+
if !link then return {} end
|
404
|
+
|
405
|
+
var_string = link.split( /\?/ )[1]
|
406
|
+
if !var_string then return {} end
|
407
|
+
|
408
|
+
var_hash = Hash.new
|
409
|
+
var_string.split( /&/ ).each {
|
410
|
+
|pair|
|
411
|
+
name, value = pair.split( /=/ )
|
412
|
+
|
413
|
+
next if value == seed
|
414
|
+
var_hash[name] = value
|
415
|
+
}
|
416
|
+
|
417
|
+
var_hash
|
418
|
+
|
419
|
+
end
|
420
|
+
|
421
|
+
#
|
422
|
+
# Converts relative URL *link* into an absolute URL based on the
|
423
|
+
# location of the page
|
424
|
+
#
|
425
|
+
# @param [String] link
|
426
|
+
#
|
427
|
+
# @return [String]
|
428
|
+
#
|
429
|
+
def to_absolute( link )
|
430
|
+
|
431
|
+
begin
|
432
|
+
if URI.parse( link ).host
|
433
|
+
return link
|
434
|
+
end
|
435
|
+
rescue Exception => e
|
436
|
+
return nil if link.nil?
|
437
|
+
# return link
|
438
|
+
end
|
439
|
+
|
440
|
+
# remove anchor
|
441
|
+
link = URI.encode( link.to_s.gsub( /#[a-zA-Z0-9_-]*$/, '' ) )
|
442
|
+
|
443
|
+
begin
|
444
|
+
relative = URI(link)
|
445
|
+
url = URI.parse( @url )
|
446
|
+
|
447
|
+
absolute = url.merge(relative)
|
448
|
+
|
449
|
+
absolute.path = '/' if absolute.path.empty?
|
450
|
+
rescue Exception => e
|
451
|
+
return
|
452
|
+
end
|
453
|
+
|
454
|
+
return absolute.to_s
|
455
|
+
end
|
456
|
+
|
457
|
+
#
|
458
|
+
# Returns +true+ if *uri* is in the same domain as the page, returns
|
459
|
+
# +false+ otherwise
|
460
|
+
#
|
461
|
+
def in_domain?( uri )
|
462
|
+
curi = URI.parse( normalize_url( uri.to_s ) )
|
463
|
+
|
464
|
+
if( @opts.follow_subdomains )
|
465
|
+
return extract_domain( curi ) == extract_domain( URI( @url.to_s ) )
|
466
|
+
end
|
467
|
+
|
468
|
+
return curi.host == URI.parse( normalize_url( @url.to_s ) ).host
|
469
|
+
end
|
470
|
+
|
471
|
+
#
|
472
|
+
# Extracts the domain from a URI object
|
473
|
+
#
|
474
|
+
# @param [URI] url
|
475
|
+
#
|
476
|
+
# @return [String]
|
477
|
+
#
|
478
|
+
def extract_domain( url )
|
479
|
+
|
480
|
+
if !url.host then return false end
|
481
|
+
|
482
|
+
splits = url.host.split( /\./ )
|
483
|
+
|
484
|
+
if splits.length == 1 then return true end
|
485
|
+
|
486
|
+
splits[-2] + "." + splits[-1]
|
487
|
+
end
|
488
|
+
|
489
|
+
def exclude?( url )
|
490
|
+
@opts.exclude.each {
|
491
|
+
|pattern|
|
492
|
+
return true if url.to_s =~ pattern
|
493
|
+
}
|
494
|
+
|
495
|
+
return false
|
496
|
+
end
|
497
|
+
|
498
|
+
def include?( url )
|
499
|
+
return true if @opts.include.empty?
|
500
|
+
|
501
|
+
@opts.include.each {
|
502
|
+
|pattern|
|
503
|
+
return true if url.to_s =~ pattern
|
504
|
+
}
|
505
|
+
return false
|
506
|
+
end
|
507
|
+
|
508
|
+
|
509
|
+
private
|
510
|
+
|
511
|
+
#
|
512
|
+
# Merges an array of form inputs with an array of form selects
|
513
|
+
#
|
514
|
+
# @see #forms
|
515
|
+
#
|
516
|
+
# @param [Array] form inputs
|
517
|
+
# @param [Array] form selects
|
518
|
+
#
|
519
|
+
# @return [Array] merged array
|
520
|
+
#
|
521
|
+
def merge_select_with_input( inputs, selects )
|
522
|
+
|
523
|
+
new_arr = []
|
524
|
+
inputs.each {
|
525
|
+
|input|
|
526
|
+
new_arr << input
|
527
|
+
}
|
528
|
+
|
529
|
+
i = new_arr.size
|
530
|
+
selects.each {
|
531
|
+
|select|
|
532
|
+
select['attrs']['value'] = select['options'][0]['value']
|
533
|
+
new_arr << select['attrs']
|
534
|
+
}
|
535
|
+
|
536
|
+
new_arr
|
537
|
+
end
|
538
|
+
|
539
|
+
|
540
|
+
#
|
541
|
+
# Parses the attributes inside the <form ....> tag
|
542
|
+
#
|
543
|
+
# @see #forms
|
544
|
+
# @see #attrs_from_tag
|
545
|
+
#
|
546
|
+
# @param [String] form HTML code for the form tag
|
547
|
+
#
|
548
|
+
# @return [Array<Hash<String, String>>]
|
549
|
+
#
|
550
|
+
def form_attrs( form )
|
551
|
+
form_attr_html = form.scan( /(.*?)>/ixm )
|
552
|
+
attrs_from_tag( 'form', '<form ' + form_attr_html[0][0] + '>' )[0]
|
553
|
+
end
|
554
|
+
|
555
|
+
|
556
|
+
#
|
557
|
+
# Extracts HTML select elements, their attributes and their options
|
558
|
+
#
|
559
|
+
# @see #forms
|
560
|
+
# @see #form_selects_options
|
561
|
+
#
|
562
|
+
# @param [String] HTML
|
563
|
+
#
|
564
|
+
# @return [Array] array of select elements
|
565
|
+
#
|
566
|
+
def form_selects( html )
|
567
|
+
selects = html.scan( /<select(.*?)>/ixm )
|
568
|
+
|
569
|
+
elements = []
|
570
|
+
selects.each_with_index {
|
571
|
+
|select, i|
|
572
|
+
elements[i] = Hash.new
|
573
|
+
elements[i]['options'] = form_selects_options( html )
|
574
|
+
|
575
|
+
elements[i]['attrs'] =
|
576
|
+
attrs_from_tag( 'select',
|
577
|
+
'<select ' + select[0] + '/>' )[0]
|
578
|
+
|
579
|
+
}
|
580
|
+
|
581
|
+
elements
|
582
|
+
end
|
583
|
+
|
584
|
+
#
|
585
|
+
# Extracts HTML option elements and their attributes
|
586
|
+
# from select elements
|
587
|
+
#
|
588
|
+
# @see #forms
|
589
|
+
# @see #form_selects
|
590
|
+
#
|
591
|
+
# @param [String] HTML selects
|
592
|
+
#
|
593
|
+
# @return [Array] array of option elements
|
594
|
+
#
|
595
|
+
def form_selects_options( html )
|
596
|
+
options = html.scan( /<option(.*?)>/ixm )
|
597
|
+
|
598
|
+
elements = []
|
599
|
+
options.each_with_index {
|
600
|
+
|option, i|
|
601
|
+
elements[i] =
|
602
|
+
attrs_from_tag( 'option',
|
603
|
+
'<option ' + option[0] + '/>' )[0]
|
604
|
+
|
605
|
+
}
|
606
|
+
|
607
|
+
elements
|
608
|
+
end
|
609
|
+
|
610
|
+
#
|
611
|
+
# Extracts HTML textarea elements and their attributes
|
612
|
+
# from forms
|
613
|
+
#
|
614
|
+
# @see #forms
|
615
|
+
#
|
616
|
+
# @param [String] HTML
|
617
|
+
#
|
618
|
+
# @return [Array] array of textarea elements
|
619
|
+
#
|
620
|
+
def form_textareas( html )
|
621
|
+
inputs = html.scan( /<textarea(.*?)>/ixm )
|
622
|
+
|
623
|
+
elements = []
|
624
|
+
inputs.each_with_index {
|
625
|
+
|input, i|
|
626
|
+
elements[i] =
|
627
|
+
attrs_from_tag( 'textarea',
|
628
|
+
'<textarea ' + input[0] + '/>' )[0]
|
629
|
+
}
|
630
|
+
elements
|
631
|
+
end
|
632
|
+
|
633
|
+
#
|
634
|
+
# Parses the attributes of input fields
|
635
|
+
#
|
636
|
+
# @see #forms
|
637
|
+
#
|
638
|
+
# @param [String] html HTML code for the form tag
|
639
|
+
#
|
640
|
+
# @return [Hash<Hash<String, String>>]
|
641
|
+
#
|
642
|
+
def form_inputs( html )
|
643
|
+
inputs = html.scan( /<input(.*?)>/ixm )
|
644
|
+
|
645
|
+
elements = []
|
646
|
+
inputs.each_with_index {
|
647
|
+
|input, i|
|
648
|
+
elements[i] =
|
649
|
+
attrs_from_tag( 'input',
|
650
|
+
'<input ' + input[0] + '/>' )[0]
|
651
|
+
}
|
652
|
+
|
653
|
+
elements
|
654
|
+
end
|
655
|
+
|
656
|
+
#
|
657
|
+
# Gets attributes from HTML code of a tag
|
658
|
+
#
|
659
|
+
# @param [String] tag tag name (a, form, input)
|
660
|
+
# @param [String] html HTML code for the form tag
|
661
|
+
#
|
662
|
+
# @return [Array<Hash<String, String>>]
|
663
|
+
#
|
664
|
+
def attrs_from_tag( tag, html )
|
665
|
+
|
666
|
+
elements = []
|
667
|
+
Nokogiri::HTML( html ).search( tag ).each_with_index {
|
668
|
+
|element, i|
|
669
|
+
|
670
|
+
elements[i] = Hash.new
|
671
|
+
|
672
|
+
element.each {
|
673
|
+
|attribute|
|
674
|
+
next if attribute[1] == seed
|
675
|
+
elements[i][attribute[0].downcase] = attribute[1]
|
676
|
+
}
|
677
|
+
|
678
|
+
}
|
679
|
+
elements
|
680
|
+
end
|
681
|
+
|
682
|
+
# Extracts elements by name from HTML document
|
683
|
+
#
|
684
|
+
# @param [String] name 'form', 'a', 'div', etc.
|
685
|
+
# @param [String] html
|
686
|
+
#
|
687
|
+
# @return [Array<Hash <String, String> >] of elements
|
688
|
+
#
|
689
|
+
def elements_by_name( name )
|
690
|
+
|
691
|
+
elements = []
|
692
|
+
doc.search( name ).each_with_index do |input, i|
|
693
|
+
|
694
|
+
elements[i] = Hash.new
|
695
|
+
input.each {
|
696
|
+
|attribute|
|
697
|
+
elements[i][attribute[0]] = attribute[1]
|
698
|
+
}
|
699
|
+
|
700
|
+
input.children.each {
|
701
|
+
|child|
|
702
|
+
child.each{
|
703
|
+
|attribute|
|
704
|
+
elements[i][attribute[0]] = attribute[1]
|
705
|
+
}
|
706
|
+
}
|
707
|
+
|
708
|
+
end rescue []
|
709
|
+
|
710
|
+
return elements
|
711
|
+
end
|
712
|
+
|
713
|
+
def normalize_name( name )
|
714
|
+
name.to_s.gsub( /@/, '' )
|
715
|
+
end
|
716
|
+
end
|
717
|
+
end
|