arachni 0.2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ACKNOWLEDGMENTS.md +14 -0
- data/AUTHORS.md +6 -0
- data/CHANGELOG.md +162 -0
- data/CONTRIBUTORS.md +10 -0
- data/EXPLOITATION.md +429 -0
- data/HACKING.md +101 -0
- data/LICENSE.md +341 -0
- data/README.md +350 -0
- data/Rakefile +86 -0
- data/bin/arachni +22 -0
- data/bin/arachni_web +77 -0
- data/bin/arachni_xmlrpc +21 -0
- data/bin/arachni_xmlrpcd +82 -0
- data/bin/arachni_xmlrpcd_monitor +74 -0
- data/conf/README.webui.yaml.txt +44 -0
- data/conf/webui.yaml +11 -0
- data/external/metasploit/LICENSE +24 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_exec.rb +142 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_path_traversal.rb +113 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_eval.rb +150 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_include.rb +141 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_sqlmap.rb +92 -0
- data/external/metasploit/plugins/arachni.rb +536 -0
- data/getoptslong.rb +241 -0
- data/lib/anemone.rb +2 -0
- data/lib/anemone/cookie_store.rb +35 -0
- data/lib/anemone/core.rb +371 -0
- data/lib/anemone/exceptions.rb +5 -0
- data/lib/anemone/http.rb +144 -0
- data/lib/anemone/page.rb +337 -0
- data/lib/anemone/page_store.rb +160 -0
- data/lib/anemone/storage.rb +34 -0
- data/lib/anemone/storage/base.rb +75 -0
- data/lib/anemone/storage/exceptions.rb +15 -0
- data/lib/anemone/storage/mongodb.rb +89 -0
- data/lib/anemone/storage/pstore.rb +50 -0
- data/lib/anemone/storage/redis.rb +90 -0
- data/lib/anemone/storage/tokyo_cabinet.rb +57 -0
- data/lib/anemone/tentacle.rb +40 -0
- data/lib/arachni.rb +16 -0
- data/lib/audit_store.rb +346 -0
- data/lib/component_manager.rb +293 -0
- data/lib/component_options.rb +395 -0
- data/lib/exceptions.rb +76 -0
- data/lib/framework.rb +637 -0
- data/lib/http.rb +809 -0
- data/lib/issue.rb +302 -0
- data/lib/module.rb +4 -0
- data/lib/module/auditor.rb +455 -0
- data/lib/module/base.rb +188 -0
- data/lib/module/element_db.rb +158 -0
- data/lib/module/key_filler.rb +87 -0
- data/lib/module/manager.rb +87 -0
- data/lib/module/output.rb +68 -0
- data/lib/module/trainer.rb +240 -0
- data/lib/module/utilities.rb +110 -0
- data/lib/options.rb +547 -0
- data/lib/parser.rb +2 -0
- data/lib/parser/auditable.rb +522 -0
- data/lib/parser/elements.rb +296 -0
- data/lib/parser/page.rb +149 -0
- data/lib/parser/parser.rb +717 -0
- data/lib/plugin.rb +4 -0
- data/lib/plugin/base.rb +110 -0
- data/lib/plugin/manager.rb +162 -0
- data/lib/report.rb +4 -0
- data/lib/report/base.rb +119 -0
- data/lib/report/manager.rb +92 -0
- data/lib/rpc/xml/client/base.rb +71 -0
- data/lib/rpc/xml/client/dispatcher.rb +49 -0
- data/lib/rpc/xml/client/instance.rb +88 -0
- data/lib/rpc/xml/server/base.rb +90 -0
- data/lib/rpc/xml/server/dispatcher.rb +357 -0
- data/lib/rpc/xml/server/framework.rb +206 -0
- data/lib/rpc/xml/server/instance.rb +191 -0
- data/lib/rpc/xml/server/module/manager.rb +46 -0
- data/lib/rpc/xml/server/options.rb +124 -0
- data/lib/rpc/xml/server/output.rb +299 -0
- data/lib/rpc/xml/server/plugin/manager.rb +58 -0
- data/lib/ruby.rb +5 -0
- data/lib/ruby/object.rb +32 -0
- data/lib/ruby/string.rb +74 -0
- data/lib/ruby/xmlrpc/server.rb +27 -0
- data/lib/spider.rb +200 -0
- data/lib/typhoeus/request.rb +91 -0
- data/lib/typhoeus/response.rb +34 -0
- data/lib/ui/cli/cli.rb +744 -0
- data/lib/ui/cli/output.rb +279 -0
- data/lib/ui/web/log.rb +82 -0
- data/lib/ui/web/output_stream.rb +94 -0
- data/lib/ui/web/report_manager.rb +222 -0
- data/lib/ui/web/server.rb +903 -0
- data/lib/ui/web/server/db/placeholder +0 -0
- data/lib/ui/web/server/public/banner.png +0 -0
- data/lib/ui/web/server/public/bodybg-small.png +0 -0
- data/lib/ui/web/server/public/bodybg.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/pbar-ani.gif +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_222222_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_2e83ff_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_454545_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_888888_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_cd0a0a_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/jquery-ui-1.8.9.custom.css +573 -0
- data/lib/ui/web/server/public/favicon.ico +0 -0
- data/lib/ui/web/server/public/footer.jpg +0 -0
- data/lib/ui/web/server/public/icons/error.png +0 -0
- data/lib/ui/web/server/public/icons/info.png +0 -0
- data/lib/ui/web/server/public/icons/ok.png +0 -0
- data/lib/ui/web/server/public/icons/status.png +0 -0
- data/lib/ui/web/server/public/js/jquery-1.4.4.min.js +167 -0
- data/lib/ui/web/server/public/js/jquery-ui-1.8.9.custom.min.js +781 -0
- data/lib/ui/web/server/public/logo.png +0 -0
- data/lib/ui/web/server/public/nav-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-right.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-right.jpg +0 -0
- data/lib/ui/web/server/public/reports/placeholder +1 -0
- data/lib/ui/web/server/public/sidebar-bottom.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-h4.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-top.jpg +0 -0
- data/lib/ui/web/server/public/spider.png +0 -0
- data/lib/ui/web/server/public/style.css +604 -0
- data/lib/ui/web/server/tmp/placeholder +0 -0
- data/lib/ui/web/server/views/dispatcher.erb +85 -0
- data/lib/ui/web/server/views/dispatcher_error.erb +14 -0
- data/lib/ui/web/server/views/error.erb +1 -0
- data/lib/ui/web/server/views/flash.erb +18 -0
- data/lib/ui/web/server/views/home.erb +14 -0
- data/lib/ui/web/server/views/instance.erb +213 -0
- data/lib/ui/web/server/views/layout.erb +95 -0
- data/lib/ui/web/server/views/log.erb +40 -0
- data/lib/ui/web/server/views/modules.erb +71 -0
- data/lib/ui/web/server/views/options.erb +23 -0
- data/lib/ui/web/server/views/output_results.erb +51 -0
- data/lib/ui/web/server/views/plugins.erb +42 -0
- data/lib/ui/web/server/views/report_formats.erb +30 -0
- data/lib/ui/web/server/views/reports.erb +55 -0
- data/lib/ui/web/server/views/settings.erb +120 -0
- data/lib/ui/web/server/views/welcome.erb +38 -0
- data/lib/ui/xmlrpc/dispatcher_monitor.rb +204 -0
- data/lib/ui/xmlrpc/xmlrpc.rb +843 -0
- data/logs/placeholder +0 -0
- data/metamodules/autothrottle.rb +74 -0
- data/metamodules/timeout_notice.rb +118 -0
- data/metamodules/uniformity.rb +98 -0
- data/modules/audit/code_injection.rb +136 -0
- data/modules/audit/code_injection_timing.rb +115 -0
- data/modules/audit/code_injection_timing/payloads.txt +4 -0
- data/modules/audit/csrf.rb +301 -0
- data/modules/audit/ldapi.rb +103 -0
- data/modules/audit/ldapi/errors.txt +26 -0
- data/modules/audit/os_cmd_injection.rb +103 -0
- data/modules/audit/os_cmd_injection/payloads.txt +2 -0
- data/modules/audit/os_cmd_injection_timing.rb +104 -0
- data/modules/audit/os_cmd_injection_timing/payloads.txt +3 -0
- data/modules/audit/path_traversal.rb +141 -0
- data/modules/audit/response_splitting.rb +105 -0
- data/modules/audit/rfi.rb +193 -0
- data/modules/audit/sqli.rb +120 -0
- data/modules/audit/sqli/regexp_ids.txt +90 -0
- data/modules/audit/sqli_blind_rdiff.rb +321 -0
- data/modules/audit/sqli_blind_timing.rb +103 -0
- data/modules/audit/sqli_blind_timing/payloads.txt +51 -0
- data/modules/audit/trainer.rb +89 -0
- data/modules/audit/unvalidated_redirect.rb +90 -0
- data/modules/audit/xpath.rb +104 -0
- data/modules/audit/xpath/errors.txt +26 -0
- data/modules/audit/xss.rb +99 -0
- data/modules/audit/xss_event.rb +134 -0
- data/modules/audit/xss_path.rb +125 -0
- data/modules/audit/xss_script_tag.rb +112 -0
- data/modules/audit/xss_tag.rb +112 -0
- data/modules/audit/xss_uri.rb +125 -0
- data/modules/recon/allowed_methods.rb +104 -0
- data/modules/recon/backdoors.rb +131 -0
- data/modules/recon/backdoors/filenames.txt +16 -0
- data/modules/recon/backup_files.rb +177 -0
- data/modules/recon/backup_files/extensions.txt +28 -0
- data/modules/recon/common_directories.rb +138 -0
- data/modules/recon/common_directories/directories.txt +265 -0
- data/modules/recon/common_files.rb +138 -0
- data/modules/recon/common_files/filenames.txt +17 -0
- data/modules/recon/directory_listing.rb +171 -0
- data/modules/recon/grep/captcha.rb +62 -0
- data/modules/recon/grep/credit_card.rb +85 -0
- data/modules/recon/grep/cvs_svn_users.rb +73 -0
- data/modules/recon/grep/emails.rb +59 -0
- data/modules/recon/grep/html_objects.rb +53 -0
- data/modules/recon/grep/private_ip.rb +54 -0
- data/modules/recon/grep/ssn.rb +53 -0
- data/modules/recon/htaccess_limit.rb +82 -0
- data/modules/recon/http_put.rb +95 -0
- data/modules/recon/interesting_responses.rb +118 -0
- data/modules/recon/unencrypted_password_forms.rb +119 -0
- data/modules/recon/webdav.rb +126 -0
- data/modules/recon/xst.rb +107 -0
- data/path_extractors/anchors.rb +35 -0
- data/path_extractors/forms.rb +35 -0
- data/path_extractors/frames.rb +38 -0
- data/path_extractors/generic.rb +39 -0
- data/path_extractors/links.rb +35 -0
- data/path_extractors/meta_refresh.rb +39 -0
- data/path_extractors/scripts.rb +37 -0
- data/path_extractors/sitemap.rb +31 -0
- data/plugins/autologin.rb +137 -0
- data/plugins/content_types.rb +90 -0
- data/plugins/cookie_collector.rb +99 -0
- data/plugins/form_dicattack.rb +185 -0
- data/plugins/healthmap.rb +94 -0
- data/plugins/http_dicattack.rb +133 -0
- data/plugins/metamodules.rb +118 -0
- data/plugins/proxy.rb +248 -0
- data/plugins/proxy/server.rb +66 -0
- data/plugins/waf_detector.rb +184 -0
- data/profiles/comprehensive.afp +74 -0
- data/profiles/full.afp +75 -0
- data/reports/afr.rb +59 -0
- data/reports/ap.rb +55 -0
- data/reports/html.rb +179 -0
- data/reports/html/default.erb +967 -0
- data/reports/metareport.rb +139 -0
- data/reports/metareport/arachni_metareport.rb +174 -0
- data/reports/plugin_formatters/html/content_types.rb +82 -0
- data/reports/plugin_formatters/html/cookie_collector.rb +66 -0
- data/reports/plugin_formatters/html/form_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/healthmap.rb +76 -0
- data/reports/plugin_formatters/html/http_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/metaformatters/timeout_notice.rb +65 -0
- data/reports/plugin_formatters/html/metaformatters/uniformity.rb +71 -0
- data/reports/plugin_formatters/html/metamodules.rb +93 -0
- data/reports/plugin_formatters/html/waf_detector.rb +54 -0
- data/reports/plugin_formatters/stdout/content_types.rb +73 -0
- data/reports/plugin_formatters/stdout/cookie_collector.rb +61 -0
- data/reports/plugin_formatters/stdout/form_dicattack.rb +52 -0
- data/reports/plugin_formatters/stdout/healthmap.rb +72 -0
- data/reports/plugin_formatters/stdout/http_dicattack.rb +53 -0
- data/reports/plugin_formatters/stdout/metaformatters/timeout_notice.rb +55 -0
- data/reports/plugin_formatters/stdout/metaformatters/uniformity.rb +68 -0
- data/reports/plugin_formatters/stdout/metamodules.rb +89 -0
- data/reports/plugin_formatters/stdout/waf_detector.rb +48 -0
- data/reports/plugin_formatters/xml/content_types.rb +91 -0
- data/reports/plugin_formatters/xml/cookie_collector.rb +70 -0
- data/reports/plugin_formatters/xml/form_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/healthmap.rb +82 -0
- data/reports/plugin_formatters/xml/http_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/metaformatters/timeout_notice.rb +67 -0
- data/reports/plugin_formatters/xml/metaformatters/uniformity.rb +82 -0
- data/reports/plugin_formatters/xml/metamodules.rb +91 -0
- data/reports/plugin_formatters/xml/waf_detector.rb +58 -0
- data/reports/stdout.rb +182 -0
- data/reports/txt.rb +77 -0
- data/reports/xml.rb +231 -0
- data/reports/xml/buffer.rb +98 -0
- metadata +516 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
=begin
|
|
2
|
+
Arachni
|
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
|
4
|
+
|
|
5
|
+
This is free software; you can copy and distribute and modify
|
|
6
|
+
this program under the term of the GPL v2.0 License
|
|
7
|
+
(See LICENSE file for details)
|
|
8
|
+
=end
|
|
9
|
+
|
|
10
|
+
module Arachni
|
|
11
|
+
|
|
12
|
+
opts = Arachni::Options.instance
|
|
13
|
+
require opts.dir['lib'] + 'parser/auditable'
|
|
14
|
+
|
|
15
|
+
class Parser
|
|
16
|
+
|
|
17
|
+
module Element
|
|
18
|
+
|
|
19
|
+
#
|
|
20
|
+
# Base element class.
|
|
21
|
+
#
|
|
22
|
+
# Should be extended/implemented by all HTML/HTTP modules.
|
|
23
|
+
#
|
|
24
|
+
# @author: Tasos "Zapotek" Laskos
|
|
25
|
+
# <tasos.laskos@gmail.com>
|
|
26
|
+
# <zapotek@segfault.gr>
|
|
27
|
+
# @version: 0.1
|
|
28
|
+
#
|
|
29
|
+
# @abstract
|
|
30
|
+
#
|
|
31
|
+
class Base < Arachni::Element::Auditable
|
|
32
|
+
|
|
33
|
+
#
|
|
34
|
+
# The URL of the page that owns the element.
|
|
35
|
+
#
|
|
36
|
+
# @return [String]
|
|
37
|
+
#
|
|
38
|
+
attr_accessor :url
|
|
39
|
+
|
|
40
|
+
#
|
|
41
|
+
# The url to which the element points and should be audited against.
|
|
42
|
+
#
|
|
43
|
+
# Ex. 'href' for links, 'action' for forms, etc.
|
|
44
|
+
#
|
|
45
|
+
# @return [String]
|
|
46
|
+
#
|
|
47
|
+
attr_accessor :action
|
|
48
|
+
|
|
49
|
+
attr_accessor :auditable
|
|
50
|
+
|
|
51
|
+
#
|
|
52
|
+
# Relatively 'raw' hash holding the element's attributes, values, etc.
|
|
53
|
+
#
|
|
54
|
+
# @return [Hash]
|
|
55
|
+
#
|
|
56
|
+
attr_accessor :raw
|
|
57
|
+
|
|
58
|
+
#
|
|
59
|
+
# Method of the element.
|
|
60
|
+
#
|
|
61
|
+
# Should represent a method in {Arachni::Module::HTTP}.
|
|
62
|
+
#
|
|
63
|
+
# Ex. get, post, cookie, header
|
|
64
|
+
#
|
|
65
|
+
# @see Arachni::Module::HTTP
|
|
66
|
+
#
|
|
67
|
+
# @return [String]
|
|
68
|
+
#
|
|
69
|
+
attr_accessor :method
|
|
70
|
+
|
|
71
|
+
#
|
|
72
|
+
# Initialize the element.
|
|
73
|
+
#
|
|
74
|
+
# @param [String] url {#url}
|
|
75
|
+
# @param [Hash] raw {#raw}
|
|
76
|
+
#
|
|
77
|
+
def initialize( url, raw = {} )
|
|
78
|
+
@raw = raw.dup
|
|
79
|
+
@url = url.dup
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
#
|
|
83
|
+
# Must provide a string uniquely identifying self.
|
|
84
|
+
#
|
|
85
|
+
# @return [String]
|
|
86
|
+
#
|
|
87
|
+
def id
|
|
88
|
+
return @raw.to_s
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
#
|
|
92
|
+
# Must provide a simple hash representation of self
|
|
93
|
+
#
|
|
94
|
+
def simple
|
|
95
|
+
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
#
|
|
99
|
+
# Must provide the element type, one of {Arachni::Module::Auditor::Element}.
|
|
100
|
+
#
|
|
101
|
+
def type
|
|
102
|
+
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
class Link < Base
|
|
108
|
+
|
|
109
|
+
def initialize( url, raw = {} )
|
|
110
|
+
super( url, raw )
|
|
111
|
+
|
|
112
|
+
@action = @raw['href']
|
|
113
|
+
@method = 'get'
|
|
114
|
+
|
|
115
|
+
@auditable = @raw['vars']
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def http_request( url, opts )
|
|
119
|
+
return @auditor.http.get( url, opts )
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def simple
|
|
123
|
+
return { @action => @auditable }
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def type
|
|
127
|
+
Arachni::Module::Auditor::Element::LINK
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def audit_id( injection_str, opts = {} )
|
|
131
|
+
vars = auditable.keys.sort.to_s
|
|
132
|
+
url = URI( @auditor.page.url ).merge( URI( @action ).path ).to_s
|
|
133
|
+
|
|
134
|
+
timeout = opts[:timeout] || ''
|
|
135
|
+
return "#{@auditor.class.info[:name]}:" +
|
|
136
|
+
"#{url}:" + "#{self.type}:" +
|
|
137
|
+
"#{vars}=#{injection_str.to_s}:timeout=#{timeout}"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class Form < Base
|
|
145
|
+
|
|
146
|
+
include Arachni::Module::Utilities
|
|
147
|
+
|
|
148
|
+
FORM_VALUES_ORIGINAL = '__original_values__'
|
|
149
|
+
FORM_VALUES_SAMPLE = '__sample_values__'
|
|
150
|
+
|
|
151
|
+
def initialize( url, raw = {} )
|
|
152
|
+
super( url, raw )
|
|
153
|
+
|
|
154
|
+
@action = @raw['attrs']['action']
|
|
155
|
+
@method = @raw['attrs']['method']
|
|
156
|
+
|
|
157
|
+
@auditable = simple['auditable'] || {}
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def http_request( url, opts )
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
params = opts[:params]
|
|
164
|
+
altered = opts[:altered]
|
|
165
|
+
|
|
166
|
+
curr_opts = opts.dup
|
|
167
|
+
if( altered == FORM_VALUES_ORIGINAL )
|
|
168
|
+
orig_id = audit_id( FORM_VALUES_ORIGINAL )
|
|
169
|
+
|
|
170
|
+
return if !opts[:redundant] && audited?( orig_id )
|
|
171
|
+
audited( orig_id )
|
|
172
|
+
|
|
173
|
+
print_debug( 'Submitting form with original values;' +
|
|
174
|
+
' overriding trainer option.' )
|
|
175
|
+
opts[:train] = true
|
|
176
|
+
print_debug_trainer( opts )
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
if( altered == FORM_VALUES_SAMPLE )
|
|
180
|
+
sample_id = audit_id( FORM_VALUES_SAMPLE )
|
|
181
|
+
|
|
182
|
+
return if !opts[:redundant] && audited?( sample_id )
|
|
183
|
+
audited( sample_id )
|
|
184
|
+
|
|
185
|
+
print_debug( 'Submitting form with sample values;' +
|
|
186
|
+
' overriding trainer option.' )
|
|
187
|
+
opts[:train] = true
|
|
188
|
+
print_debug_trainer( opts )
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
if( @method.downcase != 'get' )
|
|
193
|
+
return @auditor.http.post( url, opts )
|
|
194
|
+
else
|
|
195
|
+
return @auditor.http.get( url, opts )
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def id
|
|
200
|
+
|
|
201
|
+
id = simple['attrs'].to_s
|
|
202
|
+
|
|
203
|
+
auditable.map {
|
|
204
|
+
|name, value|
|
|
205
|
+
next if name.substring?( seed )
|
|
206
|
+
id += name
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
return id
|
|
210
|
+
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def simple
|
|
214
|
+
|
|
215
|
+
form = Hash.new
|
|
216
|
+
|
|
217
|
+
return form if !@raw || !@raw['auditable'] || @raw['auditable'].empty?
|
|
218
|
+
|
|
219
|
+
form['attrs'] = @raw['attrs']
|
|
220
|
+
form['auditable'] = {}
|
|
221
|
+
@raw['auditable'].each {
|
|
222
|
+
|item|
|
|
223
|
+
if( !item['name'] ) then next end
|
|
224
|
+
form['auditable'][item['name']] = item['value']
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return form.dup
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def type
|
|
231
|
+
Arachni::Module::Auditor::Element::FORM
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
class Cookie < Base
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def initialize( url, raw = {} )
|
|
240
|
+
super( url, raw )
|
|
241
|
+
|
|
242
|
+
@action = @url
|
|
243
|
+
@method = 'cookie'
|
|
244
|
+
|
|
245
|
+
@auditable = { @raw['name'] => @raw['value'] }
|
|
246
|
+
@simple = @auditable.dup
|
|
247
|
+
@auditable.reject! {
|
|
248
|
+
|cookie|
|
|
249
|
+
Options.instance.exclude_cookies.include?( cookie )
|
|
250
|
+
}
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def http_request( url, opts )
|
|
254
|
+
return @auditor.http.cookie( url, opts )
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def simple
|
|
258
|
+
return @simple
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def type
|
|
262
|
+
Arachni::Module::Auditor::Element::COOKIE
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
class Header < Base
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def initialize( url, raw = {} )
|
|
271
|
+
super( url, raw )
|
|
272
|
+
|
|
273
|
+
@action = @url
|
|
274
|
+
@method = 'header'
|
|
275
|
+
|
|
276
|
+
@auditable = @raw
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def http_request( url, opts )
|
|
280
|
+
return @auditor.http.header( url, opts )
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def simple
|
|
284
|
+
return @auditable.dup
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def type
|
|
288
|
+
Arachni::Module::Auditor::Element::HEADER
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
end
|
data/lib/parser/page.rb
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
=begin
|
|
2
|
+
Arachni
|
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
|
4
|
+
|
|
5
|
+
This is free software; you can copy and distribute and modify
|
|
6
|
+
this program under the term of the GPL v2.0 License
|
|
7
|
+
(See LICENSE file for details)
|
|
8
|
+
|
|
9
|
+
=end
|
|
10
|
+
|
|
11
|
+
module Arachni
|
|
12
|
+
|
|
13
|
+
class Parser
|
|
14
|
+
#
|
|
15
|
+
# Arachni::Page class
|
|
16
|
+
#
|
|
17
|
+
# It holds page data like elements, cookies, headers, etc...
|
|
18
|
+
#
|
|
19
|
+
# @author: Tasos "Zapotek" Laskos
|
|
20
|
+
# <tasos.laskos@gmail.com>
|
|
21
|
+
# <zapotek@segfault.gr>
|
|
22
|
+
# @version: 0.2
|
|
23
|
+
#
|
|
24
|
+
class Page
|
|
25
|
+
|
|
26
|
+
#
|
|
27
|
+
# @return [String] url of the page
|
|
28
|
+
#
|
|
29
|
+
attr_accessor :url
|
|
30
|
+
|
|
31
|
+
#
|
|
32
|
+
# @return [Fixnum] the HTTP response code
|
|
33
|
+
#
|
|
34
|
+
attr_accessor :code
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
# @return [String] the request method that returned the page
|
|
38
|
+
#
|
|
39
|
+
attr_accessor :method
|
|
40
|
+
|
|
41
|
+
#
|
|
42
|
+
# @return [Hash] url variables
|
|
43
|
+
#
|
|
44
|
+
attr_accessor :query_vars
|
|
45
|
+
|
|
46
|
+
#
|
|
47
|
+
# @return [String] the HTML response
|
|
48
|
+
#
|
|
49
|
+
attr_accessor :html
|
|
50
|
+
|
|
51
|
+
#
|
|
52
|
+
# Request headers
|
|
53
|
+
#
|
|
54
|
+
# @return [Array<Arachni::Parser::Element::Header>]
|
|
55
|
+
#
|
|
56
|
+
attr_accessor :headers
|
|
57
|
+
|
|
58
|
+
#
|
|
59
|
+
# @return [Hash]
|
|
60
|
+
#
|
|
61
|
+
attr_accessor :response_headers
|
|
62
|
+
|
|
63
|
+
#
|
|
64
|
+
# @see Parser#links
|
|
65
|
+
#
|
|
66
|
+
# @return [Array<Arachni::Parser::Element::Link>]
|
|
67
|
+
#
|
|
68
|
+
attr_accessor :links
|
|
69
|
+
|
|
70
|
+
#
|
|
71
|
+
# @see Parser#forms
|
|
72
|
+
#
|
|
73
|
+
# @return [Array<Arachni::Parser::Element::Form>]
|
|
74
|
+
#
|
|
75
|
+
attr_accessor :forms
|
|
76
|
+
|
|
77
|
+
#
|
|
78
|
+
# @see Parser#cookies
|
|
79
|
+
#
|
|
80
|
+
# @return [Array<Arachni::Parser::Element::Cookie>]
|
|
81
|
+
#
|
|
82
|
+
attr_accessor :cookies
|
|
83
|
+
|
|
84
|
+
#
|
|
85
|
+
# Cookies extracted from the supplied cookiejar
|
|
86
|
+
#
|
|
87
|
+
# @return [Hash]
|
|
88
|
+
#
|
|
89
|
+
attr_accessor :cookiejar
|
|
90
|
+
|
|
91
|
+
def initialize( opts = {} )
|
|
92
|
+
opts.each {
|
|
93
|
+
|k, v|
|
|
94
|
+
send( "#{k}=", v )
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def body
|
|
100
|
+
@html
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
#
|
|
104
|
+
# Returns an array of forms from {#forms} with its attributes and<br/>
|
|
105
|
+
# its auditable inputs as a name=>value hash
|
|
106
|
+
#
|
|
107
|
+
# @return [Array]
|
|
108
|
+
#
|
|
109
|
+
def forms_simple( )
|
|
110
|
+
forms = []
|
|
111
|
+
@forms.each {
|
|
112
|
+
|form|
|
|
113
|
+
forms << form.simple
|
|
114
|
+
}
|
|
115
|
+
return forms
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
#
|
|
119
|
+
# Returns links from {#links} as a name=>value hash with href as key
|
|
120
|
+
#
|
|
121
|
+
# @return [Hash]
|
|
122
|
+
#
|
|
123
|
+
def links_simple
|
|
124
|
+
links = []
|
|
125
|
+
@links.each {
|
|
126
|
+
|link|
|
|
127
|
+
links << link.simple
|
|
128
|
+
}
|
|
129
|
+
return links
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
#
|
|
133
|
+
# Returns cookies from {#cookies} as a name=>value hash
|
|
134
|
+
#
|
|
135
|
+
# @return [Hash] the cookie attributes, values, etc
|
|
136
|
+
#
|
|
137
|
+
def cookies_simple
|
|
138
|
+
cookies = { }
|
|
139
|
+
|
|
140
|
+
@cookies.each {
|
|
141
|
+
|cookie|
|
|
142
|
+
cookies.merge!( cookie.simple )
|
|
143
|
+
}
|
|
144
|
+
return cookies
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -0,0 +1,717 @@
|
|
|
1
|
+
=begin
|
|
2
|
+
Arachni
|
|
3
|
+
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
|
4
|
+
|
|
5
|
+
This is free software; you can copy and distribute and modify
|
|
6
|
+
this program under the term of the GPL v2.0 License
|
|
7
|
+
(See LICENSE file for details)
|
|
8
|
+
=end
|
|
9
|
+
|
|
10
|
+
module Arachni
|
|
11
|
+
|
|
12
|
+
opts = Arachni::Options.instance
|
|
13
|
+
require opts.dir['lib'] + 'parser/elements'
|
|
14
|
+
require opts.dir['lib'] + 'parser/page'
|
|
15
|
+
require opts.dir['lib'] + 'module/utilities'
|
|
16
|
+
|
|
17
|
+
#
|
|
18
|
+
# Analyzer class
|
|
19
|
+
#
|
|
20
|
+
# Analyzes HTML code extracting forms, links and cookies
|
|
21
|
+
# depending on user opts.<br/>
|
|
22
|
+
#
|
|
23
|
+
# It grabs <b>all</b> element attributes not just URLs and variables.<br/>
|
|
24
|
+
# All URLs are converted to absolute and URLs outside the domain are ignored.<br/>
|
|
25
|
+
#
|
|
26
|
+
# === Forms
|
|
27
|
+
# Form analysis uses both regular expressions and the Nokogiri parser<br/>
|
|
28
|
+
# in order to be able to handle badly written HTML code, such as not closed<br/>
|
|
29
|
+
# tags and tag overlaps.
|
|
30
|
+
#
|
|
31
|
+
# In order to ease audits, in addition to parsing forms into data structures<br/>
|
|
32
|
+
# like "select" and "option", all auditable inputs are put under the<br/>
|
|
33
|
+
# "auditable" key.
|
|
34
|
+
#
|
|
35
|
+
# === Links
|
|
36
|
+
# Links are extracted using the Nokogiri parser.
|
|
37
|
+
#
|
|
38
|
+
# === Cookies
|
|
39
|
+
# Cookies are extracted from the HTTP headers and parsed by WEBrick::Cookie
|
|
40
|
+
#
|
|
41
|
+
# @author: Tasos "Zapotek" Laskos
|
|
42
|
+
# <tasos.laskos@gmail.com>
|
|
43
|
+
# <zapotek@segfault.gr>
|
|
44
|
+
# @version: 0.2
|
|
45
|
+
#
|
|
46
|
+
class Parser
|
|
47
|
+
|
|
48
|
+
include Arachni::Module::Utilities
|
|
49
|
+
|
|
50
|
+
#
|
|
51
|
+
# @return [String] the url of the page
|
|
52
|
+
#
|
|
53
|
+
attr_accessor :url
|
|
54
|
+
|
|
55
|
+
#
|
|
56
|
+
# Options instance
|
|
57
|
+
#
|
|
58
|
+
# @return [Options]
|
|
59
|
+
#
|
|
60
|
+
attr_reader :opts
|
|
61
|
+
|
|
62
|
+
#
|
|
63
|
+
# Constructor <br/>
|
|
64
|
+
# Instantiates Analyzer class with user options.
|
|
65
|
+
#
|
|
66
|
+
# @param [Options] opts
|
|
67
|
+
#
|
|
68
|
+
def initialize( opts, res )
|
|
69
|
+
@opts = opts
|
|
70
|
+
|
|
71
|
+
@url = res.effective_url
|
|
72
|
+
@html = res.body
|
|
73
|
+
@response_headers = res.headers_hash
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
#
|
|
77
|
+
# Runs the Analyzer and extracts forms, links and cookies
|
|
78
|
+
#
|
|
79
|
+
# @return [Page]
|
|
80
|
+
#
|
|
81
|
+
def run
|
|
82
|
+
|
|
83
|
+
# non text files won't contain any auditable elements
|
|
84
|
+
type = Arachni::HTTP.content_type( @response_headers )
|
|
85
|
+
if type.is_a?( String) && !type.substring?( 'text' )
|
|
86
|
+
return Page.new( {
|
|
87
|
+
:url => @url,
|
|
88
|
+
:query_vars => link_vars( @url ),
|
|
89
|
+
:html => @html,
|
|
90
|
+
:headers => [],
|
|
91
|
+
:response_headers => @response_headers,
|
|
92
|
+
:forms => [],
|
|
93
|
+
:links => [],
|
|
94
|
+
:cookies => [],
|
|
95
|
+
:cookiejar => []
|
|
96
|
+
} )
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
cookies_arr = cookies
|
|
101
|
+
cookies_arr = merge_with_cookiejar( cookies_arr.flatten.uniq )
|
|
102
|
+
|
|
103
|
+
jar = {}
|
|
104
|
+
jar = @opts.cookies = Arachni::HTTP.parse_cookiejar( @opts.cookie_jar ) if @opts.cookie_jar
|
|
105
|
+
|
|
106
|
+
preped = {}
|
|
107
|
+
cookies_arr.each{ |cookie| preped.merge!( cookie.simple ) }
|
|
108
|
+
|
|
109
|
+
jar = preped.merge( jar )
|
|
110
|
+
|
|
111
|
+
return Page.new( {
|
|
112
|
+
:url => @url,
|
|
113
|
+
:query_vars => link_vars( @url ),
|
|
114
|
+
:html => @html,
|
|
115
|
+
:headers => headers(),
|
|
116
|
+
:response_headers => @response_headers,
|
|
117
|
+
:forms => @opts.audit_forms ? forms() : [],
|
|
118
|
+
:links => @opts.audit_links ? links() : [],
|
|
119
|
+
:cookies => merge_with_cookiestore( merge_with_cookiejar( cookies_arr ) ),
|
|
120
|
+
:cookiejar => jar
|
|
121
|
+
} )
|
|
122
|
+
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def doc
|
|
126
|
+
return @doc if @doc
|
|
127
|
+
@doc = Nokogiri::HTML( @html ) if @html rescue nil
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def merge_with_cookiestore( cookies )
|
|
131
|
+
|
|
132
|
+
@cookiestore ||= []
|
|
133
|
+
|
|
134
|
+
if @cookiestore.empty?
|
|
135
|
+
@cookiestore = cookies
|
|
136
|
+
else
|
|
137
|
+
tmp = {}
|
|
138
|
+
@cookiestore.each {
|
|
139
|
+
|cookie|
|
|
140
|
+
tmp.merge!( cookie.simple )
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
cookies.each {
|
|
144
|
+
|cookie|
|
|
145
|
+
tmp.merge!( cookie.simple )
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
@cookiestore = tmp.map {
|
|
149
|
+
|name, value|
|
|
150
|
+
Element::Cookie.new( @url, {
|
|
151
|
+
'name' => name,
|
|
152
|
+
'value' => value
|
|
153
|
+
} )
|
|
154
|
+
}
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
return @cookiestore
|
|
158
|
+
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
#
|
|
162
|
+
# Merges 'cookies' with the cookiejar and returns it as an array
|
|
163
|
+
#
|
|
164
|
+
# @param [Array<Hash>] cookies
|
|
165
|
+
#
|
|
166
|
+
# @return [Array<Element::Cookie>] the merged cookies
|
|
167
|
+
#
|
|
168
|
+
def merge_with_cookiejar( cookies )
|
|
169
|
+
return cookies if !@opts.cookies
|
|
170
|
+
|
|
171
|
+
@opts.cookies.each_pair {
|
|
172
|
+
|name, value|
|
|
173
|
+
cookies << Element::Cookie.new( @url,
|
|
174
|
+
{
|
|
175
|
+
'name' => name,
|
|
176
|
+
'value' => value
|
|
177
|
+
} )
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return cookies
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
#
|
|
185
|
+
# Returns a list of valid auditable HTTP header fields.
|
|
186
|
+
#
|
|
187
|
+
# It's more of a placeholder method, it doesn't actually analyze anything.<br/>
|
|
188
|
+
# It's a long shot that any of these will be vulnerable but better
|
|
189
|
+
# be safe than sorry.
|
|
190
|
+
#
|
|
191
|
+
# @return [Hash] HTTP header fields
|
|
192
|
+
#
|
|
193
|
+
def headers( )
|
|
194
|
+
headers_arr = []
|
|
195
|
+
{
|
|
196
|
+
'accept' => 'text/html,application/xhtml+xml,application' +
|
|
197
|
+
'/xml;q=0.9,*/*;q=0.8',
|
|
198
|
+
'accept-charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
|
199
|
+
'accept-language' => 'en-gb,en;q=0.5',
|
|
200
|
+
'accept-encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
|
|
201
|
+
'from' => @opts.authed_by || '',
|
|
202
|
+
'user-agent' => @opts.user_agent || '',
|
|
203
|
+
'referer' => @url,
|
|
204
|
+
'pragma' => 'no-cache'
|
|
205
|
+
}.each {
|
|
206
|
+
|k,v|
|
|
207
|
+
headers_arr << Element::Header.new( @url, { k => v } )
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return headers_arr
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# TODO: Add support for radio buttons.
|
|
214
|
+
#
|
|
215
|
+
# Extracts forms from HTML document
|
|
216
|
+
#
|
|
217
|
+
# @see #form_attrs
|
|
218
|
+
# @see #form_textareas
|
|
219
|
+
# @see #form_selects
|
|
220
|
+
# @see #form_inputs
|
|
221
|
+
# @see #merge_select_with_input
|
|
222
|
+
#
|
|
223
|
+
# @param [String] html
|
|
224
|
+
#
|
|
225
|
+
# @return [Array<Element::Form>] array of forms
|
|
226
|
+
#
|
|
227
|
+
def forms( html = nil )
|
|
228
|
+
|
|
229
|
+
elements = []
|
|
230
|
+
|
|
231
|
+
begin
|
|
232
|
+
html = html || @html.clone
|
|
233
|
+
#
|
|
234
|
+
# This imitates Firefox's behavior when it comes to
|
|
235
|
+
# broken/unclosed form tags
|
|
236
|
+
#
|
|
237
|
+
|
|
238
|
+
# get properly closed forms
|
|
239
|
+
forms = html.scan( /<form(.*?)<\/form>/ixm ).flatten
|
|
240
|
+
|
|
241
|
+
# now remove them from html...
|
|
242
|
+
forms.each { |form| html.gsub!( form, '' ) }
|
|
243
|
+
|
|
244
|
+
# and get unclosed forms.
|
|
245
|
+
forms |= html.scan( /<form (.*)(?!<\/form>)/ixm ).flatten
|
|
246
|
+
|
|
247
|
+
rescue Exception => e
|
|
248
|
+
return elements
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
i = 0
|
|
252
|
+
forms.each {
|
|
253
|
+
|form|
|
|
254
|
+
|
|
255
|
+
elements[i] = Hash.new
|
|
256
|
+
elements[i]['attrs'] = form_attrs( form )
|
|
257
|
+
|
|
258
|
+
if( !elements[i]['attrs'] || !elements[i]['attrs']['action'] )
|
|
259
|
+
action = @url.to_s
|
|
260
|
+
else
|
|
261
|
+
action = elements[i]['attrs']['action']
|
|
262
|
+
end
|
|
263
|
+
action = URI.escape( action ).to_s
|
|
264
|
+
|
|
265
|
+
elements[i]['attrs']['action'] = to_absolute( action.clone ).to_s
|
|
266
|
+
|
|
267
|
+
if( !elements[i]['attrs']['method'] )
|
|
268
|
+
elements[i]['attrs']['method'] = 'post'
|
|
269
|
+
else
|
|
270
|
+
elements[i]['attrs']['method'] =
|
|
271
|
+
elements[i]['attrs']['method'].downcase
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
url = URI.parse( URI.escape( elements[i]['attrs']['action'] ) )
|
|
275
|
+
if !in_domain?( url )
|
|
276
|
+
next
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
elements[i]['textarea'] = form_textareas( form )
|
|
280
|
+
elements[i]['select'] = form_selects( form )
|
|
281
|
+
elements[i]['input'] = form_inputs( form )
|
|
282
|
+
|
|
283
|
+
# merge the form elements to make auditing easier
|
|
284
|
+
elements[i]['auditable'] =
|
|
285
|
+
elements[i]['input'] | elements[i]['textarea']
|
|
286
|
+
|
|
287
|
+
elements[i]['auditable'] =
|
|
288
|
+
merge_select_with_input( elements[i]['auditable'],
|
|
289
|
+
elements[i]['select'] )
|
|
290
|
+
|
|
291
|
+
elements[i] = Element::Form.new( @url, elements[i] )
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
i += 1
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
elements.reject {
|
|
298
|
+
|form|
|
|
299
|
+
!form.is_a?( Element::Form ) || form.auditable.empty?
|
|
300
|
+
}
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
#
|
|
304
|
+
# Extracts links from HTML document
|
|
305
|
+
#
|
|
306
|
+
# @see #link_vars
|
|
307
|
+
#
|
|
308
|
+
# @param [String] html
|
|
309
|
+
#
|
|
310
|
+
# @return [Array<Element::Link>] of links
|
|
311
|
+
#
|
|
312
|
+
def links
|
|
313
|
+
|
|
314
|
+
link_arr = []
|
|
315
|
+
elements_by_name( 'a' ).each_with_index {
|
|
316
|
+
|link|
|
|
317
|
+
|
|
318
|
+
link['href'] = to_absolute( link['href'] )
|
|
319
|
+
|
|
320
|
+
if !link['href'] then next end
|
|
321
|
+
if( exclude?( link['href'] ) ) then next end
|
|
322
|
+
if( !include?( link['href'] ) ) then next end
|
|
323
|
+
if !in_domain?( URI.parse( link['href'] ) ) then next end
|
|
324
|
+
|
|
325
|
+
link['vars'] = link_vars( link['href'] )
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
link_arr << Element::Link.new( @url, link )
|
|
329
|
+
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return link_arr
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
#
|
|
336
|
+
# Extracts cookies from an HTTP headers
|
|
337
|
+
#
|
|
338
|
+
# @param [String] headers HTTP headers
|
|
339
|
+
# @param [String] html the HTML code of the page
|
|
340
|
+
#
|
|
341
|
+
# @return [Array<Element::Cookie>] of cookies
|
|
342
|
+
#
|
|
343
|
+
def cookies
|
|
344
|
+
|
|
345
|
+
cookies_arr = []
|
|
346
|
+
cookies = []
|
|
347
|
+
|
|
348
|
+
begin
|
|
349
|
+
doc.search( "//meta[@http-equiv]" ).each {
|
|
350
|
+
|elem|
|
|
351
|
+
|
|
352
|
+
next if elem['http-equiv'].downcase != 'set-cookie'
|
|
353
|
+
k, v = elem['content'].split( ';' )[0].split( '=', 2 )
|
|
354
|
+
cookies_arr << Element::Cookie.new( @url, { 'name' => k, 'value' => v } )
|
|
355
|
+
}
|
|
356
|
+
rescue
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
# don't ask me why....
|
|
360
|
+
if @response_headers.to_s.substring?( 'set-cookie' )
|
|
361
|
+
begin
|
|
362
|
+
cookies << WEBrick::Cookie.parse_set_cookies( @response_headers['Set-Cookie'].to_s )
|
|
363
|
+
cookies << WEBrick::Cookie.parse_set_cookies( @response_headers['set-cookie'].to_s )
|
|
364
|
+
rescue
|
|
365
|
+
return cookies_arr
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
cookies.flatten.uniq.each_with_index {
|
|
370
|
+
|cookie, i|
|
|
371
|
+
cookies_arr[i] = Hash.new
|
|
372
|
+
|
|
373
|
+
cookie.instance_variables.each {
|
|
374
|
+
|var|
|
|
375
|
+
value = cookie.instance_variable_get( var ).to_s
|
|
376
|
+
value.strip!
|
|
377
|
+
|
|
378
|
+
key = normalize_name( var )
|
|
379
|
+
val = value.gsub( /[\"\\\[\]]/, '' )
|
|
380
|
+
|
|
381
|
+
next if val == seed
|
|
382
|
+
cookies_arr[i][key] = val
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
# cookies.reject!{ |cookie| cookie['name'] == cookies_arr[i]['name'] }
|
|
386
|
+
|
|
387
|
+
cookies_arr[i] = Element::Cookie.new( @url, cookies_arr[i] )
|
|
388
|
+
}
|
|
389
|
+
cookies_arr.flatten!
|
|
390
|
+
return cookies_arr
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
#
|
|
394
|
+
# Extracts variables and their values from a link
|
|
395
|
+
#
|
|
396
|
+
# @see #links
|
|
397
|
+
#
|
|
398
|
+
# @param [String] link
|
|
399
|
+
#
|
|
400
|
+
# @return [Hash] name=>value pairs
|
|
401
|
+
#
|
|
402
|
+
def link_vars( link )
|
|
403
|
+
if !link then return {} end
|
|
404
|
+
|
|
405
|
+
var_string = link.split( /\?/ )[1]
|
|
406
|
+
if !var_string then return {} end
|
|
407
|
+
|
|
408
|
+
var_hash = Hash.new
|
|
409
|
+
var_string.split( /&/ ).each {
|
|
410
|
+
|pair|
|
|
411
|
+
name, value = pair.split( /=/ )
|
|
412
|
+
|
|
413
|
+
next if value == seed
|
|
414
|
+
var_hash[name] = value
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
var_hash
|
|
418
|
+
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
#
|
|
422
|
+
# Converts relative URL *link* into an absolute URL based on the
|
|
423
|
+
# location of the page
|
|
424
|
+
#
|
|
425
|
+
# @param [String] link
|
|
426
|
+
#
|
|
427
|
+
# @return [String]
|
|
428
|
+
#
|
|
429
|
+
def to_absolute( link )
|
|
430
|
+
|
|
431
|
+
begin
|
|
432
|
+
if URI.parse( link ).host
|
|
433
|
+
return link
|
|
434
|
+
end
|
|
435
|
+
rescue Exception => e
|
|
436
|
+
return nil if link.nil?
|
|
437
|
+
# return link
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# remove anchor
|
|
441
|
+
link = URI.encode( link.to_s.gsub( /#[a-zA-Z0-9_-]*$/, '' ) )
|
|
442
|
+
|
|
443
|
+
begin
|
|
444
|
+
relative = URI(link)
|
|
445
|
+
url = URI.parse( @url )
|
|
446
|
+
|
|
447
|
+
absolute = url.merge(relative)
|
|
448
|
+
|
|
449
|
+
absolute.path = '/' if absolute.path.empty?
|
|
450
|
+
rescue Exception => e
|
|
451
|
+
return
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
return absolute.to_s
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
#
|
|
458
|
+
# Returns +true+ if *uri* is in the same domain as the page, returns
|
|
459
|
+
# +false+ otherwise
|
|
460
|
+
#
|
|
461
|
+
def in_domain?( uri )
|
|
462
|
+
curi = URI.parse( normalize_url( uri.to_s ) )
|
|
463
|
+
|
|
464
|
+
if( @opts.follow_subdomains )
|
|
465
|
+
return extract_domain( curi ) == extract_domain( URI( @url.to_s ) )
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
return curi.host == URI.parse( normalize_url( @url.to_s ) ).host
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
#
|
|
472
|
+
# Extracts the domain from a URI object
|
|
473
|
+
#
|
|
474
|
+
# @param [URI] url
|
|
475
|
+
#
|
|
476
|
+
# @return [String]
|
|
477
|
+
#
|
|
478
|
+
def extract_domain( url )
|
|
479
|
+
|
|
480
|
+
if !url.host then return false end
|
|
481
|
+
|
|
482
|
+
splits = url.host.split( /\./ )
|
|
483
|
+
|
|
484
|
+
if splits.length == 1 then return true end
|
|
485
|
+
|
|
486
|
+
splits[-2] + "." + splits[-1]
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
def exclude?( url )
|
|
490
|
+
@opts.exclude.each {
|
|
491
|
+
|pattern|
|
|
492
|
+
return true if url.to_s =~ pattern
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
return false
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
def include?( url )
|
|
499
|
+
return true if @opts.include.empty?
|
|
500
|
+
|
|
501
|
+
@opts.include.each {
|
|
502
|
+
|pattern|
|
|
503
|
+
return true if url.to_s =~ pattern
|
|
504
|
+
}
|
|
505
|
+
return false
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
private
|
|
510
|
+
|
|
511
|
+
#
|
|
512
|
+
# Merges an array of form inputs with an array of form selects
|
|
513
|
+
#
|
|
514
|
+
# @see #forms
|
|
515
|
+
#
|
|
516
|
+
# @param [Array] form inputs
|
|
517
|
+
# @param [Array] form selects
|
|
518
|
+
#
|
|
519
|
+
# @return [Array] merged array
|
|
520
|
+
#
|
|
521
|
+
def merge_select_with_input( inputs, selects )
|
|
522
|
+
|
|
523
|
+
new_arr = []
|
|
524
|
+
inputs.each {
|
|
525
|
+
|input|
|
|
526
|
+
new_arr << input
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
i = new_arr.size
|
|
530
|
+
selects.each {
|
|
531
|
+
|select|
|
|
532
|
+
select['attrs']['value'] = select['options'][0]['value']
|
|
533
|
+
new_arr << select['attrs']
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
new_arr
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
#
|
|
541
|
+
# Parses the attributes inside the <form ....> tag
|
|
542
|
+
#
|
|
543
|
+
# @see #forms
|
|
544
|
+
# @see #attrs_from_tag
|
|
545
|
+
#
|
|
546
|
+
# @param [String] form HTML code for the form tag
|
|
547
|
+
#
|
|
548
|
+
# @return [Array<Hash<String, String>>]
|
|
549
|
+
#
|
|
550
|
+
def form_attrs( form )
|
|
551
|
+
form_attr_html = form.scan( /(.*?)>/ixm )
|
|
552
|
+
attrs_from_tag( 'form', '<form ' + form_attr_html[0][0] + '>' )[0]
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
#
|
|
557
|
+
# Extracts HTML select elements, their attributes and their options
|
|
558
|
+
#
|
|
559
|
+
# @see #forms
|
|
560
|
+
# @see #form_selects_options
|
|
561
|
+
#
|
|
562
|
+
# @param [String] HTML
|
|
563
|
+
#
|
|
564
|
+
# @return [Array] array of select elements
|
|
565
|
+
#
|
|
566
|
+
def form_selects( html )
|
|
567
|
+
selects = html.scan( /<select(.*?)>/ixm )
|
|
568
|
+
|
|
569
|
+
elements = []
|
|
570
|
+
selects.each_with_index {
|
|
571
|
+
|select, i|
|
|
572
|
+
elements[i] = Hash.new
|
|
573
|
+
elements[i]['options'] = form_selects_options( html )
|
|
574
|
+
|
|
575
|
+
elements[i]['attrs'] =
|
|
576
|
+
attrs_from_tag( 'select',
|
|
577
|
+
'<select ' + select[0] + '/>' )[0]
|
|
578
|
+
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
elements
|
|
582
|
+
end
|
|
583
|
+
|
|
584
|
+
#
|
|
585
|
+
# Extracts HTML option elements and their attributes
|
|
586
|
+
# from select elements
|
|
587
|
+
#
|
|
588
|
+
# @see #forms
|
|
589
|
+
# @see #form_selects
|
|
590
|
+
#
|
|
591
|
+
# @param [String] HTML selects
|
|
592
|
+
#
|
|
593
|
+
# @return [Array] array of option elements
|
|
594
|
+
#
|
|
595
|
+
def form_selects_options( html )
|
|
596
|
+
options = html.scan( /<option(.*?)>/ixm )
|
|
597
|
+
|
|
598
|
+
elements = []
|
|
599
|
+
options.each_with_index {
|
|
600
|
+
|option, i|
|
|
601
|
+
elements[i] =
|
|
602
|
+
attrs_from_tag( 'option',
|
|
603
|
+
'<option ' + option[0] + '/>' )[0]
|
|
604
|
+
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
elements
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
#
|
|
611
|
+
# Extracts HTML textarea elements and their attributes
|
|
612
|
+
# from forms
|
|
613
|
+
#
|
|
614
|
+
# @see #forms
|
|
615
|
+
#
|
|
616
|
+
# @param [String] HTML
|
|
617
|
+
#
|
|
618
|
+
# @return [Array] array of textarea elements
|
|
619
|
+
#
|
|
620
|
+
def form_textareas( html )
|
|
621
|
+
inputs = html.scan( /<textarea(.*?)>/ixm )
|
|
622
|
+
|
|
623
|
+
elements = []
|
|
624
|
+
inputs.each_with_index {
|
|
625
|
+
|input, i|
|
|
626
|
+
elements[i] =
|
|
627
|
+
attrs_from_tag( 'textarea',
|
|
628
|
+
'<textarea ' + input[0] + '/>' )[0]
|
|
629
|
+
}
|
|
630
|
+
elements
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
#
|
|
634
|
+
# Parses the attributes of input fields
|
|
635
|
+
#
|
|
636
|
+
# @see #forms
|
|
637
|
+
#
|
|
638
|
+
# @param [String] html HTML code for the form tag
|
|
639
|
+
#
|
|
640
|
+
# @return [Hash<Hash<String, String>>]
|
|
641
|
+
#
|
|
642
|
+
def form_inputs( html )
|
|
643
|
+
inputs = html.scan( /<input(.*?)>/ixm )
|
|
644
|
+
|
|
645
|
+
elements = []
|
|
646
|
+
inputs.each_with_index {
|
|
647
|
+
|input, i|
|
|
648
|
+
elements[i] =
|
|
649
|
+
attrs_from_tag( 'input',
|
|
650
|
+
'<input ' + input[0] + '/>' )[0]
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
elements
|
|
654
|
+
end
|
|
655
|
+
|
|
656
|
+
#
|
|
657
|
+
# Gets attributes from HTML code of a tag
|
|
658
|
+
#
|
|
659
|
+
# @param [String] tag tag name (a, form, input)
|
|
660
|
+
# @param [String] html HTML code for the form tag
|
|
661
|
+
#
|
|
662
|
+
# @return [Array<Hash<String, String>>]
|
|
663
|
+
#
|
|
664
|
+
def attrs_from_tag( tag, html )
|
|
665
|
+
|
|
666
|
+
elements = []
|
|
667
|
+
Nokogiri::HTML( html ).search( tag ).each_with_index {
|
|
668
|
+
|element, i|
|
|
669
|
+
|
|
670
|
+
elements[i] = Hash.new
|
|
671
|
+
|
|
672
|
+
element.each {
|
|
673
|
+
|attribute|
|
|
674
|
+
next if attribute[1] == seed
|
|
675
|
+
elements[i][attribute[0].downcase] = attribute[1]
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
}
|
|
679
|
+
elements
|
|
680
|
+
end
|
|
681
|
+
|
|
682
|
+
# Extracts elements by name from HTML document
|
|
683
|
+
#
|
|
684
|
+
# @param [String] name 'form', 'a', 'div', etc.
|
|
685
|
+
# @param [String] html
|
|
686
|
+
#
|
|
687
|
+
# @return [Array<Hash <String, String> >] of elements
|
|
688
|
+
#
|
|
689
|
+
def elements_by_name( name )
|
|
690
|
+
|
|
691
|
+
elements = []
|
|
692
|
+
doc.search( name ).each_with_index do |input, i|
|
|
693
|
+
|
|
694
|
+
elements[i] = Hash.new
|
|
695
|
+
input.each {
|
|
696
|
+
|attribute|
|
|
697
|
+
elements[i][attribute[0]] = attribute[1]
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
input.children.each {
|
|
701
|
+
|child|
|
|
702
|
+
child.each{
|
|
703
|
+
|attribute|
|
|
704
|
+
elements[i][attribute[0]] = attribute[1]
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
end rescue []
|
|
709
|
+
|
|
710
|
+
return elements
|
|
711
|
+
end
|
|
712
|
+
|
|
713
|
+
def normalize_name( name )
|
|
714
|
+
name.to_s.gsub( /@/, '' )
|
|
715
|
+
end
|
|
716
|
+
end
|
|
717
|
+
end
|