arachni 0.2.4 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +33 -0
- data/README.md +2 -4
- data/Rakefile +15 -4
- data/bin/arachni +0 -0
- data/bin/arachni_web +0 -0
- data/bin/arachni_web_autostart +0 -0
- data/bin/arachni_xmlrpc +0 -0
- data/bin/arachni_xmlrpcd +0 -0
- data/bin/arachni_xmlrpcd_monitor +0 -0
- data/lib/arachni.rb +1 -1
- data/lib/framework.rb +36 -6
- data/lib/http.rb +12 -5
- data/lib/module/auditor.rb +482 -59
- data/lib/module/base.rb +17 -0
- data/lib/module/manager.rb +26 -2
- data/lib/module/trainer.rb +1 -12
- data/lib/module/utilities.rb +12 -0
- data/lib/parser/auditable.rb +8 -3
- data/lib/parser/elements.rb +11 -0
- data/lib/parser/page.rb +3 -1
- data/lib/parser/parser.rb +130 -18
- data/lib/rpc/xml/server/dispatcher.rb +21 -0
- data/lib/spider.rb +141 -82
- data/lib/ui/cli/cli.rb +2 -3
- data/lib/ui/web/addon_manager.rb +273 -0
- data/lib/ui/web/addons/autodeploy.rb +172 -0
- data/lib/ui/web/addons/autodeploy/lib/manager.rb +291 -0
- data/lib/ui/web/addons/autodeploy/views/index.erb +124 -0
- data/lib/ui/web/addons/sample.rb +78 -0
- data/lib/ui/web/addons/sample/views/index.erb +4 -0
- data/lib/ui/web/addons/scheduler.rb +139 -0
- data/lib/ui/web/addons/scheduler/views/index.erb +131 -0
- data/lib/ui/web/addons/scheduler/views/options.erb +93 -0
- data/lib/ui/web/dispatcher_manager.rb +80 -13
- data/lib/ui/web/instance_manager.rb +87 -0
- data/lib/ui/web/scheduler.rb +166 -0
- data/lib/ui/web/server.rb +142 -202
- data/lib/ui/web/server/public/js/jquery-ui-timepicker.js +985 -0
- data/lib/ui/web/server/public/plugins/sample/style.css +0 -0
- data/lib/ui/web/server/public/style.css +42 -0
- data/lib/ui/web/server/views/addon.erb +15 -0
- data/lib/ui/web/server/views/addons.erb +46 -0
- data/lib/ui/web/server/views/dispatchers.erb +1 -1
- data/lib/ui/web/server/views/instance.erb +9 -11
- data/lib/ui/web/server/views/layout.erb +14 -1
- data/lib/ui/web/server/views/welcome.erb +7 -6
- data/lib/ui/web/utilities.rb +134 -0
- data/modules/audit/code_injection_timing.rb +6 -2
- data/modules/audit/code_injection_timing/payloads.txt +2 -2
- data/modules/audit/os_cmd_injection_timing.rb +7 -3
- data/modules/audit/os_cmd_injection_timing/payloads.txt +1 -1
- data/modules/audit/sqli_blind_rdiff.rb +18 -233
- data/modules/audit/sqli_blind_rdiff/payloads.txt +5 -0
- data/modules/audit/sqli_blind_timing.rb +9 -2
- data/path_extractors/anchors.rb +1 -1
- data/path_extractors/forms.rb +1 -1
- data/path_extractors/frames.rb +1 -1
- data/path_extractors/generic.rb +1 -1
- data/path_extractors/links.rb +1 -1
- data/path_extractors/meta_refresh.rb +1 -1
- data/path_extractors/scripts.rb +1 -1
- data/path_extractors/sitemap.rb +1 -1
- data/plugins/proxy/server.rb +3 -2
- data/plugins/waf_detector.rb +0 -3
- metadata +37 -34
- data/lib/anemone/cookie_store.rb +0 -35
- data/lib/anemone/core.rb +0 -371
- data/lib/anemone/exceptions.rb +0 -5
- data/lib/anemone/http.rb +0 -144
- data/lib/anemone/page.rb +0 -338
- data/lib/anemone/page_store.rb +0 -160
- data/lib/anemone/storage.rb +0 -34
- data/lib/anemone/storage/base.rb +0 -75
- data/lib/anemone/storage/exceptions.rb +0 -15
- data/lib/anemone/storage/mongodb.rb +0 -89
- data/lib/anemone/storage/pstore.rb +0 -50
- data/lib/anemone/storage/redis.rb +0 -90
- data/lib/anemone/storage/tokyo_cabinet.rb +0 -57
- data/lib/anemone/tentacle.rb +0 -40
data/lib/anemone/exceptions.rb
DELETED
data/lib/anemone/http.rb
DELETED
@@ -1,144 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Arachni
|
3
|
-
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
4
|
-
|
5
|
-
This is free software; you can copy and distribute and modify
|
6
|
-
this program under the term of the GPL v2.0 License
|
7
|
-
(See LICENSE file for details)
|
8
|
-
|
9
|
-
=end
|
10
|
-
|
11
|
-
require Arachni::Options.instance.dir['lib'] + 'anemone/page'
|
12
|
-
require Arachni::Options.instance.dir['lib'] + 'anemone/cookie_store'
|
13
|
-
|
14
|
-
|
15
|
-
#
|
16
|
-
# Overides Anemone's HTTP class methods:
|
17
|
-
# o refresh_connection( ): added proxy support
|
18
|
-
# o get_response( ): upped the retry counter to 7 and generalized exception handling
|
19
|
-
#
|
20
|
-
# @author: Tasos "Zapotek" Laskos
|
21
|
-
# <tasos.laskos@gmail.com>
|
22
|
-
# <zapotek@segfault.gr>
|
23
|
-
# @version: 0.1.1
|
24
|
-
#
|
25
|
-
module Anemone
|
26
|
-
|
27
|
-
class HTTP
|
28
|
-
|
29
|
-
include Arachni::UI::Output
|
30
|
-
|
31
|
-
# Maximum number of redirects to follow on each get_response
|
32
|
-
REDIRECT_LIMIT = 5
|
33
|
-
|
34
|
-
# CookieStore for this HTTP client
|
35
|
-
attr_reader :cookie_store
|
36
|
-
|
37
|
-
def initialize(opts = {})
|
38
|
-
@connections = {}
|
39
|
-
@opts = opts
|
40
|
-
@cookie_store = CookieStore.new(@opts[:cookies])
|
41
|
-
end
|
42
|
-
|
43
|
-
#
|
44
|
-
# Fetch a single Page from the response of an HTTP request to *url*.
|
45
|
-
# Just gets the final destination page.
|
46
|
-
#
|
47
|
-
def fetch_page(url, referer = nil, depth = nil)
|
48
|
-
fetch_pages(url, referer, depth).last
|
49
|
-
end
|
50
|
-
|
51
|
-
#
|
52
|
-
# Create new Pages from the response of an HTTP request to *url*,
|
53
|
-
# including redirects
|
54
|
-
#
|
55
|
-
def fetch_pages(url, referer = nil, depth = nil)
|
56
|
-
begin
|
57
|
-
url = URI(url) unless url.is_a?(URI)
|
58
|
-
pages = []
|
59
|
-
get(url, referer) do |response, code, location, redirect_to, response_time|
|
60
|
-
pages << Page.new(location, :body => response.body.dup,
|
61
|
-
:code => code,
|
62
|
-
:headers => response.headers_hash,
|
63
|
-
:referer => referer,
|
64
|
-
:depth => depth,
|
65
|
-
:redirect_to => redirect_to,
|
66
|
-
:response_time => response_time)
|
67
|
-
end
|
68
|
-
|
69
|
-
return pages
|
70
|
-
rescue => e
|
71
|
-
if verbose?
|
72
|
-
puts e.inspect
|
73
|
-
puts e.backtrace
|
74
|
-
end
|
75
|
-
return [Page.new(url, :error => e)]
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
#
|
80
|
-
# The maximum number of redirects to follow
|
81
|
-
#
|
82
|
-
def redirect_limit
|
83
|
-
@opts[:redirect_limit] || REDIRECT_LIMIT
|
84
|
-
end
|
85
|
-
|
86
|
-
#
|
87
|
-
# The user-agent string which will be sent with each request,
|
88
|
-
# or nil if no such option is set
|
89
|
-
#
|
90
|
-
def user_agent
|
91
|
-
@opts[:user_agent]
|
92
|
-
end
|
93
|
-
|
94
|
-
#
|
95
|
-
# Does this HTTP client accept cookies from the server?
|
96
|
-
#
|
97
|
-
def accept_cookies?
|
98
|
-
@opts[:accept_cookies]
|
99
|
-
end
|
100
|
-
|
101
|
-
private
|
102
|
-
|
103
|
-
#
|
104
|
-
# Retrieve HTTP responses for *url*, including redirects.
|
105
|
-
# Yields the response object, response code, and URI location
|
106
|
-
# for each response.
|
107
|
-
#
|
108
|
-
def get(url, referer = nil)
|
109
|
-
response = get_response(url, referer)
|
110
|
-
yield response, response.code, url, '', response.time
|
111
|
-
end
|
112
|
-
|
113
|
-
#
|
114
|
-
# Get an HTTPResponse for *url*, sending the appropriate User-Agent string
|
115
|
-
#
|
116
|
-
def get_response(url, referer = nil)
|
117
|
-
opts = {}
|
118
|
-
opts['Referer'] = referer.to_s if referer
|
119
|
-
|
120
|
-
response = Arachni::HTTP.instance.get( url.to_s,
|
121
|
-
:headers => opts,
|
122
|
-
:follow_location => true,
|
123
|
-
:async => false,
|
124
|
-
:remove_id => true
|
125
|
-
).response
|
126
|
-
|
127
|
-
return response
|
128
|
-
end
|
129
|
-
|
130
|
-
|
131
|
-
def verbose?
|
132
|
-
@opts[:verbose]
|
133
|
-
end
|
134
|
-
|
135
|
-
#
|
136
|
-
# Allowed to connect to the requested url?
|
137
|
-
#
|
138
|
-
def allowed?(to_url, from_url)
|
139
|
-
to_url.host.nil? || (to_url.host == from_url.host)
|
140
|
-
end
|
141
|
-
|
142
|
-
|
143
|
-
end
|
144
|
-
end
|
data/lib/anemone/page.rb
DELETED
@@ -1,338 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Arachni
|
3
|
-
Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
|
4
|
-
|
5
|
-
This is free software; you can copy and distribute and modify
|
6
|
-
this program under the term of the GPL v2.0 License
|
7
|
-
(See LICENSE file for details)
|
8
|
-
|
9
|
-
=end
|
10
|
-
|
11
|
-
require 'nokogiri'
|
12
|
-
require Arachni::Options.instance.dir['lib'] + 'nokogiri/xml/node'
|
13
|
-
require 'ostruct'
|
14
|
-
require 'webrick/cookie'
|
15
|
-
|
16
|
-
#
|
17
|
-
# Overides Anemone's Page class methods:<br/>
|
18
|
-
# o in_domain?( uri ): adding support for subdomain crawling<br/>
|
19
|
-
# o links(): adding support for frame and iframe src URLs<br/>
|
20
|
-
#
|
21
|
-
# @author: Tasos "Zapotek" Laskos
|
22
|
-
# <tasos.laskos@gmail.com>
|
23
|
-
# <zapotek@segfault.gr>
|
24
|
-
# @version: 0.1
|
25
|
-
#
|
26
|
-
module Anemone
|
27
|
-
|
28
|
-
module Extractors
|
29
|
-
#
|
30
|
-
# Base Spider parser class for modules.
|
31
|
-
#
|
32
|
-
# The aim of such modules is to extract paths from a webpage for the Spider to follow.
|
33
|
-
#
|
34
|
-
#
|
35
|
-
# @author: Tasos "Zapotek" Laskos
|
36
|
-
# <tasos.laskos@gmail.com>
|
37
|
-
# <zapotek@segfault.gr>
|
38
|
-
# @version: 0.1
|
39
|
-
# @abstract
|
40
|
-
#
|
41
|
-
class Paths
|
42
|
-
|
43
|
-
#
|
44
|
-
# This method must be implemented by all modules and must return an array
|
45
|
-
# of paths as plain strings
|
46
|
-
#
|
47
|
-
# @param [Nokogiri] Nokogiri document
|
48
|
-
#
|
49
|
-
# @return [Array<String>] paths
|
50
|
-
#
|
51
|
-
def parse( doc )
|
52
|
-
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
class Page
|
58
|
-
|
59
|
-
include Arachni::UI::Output
|
60
|
-
|
61
|
-
# The URL of the page
|
62
|
-
attr_reader :url
|
63
|
-
# The raw HTTP response body of the page
|
64
|
-
attr_reader :body
|
65
|
-
# Headers of the HTTP response
|
66
|
-
attr_reader :headers
|
67
|
-
# URL of the page this one redirected to, if any
|
68
|
-
attr_reader :redirect_to
|
69
|
-
# Exception object, if one was raised during HTTP#fetch_page
|
70
|
-
attr_reader :error
|
71
|
-
|
72
|
-
# OpenStruct for user-stored data
|
73
|
-
attr_accessor :data
|
74
|
-
# Integer response code of the page
|
75
|
-
attr_accessor :code
|
76
|
-
# Boolean indicating whether or not this page has been visited in PageStore#shortest_paths!
|
77
|
-
attr_accessor :visited
|
78
|
-
# Depth of this page from the root of the crawl. This is not necessarily the
|
79
|
-
# shortest path; use PageStore#shortest_paths! to find that value.
|
80
|
-
attr_accessor :depth
|
81
|
-
# URL of the page that brought us to this page
|
82
|
-
attr_accessor :referer
|
83
|
-
# Response time of the request for this page in milliseconds
|
84
|
-
attr_accessor :response_time
|
85
|
-
|
86
|
-
#
|
87
|
-
# Create a new page
|
88
|
-
#
|
89
|
-
def initialize(url, params = {})
|
90
|
-
@url = url
|
91
|
-
@data = OpenStruct.new
|
92
|
-
|
93
|
-
@code = params[:code]
|
94
|
-
@headers = params[:headers] || {}
|
95
|
-
@headers['content-type'] ||= ['']
|
96
|
-
@aliases = Array(params[:aka]).compact
|
97
|
-
@referer = params[:referer]
|
98
|
-
@depth = params[:depth] || 0
|
99
|
-
@redirect_to = to_absolute(params[:redirect_to])
|
100
|
-
@response_time = params[:response_time]
|
101
|
-
@body = params[:body]
|
102
|
-
@error = params[:error]
|
103
|
-
|
104
|
-
@fetched = !params[:code].nil?
|
105
|
-
end
|
106
|
-
|
107
|
-
#
|
108
|
-
# Runs all Spider (path extraction) modules and returns an array of paths
|
109
|
-
#
|
110
|
-
# @return [Array] paths
|
111
|
-
#
|
112
|
-
def run_modules
|
113
|
-
opts = Arachni::Options.instance
|
114
|
-
require opts.dir['lib'] + 'component_manager'
|
115
|
-
|
116
|
-
lib = opts.dir['root'] + 'path_extractors/'
|
117
|
-
|
118
|
-
|
119
|
-
begin
|
120
|
-
@@manager ||= ::Arachni::ComponentManager.new( lib, Extractors )
|
121
|
-
|
122
|
-
return @@manager.available.map {
|
123
|
-
|name|
|
124
|
-
@@manager[name].new.run( doc )
|
125
|
-
}.flatten.uniq
|
126
|
-
|
127
|
-
rescue ::Exception => e
|
128
|
-
print_error( e.to_s )
|
129
|
-
print_debug_backtrace( e )
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
def dir( url )
|
134
|
-
URI( File.dirname( URI( url.to_s ).path ) + '/' )
|
135
|
-
end
|
136
|
-
|
137
|
-
#
|
138
|
-
# Array of distinct links to follow
|
139
|
-
#
|
140
|
-
# @return [Array<URI>]
|
141
|
-
#
|
142
|
-
def links
|
143
|
-
return @links unless @links.nil?
|
144
|
-
@links = []
|
145
|
-
return @links if !doc
|
146
|
-
|
147
|
-
run_modules( ).each {
|
148
|
-
|path|
|
149
|
-
next if path.nil? or path.empty?
|
150
|
-
abs = to_absolute( URI( path ) ) rescue next
|
151
|
-
|
152
|
-
if in_domain?( abs )
|
153
|
-
@links << abs
|
154
|
-
# force dir listing
|
155
|
-
# ap to_absolute( get_path( abs.to_s ).to_s ).to_s
|
156
|
-
# @links << to_absolute( dir( abs.to_s ).to_s ) rescue next
|
157
|
-
end
|
158
|
-
}
|
159
|
-
|
160
|
-
@links.uniq!
|
161
|
-
return @links
|
162
|
-
end
|
163
|
-
|
164
|
-
#
|
165
|
-
# Nokogiri document for the HTML body
|
166
|
-
#
|
167
|
-
def doc
|
168
|
-
type = Arachni::HTTP.content_type( @headers )
|
169
|
-
return if type.is_a?( String) && !type.substring?( 'text' )
|
170
|
-
|
171
|
-
return @doc if @doc
|
172
|
-
@doc = Nokogiri::HTML( @body ) if @body rescue nil
|
173
|
-
end
|
174
|
-
|
175
|
-
#
|
176
|
-
# Delete the Nokogiri document and response body to conserve memory
|
177
|
-
#
|
178
|
-
def discard_doc!
|
179
|
-
links # force parsing of page links before we trash the document
|
180
|
-
@doc = @body = nil
|
181
|
-
end
|
182
|
-
|
183
|
-
#
|
184
|
-
# Was the page successfully fetched?
|
185
|
-
# +true+ if the page was fetched with no error, +false+ otherwise.
|
186
|
-
#
|
187
|
-
def fetched?
|
188
|
-
@fetched
|
189
|
-
end
|
190
|
-
|
191
|
-
#
|
192
|
-
# Array of cookies received with this page as WEBrick::Cookie objects.
|
193
|
-
#
|
194
|
-
def cookies
|
195
|
-
WEBrick::Cookie.parse_set_cookies(@headers['Set-Cookie']) rescue []
|
196
|
-
end
|
197
|
-
|
198
|
-
#
|
199
|
-
# The content-type returned by the HTTP request for this page
|
200
|
-
#
|
201
|
-
def content_type
|
202
|
-
headers['content-type'].first
|
203
|
-
end
|
204
|
-
|
205
|
-
#
|
206
|
-
# Returns +true+ if the page is a HTML document, returns +false+
|
207
|
-
# otherwise.
|
208
|
-
#
|
209
|
-
def html?
|
210
|
-
!!(content_type =~ %r{^(text/html|application/xhtml+xml)\b})
|
211
|
-
end
|
212
|
-
|
213
|
-
#
|
214
|
-
# Returns +true+ if the page is a HTTP redirect, returns +false+
|
215
|
-
# otherwise.
|
216
|
-
#
|
217
|
-
def redirect?
|
218
|
-
(300..307).include?(@code)
|
219
|
-
end
|
220
|
-
|
221
|
-
#
|
222
|
-
# Returns +true+ if the page was not found (returned 404 code),
|
223
|
-
# returns +false+ otherwise.
|
224
|
-
#
|
225
|
-
def not_found?
|
226
|
-
404 == @code
|
227
|
-
end
|
228
|
-
|
229
|
-
#
|
230
|
-
# Converts relative URL *link* into an absolute URL based on the
|
231
|
-
# location of the page
|
232
|
-
#
|
233
|
-
def to_absolute(link)
|
234
|
-
return nil if link.nil?
|
235
|
-
|
236
|
-
# remove anchor
|
237
|
-
link = URI.encode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))
|
238
|
-
|
239
|
-
if url = base
|
240
|
-
base_url = URI(url)
|
241
|
-
else
|
242
|
-
base_url = @url.dup
|
243
|
-
end
|
244
|
-
|
245
|
-
relative = URI(link)
|
246
|
-
absolute = base_url.merge(relative)
|
247
|
-
|
248
|
-
absolute.path = '/' if absolute.path.empty?
|
249
|
-
|
250
|
-
return absolute
|
251
|
-
end
|
252
|
-
|
253
|
-
def base
|
254
|
-
begin
|
255
|
-
tmp = doc.search( '//base[@href]' )
|
256
|
-
return tmp[0]['href'].dup
|
257
|
-
rescue
|
258
|
-
return
|
259
|
-
end
|
260
|
-
end
|
261
|
-
|
262
|
-
#
|
263
|
-
# Returns +true+ if *uri* is in the same domain as the page, returns
|
264
|
-
# +false+ otherwise.
|
265
|
-
#
|
266
|
-
# The added code enables optional subdomain crawling.
|
267
|
-
#
|
268
|
-
def in_domain?( uri )
|
269
|
-
if( Arachni::Options.instance.follow_subdomains )
|
270
|
-
return extract_domain( uri ) == extract_domain( @url )
|
271
|
-
end
|
272
|
-
|
273
|
-
uri.host == @url.host
|
274
|
-
end
|
275
|
-
|
276
|
-
#
|
277
|
-
# Extracts the domain from a URI object
|
278
|
-
#
|
279
|
-
# @param [URI] url
|
280
|
-
#
|
281
|
-
# @return [String]
|
282
|
-
#
|
283
|
-
def extract_domain( url )
|
284
|
-
|
285
|
-
if !url.host then return false end
|
286
|
-
|
287
|
-
splits = url.host.split( /\./ )
|
288
|
-
|
289
|
-
if splits.length == 1 then return true end
|
290
|
-
|
291
|
-
splits[-2] + "." + splits[-1]
|
292
|
-
end
|
293
|
-
|
294
|
-
|
295
|
-
def marshal_dump
|
296
|
-
[@url, @headers, @data, @body, @links, @code, @visited, @depth, @referer, @redirect_to, @response_time, @fetched]
|
297
|
-
end
|
298
|
-
|
299
|
-
def marshal_load(ary)
|
300
|
-
@url, @headers, @data, @body, @links, @code, @visited, @depth, @referer, @redirect_to, @response_time, @fetched = ary
|
301
|
-
end
|
302
|
-
|
303
|
-
def to_hash
|
304
|
-
{'url' => @url.to_s,
|
305
|
-
'headers' => Marshal.dump(@headers),
|
306
|
-
'data' => Marshal.dump(@data),
|
307
|
-
'body' => @body,
|
308
|
-
'links' => links.map(&:to_s),
|
309
|
-
'code' => @code,
|
310
|
-
'visited' => @visited,
|
311
|
-
'depth' => @depth,
|
312
|
-
'referer' => @referer.to_s,
|
313
|
-
'redirect_to' => @redirect_to.to_s,
|
314
|
-
'response_time' => @response_time,
|
315
|
-
'fetched' => @fetched}
|
316
|
-
end
|
317
|
-
|
318
|
-
def self.from_hash(hash)
|
319
|
-
page = self.new(URI(hash['url']))
|
320
|
-
{'@headers' => Marshal.load(hash['headers']),
|
321
|
-
'@data' => Marshal.load(hash['data']),
|
322
|
-
'@body' => hash['body'],
|
323
|
-
'@links' => hash['links'].map { |link| URI(link) },
|
324
|
-
'@code' => hash['code'].to_i,
|
325
|
-
'@visited' => hash['visited'],
|
326
|
-
'@depth' => hash['depth'].to_i,
|
327
|
-
'@referer' => hash['referer'],
|
328
|
-
'@redirect_to' => URI(hash['redirect_to']),
|
329
|
-
'@response_time' => hash['response_time'].to_i,
|
330
|
-
'@fetched' => hash['fetched']
|
331
|
-
}.each do |var, value|
|
332
|
-
page.instance_variable_set(var, value)
|
333
|
-
end
|
334
|
-
page
|
335
|
-
end
|
336
|
-
|
337
|
-
end
|
338
|
-
end
|