ronin-web 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/Manifest.txt +25 -0
- data/README.txt +74 -0
- data/Rakefile +21 -0
- data/lib/ronin/sessions/web.rb +80 -0
- data/lib/ronin/web/extensions/nokogiri/xml/attr.rb +13 -0
- data/lib/ronin/web/extensions/nokogiri/xml/document.rb +18 -0
- data/lib/ronin/web/extensions/nokogiri/xml/element.rb +23 -0
- data/lib/ronin/web/extensions/nokogiri/xml/node.rb +47 -0
- data/lib/ronin/web/extensions/nokogiri/xml/text.rb +13 -0
- data/lib/ronin/web/extensions/nokogiri/xml.rb +5 -0
- data/lib/ronin/web/extensions/nokogiri.rb +24 -0
- data/lib/ronin/web/extensions.rb +24 -0
- data/lib/ronin/web/server.rb +511 -0
- data/lib/ronin/web/spider.rb +89 -0
- data/lib/ronin/web/version.rb +29 -0
- data/lib/ronin/web/web.rb +305 -0
- data/lib/ronin/web.rb +25 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/web/extensions/nokogiri_spec.rb +38 -0
- data/spec/web/helpers/root/index.html +1 -0
- data/spec/web/helpers/root/test.txt +1 -0
- data/spec/web/helpers/server.rb +2 -0
- data/spec/web/server_spec.rb +142 -0
- data/tasks/spec.rb +9 -0
- metadata +141 -0
@@ -0,0 +1,511 @@
|
|
1
|
+
#
|
2
|
+
#--
|
3
|
+
# Ronin Web - A Ruby library for Ronin that provides support for web
|
4
|
+
# scraping and spidering functionality.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2006-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
7
|
+
#
|
8
|
+
# This program is free software; you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation; either version 2 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program; if not, write to the Free Software
|
20
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
21
|
+
#++
|
22
|
+
#
|
23
|
+
|
24
|
+
require 'uri'
|
25
|
+
require 'cgi'
|
26
|
+
|
27
|
+
begin
|
28
|
+
require 'mongrel'
|
29
|
+
rescue
|
30
|
+
require 'webrick'
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'rack'
|
34
|
+
|
35
|
+
module Ronin
|
36
|
+
module Web
|
37
|
+
class Server
|
38
|
+
|
39
|
+
# Default interface to run the Web Server on
|
40
|
+
HOST = '0.0.0.0'
|
41
|
+
|
42
|
+
# Default port to run the Web Server on
|
43
|
+
PORT = 8080
|
44
|
+
|
45
|
+
# Directory index files
|
46
|
+
INDICES = ['index.htm', 'index.html']
|
47
|
+
|
48
|
+
# The host to bind to
|
49
|
+
attr_accessor :host
|
50
|
+
|
51
|
+
# The port to listen on
|
52
|
+
attr_accessor :port
|
53
|
+
|
54
|
+
# The Hash of configurable variables for the server
|
55
|
+
attr_reader :config
|
56
|
+
|
57
|
+
#
|
58
|
+
# Creates a new Web Server using the given configuration _block_.
|
59
|
+
#
|
60
|
+
# _options_ may contain the following keys:
|
61
|
+
# <tt>:host</tt>:: The host to bind to.
|
62
|
+
# <tt>:port</tt>:: The port to listen on.
|
63
|
+
# <tt>:config</tt>:: A +Hash+ of configurable variables to be used
|
64
|
+
# in responses.
|
65
|
+
#
|
66
|
+
def initialize(options={},&block)
|
67
|
+
@host = options[:host]
|
68
|
+
@port = options[:port]
|
69
|
+
@config = {}
|
70
|
+
|
71
|
+
if options.has_key?(:config)
|
72
|
+
@config.merge!(options[:config])
|
73
|
+
end
|
74
|
+
|
75
|
+
@default = method(:not_found)
|
76
|
+
|
77
|
+
@virtual_host_patterns = {}
|
78
|
+
@virtual_hosts = {}
|
79
|
+
|
80
|
+
@path_patterns = {}
|
81
|
+
@paths = {}
|
82
|
+
@directories = {}
|
83
|
+
|
84
|
+
instance_eval(&block) if block
|
85
|
+
end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Returns the default host that the Web Server will be run on.
|
89
|
+
#
|
90
|
+
def Server.default_host
|
91
|
+
@@default_host ||= HOST
|
92
|
+
end
|
93
|
+
|
94
|
+
#
|
95
|
+
# Sets the default host that the Web Server will run on to the
|
96
|
+
# specified _host_.
|
97
|
+
#
|
98
|
+
def Server.default_host=(host)
|
99
|
+
@@default_host = host
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# Returns the default port that the Web Server will run on.
|
104
|
+
#
|
105
|
+
def Server.default_port
|
106
|
+
@@default_port ||= PORT
|
107
|
+
end
|
108
|
+
|
109
|
+
#
|
110
|
+
# Sets the default port the Web Server will run on to the specified
|
111
|
+
# _port_.
|
112
|
+
#
|
113
|
+
def Server.default_port=(port)
|
114
|
+
@@default_port = port
|
115
|
+
end
|
116
|
+
|
117
|
+
#
|
118
|
+
# The Hash of the servers supported file extensions and their HTTP
|
119
|
+
# Content-Types.
|
120
|
+
#
|
121
|
+
def Server.content_types
|
122
|
+
@@content_types ||= {}
|
123
|
+
end
|
124
|
+
|
125
|
+
#
|
126
|
+
# Registers a new content _type_ for the specified file _extensions_.
|
127
|
+
#
|
128
|
+
# Server.content_type 'text/xml', ['xml', 'xsl']
|
129
|
+
#
|
130
|
+
def self.content_type(type,extensions)
|
131
|
+
extensions.each { |ext| Server.content_types[ext] = type }
|
132
|
+
|
133
|
+
return self
|
134
|
+
end
|
135
|
+
|
136
|
+
#
|
137
|
+
# Runs the specified _server_ with the given _options_. Server.run
|
138
|
+
# will use Mongrel to run the _server_, if it is installed. Otherwise
|
139
|
+
# WEBrick will be used to run the _server_.
|
140
|
+
#
|
141
|
+
# _options_ can contain the following keys:
|
142
|
+
# <tt>:host</tt>:: The host the server will bind to, defaults to
|
143
|
+
# Server.default_host.
|
144
|
+
# <tt>:port</tt>:: The port the server will listen on, defaults to
|
145
|
+
# Server.default_port.
|
146
|
+
#
|
147
|
+
def Server.run(server,options={})
|
148
|
+
rack_options = {}
|
149
|
+
|
150
|
+
rack_options[:Host] = (options[:host] || Server.default_host)
|
151
|
+
rack_options[:Port] = (options[:port] || Server.default_port)
|
152
|
+
|
153
|
+
if Object.const_defined?('Mongrel')
|
154
|
+
Rack::Handler::Mongrel.run(server,rack_options)
|
155
|
+
else
|
156
|
+
Rack::Handler::WEBrick.run(server,rack_options)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
#
|
161
|
+
# Creates a new Web Server object with the given _block_ and starts
|
162
|
+
# it using the given _options_.
|
163
|
+
#
|
164
|
+
def self.start(options={},&block)
|
165
|
+
self.new(options,&block).start
|
166
|
+
end
|
167
|
+
|
168
|
+
#
|
169
|
+
# Returns the HTTP Content-Type for the specified file _extension_.
|
170
|
+
#
|
171
|
+
# content_type('html')
|
172
|
+
# # => "text/html"
|
173
|
+
#
|
174
|
+
def content_type(extension)
|
175
|
+
Server.content_types[extension] || 'application/x-unknown-content-type'
|
176
|
+
end
|
177
|
+
|
178
|
+
#
|
179
|
+
# Returns the HTTP Content-Type for the specified _file_.
|
180
|
+
#
|
181
|
+
# srv.content_type_for('file.html')
|
182
|
+
# # => "text/html"
|
183
|
+
#
|
184
|
+
def content_type_for(file)
|
185
|
+
ext = File.extname(file).downcase
|
186
|
+
|
187
|
+
return content_type(ext[1..-1])
|
188
|
+
end
|
189
|
+
|
190
|
+
#
|
191
|
+
# Returns the index file contained within the _path_ of the specified
|
192
|
+
# directory. If no index file can be found, +nil+ will be returned.
|
193
|
+
#
|
194
|
+
def index_of(path)
|
195
|
+
path = File.expand_path(path)
|
196
|
+
|
197
|
+
INDICES.each do |name|
|
198
|
+
index = File.join(path,name)
|
199
|
+
|
200
|
+
return index if File.file?(index)
|
201
|
+
end
|
202
|
+
|
203
|
+
return nil
|
204
|
+
end
|
205
|
+
|
206
|
+
#
|
207
|
+
# Returns the HTTP 404 Not Found message for the requested path.
|
208
|
+
#
|
209
|
+
def not_found(env)
|
210
|
+
path = env['PATH_INFO']
|
211
|
+
body = %{<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
|
212
|
+
<html>
|
213
|
+
<head>
|
214
|
+
<title>404 Not Found</title>
|
215
|
+
<body>
|
216
|
+
<h1>Not Found</h1>
|
217
|
+
<p>The requested URL #{CGI.escapeHTML(path)} was not found on this server.</p>
|
218
|
+
<hr>
|
219
|
+
</body>
|
220
|
+
</html>}
|
221
|
+
|
222
|
+
return response(body, :status => 404, :content_type => 'text/html')
|
223
|
+
end
|
224
|
+
|
225
|
+
#
|
226
|
+
# Returns the contents of the file at the specified _path_. If the
|
227
|
+
# _path_ points to a directory, the directory will be searched for
|
228
|
+
# an index file. If no index file can be found or _path_ points to a
|
229
|
+
# non-existant file, a "404 Not Found" response will be returned.
|
230
|
+
#
|
231
|
+
def return_file(path,env)
|
232
|
+
if !(File.exists?(path))
|
233
|
+
return not_found(env)
|
234
|
+
end
|
235
|
+
|
236
|
+
if File.directory?(path)
|
237
|
+
unless (path = index_of(path))
|
238
|
+
return not_found(env)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
return response(File.new(path), :content_type => content_type_for(path))
|
243
|
+
end
|
244
|
+
|
245
|
+
#
|
246
|
+
# Returns a Rack Response object with the specified _body_, the given
|
247
|
+
# _options_ and the given _block_.
|
248
|
+
#
|
249
|
+
# _options_ may include the following keys:
|
250
|
+
# <tt>:status</tt>:: The HTTP Response status code, defaults to 200.
|
251
|
+
#
|
252
|
+
# response("<data>lol</data>", :content_type => 'text/xml')
|
253
|
+
#
|
254
|
+
def response(body=[],options={},&block)
|
255
|
+
status = (options.delete(:status) || 200)
|
256
|
+
headers = {}
|
257
|
+
|
258
|
+
options.each do |name,value|
|
259
|
+
header_name = name.to_s.split('_').map { |word|
|
260
|
+
word.capitalize
|
261
|
+
}.join('-')
|
262
|
+
|
263
|
+
headers[header_name] = value.to_s
|
264
|
+
end
|
265
|
+
|
266
|
+
return Rack::Response.new(body,status,headers,&block)
|
267
|
+
end
|
268
|
+
|
269
|
+
#
|
270
|
+
# Use the specified _block_ as the default route for all other
|
271
|
+
# requests.
|
272
|
+
#
|
273
|
+
# default do |env|
|
274
|
+
# [200, {'Content-Type' => 'text/html'}, 'lol train']
|
275
|
+
# end
|
276
|
+
#
|
277
|
+
def default(&block)
|
278
|
+
@default = block
|
279
|
+
return self
|
280
|
+
end
|
281
|
+
|
282
|
+
#
|
283
|
+
# Connects the specified _server_ as a virtual host representing the
|
284
|
+
# specified host _name_.
|
285
|
+
#
|
286
|
+
def connect(name,server)
|
287
|
+
@virtual_hosts[name.to_s] = server
|
288
|
+
end
|
289
|
+
|
290
|
+
#
|
291
|
+
# Returns the server that handles requests for the specified host
|
292
|
+
# _name_.
|
293
|
+
#
|
294
|
+
def virtual_host(name)
|
295
|
+
name = name.to_s
|
296
|
+
|
297
|
+
if @virtual_hosts.has_key?(name)
|
298
|
+
return @virtual_hosts[name]
|
299
|
+
end
|
300
|
+
|
301
|
+
@virtual_host_patterns.each do |pattern,server|
|
302
|
+
return server if name.match(pattern)
|
303
|
+
end
|
304
|
+
|
305
|
+
return nil
|
306
|
+
end
|
307
|
+
|
308
|
+
#
|
309
|
+
# Registers the specified _block_ to be called when receiving
|
310
|
+
# requests to host names which match the specified _pattern_.
|
311
|
+
#
|
312
|
+
# hosts_like(/^a[0-9]\./) do
|
313
|
+
# map('/download/') do |env|
|
314
|
+
# ...
|
315
|
+
# end
|
316
|
+
# end
|
317
|
+
#
|
318
|
+
def hosts_like(pattern,&block)
|
319
|
+
@virtual_host_patterns[pattern] = self.class.new(&block)
|
320
|
+
end
|
321
|
+
|
322
|
+
#
|
323
|
+
# Registers the specified _block_ to be called when receiving
|
324
|
+
# requests for paths which match the specified _pattern_.
|
325
|
+
#
|
326
|
+
# paths_like(/\.xml$/) do |env|
|
327
|
+
# ...
|
328
|
+
# end
|
329
|
+
#
|
330
|
+
def paths_like(pattern,&block)
|
331
|
+
@path_patterns[pattern] = block
|
332
|
+
return self
|
333
|
+
end
|
334
|
+
|
335
|
+
#
|
336
|
+
# Creates a new Server object using the specified _block_ and
|
337
|
+
# connects it as a virtual host representing the specified host
|
338
|
+
# _name_.
|
339
|
+
#
|
340
|
+
# host('cdn.evil.com') do
|
341
|
+
# ...
|
342
|
+
# end
|
343
|
+
#
|
344
|
+
def host(name,&block)
|
345
|
+
connect(name,self.class.new(&block))
|
346
|
+
end
|
347
|
+
|
348
|
+
#
|
349
|
+
# Binds the specified URL _path_ to the given _block_.
|
350
|
+
#
|
351
|
+
# bind '/secrets.xml' do |env|
|
352
|
+
# [200, {'Content-Type' => 'text/xml'}, "Made you look."]
|
353
|
+
# end
|
354
|
+
#
|
355
|
+
def bind(path,&block)
|
356
|
+
@paths[path] = block
|
357
|
+
return self
|
358
|
+
end
|
359
|
+
|
360
|
+
#
|
361
|
+
# Binds the specified URL directory _path_ to the given _block_.
|
362
|
+
#
|
363
|
+
# map '/downloads' do |env|
|
364
|
+
# response(
|
365
|
+
# "Your somewhere inside the downloads directory",
|
366
|
+
# :content_type' => 'text/xml'
|
367
|
+
# )
|
368
|
+
# end
|
369
|
+
#
|
370
|
+
def map(path,&block)
|
371
|
+
@directories[path] = block
|
372
|
+
return self
|
373
|
+
end
|
374
|
+
|
375
|
+
#
|
376
|
+
# Binds the contents of the specified _file_ to the specified URL
|
377
|
+
# _path_, using the given _options_.
|
378
|
+
#
|
379
|
+
# file '/robots.txt', '/path/to/my_robots.txt'
|
380
|
+
#
|
381
|
+
def file(path,file,options={})
|
382
|
+
file = File.expand_path(file)
|
383
|
+
content_type = (options[:content_type] || content_type_for(file))
|
384
|
+
|
385
|
+
bind(path) do |env|
|
386
|
+
if File.file?(file)
|
387
|
+
return_file(file,env)
|
388
|
+
else
|
389
|
+
not_found(env)
|
390
|
+
end
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
#
|
395
|
+
# Mounts the contents of the specified _directory_ to the given
|
396
|
+
# prefix _path_.
|
397
|
+
#
|
398
|
+
# mount '/download/', '/tmp/files/'
|
399
|
+
#
|
400
|
+
def mount(path,directory)
|
401
|
+
sub_dirs = path.split('/')
|
402
|
+
directory = File.expand_path(directory)
|
403
|
+
|
404
|
+
map(path) do |env|
|
405
|
+
http_path = File.expand_path(env['PATH_INFO'])
|
406
|
+
http_dirs = http_path.split('/')
|
407
|
+
|
408
|
+
sub_path = http_dirs[sub_dirs.length..-1].join('/')
|
409
|
+
absolute_path = File.join(directory,sub_path)
|
410
|
+
|
411
|
+
return_file(absolute_path,env)
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
#
|
416
|
+
# Starts the server.
|
417
|
+
#
|
418
|
+
def start
|
419
|
+
Server.run(self, :host => @host, :port => @port)
|
420
|
+
return self
|
421
|
+
end
|
422
|
+
|
423
|
+
#
|
424
|
+
# The method which receives all requests.
|
425
|
+
#
|
426
|
+
def call(env)
|
427
|
+
http_host = env['HTTP_HOST']
|
428
|
+
http_path = File.expand_path(env['PATH_INFO'])
|
429
|
+
|
430
|
+
if http_host
|
431
|
+
if (server = virtual_host(http_host))
|
432
|
+
return server.call(env)
|
433
|
+
end
|
434
|
+
end
|
435
|
+
|
436
|
+
if http_path
|
437
|
+
if (block = @paths[http_path])
|
438
|
+
return block.call(env)
|
439
|
+
end
|
440
|
+
|
441
|
+
@path_patterns.each do |pattern,block|
|
442
|
+
if http_path.match(pattern)
|
443
|
+
return block.call(env)
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
http_dirs = http_path.split('/')
|
448
|
+
|
449
|
+
sub_dir = @directories.keys.select { |path|
|
450
|
+
dirs = path.split('/')
|
451
|
+
|
452
|
+
http_dirs[0...dirs.length] == dirs
|
453
|
+
}.sort.last
|
454
|
+
|
455
|
+
if (sub_dir && (block = @directories[sub_dir]))
|
456
|
+
return block.call(env)
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
return @default.call(env)
|
461
|
+
end
|
462
|
+
|
463
|
+
#
|
464
|
+
# Routes the specified _url_ to the call method.
|
465
|
+
#
|
466
|
+
def route(url)
|
467
|
+
url = URI(url.to_s)
|
468
|
+
|
469
|
+
return call(
|
470
|
+
'HTTP_HOST' => url.host,
|
471
|
+
'HTTP_PORT' => url.port,
|
472
|
+
'SERVER_PORT' => url.port,
|
473
|
+
'PATH_INFO' => url.path,
|
474
|
+
'QUERY_STRING' => url.query
|
475
|
+
)
|
476
|
+
end
|
477
|
+
|
478
|
+
#
|
479
|
+
# Routes the specified _path_ to the call method.
|
480
|
+
#
|
481
|
+
def route_path(path)
|
482
|
+
path, query = URI.decode(path.to_s).split('?',2)
|
483
|
+
|
484
|
+
return route(URI::HTTP.build(
|
485
|
+
:host => @host,
|
486
|
+
:port => @port,
|
487
|
+
:path => path,
|
488
|
+
:query => query
|
489
|
+
))
|
490
|
+
end
|
491
|
+
|
492
|
+
protected
|
493
|
+
|
494
|
+
content_type 'text/html', ['html', 'htm', 'xhtml']
|
495
|
+
content_type 'text/css', ['css']
|
496
|
+
content_type 'text/gif', ['gif']
|
497
|
+
content_type 'text/jpeg', ['jpeg', 'jpg']
|
498
|
+
content_type 'text/png', ['png']
|
499
|
+
content_type 'image/x-icon', ['ico']
|
500
|
+
content_type 'text/javascript', ['js']
|
501
|
+
content_type 'text/xml', ['xml', 'xsl']
|
502
|
+
content_type 'application/rss+xml', ['rss']
|
503
|
+
content_type 'application/rdf+xml', ['rdf']
|
504
|
+
content_type 'application/pdf', ['pdf']
|
505
|
+
content_type 'application/doc', ['doc']
|
506
|
+
content_type 'application/zip', ['zip']
|
507
|
+
content_type 'text/plain', ['txt', 'conf', 'rb', 'py', 'h', 'c', 'hh', 'cc', 'hpp', 'cpp']
|
508
|
+
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#
|
2
|
+
#--
|
3
|
+
# Ronin Web - A Ruby library for Ronin that provides support for web
|
4
|
+
# scraping and spidering functionality.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2006-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
7
|
+
#
|
8
|
+
# This program is free software; you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation; either version 2 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program; if not, write to the Free Software
|
20
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
21
|
+
#++
|
22
|
+
#
|
23
|
+
|
24
|
+
require 'ronin/web/web'
|
25
|
+
|
26
|
+
require 'spidr/agent'
|
27
|
+
|
28
|
+
module Ronin
|
29
|
+
module Web
|
30
|
+
class Spider < Spidr::Agent
|
31
|
+
#
|
32
|
+
# Creates a new Spider object with the given _options_ and
|
33
|
+
# _block_. If a _block_ is given, it will be passed the newly created
|
34
|
+
# Spider object.
|
35
|
+
#
|
36
|
+
# _options_ may contain the following keys:
|
37
|
+
# <tt>:proxy</tt>:: The proxy to use while spidering. Defaults to
|
38
|
+
# Web.proxy.
|
39
|
+
# <tt>:user_agent</tt>:: The User-Agent string to send. Defaults to
|
40
|
+
# Web.user_agent.
|
41
|
+
# <tt>:referer</tt>:: The referer URL to send.
|
42
|
+
# <tt>:delay</tt>:: Duration in seconds to pause between spidering each
|
43
|
+
# link. Defaults to 0.
|
44
|
+
# <tt>:host</tt>:: The host-name to visit.
|
45
|
+
# <tt>:hosts</tt>:: An +Array+ of host patterns to visit.
|
46
|
+
# <tt>:ignore_hosts</tt>:: An +Array+ of host patterns to not visit.
|
47
|
+
# <tt>:ports</tt>:: An +Array+ of port patterns to visit.
|
48
|
+
# <tt>:ignore_ports</tt>:: An +Array+ of port patterns to not visit.
|
49
|
+
# <tt>:links</tt>:: An +Array+ of link patterns to visit.
|
50
|
+
# <tt>:ignore_links</tt>:: An +Array+ of link patterns to not visit.
|
51
|
+
# <tt>:exts</tt>:: An +Array+ of File extension patterns to visit.
|
52
|
+
# <tt>:ignore_exts</tt>:: An +Array+ of File extension patterns to not
|
53
|
+
# visit.
|
54
|
+
#
|
55
|
+
def self.agent(options={},&block)
|
56
|
+
self.new(self.default_options.merge(options),&block)
|
57
|
+
end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Creates a new Spider object with the given _options_ and will begin
|
61
|
+
# spidering the specified host _name_. If a _block_ is given it
|
62
|
+
# will be passed the newly created Spider object, before the agent
|
63
|
+
# begins spidering.
|
64
|
+
#
|
65
|
+
def self.host(name,options={},&block)
|
66
|
+
super(name,self.default_options.merge(options),&block)
|
67
|
+
end
|
68
|
+
|
69
|
+
#
|
70
|
+
# Creates a new Spider object with the given _options_ and will begin
|
71
|
+
# spidering the host of the specified _url_. If a _block_ is
|
72
|
+
# given it will be passed the newly created Spider object, before
|
73
|
+
# the agent begins spidering.
|
74
|
+
#
|
75
|
+
def self.site(url,options={},&block)
|
76
|
+
super(url,self.default_options.merge(options),&block)
|
77
|
+
end
|
78
|
+
|
79
|
+
protected
|
80
|
+
|
81
|
+
#
|
82
|
+
# Returns the default options for Spider.
|
83
|
+
#
|
84
|
+
def self.default_options
|
85
|
+
{:proxy => Web.proxy, :user_agent => Web.user_agent}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#
|
2
|
+
#--
|
3
|
+
# Ronin Web - A Ruby library for Ronin that provides support for web
|
4
|
+
# scraping and spidering functionality.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2006-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
7
|
+
#
|
8
|
+
# This program is free software; you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation; either version 2 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program; if not, write to the Free Software
|
20
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
21
|
+
#++
|
22
|
+
#
|
23
|
+
|
24
|
+
module Ronin
|
25
|
+
module Web
|
26
|
+
# Ronin Web Version
|
27
|
+
VERSION = '0.1.0'
|
28
|
+
end
|
29
|
+
end
|