ronin-web 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -0
- data/Manifest.txt +25 -0
- data/README.txt +74 -0
- data/Rakefile +21 -0
- data/lib/ronin/sessions/web.rb +80 -0
- data/lib/ronin/web/extensions/nokogiri/xml/attr.rb +13 -0
- data/lib/ronin/web/extensions/nokogiri/xml/document.rb +18 -0
- data/lib/ronin/web/extensions/nokogiri/xml/element.rb +23 -0
- data/lib/ronin/web/extensions/nokogiri/xml/node.rb +47 -0
- data/lib/ronin/web/extensions/nokogiri/xml/text.rb +13 -0
- data/lib/ronin/web/extensions/nokogiri/xml.rb +5 -0
- data/lib/ronin/web/extensions/nokogiri.rb +24 -0
- data/lib/ronin/web/extensions.rb +24 -0
- data/lib/ronin/web/server.rb +511 -0
- data/lib/ronin/web/spider.rb +89 -0
- data/lib/ronin/web/version.rb +29 -0
- data/lib/ronin/web/web.rb +305 -0
- data/lib/ronin/web.rb +25 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/web/extensions/nokogiri_spec.rb +38 -0
- data/spec/web/helpers/root/index.html +1 -0
- data/spec/web/helpers/root/test.txt +1 -0
- data/spec/web/helpers/server.rb +2 -0
- data/spec/web/server_spec.rb +142 -0
- data/tasks/spec.rb +9 -0
- metadata +141 -0
@@ -0,0 +1,511 @@
|
|
1
|
+
#
|
2
|
+
#--
|
3
|
+
# Ronin Web - A Ruby library for Ronin that provides support for web
|
4
|
+
# scraping and spidering functionality.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2006-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
7
|
+
#
|
8
|
+
# This program is free software; you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation; either version 2 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program; if not, write to the Free Software
|
20
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
21
|
+
#++
|
22
|
+
#
|
23
|
+
|
24
|
+
require 'uri'
|
25
|
+
require 'cgi'
|
26
|
+
|
27
|
+
begin
|
28
|
+
require 'mongrel'
|
29
|
+
rescue
|
30
|
+
require 'webrick'
|
31
|
+
end
|
32
|
+
|
33
|
+
require 'rack'
|
34
|
+
|
35
|
+
module Ronin
|
36
|
+
module Web
|
37
|
+
class Server
|
38
|
+
|
39
|
+
# Default interface to run the Web Server on
|
40
|
+
HOST = '0.0.0.0'
|
41
|
+
|
42
|
+
# Default port to run the Web Server on
|
43
|
+
PORT = 8080
|
44
|
+
|
45
|
+
# Directory index files
|
46
|
+
INDICES = ['index.htm', 'index.html']
|
47
|
+
|
48
|
+
# The host to bind to
|
49
|
+
attr_accessor :host
|
50
|
+
|
51
|
+
# The port to listen on
|
52
|
+
attr_accessor :port
|
53
|
+
|
54
|
+
# The Hash of configurable variables for the server
|
55
|
+
attr_reader :config
|
56
|
+
|
57
|
+
#
|
58
|
+
# Creates a new Web Server using the given configuration _block_.
|
59
|
+
#
|
60
|
+
# _options_ may contain the following keys:
|
61
|
+
# <tt>:host</tt>:: The host to bind to.
|
62
|
+
# <tt>:port</tt>:: The port to listen on.
|
63
|
+
# <tt>:config</tt>:: A +Hash+ of configurable variables to be used
|
64
|
+
# in responses.
|
65
|
+
#
|
66
|
+
def initialize(options={},&block)
|
67
|
+
@host = options[:host]
|
68
|
+
@port = options[:port]
|
69
|
+
@config = {}
|
70
|
+
|
71
|
+
if options.has_key?(:config)
|
72
|
+
@config.merge!(options[:config])
|
73
|
+
end
|
74
|
+
|
75
|
+
@default = method(:not_found)
|
76
|
+
|
77
|
+
@virtual_host_patterns = {}
|
78
|
+
@virtual_hosts = {}
|
79
|
+
|
80
|
+
@path_patterns = {}
|
81
|
+
@paths = {}
|
82
|
+
@directories = {}
|
83
|
+
|
84
|
+
instance_eval(&block) if block
|
85
|
+
end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Returns the default host that the Web Server will be run on.
|
89
|
+
#
|
90
|
+
def Server.default_host
|
91
|
+
@@default_host ||= HOST
|
92
|
+
end
|
93
|
+
|
94
|
+
#
|
95
|
+
# Sets the default host that the Web Server will run on to the
|
96
|
+
# specified _host_.
|
97
|
+
#
|
98
|
+
def Server.default_host=(host)
|
99
|
+
@@default_host = host
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# Returns the default port that the Web Server will run on.
|
104
|
+
#
|
105
|
+
def Server.default_port
|
106
|
+
@@default_port ||= PORT
|
107
|
+
end
|
108
|
+
|
109
|
+
#
|
110
|
+
# Sets the default port the Web Server will run on to the specified
|
111
|
+
# _port_.
|
112
|
+
#
|
113
|
+
def Server.default_port=(port)
|
114
|
+
@@default_port = port
|
115
|
+
end
|
116
|
+
|
117
|
+
#
|
118
|
+
# The Hash of the servers supported file extensions and their HTTP
|
119
|
+
# Content-Types.
|
120
|
+
#
|
121
|
+
def Server.content_types
|
122
|
+
@@content_types ||= {}
|
123
|
+
end
|
124
|
+
|
125
|
+
#
|
126
|
+
# Registers a new content _type_ for the specified file _extensions_.
|
127
|
+
#
|
128
|
+
# Server.content_type 'text/xml', ['xml', 'xsl']
|
129
|
+
#
|
130
|
+
def self.content_type(type,extensions)
|
131
|
+
extensions.each { |ext| Server.content_types[ext] = type }
|
132
|
+
|
133
|
+
return self
|
134
|
+
end
|
135
|
+
|
136
|
+
#
|
137
|
+
# Runs the specified _server_ with the given _options_. Server.run
|
138
|
+
# will use Mongrel to run the _server_, if it is installed. Otherwise
|
139
|
+
# WEBrick will be used to run the _server_.
|
140
|
+
#
|
141
|
+
# _options_ can contain the following keys:
|
142
|
+
# <tt>:host</tt>:: The host the server will bind to, defaults to
|
143
|
+
# Server.default_host.
|
144
|
+
# <tt>:port</tt>:: The port the server will listen on, defaults to
|
145
|
+
# Server.default_port.
|
146
|
+
#
|
147
|
+
def Server.run(server,options={})
|
148
|
+
rack_options = {}
|
149
|
+
|
150
|
+
rack_options[:Host] = (options[:host] || Server.default_host)
|
151
|
+
rack_options[:Port] = (options[:port] || Server.default_port)
|
152
|
+
|
153
|
+
if Object.const_defined?('Mongrel')
|
154
|
+
Rack::Handler::Mongrel.run(server,rack_options)
|
155
|
+
else
|
156
|
+
Rack::Handler::WEBrick.run(server,rack_options)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
#
|
161
|
+
# Creates a new Web Server object with the given _block_ and starts
|
162
|
+
# it using the given _options_.
|
163
|
+
#
|
164
|
+
def self.start(options={},&block)
|
165
|
+
self.new(options,&block).start
|
166
|
+
end
|
167
|
+
|
168
|
+
#
|
169
|
+
# Returns the HTTP Content-Type for the specified file _extension_.
|
170
|
+
#
|
171
|
+
# content_type('html')
|
172
|
+
# # => "text/html"
|
173
|
+
#
|
174
|
+
def content_type(extension)
|
175
|
+
Server.content_types[extension] || 'application/x-unknown-content-type'
|
176
|
+
end
|
177
|
+
|
178
|
+
#
|
179
|
+
# Returns the HTTP Content-Type for the specified _file_.
|
180
|
+
#
|
181
|
+
# srv.content_type_for('file.html')
|
182
|
+
# # => "text/html"
|
183
|
+
#
|
184
|
+
def content_type_for(file)
|
185
|
+
ext = File.extname(file).downcase
|
186
|
+
|
187
|
+
return content_type(ext[1..-1])
|
188
|
+
end
|
189
|
+
|
190
|
+
#
|
191
|
+
# Returns the index file contained within the _path_ of the specified
|
192
|
+
# directory. If no index file can be found, +nil+ will be returned.
|
193
|
+
#
|
194
|
+
def index_of(path)
|
195
|
+
path = File.expand_path(path)
|
196
|
+
|
197
|
+
INDICES.each do |name|
|
198
|
+
index = File.join(path,name)
|
199
|
+
|
200
|
+
return index if File.file?(index)
|
201
|
+
end
|
202
|
+
|
203
|
+
return nil
|
204
|
+
end
|
205
|
+
|
206
|
+
#
|
207
|
+
# Returns the HTTP 404 Not Found message for the requested path.
|
208
|
+
#
|
209
|
+
def not_found(env)
|
210
|
+
path = env['PATH_INFO']
|
211
|
+
body = %{<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
|
212
|
+
<html>
|
213
|
+
<head>
|
214
|
+
<title>404 Not Found</title>
|
215
|
+
<body>
|
216
|
+
<h1>Not Found</h1>
|
217
|
+
<p>The requested URL #{CGI.escapeHTML(path)} was not found on this server.</p>
|
218
|
+
<hr>
|
219
|
+
</body>
|
220
|
+
</html>}
|
221
|
+
|
222
|
+
return response(body, :status => 404, :content_type => 'text/html')
|
223
|
+
end
|
224
|
+
|
225
|
+
#
|
226
|
+
# Returns the contents of the file at the specified _path_. If the
|
227
|
+
# _path_ points to a directory, the directory will be searched for
|
228
|
+
# an index file. If no index file can be found or _path_ points to a
|
229
|
+
# non-existant file, a "404 Not Found" response will be returned.
|
230
|
+
#
|
231
|
+
def return_file(path,env)
|
232
|
+
if !(File.exists?(path))
|
233
|
+
return not_found(env)
|
234
|
+
end
|
235
|
+
|
236
|
+
if File.directory?(path)
|
237
|
+
unless (path = index_of(path))
|
238
|
+
return not_found(env)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
return response(File.new(path), :content_type => content_type_for(path))
|
243
|
+
end
|
244
|
+
|
245
|
+
#
|
246
|
+
# Returns a Rack Response object with the specified _body_, the given
|
247
|
+
# _options_ and the given _block_.
|
248
|
+
#
|
249
|
+
# _options_ may include the following keys:
|
250
|
+
# <tt>:status</tt>:: The HTTP Response status code, defaults to 200.
|
251
|
+
#
|
252
|
+
# response("<data>lol</data>", :content_type => 'text/xml')
|
253
|
+
#
|
254
|
+
def response(body=[],options={},&block)
|
255
|
+
status = (options.delete(:status) || 200)
|
256
|
+
headers = {}
|
257
|
+
|
258
|
+
options.each do |name,value|
|
259
|
+
header_name = name.to_s.split('_').map { |word|
|
260
|
+
word.capitalize
|
261
|
+
}.join('-')
|
262
|
+
|
263
|
+
headers[header_name] = value.to_s
|
264
|
+
end
|
265
|
+
|
266
|
+
return Rack::Response.new(body,status,headers,&block)
|
267
|
+
end
|
268
|
+
|
269
|
+
#
|
270
|
+
# Use the specified _block_ as the default route for all other
|
271
|
+
# requests.
|
272
|
+
#
|
273
|
+
# default do |env|
|
274
|
+
# [200, {'Content-Type' => 'text/html'}, 'lol train']
|
275
|
+
# end
|
276
|
+
#
|
277
|
+
def default(&block)
|
278
|
+
@default = block
|
279
|
+
return self
|
280
|
+
end
|
281
|
+
|
282
|
+
#
|
283
|
+
# Connects the specified _server_ as a virtual host representing the
|
284
|
+
# specified host _name_.
|
285
|
+
#
|
286
|
+
def connect(name,server)
|
287
|
+
@virtual_hosts[name.to_s] = server
|
288
|
+
end
|
289
|
+
|
290
|
+
#
|
291
|
+
# Returns the server that handles requests for the specified host
|
292
|
+
# _name_.
|
293
|
+
#
|
294
|
+
def virtual_host(name)
|
295
|
+
name = name.to_s
|
296
|
+
|
297
|
+
if @virtual_hosts.has_key?(name)
|
298
|
+
return @virtual_hosts[name]
|
299
|
+
end
|
300
|
+
|
301
|
+
@virtual_host_patterns.each do |pattern,server|
|
302
|
+
return server if name.match(pattern)
|
303
|
+
end
|
304
|
+
|
305
|
+
return nil
|
306
|
+
end
|
307
|
+
|
308
|
+
#
|
309
|
+
# Registers the specified _block_ to be called when receiving
|
310
|
+
# requests to host names which match the specified _pattern_.
|
311
|
+
#
|
312
|
+
# hosts_like(/^a[0-9]\./) do
|
313
|
+
# map('/download/') do |env|
|
314
|
+
# ...
|
315
|
+
# end
|
316
|
+
# end
|
317
|
+
#
|
318
|
+
def hosts_like(pattern,&block)
|
319
|
+
@virtual_host_patterns[pattern] = self.class.new(&block)
|
320
|
+
end
|
321
|
+
|
322
|
+
#
|
323
|
+
# Registers the specified _block_ to be called when receiving
|
324
|
+
# requests for paths which match the specified _pattern_.
|
325
|
+
#
|
326
|
+
# paths_like(/\.xml$/) do |env|
|
327
|
+
# ...
|
328
|
+
# end
|
329
|
+
#
|
330
|
+
def paths_like(pattern,&block)
|
331
|
+
@path_patterns[pattern] = block
|
332
|
+
return self
|
333
|
+
end
|
334
|
+
|
335
|
+
#
|
336
|
+
# Creates a new Server object using the specified _block_ and
|
337
|
+
# connects it as a virtual host representing the specified host
|
338
|
+
# _name_.
|
339
|
+
#
|
340
|
+
# host('cdn.evil.com') do
|
341
|
+
# ...
|
342
|
+
# end
|
343
|
+
#
|
344
|
+
def host(name,&block)
|
345
|
+
connect(name,self.class.new(&block))
|
346
|
+
end
|
347
|
+
|
348
|
+
#
|
349
|
+
# Binds the specified URL _path_ to the given _block_.
|
350
|
+
#
|
351
|
+
# bind '/secrets.xml' do |env|
|
352
|
+
# [200, {'Content-Type' => 'text/xml'}, "Made you look."]
|
353
|
+
# end
|
354
|
+
#
|
355
|
+
def bind(path,&block)
|
356
|
+
@paths[path] = block
|
357
|
+
return self
|
358
|
+
end
|
359
|
+
|
360
|
+
#
|
361
|
+
# Binds the specified URL directory _path_ to the given _block_.
|
362
|
+
#
|
363
|
+
# map '/downloads' do |env|
|
364
|
+
# response(
|
365
|
+
# "Your somewhere inside the downloads directory",
|
366
|
+
# :content_type' => 'text/xml'
|
367
|
+
# )
|
368
|
+
# end
|
369
|
+
#
|
370
|
+
def map(path,&block)
|
371
|
+
@directories[path] = block
|
372
|
+
return self
|
373
|
+
end
|
374
|
+
|
375
|
+
#
|
376
|
+
# Binds the contents of the specified _file_ to the specified URL
|
377
|
+
# _path_, using the given _options_.
|
378
|
+
#
|
379
|
+
# file '/robots.txt', '/path/to/my_robots.txt'
|
380
|
+
#
|
381
|
+
def file(path,file,options={})
|
382
|
+
file = File.expand_path(file)
|
383
|
+
content_type = (options[:content_type] || content_type_for(file))
|
384
|
+
|
385
|
+
bind(path) do |env|
|
386
|
+
if File.file?(file)
|
387
|
+
return_file(file,env)
|
388
|
+
else
|
389
|
+
not_found(env)
|
390
|
+
end
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
#
|
395
|
+
# Mounts the contents of the specified _directory_ to the given
|
396
|
+
# prefix _path_.
|
397
|
+
#
|
398
|
+
# mount '/download/', '/tmp/files/'
|
399
|
+
#
|
400
|
+
def mount(path,directory)
|
401
|
+
sub_dirs = path.split('/')
|
402
|
+
directory = File.expand_path(directory)
|
403
|
+
|
404
|
+
map(path) do |env|
|
405
|
+
http_path = File.expand_path(env['PATH_INFO'])
|
406
|
+
http_dirs = http_path.split('/')
|
407
|
+
|
408
|
+
sub_path = http_dirs[sub_dirs.length..-1].join('/')
|
409
|
+
absolute_path = File.join(directory,sub_path)
|
410
|
+
|
411
|
+
return_file(absolute_path,env)
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
#
|
416
|
+
# Starts the server.
|
417
|
+
#
|
418
|
+
def start
|
419
|
+
Server.run(self, :host => @host, :port => @port)
|
420
|
+
return self
|
421
|
+
end
|
422
|
+
|
423
|
+
#
|
424
|
+
# The method which receives all requests.
|
425
|
+
#
|
426
|
+
def call(env)
|
427
|
+
http_host = env['HTTP_HOST']
|
428
|
+
http_path = File.expand_path(env['PATH_INFO'])
|
429
|
+
|
430
|
+
if http_host
|
431
|
+
if (server = virtual_host(http_host))
|
432
|
+
return server.call(env)
|
433
|
+
end
|
434
|
+
end
|
435
|
+
|
436
|
+
if http_path
|
437
|
+
if (block = @paths[http_path])
|
438
|
+
return block.call(env)
|
439
|
+
end
|
440
|
+
|
441
|
+
@path_patterns.each do |pattern,block|
|
442
|
+
if http_path.match(pattern)
|
443
|
+
return block.call(env)
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
http_dirs = http_path.split('/')
|
448
|
+
|
449
|
+
sub_dir = @directories.keys.select { |path|
|
450
|
+
dirs = path.split('/')
|
451
|
+
|
452
|
+
http_dirs[0...dirs.length] == dirs
|
453
|
+
}.sort.last
|
454
|
+
|
455
|
+
if (sub_dir && (block = @directories[sub_dir]))
|
456
|
+
return block.call(env)
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
return @default.call(env)
|
461
|
+
end
|
462
|
+
|
463
|
+
#
|
464
|
+
# Routes the specified _url_ to the call method.
|
465
|
+
#
|
466
|
+
def route(url)
|
467
|
+
url = URI(url.to_s)
|
468
|
+
|
469
|
+
return call(
|
470
|
+
'HTTP_HOST' => url.host,
|
471
|
+
'HTTP_PORT' => url.port,
|
472
|
+
'SERVER_PORT' => url.port,
|
473
|
+
'PATH_INFO' => url.path,
|
474
|
+
'QUERY_STRING' => url.query
|
475
|
+
)
|
476
|
+
end
|
477
|
+
|
478
|
+
#
|
479
|
+
# Routes the specified _path_ to the call method.
|
480
|
+
#
|
481
|
+
def route_path(path)
|
482
|
+
path, query = URI.decode(path.to_s).split('?',2)
|
483
|
+
|
484
|
+
return route(URI::HTTP.build(
|
485
|
+
:host => @host,
|
486
|
+
:port => @port,
|
487
|
+
:path => path,
|
488
|
+
:query => query
|
489
|
+
))
|
490
|
+
end
|
491
|
+
|
492
|
+
protected
|
493
|
+
|
494
|
+
content_type 'text/html', ['html', 'htm', 'xhtml']
|
495
|
+
content_type 'text/css', ['css']
|
496
|
+
content_type 'text/gif', ['gif']
|
497
|
+
content_type 'text/jpeg', ['jpeg', 'jpg']
|
498
|
+
content_type 'text/png', ['png']
|
499
|
+
content_type 'image/x-icon', ['ico']
|
500
|
+
content_type 'text/javascript', ['js']
|
501
|
+
content_type 'text/xml', ['xml', 'xsl']
|
502
|
+
content_type 'application/rss+xml', ['rss']
|
503
|
+
content_type 'application/rdf+xml', ['rdf']
|
504
|
+
content_type 'application/pdf', ['pdf']
|
505
|
+
content_type 'application/doc', ['doc']
|
506
|
+
content_type 'application/zip', ['zip']
|
507
|
+
content_type 'text/plain', ['txt', 'conf', 'rb', 'py', 'h', 'c', 'hh', 'cc', 'hpp', 'cpp']
|
508
|
+
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#
|
2
|
+
#--
|
3
|
+
# Ronin Web - A Ruby library for Ronin that provides support for web
|
4
|
+
# scraping and spidering functionality.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2006-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
7
|
+
#
|
8
|
+
# This program is free software; you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation; either version 2 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program; if not, write to the Free Software
|
20
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
21
|
+
#++
|
22
|
+
#
|
23
|
+
|
24
|
+
require 'ronin/web/web'
|
25
|
+
|
26
|
+
require 'spidr/agent'
|
27
|
+
|
28
|
+
module Ronin
|
29
|
+
module Web
|
30
|
+
class Spider < Spidr::Agent
|
31
|
+
#
|
32
|
+
# Creates a new Spider object with the given _options_ and
|
33
|
+
# _block_. If a _block_ is given, it will be passed the newly created
|
34
|
+
# Spider object.
|
35
|
+
#
|
36
|
+
# _options_ may contain the following keys:
|
37
|
+
# <tt>:proxy</tt>:: The proxy to use while spidering. Defaults to
|
38
|
+
# Web.proxy.
|
39
|
+
# <tt>:user_agent</tt>:: The User-Agent string to send. Defaults to
|
40
|
+
# Web.user_agent.
|
41
|
+
# <tt>:referer</tt>:: The referer URL to send.
|
42
|
+
# <tt>:delay</tt>:: Duration in seconds to pause between spidering each
|
43
|
+
# link. Defaults to 0.
|
44
|
+
# <tt>:host</tt>:: The host-name to visit.
|
45
|
+
# <tt>:hosts</tt>:: An +Array+ of host patterns to visit.
|
46
|
+
# <tt>:ignore_hosts</tt>:: An +Array+ of host patterns to not visit.
|
47
|
+
# <tt>:ports</tt>:: An +Array+ of port patterns to visit.
|
48
|
+
# <tt>:ignore_ports</tt>:: An +Array+ of port patterns to not visit.
|
49
|
+
# <tt>:links</tt>:: An +Array+ of link patterns to visit.
|
50
|
+
# <tt>:ignore_links</tt>:: An +Array+ of link patterns to not visit.
|
51
|
+
# <tt>:exts</tt>:: An +Array+ of File extension patterns to visit.
|
52
|
+
# <tt>:ignore_exts</tt>:: An +Array+ of File extension patterns to not
|
53
|
+
# visit.
|
54
|
+
#
|
55
|
+
def self.agent(options={},&block)
|
56
|
+
self.new(self.default_options.merge(options),&block)
|
57
|
+
end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Creates a new Spider object with the given _options_ and will begin
|
61
|
+
# spidering the specified host _name_. If a _block_ is given it
|
62
|
+
# will be passed the newly created Spider object, before the agent
|
63
|
+
# begins spidering.
|
64
|
+
#
|
65
|
+
def self.host(name,options={},&block)
|
66
|
+
super(name,self.default_options.merge(options),&block)
|
67
|
+
end
|
68
|
+
|
69
|
+
#
|
70
|
+
# Creates a new Spider object with the given _options_ and will begin
|
71
|
+
# spidering the host of the specified _url_. If a _block_ is
|
72
|
+
# given it will be passed the newly created Spider object, before
|
73
|
+
# the agent begins spidering.
|
74
|
+
#
|
75
|
+
def self.site(url,options={},&block)
|
76
|
+
super(url,self.default_options.merge(options),&block)
|
77
|
+
end
|
78
|
+
|
79
|
+
protected
|
80
|
+
|
81
|
+
#
|
82
|
+
# Returns the default options for Spider.
|
83
|
+
#
|
84
|
+
def self.default_options
|
85
|
+
{:proxy => Web.proxy, :user_agent => Web.user_agent}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#
|
2
|
+
#--
|
3
|
+
# Ronin Web - A Ruby library for Ronin that provides support for web
|
4
|
+
# scraping and spidering functionality.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2006-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
7
|
+
#
|
8
|
+
# This program is free software; you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation; either version 2 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program; if not, write to the Free Software
|
20
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
21
|
+
#++
|
22
|
+
#
|
23
|
+
|
24
|
+
module Ronin
|
25
|
+
module Web
|
26
|
+
# Ronin Web Version
|
27
|
+
VERSION = '0.1.0'
|
28
|
+
end
|
29
|
+
end
|