ronin-web 0.3.0.rc1 → 1.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +11 -0
  3. data/.github/workflows/ruby.yml +31 -0
  4. data/.gitignore +13 -0
  5. data/.mailmap +1 -0
  6. data/.ruby-version +1 -0
  7. data/COPYING.txt +3 -3
  8. data/ChangeLog.md +115 -70
  9. data/Gemfile +42 -37
  10. data/README.md +159 -145
  11. data/Rakefile +12 -3
  12. data/bin/ronin-web +9 -17
  13. data/data/new/nokogiri.rb.erb +12 -0
  14. data/data/new/server.rb.erb +22 -0
  15. data/data/new/spider.rb.erb +26 -0
  16. data/data/new/webapp/.gitignore +15 -0
  17. data/data/new/webapp/.ruby-version.erb +1 -0
  18. data/data/new/webapp/Dockerfile.erb +11 -0
  19. data/data/new/webapp/Gemfile +6 -0
  20. data/data/new/webapp/app.rb.erb +15 -0
  21. data/data/new/webapp/config.ru +4 -0
  22. data/data/new/webapp/docker-compose.yml.erb +9 -0
  23. data/gemspec.yml +32 -14
  24. data/lib/ronin/web/cli/command.rb +36 -0
  25. data/lib/ronin/web/cli/commands/diff.rb +106 -0
  26. data/lib/ronin/web/cli/commands/html.rb +174 -0
  27. data/lib/ronin/web/cli/commands/irb.rb +56 -0
  28. data/lib/ronin/web/cli/commands/new/nokogiri.rb +85 -0
  29. data/lib/ronin/web/cli/commands/new/server.rb +96 -0
  30. data/lib/ronin/web/cli/commands/new/spider.rb +315 -0
  31. data/lib/ronin/web/cli/commands/new/webapp.rb +123 -0
  32. data/lib/ronin/web/cli/commands/new.rb +64 -0
  33. data/lib/ronin/web/cli/commands/reverse_proxy.rb +215 -0
  34. data/lib/ronin/web/cli/commands/server.rb +155 -0
  35. data/lib/ronin/web/cli/commands/spider.rb +822 -0
  36. data/lib/ronin/web/cli/ruby_shell.rb +50 -0
  37. data/lib/ronin/web/cli.rb +44 -0
  38. data/lib/ronin/web/html.rb +85 -0
  39. data/lib/ronin/web/mechanize.rb +34 -36
  40. data/lib/ronin/web/root.rb +27 -0
  41. data/lib/ronin/web/version.rb +7 -10
  42. data/lib/ronin/web/xml.rb +85 -0
  43. data/lib/ronin/web.rb +372 -13
  44. data/man/ronin-web-diff.1 +41 -0
  45. data/man/ronin-web-diff.1.md +30 -0
  46. data/man/ronin-web-html.1 +89 -0
  47. data/man/ronin-web-html.1.md +66 -0
  48. data/man/ronin-web-irb.1 +31 -0
  49. data/man/ronin-web-irb.1.md +22 -0
  50. data/man/ronin-web-new-nokogiri.1 +41 -0
  51. data/man/ronin-web-new-nokogiri.1.md +30 -0
  52. data/man/ronin-web-new-server.1 +45 -0
  53. data/man/ronin-web-new-server.1.md +33 -0
  54. data/man/ronin-web-new-spider.1 +173 -0
  55. data/man/ronin-web-new-spider.1.md +129 -0
  56. data/man/ronin-web-new-webapp.1 +53 -0
  57. data/man/ronin-web-new-webapp.1.md +39 -0
  58. data/man/ronin-web-new.1 +59 -0
  59. data/man/ronin-web-new.1.md +44 -0
  60. data/man/ronin-web-reverse-proxy.1 +63 -0
  61. data/man/ronin-web-reverse-proxy.1.md +47 -0
  62. data/man/ronin-web-server.1 +59 -0
  63. data/man/ronin-web-server.1.md +43 -0
  64. data/man/ronin-web-spider.1 +225 -0
  65. data/man/ronin-web-spider.1.md +168 -0
  66. data/man/ronin-web.1 +41 -0
  67. data/man/ronin-web.1.md +30 -0
  68. data/ronin-web.gemspec +39 -109
  69. data/spec/cli/ruby_shell_spec.rb +14 -0
  70. data/spec/html_spec.rb +43 -0
  71. data/spec/mechanize_spec.rb +72 -0
  72. data/spec/spec_helper.rb +5 -3
  73. data/spec/web_spec.rb +97 -0
  74. data/spec/xml_spec.rb +42 -0
  75. metadata +236 -224
  76. data/.gemtest +0 -0
  77. data/data/ronin/web/user_agents.yml +0 -247
  78. data/lib/ronin/network/mixins/web.rb +0 -258
  79. data/lib/ronin/web/config.rb +0 -34
  80. data/lib/ronin/web/extensions/nokogiri/xml/attr.rb +0 -47
  81. data/lib/ronin/web/extensions/nokogiri/xml/document.rb +0 -48
  82. data/lib/ronin/web/extensions/nokogiri/xml/element.rb +0 -57
  83. data/lib/ronin/web/extensions/nokogiri/xml/node.rb +0 -86
  84. data/lib/ronin/web/extensions/nokogiri/xml/text.rb +0 -47
  85. data/lib/ronin/web/extensions/nokogiri/xml.rb +0 -27
  86. data/lib/ronin/web/extensions/nokogiri.rb +0 -23
  87. data/lib/ronin/web/extensions.rb +0 -23
  88. data/lib/ronin/web/middleware/base.rb +0 -144
  89. data/lib/ronin/web/middleware/directories.rb +0 -179
  90. data/lib/ronin/web/middleware/files.rb +0 -144
  91. data/lib/ronin/web/middleware/filters/campaign_filter.rb +0 -77
  92. data/lib/ronin/web/middleware/filters/ip_filter.rb +0 -73
  93. data/lib/ronin/web/middleware/filters/path_filter.rb +0 -73
  94. data/lib/ronin/web/middleware/filters/referer_filter.rb +0 -71
  95. data/lib/ronin/web/middleware/filters/user_agent_filter.rb +0 -71
  96. data/lib/ronin/web/middleware/filters/vhost_filter.rb +0 -71
  97. data/lib/ronin/web/middleware/filters.rb +0 -28
  98. data/lib/ronin/web/middleware/helpers.rb +0 -145
  99. data/lib/ronin/web/middleware/proxy.rb +0 -265
  100. data/lib/ronin/web/middleware/proxy_request.rb +0 -262
  101. data/lib/ronin/web/middleware/request.rb +0 -79
  102. data/lib/ronin/web/middleware/response.rb +0 -33
  103. data/lib/ronin/web/middleware/router.rb +0 -167
  104. data/lib/ronin/web/middleware/rule.rb +0 -103
  105. data/lib/ronin/web/middleware.rb +0 -27
  106. data/lib/ronin/web/proxy/app.rb +0 -32
  107. data/lib/ronin/web/proxy/base.rb +0 -46
  108. data/lib/ronin/web/proxy/web.rb +0 -46
  109. data/lib/ronin/web/proxy.rb +0 -25
  110. data/lib/ronin/web/server/app.rb +0 -32
  111. data/lib/ronin/web/server/base.rb +0 -461
  112. data/lib/ronin/web/server/web.rb +0 -66
  113. data/lib/ronin/web/server.rb +0 -25
  114. data/lib/ronin/web/spider.rb +0 -120
  115. data/lib/ronin/web/user_agents.rb +0 -196
  116. data/lib/ronin/web/web.rb +0 -560
  117. data/spec/helpers/output.rb +0 -3
  118. data/spec/web/extensions/nokogiri_spec.rb +0 -38
  119. data/spec/web/helpers/rack_app.rb +0 -24
  120. data/spec/web/helpers/root/test1/index.html +0 -1
  121. data/spec/web/helpers/root/test1/test1.txt +0 -1
  122. data/spec/web/helpers/root/test1.txt +0 -1
  123. data/spec/web/helpers/root/test2/test2.txt +0 -1
  124. data/spec/web/helpers/root/test2.txt +0 -1
  125. data/spec/web/helpers/root/test3/test3.txt +0 -1
  126. data/spec/web/helpers/root/test3.txt +0 -1
  127. data/spec/web/helpers/root.rb +0 -15
  128. data/spec/web/mechanize_spec.rb +0 -62
  129. data/spec/web/middleware/directories_spec.rb +0 -86
  130. data/spec/web/middleware/files_spec.rb +0 -57
  131. data/spec/web/middleware/filters/campaign_filter_spec.rb +0 -30
  132. data/spec/web/middleware/filters/ip_filter_spec.rb +0 -25
  133. data/spec/web/middleware/filters/path_filter_spec.rb +0 -29
  134. data/spec/web/middleware/filters/referer_filter_spec.rb +0 -25
  135. data/spec/web/middleware/filters/user_agent_filter_spec.rb +0 -25
  136. data/spec/web/middleware/filters/vhost_filter_spec.rb +0 -23
  137. data/spec/web/middleware/proxy_spec.rb +0 -67
  138. data/spec/web/middleware/response_spec.rb +0 -20
  139. data/spec/web/middleware/router_spec.rb +0 -65
  140. data/spec/web/middleware/rule_spec.rb +0 -37
  141. data/spec/web/proxy/base_spec.rb +0 -8
  142. data/spec/web/server/base_spec.rb +0 -77
  143. data/spec/web/server/classes/public1/static1.txt +0 -1
  144. data/spec/web/server/classes/public2/static2.txt +0 -1
  145. data/spec/web/server/classes/sub_app.rb +0 -13
  146. data/spec/web/server/classes/test_app.rb +0 -20
  147. data/spec/web/user_agents_spec.rb +0 -56
  148. data/spec/web/web_spec.rb +0 -101
@@ -1,461 +0,0 @@
1
- #
2
- # Ronin Web - A Ruby library for Ronin that provides support for web
3
- # scraping and spidering functionality.
4
- #
5
- # Copyright (c) 2006-2011 Hal Brodigan (postmodern.mod3 at gmail.com)
6
- #
7
- # This file is part of Ronin Web.
8
- #
9
- # Ronin is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # Ronin is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU General Public License
20
- # along with Ronin. If not, see <http://www.gnu.org/licenses/>.
21
- #
22
-
23
- require 'ronin/web/middleware/helpers'
24
- require 'ronin/web/middleware/files'
25
- require 'ronin/web/middleware/directories'
26
- require 'ronin/web/middleware/router'
27
- require 'ronin/web/middleware/proxy'
28
- require 'ronin/templates/erb'
29
- require 'ronin/ui/output'
30
- require 'ronin/extensions/meta'
31
-
32
- require 'thread'
33
- require 'rack'
34
- require 'sinatra'
35
-
36
- module Ronin
37
- module Web
38
- module Server
39
- #
40
- # The base-class for all Ronin Web Servers. Extends
41
- # [Sinatra::Base](http://rubydoc.info/gems/sinatra/Sinatra/Base)
42
- # with additional helper methods and Rack {Middleware}.
43
- #
44
- class Base < Sinatra::Base
45
-
46
- include Templates::Erb
47
- include UI::Output::Helpers
48
- extend UI::Output::Helpers
49
-
50
- # Default interface to run the Web Server on
51
- DEFAULT_HOST = '0.0.0.0'
52
-
53
- # Default port to run the Web Server on
54
- DEFAULT_PORT = 8000
55
-
56
- set :host, DEFAULT_HOST
57
- set :port, DEFAULT_PORT
58
-
59
- #
60
- # The default Rack Handler to run all web servers with.
61
- #
62
- # @return [String]
63
- # The class name of the Rack Handler to use.
64
- #
65
- # @since 0.2.0
66
- #
67
- # @api public
68
- #
69
- def Base.handler
70
- @@ronin_web_server_handler ||= nil
71
- end
72
-
73
- #
74
- # Sets the default Rack Handler to run all web servers with.
75
- #
76
- # @param [String] name
77
- # The name of the handler.
78
- #
79
- # @return [String]
80
- # The name of the new handler.
81
- #
82
- # @since 0.2.0
83
- #
84
- # @api public
85
- #
86
- def Base.handler=(name)
87
- @@ronin_web_server_handler = name
88
- end
89
-
90
- #
91
- # The list of Rack Handlers to attempt to use with the web server.
92
- #
93
- # @return [Array]
94
- # The names of handler classes.
95
- #
96
- # @since 0.2.0
97
- #
98
- # @api public
99
- #
100
- def self.handlers
101
- handlers = self.server
102
-
103
- if Base.handler
104
- handlers = [Base.handler] + handlers
105
- end
106
-
107
- return handlers
108
- end
109
-
110
- #
111
- # Attempts to load the desired Rack Handler to run the web server
112
- # with.
113
- #
114
- # @return [Rack::Handler]
115
- # The handler class to use to run the web server.
116
- #
117
- # @raise [StandardError]
118
- # None of the handlers could be loaded.
119
- #
120
- # @since 0.2.0
121
- #
122
- # @api semipublic
123
- #
124
- def self.handler_class
125
- self.handlers.find do |name|
126
- begin
127
- return Rack::Handler.get(name)
128
- rescue Gem::LoadError => e
129
- raise(e)
130
- rescue NameError, ::LoadError
131
- next
132
- end
133
- end
134
-
135
- raise(StandardError,"unable to find any Rack handlers")
136
- end
137
-
138
- #
139
- # Run the web server using the Rack Handler returned by
140
- # {handler_class}.
141
- #
142
- # @param [Hash] options Additional options.
143
- #
144
- # @option options [String] :host
145
- # The host the server will listen on.
146
- #
147
- # @option options [Integer] :port
148
- # The port the server will bind to.
149
- #
150
- # @option options [Boolean] :background (false)
151
- # Specifies wether the server will run in the background or run
152
- # in the foreground.
153
- #
154
- # @since 0.2.0
155
- #
156
- # @api public
157
- #
158
- def self.run!(options={})
159
- rack_options = {
160
- :Host => (options[:host] || self.host),
161
- :Port => (options[:port] || self.port)
162
- }
163
-
164
- runner = lambda { |handler,server,options|
165
- print_info "Starting Web Server on #{options[:Host]}:#{options[:Port]}"
166
- print_debug "Using Web Server handler #{handler}"
167
-
168
- handler.run(server,options) do |server|
169
- trap(:INT) do
170
- # Use thins' hard #stop! if available,
171
- # otherwise just #stop
172
- server.respond_to?(:stop!) ? server.stop! : server.stop
173
- end
174
-
175
- set :running, true
176
- end
177
- }
178
-
179
- handler = self.handler_class
180
-
181
- if options[:background]
182
- Thread.new(handler,self,rack_options,&runner)
183
- else
184
- runner.call(handler,self,rack_options)
185
- end
186
-
187
- return self
188
- end
189
-
190
- #
191
- # Route any type of request for a given URL pattern.
192
- #
193
- # @param [String] path
194
- # The URL pattern to handle requests for.
195
- #
196
- # @yield []
197
- # The block that will handle the request.
198
- #
199
- # @example
200
- # any '/submit' do
201
- # puts request.inspect
202
- # end
203
- #
204
- # @since 0.2.0
205
- #
206
- # @api public
207
- #
208
- def self.any(path,options={},&block)
209
- get(path,options,&block)
210
- put(path,options,&block)
211
- post(path,options,&block)
212
- delete(path,options,&block)
213
- end
214
-
215
- #
216
- # Sets the default route.
217
- #
218
- # @yield []
219
- # The block that will handle all other requests.
220
- #
221
- # @example
222
- # default do
223
- # status 200
224
- # content_type :html
225
- #
226
- # %{
227
- # <html>
228
- # <body>
229
- # <center><h1>YOU LOSE THE GAME</h1></center>
230
- # </body>
231
- # </html>
232
- # }
233
- # end
234
- #
235
- # @since 0.2.0
236
- #
237
- # @api public
238
- #
239
- def self.default(&block)
240
- class_def(:default_response,&block)
241
- return self
242
- end
243
-
244
- #
245
- # Hosts the contents of a file.
246
- #
247
- # @param [String] remote_path
248
- # The path the web server will host the file at.
249
- #
250
- # @param [String] local_path
251
- # The path to the local file.
252
- #
253
- # @example
254
- # file '/robots.txt', '/path/to/my_robots.txt'
255
- #
256
- # @see Middleware::Files
257
- #
258
- # @since 0.3.0
259
- #
260
- # @api public
261
- #
262
- def self.file(remote_path,local_path)
263
- use Middleware::Files, :paths => {remote_path => local_path}
264
- end
265
-
266
- #
267
- # Hosts the contents of files.
268
- #
269
- # @yield [files]
270
- # The given block will be passed the files middleware to
271
- # configure.
272
- #
273
- # @yieldparam [Middleware::Files]
274
- # The files middleware object.
275
- #
276
- # @example
277
- # files do |files|
278
- # files.map '/foo.txt', 'foo.txt'
279
- # files.map /\.exe$/, 'trojan.exe'
280
- # end
281
- #
282
- # @see Middleware::Files
283
- #
284
- # @since 0.3.0
285
- #
286
- # @api public
287
- #
288
- def self.files(&block)
289
- use(Middleware::Files,&block)
290
- end
291
-
292
- #
293
- # Hosts the contents of the directory.
294
- #
295
- # @param [String] remote_path
296
- # The path the web server will host the directory at.
297
- #
298
- # @param [String] local_path
299
- # The path to the local directory.
300
- #
301
- # @example
302
- # directory '/download/', '/tmp/files/'
303
- #
304
- # @see Middleware::Directories
305
- #
306
- # @since 0.2.0
307
- #
308
- # @api public
309
- #
310
- def self.directory(remote_path,local_path)
311
- use Middleware::Directories, :paths => {remote_path => local_path}
312
- end
313
-
314
- #
315
- # Hosts the contents of directories.
316
- #
317
- # @yield [dirs]
318
- # The given block will be passed the directories middleware to
319
- # configure.
320
- #
321
- # @yieldparam [Middleware::Directories]
322
- # The directories middleware object.
323
- #
324
- # @example
325
- # directories do |dirs|
326
- # dirs.map '/downloads', '/tmp/ronin_downloads'
327
- # dirs.map '/images', '/tmp/ronin_images'
328
- # dirs.map '/pdfs', '/tmp/ronin_pdfs'
329
- # end
330
- #
331
- # @see Middleware::Directories
332
- #
333
- # @since 0.3.0
334
- #
335
- # @api public
336
- #
337
- def self.directories(&block)
338
- use(Middleware::Directories,&block)
339
- end
340
-
341
- #
342
- # Hosts the static contents within a given directory.
343
- #
344
- # @param [String] path
345
- # The path to a directory to serve static content from.
346
- #
347
- # @example
348
- # public_dir 'path/to/another/public'
349
- #
350
- # @since 0.2.0
351
- #
352
- # @api public
353
- #
354
- def self.public_dir(path)
355
- self.directory('/',path)
356
- end
357
-
358
- #
359
- # Routes all requests within a given directory into another
360
- # web server.
361
- #
362
- # @param [String, Regexp] dir
363
- # The directory that requests for will be routed from.
364
- #
365
- # @param [#call] server
366
- # The web server to route requests to.
367
- #
368
- # @example
369
- # map '/subapp/', SubApp
370
- #
371
- # @see Middleware::Router
372
- #
373
- # @since 0.2.0
374
- #
375
- # @api public
376
- #
377
- def self.map(dir,server)
378
- use Middleware::Router do |router|
379
- router.draw :path => dir, :to => server
380
- end
381
- end
382
-
383
- #
384
- # Routes requests with a specific Host header to another
385
- # web server.
386
- #
387
- # @param [String, Regexp] name
388
- # The host-name to route requests for.
389
- #
390
- # @param [#call] server
391
- # The web server to route the requests to.
392
- #
393
- # @example
394
- # vhost 'cdn.evil.com', EvilServer
395
- #
396
- # @since 0.3.0
397
- #
398
- # @api public
399
- #
400
- def self.vhost(name,server)
401
- use Middleware::Router do |router|
402
- router.draw :vhost => name, :to => server
403
- end
404
- end
405
-
406
- #
407
- # Proxies requests to a given path.
408
- #
409
- # @param [String] path
410
- # The path to proxy requests for.
411
- #
412
- # @param [Hash] options
413
- # Additional options.
414
- #
415
- # @yield [(response), body]
416
- # If a block is given, it will be passed the optional
417
- # response of the proxied request and the body received
418
- # from the proxied request.
419
- #
420
- # @yieldparam [Net::HTTP::Response] response
421
- # The response.
422
- #
423
- # @yieldparam [String] body
424
- # The body from the response.
425
- #
426
- # @example
427
- # proxy '/login.php' do |response,body|
428
- # body.gsub(/https/,'http')
429
- # end
430
- #
431
- # @see Middleware::Proxy
432
- #
433
- # @since 0.2.0
434
- #
435
- # @api public
436
- #
437
- def self.proxy(path,options={},&block)
438
- use(Middleware::Proxy,options,&block)
439
- end
440
-
441
- protected
442
-
443
- #
444
- # Returns an HTTP 404 response with an empty body.
445
- #
446
- # @since 0.2.0
447
- #
448
- # @api semipublic
449
- #
450
- def default_response
451
- halt 404, ''
452
- end
453
-
454
- enable :sessions
455
-
456
- any('*') { default_response }
457
-
458
- end
459
- end
460
- end
461
- end
@@ -1,66 +0,0 @@
1
- #
2
- # Ronin Web - A Ruby library for Ronin that provides support for web
3
- # scraping and spidering functionality.
4
- #
5
- # Copyright (c) 2006-2011 Hal Brodigan (postmodern.mod3 at gmail.com)
6
- #
7
- # This file is part of Ronin Web.
8
- #
9
- # Ronin is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # Ronin is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU General Public License
20
- # along with Ronin. If not, see <http://www.gnu.org/licenses/>.
21
- #
22
-
23
- require 'ronin/web/server/app'
24
-
25
- module Ronin
26
- module Web
27
- #
28
- # Returns the Ronin Web Server.
29
- #
30
- # @param [Hash] options
31
- # Additional options.
32
- #
33
- # @yield [server]
34
- # If a block is given, it will be passed the current web server.
35
- #
36
- # @yieldparam [Server::App]
37
- # The current web server class.
38
- #
39
- # @return [Server::App]
40
- # The current web server class.
41
- #
42
- # @example
43
- # Web.server do
44
- # get '/hello' do
45
- # 'world'
46
- # end
47
- # end
48
- #
49
- # @see Server::Base.run!
50
- #
51
- # @since 0.2.0
52
- #
53
- # @api public
54
- #
55
- def Web.server(options={},&block)
56
- unless class_variable_defined?('@@ronin_web_server')
57
- @@ronin_web_server = Server::App
58
- @@ronin_web_server.run!(options.merge(:background => true))
59
- end
60
-
61
- @@ronin_web_server.class_eval(&block)
62
-
63
- return @@ronin_web_server
64
- end
65
- end
66
- end
@@ -1,25 +0,0 @@
1
- #
2
- # Ronin Web - A Ruby library for Ronin that provides support for web
3
- # scraping and spidering functionality.
4
- #
5
- # Copyright (c) 2006-2011 Hal Brodigan (postmodern.mod3 at gmail.com)
6
- #
7
- # This file is part of Ronin Web.
8
- #
9
- # Ronin is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # Ronin is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU General Public License
20
- # along with Ronin. If not, see <http://www.gnu.org/licenses/>.
21
- #
22
-
23
- require 'ronin/web/server/base'
24
- require 'ronin/web/server/app'
25
- require 'ronin/web/server/web'
@@ -1,120 +0,0 @@
1
- #
2
- # Ronin Web - A Ruby library for Ronin that provides support for web
3
- # scraping and spidering functionality.
4
- #
5
- # Copyright (c) 2006-2011 Hal Brodigan (postmodern.mod3 at gmail.com)
6
- #
7
- # This file is part of Ronin Web.
8
- #
9
- # Ronin is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # Ronin is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU General Public License
20
- # along with Ronin. If not, see <http://www.gnu.org/licenses/>.
21
- #
22
-
23
- require 'ronin/web/web'
24
- require 'ronin/ui/output/helpers'
25
-
26
- require 'spidr/agent'
27
-
28
- module Ronin
29
- module Web
30
- #
31
- # Extends [Spidr::Agent](http://rubydoc.info/gems/spidr/Agent) with
32
- # [Ronin::UI::Output::Helpers](http://rubydoc.info/gems/ronin/Ronin/UI/Output/Helpers).
33
- #
34
- class Spider < Spidr::Agent
35
-
36
- include UI::Output::Helpers
37
-
38
- #
39
- # Creates a new Spider object.
40
- #
41
- # @param [Hash] options
42
- # Additional options.
43
- #
44
- # @option options [Hash] :proxy (Web.proxy)
45
- # The proxy to use while spidering.
46
- #
47
- # @option options [String] :user_agent (Web.user_agent)
48
- # The User-Agent string to send.
49
- #
50
- # @option options [String] :referer
51
- # The referer URL to send.
52
- #
53
- # @option options [Integer] :delay (0)
54
- # Duration in seconds to pause between spidering each link.
55
- #
56
- # @option options [Array] :schemes (['http', 'https'])
57
- # The list of acceptable URI schemes to visit.
58
- # The `https` scheme will be ignored if `net/https` cannot be
59
- # loaded.
60
- #
61
- # @option options [String] :host
62
- # The host-name to visit.
63
- #
64
- # @option options [Array<String, Regexp, Proc>] :hosts
65
- # The patterns which match the host-names to visit.
66
- #
67
- # @option options [Array<String, Regexp, Proc>] :ignore_hosts
68
- # The patterns which match the host-names to not visit.
69
- #
70
- # @option options [Array<Integer, Regexp, Proc>] :ports
71
- # The patterns which match the ports to visit.
72
- #
73
- # @option options [Array<Integer, Regexp, Proc>] :ignore_ports
74
- # The patterns which match the ports to not visit.
75
- #
76
- # @option options [Array<String, Regexp, Proc>] :links
77
- # The patterns which match the links to visit.
78
- #
79
- # @option options [Array<String, Regexp, Proc>] :ignore_links
80
- # The patterns which match the links to not visit.
81
- #
82
- # @option options [Array<String, Regexp, Proc>] :exts
83
- # The patterns which match the URI path extensions to visit.
84
- #
85
- # @option options [Array<String, Regexp, Proc>] :ignore_exts
86
- # The patterns which match the URI path extensions to not visit.
87
- #
88
- # @option options [Boolean] :verbose (true)
89
- # Specifies whether every URL will be printed.
90
- #
91
- # @yield [spider]
92
- # If a block is given, it will be passed the newly created spider.
93
- #
94
- # @yieldparam [Spider] spider
95
- # The newly created spider.
96
- #
97
- # @see http://spidr.rubyforge.org/docs/classes/Spidr/Agent.html
98
- #
99
- # @api public
100
- #
101
- def initialize(options={})
102
- options = {
103
- :proxy => Web.proxy,
104
- :user_agent => Web.user_agent
105
- }.merge(options)
106
-
107
- super(options)
108
-
109
- if options.fetch(:verbose,true)
110
- every_url do |url|
111
- print_info("Spidering #{url}")
112
- end
113
- end
114
-
115
- yield self if block_given?
116
- end
117
-
118
- end
119
- end
120
- end