ronin-web 1.0.2 → 2.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +3 -2
- data/.gitignore +1 -0
- data/.rubocop.yml +5 -0
- data/ChangeLog.md +46 -1
- data/Gemfile +25 -12
- data/README.md +257 -51
- data/Rakefile +9 -0
- data/data/completions/ronin-web +203 -0
- data/gemspec.yml +18 -5
- data/lib/ronin/web/cli/browser_options.rb +92 -0
- data/lib/ronin/web/cli/browser_shell.rb +448 -0
- data/lib/ronin/web/cli/command.rb +1 -1
- data/lib/ronin/web/cli/commands/browser.rb +373 -0
- data/lib/ronin/web/cli/commands/completion.rb +63 -0
- data/lib/ronin/web/cli/commands/diff.rb +60 -8
- data/lib/ronin/web/cli/commands/html.rb +21 -33
- data/lib/ronin/web/cli/commands/irb.rb +1 -1
- data/lib/ronin/web/cli/commands/new/{webapp.rb → app.rb} +8 -8
- data/lib/ronin/web/cli/commands/new/nokogiri.rb +4 -4
- data/lib/ronin/web/cli/commands/new/server.rb +1 -1
- data/lib/ronin/web/cli/commands/new/spider.rb +1 -1
- data/lib/ronin/web/cli/commands/new.rb +5 -3
- data/lib/ronin/web/cli/commands/reverse_proxy.rb +1 -1
- data/lib/ronin/web/cli/commands/screenshot.rb +186 -0
- data/lib/ronin/web/cli/commands/server.rb +1 -1
- data/lib/ronin/web/cli/commands/session_cookie.rb +265 -0
- data/lib/ronin/web/cli/commands/spider.rb +61 -467
- data/lib/ronin/web/cli/commands/user_agent.rb +177 -0
- data/lib/ronin/web/cli/commands/vulns.rb +463 -0
- data/lib/ronin/web/cli/commands/wordlist.rb +484 -0
- data/lib/ronin/web/cli/commands/xml.rb +149 -0
- data/lib/ronin/web/cli/js_shell.rb +69 -0
- data/lib/ronin/web/cli/ruby_shell.rb +1 -1
- data/lib/ronin/web/cli/spider_options.rb +919 -0
- data/lib/ronin/web/cli.rb +3 -1
- data/lib/ronin/web/html.rb +1 -1
- data/lib/ronin/web/root.rb +1 -1
- data/lib/ronin/web/version.rb +2 -2
- data/lib/ronin/web/xml.rb +1 -1
- data/lib/ronin/web.rb +4 -364
- data/man/ronin-web-browser.1 +92 -0
- data/man/ronin-web-browser.1.md +96 -0
- data/man/ronin-web-completion.1 +76 -0
- data/man/ronin-web-completion.1.md +78 -0
- data/man/ronin-web-diff.1 +14 -21
- data/man/ronin-web-diff.1.md +13 -6
- data/man/ronin-web-html.1 +30 -46
- data/man/ronin-web-html.1.md +27 -17
- data/man/ronin-web-irb.1 +9 -16
- data/man/ronin-web-irb.1.md +6 -2
- data/man/ronin-web-new-app.1.md +39 -0
- data/man/ronin-web-new-nokogiri.1 +9 -20
- data/man/ronin-web-new-nokogiri.1.md +5 -5
- data/man/ronin-web-new-server.1 +11 -23
- data/man/ronin-web-new-server.1.md +5 -5
- data/man/ronin-web-new-spider.1 +44 -88
- data/man/ronin-web-new-spider.1.md +37 -37
- data/man/ronin-web-new.1 +18 -30
- data/man/ronin-web-new.1.md +15 -11
- data/man/ronin-web-reverse-proxy.1 +33 -38
- data/man/ronin-web-reverse-proxy.1.md +20 -14
- data/man/ronin-web-screenshot.1 +56 -0
- data/man/ronin-web-screenshot.1.md +56 -0
- data/man/ronin-web-server.1 +15 -29
- data/man/ronin-web-server.1.md +13 -9
- data/man/ronin-web-session-cookie.1 +38 -0
- data/man/ronin-web-session-cookie.1.md +41 -0
- data/man/ronin-web-spider.1 +121 -130
- data/man/ronin-web-spider.1.md +115 -66
- data/man/ronin-web-user-agent.1 +44 -0
- data/man/ronin-web-user-agent.1.md +46 -0
- data/man/ronin-web-vulns.1 +175 -0
- data/man/ronin-web-vulns.1.md +177 -0
- data/man/ronin-web-wordlist.1 +258 -0
- data/man/ronin-web-wordlist.1.md +263 -0
- data/man/ronin-web-xml.1 +43 -0
- data/man/ronin-web-xml.1.md +46 -0
- data/man/ronin-web.1 +67 -18
- data/man/ronin-web.1.md +55 -4
- data/scripts/setup +58 -0
- metadata +122 -31
- data/lib/ronin/web/mechanize.rb +0 -84
- data/man/ronin-web-new-webapp.1.md +0 -39
- /data/data/new/{webapp → app}/.gitignore +0 -0
- /data/data/new/{webapp → app}/.ruby-version.erb +0 -0
- /data/data/new/{webapp → app}/Dockerfile.erb +0 -0
- /data/data/new/{webapp → app}/Gemfile +0 -0
- /data/data/new/{webapp → app}/app.rb.erb +0 -0
- /data/data/new/{webapp → app}/config.ru +0 -0
- /data/data/new/{webapp → app}/docker-compose.yml.erb +0 -0
@@ -0,0 +1,919 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
#
|
3
|
+
# ronin-web - A collection of useful web helper methods and commands.
|
4
|
+
#
|
5
|
+
# Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
|
+
#
|
7
|
+
# ronin-web is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU General Public License as published by
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or
|
10
|
+
# (at your option) any later version.
|
11
|
+
#
|
12
|
+
# ronin-web is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU General Public License
|
18
|
+
# along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
require 'ronin/web/spider'
|
22
|
+
require 'ronin/support/network/http/user_agents'
|
23
|
+
|
24
|
+
module Ronin
|
25
|
+
module Web
|
26
|
+
class CLI
|
27
|
+
#
|
28
|
+
# Adds options for spidering a website.
|
29
|
+
#
|
30
|
+
# @since 2.0.0
|
31
|
+
#
|
32
|
+
module SpiderOptions
|
33
|
+
#
|
34
|
+
# Adds options for configuring a web spider and spidering a website.
|
35
|
+
#
|
36
|
+
# @param [Class<Command>] command
|
37
|
+
# The command class including {SpiderOptions}.
|
38
|
+
#
|
39
|
+
def self.included(command)
|
40
|
+
command.usage '[options] {--host HOST | --domain DOMAIN | --site URL}'
|
41
|
+
|
42
|
+
command.option :host, value: {
|
43
|
+
type: String,
|
44
|
+
usage: 'HOST'
|
45
|
+
},
|
46
|
+
desc: 'Spiders the specific HOST'
|
47
|
+
|
48
|
+
command.option :domain, value: {
|
49
|
+
type: String,
|
50
|
+
usage: 'DOMAIN'
|
51
|
+
},
|
52
|
+
desc: 'Spiders the whole domain'
|
53
|
+
|
54
|
+
command.option :site, value: {
|
55
|
+
type: String,
|
56
|
+
usage: 'URL'
|
57
|
+
},
|
58
|
+
desc: 'Spiders the website, starting at the URL'
|
59
|
+
|
60
|
+
command.option :open_timeout, value: {
|
61
|
+
type: Integer,
|
62
|
+
usage: 'SECS',
|
63
|
+
default: Spidr.open_timeout
|
64
|
+
},
|
65
|
+
desc: 'Sets the connection open timeout' do |timeout|
|
66
|
+
self.open_timeout = timeout
|
67
|
+
end
|
68
|
+
|
69
|
+
command.option :read_timeout, value: {
|
70
|
+
type: Integer,
|
71
|
+
usage: 'SECS',
|
72
|
+
default: Spidr.read_timeout
|
73
|
+
},
|
74
|
+
desc: 'Sets the read timeout' do |timeout|
|
75
|
+
self.read_timeout = timeout
|
76
|
+
end
|
77
|
+
|
78
|
+
command.option :ssl_timeout, value: {
|
79
|
+
type: Integer,
|
80
|
+
usage: 'SECS',
|
81
|
+
default: Spidr.ssl_timeout
|
82
|
+
},
|
83
|
+
desc: 'Sets the SSL connection timeout' do |timeout|
|
84
|
+
self.ssl_timeout = timeout
|
85
|
+
end
|
86
|
+
|
87
|
+
command.option :continue_timeout, value: {
|
88
|
+
type: Integer,
|
89
|
+
usage: 'SECS',
|
90
|
+
default: Spidr.continue_timeout
|
91
|
+
},
|
92
|
+
desc: 'Sets the continue timeout' do |timeout|
|
93
|
+
self.continue_timeout = timeout
|
94
|
+
end
|
95
|
+
|
96
|
+
command.option :keep_alive_timeout, value: {
|
97
|
+
type: Integer,
|
98
|
+
usage: 'SECS',
|
99
|
+
default: Spidr.keep_alive_timeout
|
100
|
+
},
|
101
|
+
desc: 'Sets the connection keep alive timeout' do |timeout|
|
102
|
+
self.keep_alive_timeout = timeout
|
103
|
+
end
|
104
|
+
|
105
|
+
command.option :proxy, short: '-P',
|
106
|
+
value: {
|
107
|
+
type: String,
|
108
|
+
usage: 'PROXY'
|
109
|
+
},
|
110
|
+
desc: 'Sets the proxy to use' do |proxy|
|
111
|
+
self.proxy = proxy
|
112
|
+
end
|
113
|
+
|
114
|
+
command.option :header, short: '-H',
|
115
|
+
value: {
|
116
|
+
type: /\A[^\s:]+:.*\z/,
|
117
|
+
usage: 'NAME: VALUE'
|
118
|
+
},
|
119
|
+
desc: 'Sets a default header' do |header|
|
120
|
+
name, value = header.split(/:\s*/,2)
|
121
|
+
|
122
|
+
self.default_headers[name] = value
|
123
|
+
end
|
124
|
+
|
125
|
+
command.option :host_header, value: {
|
126
|
+
type: /\A[^\s=]+=[^\s=]+\z/,
|
127
|
+
usage: 'NAME=VALUE'
|
128
|
+
},
|
129
|
+
desc: 'Sets a default header' do |name_value|
|
130
|
+
name, value = name_value.split('=',2)
|
131
|
+
|
132
|
+
self.host_headers[name] = value
|
133
|
+
end
|
134
|
+
|
135
|
+
command.option :user_agent_string, short: '-U',
|
136
|
+
value: {
|
137
|
+
type: String,
|
138
|
+
usage: 'STRING'
|
139
|
+
},
|
140
|
+
desc: 'The User-Agent string to use' do |ua|
|
141
|
+
self.user_agent = ua
|
142
|
+
end
|
143
|
+
|
144
|
+
command.option :user_agent, short: '-u',
|
145
|
+
value: {
|
146
|
+
type: Support::Network::HTTP::UserAgents::ALIASES.transform_keys { |key|
|
147
|
+
key.to_s.tr('_','-')
|
148
|
+
}
|
149
|
+
},
|
150
|
+
desc: 'The User-Agent to use' do |name|
|
151
|
+
self.user_agent = name
|
152
|
+
end
|
153
|
+
|
154
|
+
command.option :referer, short: '-R',
|
155
|
+
value: {
|
156
|
+
type: String,
|
157
|
+
usage: 'URL'
|
158
|
+
},
|
159
|
+
desc: 'Sets the Referer URL' do |referer|
|
160
|
+
self.referer = referer
|
161
|
+
end
|
162
|
+
|
163
|
+
command.option :delay, short: '-d',
|
164
|
+
value: {
|
165
|
+
type: Numeric,
|
166
|
+
usage: 'SECS'
|
167
|
+
},
|
168
|
+
desc: 'Sets the delay in seconds between each request' do |delay|
|
169
|
+
self.delay = delay
|
170
|
+
end
|
171
|
+
|
172
|
+
command.option :limit, short: '-l',
|
173
|
+
value: {
|
174
|
+
type: Integer,
|
175
|
+
usage: 'COUNT'
|
176
|
+
},
|
177
|
+
desc: 'Only spiders up to COUNT pages' do |limit|
|
178
|
+
self.limit = limit
|
179
|
+
end
|
180
|
+
|
181
|
+
command.option :max_depth, short: '-d',
|
182
|
+
value: {
|
183
|
+
type: Integer,
|
184
|
+
usage: 'DEPTH'
|
185
|
+
},
|
186
|
+
desc: 'Only spiders up to max depth' do |depth|
|
187
|
+
self.max_depth = depth
|
188
|
+
end
|
189
|
+
|
190
|
+
command.option :enqueue, value: {
|
191
|
+
type: String,
|
192
|
+
usage: 'URL'
|
193
|
+
},
|
194
|
+
desc: 'Adds the URL to the queue' do |url|
|
195
|
+
self.queue << url
|
196
|
+
end
|
197
|
+
|
198
|
+
command.option :visited, value: {
|
199
|
+
type: String,
|
200
|
+
usage: 'URL'
|
201
|
+
},
|
202
|
+
desc: 'Marks the URL as previously visited' do |url|
|
203
|
+
self.history << url
|
204
|
+
end
|
205
|
+
|
206
|
+
command.option :strip_fragments, desc: 'Enables/disables stripping the fragment component of every URL' do
|
207
|
+
self.strip_fragments = true
|
208
|
+
end
|
209
|
+
|
210
|
+
command.option :strip_query, desc: 'Enables/disables stripping the query component of every URL' do
|
211
|
+
self.strip_query = true
|
212
|
+
end
|
213
|
+
|
214
|
+
command.option :visit_scheme, value: {
|
215
|
+
type: String,
|
216
|
+
usage: 'SCHEME'
|
217
|
+
},
|
218
|
+
desc: 'Visit URLs with the URI scheme' do |scheme|
|
219
|
+
self.visit_schemes << scheme
|
220
|
+
end
|
221
|
+
|
222
|
+
command.option :visit_schemes_like, value: {
|
223
|
+
type: Regexp,
|
224
|
+
usage: '/REGEX/'
|
225
|
+
},
|
226
|
+
desc: 'Visit URLs with URI schemes that match the REGEX' do |regex|
|
227
|
+
self.visit_schemes << regex
|
228
|
+
end
|
229
|
+
|
230
|
+
command.option :ignore_scheme, value: {
|
231
|
+
type: String,
|
232
|
+
usage: 'SCHEME'
|
233
|
+
},
|
234
|
+
desc: 'Ignore the URLs with the URI scheme' do |scheme|
|
235
|
+
self.ignore_schemes << scheme
|
236
|
+
end
|
237
|
+
|
238
|
+
command.option :ignore_schemes_like, value: {
|
239
|
+
type: Regexp,
|
240
|
+
usage: '/REGEX/'
|
241
|
+
},
|
242
|
+
desc: 'Ignore the URLs with URI schemes matching the REGEX' do |regex|
|
243
|
+
self.ignore_schemes << regex
|
244
|
+
end
|
245
|
+
|
246
|
+
command.option :visit_host, value: {
|
247
|
+
type: String,
|
248
|
+
usage: 'HOST'
|
249
|
+
},
|
250
|
+
desc: 'Visit URLs with the matching host name' do |host|
|
251
|
+
self.visit_hosts << host
|
252
|
+
end
|
253
|
+
|
254
|
+
command.option :visit_hosts_like, value: {
|
255
|
+
type: Regexp,
|
256
|
+
usage: '/REGEX/'
|
257
|
+
},
|
258
|
+
desc: 'Visit URLs with hostnames that match the REGEX' do |regex|
|
259
|
+
self.visit_hosts << regex
|
260
|
+
end
|
261
|
+
|
262
|
+
command.option :ignore_host, value: {
|
263
|
+
type: String,
|
264
|
+
usage: 'HOST'
|
265
|
+
},
|
266
|
+
desc: 'Ignore the host name' do |host|
|
267
|
+
self.ignore_hosts << host
|
268
|
+
end
|
269
|
+
|
270
|
+
command.option :ignore_hosts_like, value: {
|
271
|
+
type: Regexp,
|
272
|
+
usage: '/REGEX/'
|
273
|
+
},
|
274
|
+
desc: 'Ignore the host names matching the REGEX' do |regex|
|
275
|
+
self.ignore_hosts << regex
|
276
|
+
end
|
277
|
+
|
278
|
+
command.option :visit_port, value: {
|
279
|
+
type: Integer,
|
280
|
+
usage: 'PORT'
|
281
|
+
},
|
282
|
+
desc: 'Visit URLs with the matching port number' do |port|
|
283
|
+
self.visit_ports << port
|
284
|
+
end
|
285
|
+
|
286
|
+
command.option :visit_ports_like, value: {
|
287
|
+
type: Regexp,
|
288
|
+
usage: '/REGEX/'
|
289
|
+
},
|
290
|
+
desc: 'Visit URLs with port numbers that match the REGEX' do |regex|
|
291
|
+
self.visit_ports << regex
|
292
|
+
end
|
293
|
+
|
294
|
+
command.option :ignore_port, value: {
|
295
|
+
type: Integer,
|
296
|
+
usage: 'PORT'
|
297
|
+
},
|
298
|
+
desc: 'Ignore the port number' do |port|
|
299
|
+
self.ignore_ports << port
|
300
|
+
end
|
301
|
+
|
302
|
+
command.option :ignore_ports_like, value: {
|
303
|
+
type: Regexp,
|
304
|
+
usage: '/REGEX/'
|
305
|
+
},
|
306
|
+
desc: 'Ignore the port numbers matching the REGEXP' do |regex|
|
307
|
+
self.ignore_ports << regex
|
308
|
+
end
|
309
|
+
|
310
|
+
command.option :visit_link, value: {
|
311
|
+
type: String,
|
312
|
+
usage: 'URL'
|
313
|
+
},
|
314
|
+
desc: 'Visit the URL' do |link|
|
315
|
+
self.visit_links << link
|
316
|
+
end
|
317
|
+
|
318
|
+
command.option :visit_links_like, value: {
|
319
|
+
type: Regexp,
|
320
|
+
usage: '/REGEX/'
|
321
|
+
},
|
322
|
+
desc: 'Visit URLs that match the REGEX' do |regex|
|
323
|
+
self.visit_links << regex
|
324
|
+
end
|
325
|
+
|
326
|
+
command.option :ignore_link, value: {
|
327
|
+
type: String,
|
328
|
+
usage: 'URL'
|
329
|
+
},
|
330
|
+
desc: 'Ignore the URL' do |link|
|
331
|
+
self.ignore_links << link
|
332
|
+
end
|
333
|
+
|
334
|
+
command.option :ignore_links_like, value: {
|
335
|
+
type: Regexp,
|
336
|
+
usage: '/REGEX/'
|
337
|
+
},
|
338
|
+
desc: 'Ignore URLs matching the REGEX' do |regex|
|
339
|
+
self.ignore_links << regex
|
340
|
+
end
|
341
|
+
|
342
|
+
command.option :visit_ext, value: {
|
343
|
+
type: String,
|
344
|
+
usage: 'FILE_EXT'
|
345
|
+
},
|
346
|
+
desc: 'Visit URLs with the matching file ext' do |ext|
|
347
|
+
self.visit_exts << ext
|
348
|
+
end
|
349
|
+
|
350
|
+
command.option :visit_exts_like, value: {
|
351
|
+
type: Regexp,
|
352
|
+
usage: '/REGEX/'
|
353
|
+
},
|
354
|
+
desc: 'Visit URLs with file exts that match the REGEX' do |regex|
|
355
|
+
self.visit_exts << regex
|
356
|
+
end
|
357
|
+
|
358
|
+
command.option :ignore_ext, value: {
|
359
|
+
type: String,
|
360
|
+
usage: 'FILE_EXT'
|
361
|
+
},
|
362
|
+
desc: 'Ignore the URLs with the file ext' do |ext|
|
363
|
+
self.ignore_exts << ext
|
364
|
+
end
|
365
|
+
|
366
|
+
command.option :ignore_exts_like, value: {
|
367
|
+
type: Regexp,
|
368
|
+
usage: '/REGEX/'
|
369
|
+
},
|
370
|
+
desc: 'Ignore URLs with file exts matching the REGEX' do |regex|
|
371
|
+
self.ignore_exts << regex
|
372
|
+
end
|
373
|
+
|
374
|
+
command.option :robots, short: '-r',
|
375
|
+
desc: 'Specifies whether to honor robots.txt' do
|
376
|
+
self.robots = true
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
# Keyword arguments to initialize a new `Spidr::Agent`.
|
381
|
+
#
|
382
|
+
# @return [Hash{Symbol => Object}]
|
383
|
+
#
|
384
|
+
# @since 2.0.0
|
385
|
+
attr_reader :agent_kwargs
|
386
|
+
|
387
|
+
#
|
388
|
+
# Initializes the command.
|
389
|
+
#
|
390
|
+
# @param [Hash{Symbol => Object}] kwargs
|
391
|
+
# Additional keyword arguments.
|
392
|
+
#
|
393
|
+
def initialize(**kwargs)
|
394
|
+
super(**kwargs)
|
395
|
+
|
396
|
+
@agent_kwargs = {}
|
397
|
+
end
|
398
|
+
|
399
|
+
#
|
400
|
+
# Creates a new web spider agent.
|
401
|
+
#
|
402
|
+
# @yield [agent]
|
403
|
+
# The given block will be given the newly created and configured
|
404
|
+
# web spider agent.
|
405
|
+
#
|
406
|
+
# @yieldparam [Ronin::Web::Spider::Agent] agent
|
407
|
+
# The newly created web spider agent.
|
408
|
+
#
|
409
|
+
# @return [Ronin::Web::Spider::Agent]
|
410
|
+
# The newly created web spider agent, after the agent has completed
|
411
|
+
# it's spidering.
|
412
|
+
#
|
413
|
+
def new_agent(&block)
|
414
|
+
if options[:host]
|
415
|
+
Web::Spider.host(options[:host],**agent_kwargs,&block)
|
416
|
+
elsif options[:domain]
|
417
|
+
Web::Spider.domain(options[:domain],**agent_kwargs,&block)
|
418
|
+
elsif options[:site]
|
419
|
+
Web::Spider.site(options[:site],**agent_kwargs,&block)
|
420
|
+
else
|
421
|
+
print_error "must specify --host, --domain, or --site"
|
422
|
+
exit(-1)
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
#
|
427
|
+
# The open connection timeout.
|
428
|
+
#
|
429
|
+
# @return [Integer, nil]
|
430
|
+
#
|
431
|
+
# @since 2.0.0
|
432
|
+
#
|
433
|
+
def open_timeout
|
434
|
+
@agent_kwargs[:open_timeout]
|
435
|
+
end
|
436
|
+
|
437
|
+
#
|
438
|
+
# Sets the open connection timeout.
|
439
|
+
#
|
440
|
+
# @param [Integer] new_timeout
|
441
|
+
#
|
442
|
+
# @return [Integer]
|
443
|
+
#
|
444
|
+
# @since 2.0.0
|
445
|
+
#
|
446
|
+
def open_timeout=(new_timeout)
|
447
|
+
@agent_kwargs[:open_timeout] = new_timeout
|
448
|
+
end
|
449
|
+
|
450
|
+
#
|
451
|
+
# The read timeout.
|
452
|
+
#
|
453
|
+
# @return [Integer, nil]
|
454
|
+
#
|
455
|
+
# @since 2.0.0
|
456
|
+
#
|
457
|
+
def read_timeout
|
458
|
+
@agent_kwargs[:read_timeout]
|
459
|
+
end
|
460
|
+
|
461
|
+
#
|
462
|
+
# Sets the read timeout.
|
463
|
+
#
|
464
|
+
# @param [Integer] new_timeout
|
465
|
+
#
|
466
|
+
# @return [Integer]
|
467
|
+
#
|
468
|
+
# @since 2.0.0
|
469
|
+
#
|
470
|
+
def read_timeout=(new_timeout)
|
471
|
+
@agent_kwargs[:read_timeout] = new_timeout
|
472
|
+
end
|
473
|
+
|
474
|
+
#
|
475
|
+
# The SSL timeout.
|
476
|
+
#
|
477
|
+
# @return [Integer, nil]
|
478
|
+
#
|
479
|
+
# @since 2.0.0
|
480
|
+
#
|
481
|
+
def ssl_timeout
|
482
|
+
@agent_kwargs[:ssl_timeout]
|
483
|
+
end
|
484
|
+
|
485
|
+
#
|
486
|
+
# Sets the SSL timeout.
|
487
|
+
#
|
488
|
+
# @param [Integer] new_timeout
|
489
|
+
#
|
490
|
+
# @return [Integer]
|
491
|
+
#
|
492
|
+
# @since 2.0.0
|
493
|
+
#
|
494
|
+
def ssl_timeout=(new_timeout)
|
495
|
+
@agent_kwargs[:ssl_timeout] = new_timeout
|
496
|
+
end
|
497
|
+
|
498
|
+
#
|
499
|
+
# The continue timeout.
|
500
|
+
#
|
501
|
+
# @return [Integer, nil]
|
502
|
+
#
|
503
|
+
# @since 2.0.0
|
504
|
+
#
|
505
|
+
def continue_timeout
|
506
|
+
@agent_kwargs[:continue_timeout]
|
507
|
+
end
|
508
|
+
|
509
|
+
#
|
510
|
+
# Sets the continue timeout.
|
511
|
+
#
|
512
|
+
# @param [Integer] new_timeout
|
513
|
+
#
|
514
|
+
# @return [Integer]
|
515
|
+
#
|
516
|
+
# @since 2.0.0
|
517
|
+
#
|
518
|
+
def continue_timeout=(new_timeout)
|
519
|
+
@agent_kwargs[:continue_timeout] = new_timeout
|
520
|
+
end
|
521
|
+
|
522
|
+
#
|
523
|
+
# The `Keep-Alive` timeout.
|
524
|
+
#
|
525
|
+
# @return [Integer, nil]
|
526
|
+
#
|
527
|
+
# @since 2.0.0
|
528
|
+
#
|
529
|
+
def keep_alive_timeout
|
530
|
+
@agent_kwargs[:keep_alive_timeout]
|
531
|
+
end
|
532
|
+
|
533
|
+
#
|
534
|
+
# Sets the `Keep-Alive` timeout.
|
535
|
+
#
|
536
|
+
# @param [Integer] new_timeout
|
537
|
+
#
|
538
|
+
# @return [Integer]
|
539
|
+
#
|
540
|
+
# @since 2.0.0
|
541
|
+
#
|
542
|
+
def keep_alive_timeout=(new_timeout)
|
543
|
+
@agent_kwargs[:keep_alive_timeout] = new_timeout
|
544
|
+
end
|
545
|
+
|
546
|
+
#
|
547
|
+
# The proxy to use for spidering.
|
548
|
+
#
|
549
|
+
# @return [String, nil]
|
550
|
+
#
|
551
|
+
# @since 0.2.0
|
552
|
+
#
|
553
|
+
def proxy
|
554
|
+
@agent_kwargs[:proxy]
|
555
|
+
end
|
556
|
+
|
557
|
+
#
|
558
|
+
# Sets the proxy to use for spidering.
|
559
|
+
#
|
560
|
+
# @param [String] new_proxy
|
561
|
+
# The new proxy URI.
|
562
|
+
#
|
563
|
+
# @return [String]
|
564
|
+
#
|
565
|
+
# @since 2.0.0
|
566
|
+
#
|
567
|
+
def proxy=(new_proxy)
|
568
|
+
@agent_kwargs[:proxy] = new_proxy
|
569
|
+
end
|
570
|
+
|
571
|
+
#
|
572
|
+
# The default headers to send with every request.
|
573
|
+
#
|
574
|
+
# @return [Hash{String => String}]
|
575
|
+
#
|
576
|
+
# @since 2.0.0
|
577
|
+
#
|
578
|
+
def default_headers
|
579
|
+
@agent_kwargs[:default_headers] ||= {}
|
580
|
+
end
|
581
|
+
|
582
|
+
#
|
583
|
+
# The default `Host` headers to send with every request.
|
584
|
+
#
|
585
|
+
# @return [Hash{String => String}]
|
586
|
+
#
|
587
|
+
# @since 2.0.0
|
588
|
+
#
|
589
|
+
def host_headers
|
590
|
+
@agent_kwargs[:host_headers] ||= {}
|
591
|
+
end
|
592
|
+
|
593
|
+
#
|
594
|
+
# Sets the new `User-Agent` header to use for spidering.
|
595
|
+
#
|
596
|
+
# @return [String, nil]
|
597
|
+
#
|
598
|
+
# @since 2.0.0
|
599
|
+
#
|
600
|
+
def user_agent
|
601
|
+
@agent_kwargs[:user_agent]
|
602
|
+
end
|
603
|
+
|
604
|
+
#
|
605
|
+
# Sets the new `User-Agent` header to use for spidering.
|
606
|
+
#
|
607
|
+
# @param [String] new_user_agent
|
608
|
+
#
|
609
|
+
# @return [String]
|
610
|
+
#
|
611
|
+
# @since 2.0.0
|
612
|
+
#
|
613
|
+
def user_agent=(new_user_agent)
|
614
|
+
@agent_kwargs[:user_agent] = new_user_agent
|
615
|
+
end
|
616
|
+
|
617
|
+
#
|
618
|
+
# The `Referer` header to use for spidering.
|
619
|
+
#
|
620
|
+
# @return [String, nil]
|
621
|
+
#
|
622
|
+
# @since 2.0.0
|
623
|
+
#
|
624
|
+
def referer
|
625
|
+
@agent_kwargs[:referer]
|
626
|
+
end
|
627
|
+
|
628
|
+
#
|
629
|
+
# Sets the `Referer` header to use for spidering.
|
630
|
+
#
|
631
|
+
# @param [String] new_referer
|
632
|
+
#
|
633
|
+
# @return [String, nil]
|
634
|
+
#
|
635
|
+
# @since 2.0.0
|
636
|
+
#
|
637
|
+
def referer=(new_referer)
|
638
|
+
@agent_kwargs[:referer] = new_referer
|
639
|
+
end
|
640
|
+
|
641
|
+
#
|
642
|
+
# The amount of seconds to pause between each request.
|
643
|
+
#
|
644
|
+
# @return [Integer, Float, nil]
|
645
|
+
#
|
646
|
+
# @since 2.0.0
|
647
|
+
#
|
648
|
+
def delay
|
649
|
+
@agent_kwargs[:delay]
|
650
|
+
end
|
651
|
+
|
652
|
+
#
|
653
|
+
# Sets the amount of seconds to pause between each request.
|
654
|
+
#
|
655
|
+
# @param [Integer, Float] new_delay
|
656
|
+
#
|
657
|
+
# @return [Integer, Float]
|
658
|
+
#
|
659
|
+
# @since 2.0.0
|
660
|
+
#
|
661
|
+
def delay=(new_delay)
|
662
|
+
@agent_kwargs[:delay] = new_delay
|
663
|
+
end
|
664
|
+
|
665
|
+
#
|
666
|
+
# The limit to how many URLs to visit.
|
667
|
+
#
|
668
|
+
# @return [Integer, nil]
|
669
|
+
#
|
670
|
+
# @since 2.0.0
|
671
|
+
#
|
672
|
+
def limit
|
673
|
+
@agent_kwargs[:limit]
|
674
|
+
end
|
675
|
+
|
676
|
+
#
|
677
|
+
# Sets the limit of how many URLs to visit.
|
678
|
+
#
|
679
|
+
# @param [Integer] new_limit
|
680
|
+
#
|
681
|
+
# @return [Integer]
|
682
|
+
#
|
683
|
+
# @since 2.0.0
|
684
|
+
#
|
685
|
+
def limit=(new_limit)
|
686
|
+
@agent_kwargs[:limit] = new_limit
|
687
|
+
end
|
688
|
+
|
689
|
+
#
|
690
|
+
# The maximum depth to spider.
|
691
|
+
#
|
692
|
+
# @return [Integer, nil]
|
693
|
+
#
|
694
|
+
# @since 2.0.0
|
695
|
+
#
|
696
|
+
def max_depth
|
697
|
+
@agent_kwargs[:max_depth]
|
698
|
+
end
|
699
|
+
|
700
|
+
#
|
701
|
+
# Sets the maximum depth to spider.
|
702
|
+
#
|
703
|
+
# @param [Integer] new_max_depth
|
704
|
+
#
|
705
|
+
# @return [Integer]
|
706
|
+
#
|
707
|
+
# @since 2.0.0
|
708
|
+
#
|
709
|
+
def max_depth=(new_max_depth)
|
710
|
+
@agent_kwargs[:max_depth] = new_max_depth
|
711
|
+
end
|
712
|
+
|
713
|
+
#
|
714
|
+
# The pre-existing queue of URLs to start spidering.
|
715
|
+
#
|
716
|
+
# @return [Array<String>]
|
717
|
+
#
|
718
|
+
# @since 2.0.0
|
719
|
+
#
|
720
|
+
def queue
|
721
|
+
@agent_kwargs[:queue] ||= []
|
722
|
+
end
|
723
|
+
|
724
|
+
#
|
725
|
+
# The pre-existing history of URLs that have already been spidered.
|
726
|
+
#
|
727
|
+
# @return [Array<String>]
|
728
|
+
#
|
729
|
+
# @since 2.0.0
|
730
|
+
#
|
731
|
+
def history
|
732
|
+
@agent_kwargs[:history] ||= []
|
733
|
+
end
|
734
|
+
|
735
|
+
#
|
736
|
+
# Whether to strip the `#fragment` components of links.
|
737
|
+
#
|
738
|
+
# @return [Boolean]
|
739
|
+
#
|
740
|
+
# @since 2.0.0
|
741
|
+
#
|
742
|
+
def strip_fragments
|
743
|
+
@agent_kwargs[:strip_fragments]
|
744
|
+
end
|
745
|
+
|
746
|
+
#
|
747
|
+
# Sets whether to strip the `#fragment` components of links.
|
748
|
+
#
|
749
|
+
# @param [Boolean] new_value
|
750
|
+
#
|
751
|
+
# @return [Boolean]
|
752
|
+
#
|
753
|
+
# @since 2.0.0
|
754
|
+
#
|
755
|
+
def strip_fragments=(new_value)
|
756
|
+
@agent_kwargs[:strip_fragments] = new_value
|
757
|
+
end
|
758
|
+
|
759
|
+
#
|
760
|
+
# Whether to strip the `?query` components of links.
|
761
|
+
#
|
762
|
+
# @return [Boolean]
|
763
|
+
#
|
764
|
+
# @since 2.0.0
|
765
|
+
#
|
766
|
+
def strip_query
|
767
|
+
@agent_kwargs[:strip_query]
|
768
|
+
end
|
769
|
+
|
770
|
+
#
|
771
|
+
# Sets whether to strip the `?query` components of links.
|
772
|
+
#
|
773
|
+
# @param [Boolean] new_value
|
774
|
+
#
|
775
|
+
# @return [Boolean]
|
776
|
+
#
|
777
|
+
# @since 2.0.0
|
778
|
+
#
|
779
|
+
def strip_query=(new_value)
|
780
|
+
@agent_kwargs[:strip_query] = new_value
|
781
|
+
end
|
782
|
+
|
783
|
+
#
|
784
|
+
# The list of URI schemes to allow spidering.
|
785
|
+
#
|
786
|
+
# @return [Array<String>]
|
787
|
+
#
|
788
|
+
# @since 2.0.0
|
789
|
+
#
|
790
|
+
def visit_schemes
|
791
|
+
@agent_kwargs[:schemes] ||= []
|
792
|
+
end
|
793
|
+
|
794
|
+
#
|
795
|
+
# The list of URI hosts to allow spidering.
|
796
|
+
#
|
797
|
+
# @return [Array<String>]
|
798
|
+
#
|
799
|
+
# @since 2.0.0
|
800
|
+
#
|
801
|
+
def visit_hosts
|
802
|
+
@agent_kwargs[:hosts] ||= []
|
803
|
+
end
|
804
|
+
|
805
|
+
#
|
806
|
+
# The list of URI ports to allow spidering.
|
807
|
+
#
|
808
|
+
# @return [Array<Integer>]
|
809
|
+
#
|
810
|
+
# @since 2.0.0
|
811
|
+
#
|
812
|
+
def visit_ports
|
813
|
+
@agent_kwargs[:ports] ||= []
|
814
|
+
end
|
815
|
+
|
816
|
+
#
|
817
|
+
# The list of URI links to allow spidering.
|
818
|
+
#
|
819
|
+
# @return [Array<String>]
|
820
|
+
#
|
821
|
+
# @since 2.0.0
|
822
|
+
#
|
823
|
+
def visit_links
|
824
|
+
@agent_kwargs[:links] ||= []
|
825
|
+
end
|
826
|
+
|
827
|
+
#
|
828
|
+
# The list of URI file extensions to allow spidering.
|
829
|
+
#
|
830
|
+
# @return [Array<String>]
|
831
|
+
#
|
832
|
+
# @since 2.0.0
|
833
|
+
#
|
834
|
+
def visit_exts
|
835
|
+
@agent_kwargs[:exts] ||= []
|
836
|
+
end
|
837
|
+
|
838
|
+
#
|
839
|
+
# The list of URI schemes to ignore while spidering.
|
840
|
+
#
|
841
|
+
# @return [Array<String>]
|
842
|
+
#
|
843
|
+
# @since 2.0.0
|
844
|
+
#
|
845
|
+
def ignore_schemes
|
846
|
+
@agent_kwargs[:ignore_schemes] ||= []
|
847
|
+
end
|
848
|
+
|
849
|
+
#
|
850
|
+
# The list of URI hosts to ignore while spidering.
|
851
|
+
#
|
852
|
+
# @return [Array<String>]
|
853
|
+
#
|
854
|
+
# @since 2.0.0
|
855
|
+
#
|
856
|
+
def ignore_hosts
|
857
|
+
@agent_kwargs[:ignore_hosts] ||= []
|
858
|
+
end
|
859
|
+
|
860
|
+
#
|
861
|
+
# The list of URI ports to ignore while spidering.
|
862
|
+
#
|
863
|
+
# @return [Array<Integer>]
|
864
|
+
#
|
865
|
+
# @since 2.0.0
|
866
|
+
#
|
867
|
+
def ignore_ports
|
868
|
+
@agent_kwargs[:ignore_ports] ||= []
|
869
|
+
end
|
870
|
+
|
871
|
+
#
|
872
|
+
# The list of URI links to ignore while spidering.
|
873
|
+
#
|
874
|
+
# @return [Array<String>]
|
875
|
+
#
|
876
|
+
# @since 2.0.0
|
877
|
+
#
|
878
|
+
def ignore_links
|
879
|
+
@agent_kwargs[:ignore_links] ||= []
|
880
|
+
end
|
881
|
+
|
882
|
+
#
|
883
|
+
# The list of URI file extensions to ignore while spidering.
|
884
|
+
#
|
885
|
+
# @return [Array<String>]
|
886
|
+
#
|
887
|
+
# @since 2.0.0
|
888
|
+
#
|
889
|
+
def ignore_exts
|
890
|
+
@agent_kwargs[:ignore_exts] ||= []
|
891
|
+
end
|
892
|
+
|
893
|
+
#
|
894
|
+
# Whether to honor the `robots.txt` file while spidering.
|
895
|
+
#
|
896
|
+
# @return [Boolean]
|
897
|
+
#
|
898
|
+
# @since 2.0.0
|
899
|
+
#
|
900
|
+
def robots
|
901
|
+
@agent_kwargs[:robots]
|
902
|
+
end
|
903
|
+
|
904
|
+
#
|
905
|
+
# Sets whether to honor the `robots.txt` file while spidering.
|
906
|
+
#
|
907
|
+
# @param [Boolean] new_value
|
908
|
+
#
|
909
|
+
# @return [Boolean]
|
910
|
+
#
|
911
|
+
# @since 2.0.0
|
912
|
+
#
|
913
|
+
def robots=(new_value)
|
914
|
+
@agent_kwargs[:robots] = new_value
|
915
|
+
end
|
916
|
+
end
|
917
|
+
end
|
918
|
+
end
|
919
|
+
end
|