ronin-web 1.0.2 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +3 -2
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +5 -0
  5. data/ChangeLog.md +46 -1
  6. data/Gemfile +25 -12
  7. data/README.md +257 -51
  8. data/Rakefile +9 -0
  9. data/data/completions/ronin-web +203 -0
  10. data/gemspec.yml +18 -5
  11. data/lib/ronin/web/cli/browser_options.rb +92 -0
  12. data/lib/ronin/web/cli/browser_shell.rb +448 -0
  13. data/lib/ronin/web/cli/command.rb +1 -1
  14. data/lib/ronin/web/cli/commands/browser.rb +373 -0
  15. data/lib/ronin/web/cli/commands/completion.rb +63 -0
  16. data/lib/ronin/web/cli/commands/diff.rb +60 -8
  17. data/lib/ronin/web/cli/commands/html.rb +21 -33
  18. data/lib/ronin/web/cli/commands/irb.rb +1 -1
  19. data/lib/ronin/web/cli/commands/new/{webapp.rb → app.rb} +8 -8
  20. data/lib/ronin/web/cli/commands/new/nokogiri.rb +4 -4
  21. data/lib/ronin/web/cli/commands/new/server.rb +1 -1
  22. data/lib/ronin/web/cli/commands/new/spider.rb +1 -1
  23. data/lib/ronin/web/cli/commands/new.rb +5 -3
  24. data/lib/ronin/web/cli/commands/reverse_proxy.rb +1 -1
  25. data/lib/ronin/web/cli/commands/screenshot.rb +186 -0
  26. data/lib/ronin/web/cli/commands/server.rb +1 -1
  27. data/lib/ronin/web/cli/commands/session_cookie.rb +265 -0
  28. data/lib/ronin/web/cli/commands/spider.rb +61 -467
  29. data/lib/ronin/web/cli/commands/user_agent.rb +177 -0
  30. data/lib/ronin/web/cli/commands/vulns.rb +463 -0
  31. data/lib/ronin/web/cli/commands/wordlist.rb +484 -0
  32. data/lib/ronin/web/cli/commands/xml.rb +149 -0
  33. data/lib/ronin/web/cli/js_shell.rb +69 -0
  34. data/lib/ronin/web/cli/ruby_shell.rb +1 -1
  35. data/lib/ronin/web/cli/spider_options.rb +919 -0
  36. data/lib/ronin/web/cli.rb +3 -1
  37. data/lib/ronin/web/html.rb +1 -1
  38. data/lib/ronin/web/root.rb +1 -1
  39. data/lib/ronin/web/version.rb +2 -2
  40. data/lib/ronin/web/xml.rb +1 -1
  41. data/lib/ronin/web.rb +4 -364
  42. data/man/ronin-web-browser.1 +92 -0
  43. data/man/ronin-web-browser.1.md +96 -0
  44. data/man/ronin-web-completion.1 +76 -0
  45. data/man/ronin-web-completion.1.md +78 -0
  46. data/man/ronin-web-diff.1 +14 -21
  47. data/man/ronin-web-diff.1.md +13 -6
  48. data/man/ronin-web-html.1 +30 -46
  49. data/man/ronin-web-html.1.md +27 -17
  50. data/man/ronin-web-irb.1 +9 -16
  51. data/man/ronin-web-irb.1.md +6 -2
  52. data/man/ronin-web-new-app.1.md +39 -0
  53. data/man/ronin-web-new-nokogiri.1 +9 -20
  54. data/man/ronin-web-new-nokogiri.1.md +5 -5
  55. data/man/ronin-web-new-server.1 +11 -23
  56. data/man/ronin-web-new-server.1.md +5 -5
  57. data/man/ronin-web-new-spider.1 +44 -88
  58. data/man/ronin-web-new-spider.1.md +37 -37
  59. data/man/ronin-web-new.1 +18 -30
  60. data/man/ronin-web-new.1.md +15 -11
  61. data/man/ronin-web-reverse-proxy.1 +33 -38
  62. data/man/ronin-web-reverse-proxy.1.md +20 -14
  63. data/man/ronin-web-screenshot.1 +56 -0
  64. data/man/ronin-web-screenshot.1.md +56 -0
  65. data/man/ronin-web-server.1 +15 -29
  66. data/man/ronin-web-server.1.md +13 -9
  67. data/man/ronin-web-session-cookie.1 +38 -0
  68. data/man/ronin-web-session-cookie.1.md +41 -0
  69. data/man/ronin-web-spider.1 +121 -130
  70. data/man/ronin-web-spider.1.md +115 -66
  71. data/man/ronin-web-user-agent.1 +44 -0
  72. data/man/ronin-web-user-agent.1.md +46 -0
  73. data/man/ronin-web-vulns.1 +175 -0
  74. data/man/ronin-web-vulns.1.md +177 -0
  75. data/man/ronin-web-wordlist.1 +258 -0
  76. data/man/ronin-web-wordlist.1.md +263 -0
  77. data/man/ronin-web-xml.1 +43 -0
  78. data/man/ronin-web-xml.1.md +46 -0
  79. data/man/ronin-web.1 +67 -18
  80. data/man/ronin-web.1.md +55 -4
  81. data/scripts/setup +58 -0
  82. metadata +122 -31
  83. data/lib/ronin/web/mechanize.rb +0 -84
  84. data/man/ronin-web-new-webapp.1.md +0 -39
  85. /data/data/new/{webapp → app}/.gitignore +0 -0
  86. /data/data/new/{webapp → app}/.ruby-version.erb +0 -0
  87. /data/data/new/{webapp → app}/Dockerfile.erb +0 -0
  88. /data/data/new/{webapp → app}/Gemfile +0 -0
  89. /data/data/new/{webapp → app}/app.rb.erb +0 -0
  90. /data/data/new/{webapp → app}/config.ru +0 -0
  91. /data/data/new/{webapp → app}/docker-compose.yml.erb +0 -0
@@ -0,0 +1,919 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # ronin-web - A collection of useful web helper methods and commands.
4
+ #
5
+ # Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
6
+ #
7
+ # ronin-web is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation, either version 3 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # ronin-web is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ require 'ronin/web/spider'
22
+ require 'ronin/support/network/http/user_agents'
23
+
24
+ module Ronin
25
+ module Web
26
+ class CLI
27
+ #
28
+ # Adds options for spidering a website.
29
+ #
30
+ # @since 2.0.0
31
+ #
32
+ module SpiderOptions
33
+ #
34
+ # Adds options for configuring a web spider and spidering a website.
35
+ #
36
+ # @param [Class<Command>] command
37
+ # The command class including {SpiderOptions}.
38
+ #
39
+ def self.included(command)
40
+ command.usage '[options] {--host HOST | --domain DOMAIN | --site URL}'
41
+
42
+ command.option :host, value: {
43
+ type: String,
44
+ usage: 'HOST'
45
+ },
46
+ desc: 'Spiders the specific HOST'
47
+
48
+ command.option :domain, value: {
49
+ type: String,
50
+ usage: 'DOMAIN'
51
+ },
52
+ desc: 'Spiders the whole domain'
53
+
54
+ command.option :site, value: {
55
+ type: String,
56
+ usage: 'URL'
57
+ },
58
+ desc: 'Spiders the website, starting at the URL'
59
+
60
+ command.option :open_timeout, value: {
61
+ type: Integer,
62
+ usage: 'SECS',
63
+ default: Spidr.open_timeout
64
+ },
65
+ desc: 'Sets the connection open timeout' do |timeout|
66
+ self.open_timeout = timeout
67
+ end
68
+
69
+ command.option :read_timeout, value: {
70
+ type: Integer,
71
+ usage: 'SECS',
72
+ default: Spidr.read_timeout
73
+ },
74
+ desc: 'Sets the read timeout' do |timeout|
75
+ self.read_timeout = timeout
76
+ end
77
+
78
+ command.option :ssl_timeout, value: {
79
+ type: Integer,
80
+ usage: 'SECS',
81
+ default: Spidr.ssl_timeout
82
+ },
83
+ desc: 'Sets the SSL connection timeout' do |timeout|
84
+ self.ssl_timeout = timeout
85
+ end
86
+
87
+ command.option :continue_timeout, value: {
88
+ type: Integer,
89
+ usage: 'SECS',
90
+ default: Spidr.continue_timeout
91
+ },
92
+ desc: 'Sets the continue timeout' do |timeout|
93
+ self.continue_timeout = timeout
94
+ end
95
+
96
+ command.option :keep_alive_timeout, value: {
97
+ type: Integer,
98
+ usage: 'SECS',
99
+ default: Spidr.keep_alive_timeout
100
+ },
101
+ desc: 'Sets the connection keep alive timeout' do |timeout|
102
+ self.keep_alive_timeout = timeout
103
+ end
104
+
105
+ command.option :proxy, short: '-P',
106
+ value: {
107
+ type: String,
108
+ usage: 'PROXY'
109
+ },
110
+ desc: 'Sets the proxy to use' do |proxy|
111
+ self.proxy = proxy
112
+ end
113
+
114
+ command.option :header, short: '-H',
115
+ value: {
116
+ type: /\A[^\s:]+:.*\z/,
117
+ usage: 'NAME: VALUE'
118
+ },
119
+ desc: 'Sets a default header' do |header|
120
+ name, value = header.split(/:\s*/,2)
121
+
122
+ self.default_headers[name] = value
123
+ end
124
+
125
+ command.option :host_header, value: {
126
+ type: /\A[^\s=]+=[^\s=]+\z/,
127
+ usage: 'NAME=VALUE'
128
+ },
129
+ desc: 'Sets a default header' do |name_value|
130
+ name, value = name_value.split('=',2)
131
+
132
+ self.host_headers[name] = value
133
+ end
134
+
135
+ command.option :user_agent_string, short: '-U',
136
+ value: {
137
+ type: String,
138
+ usage: 'STRING'
139
+ },
140
+ desc: 'The User-Agent string to use' do |ua|
141
+ self.user_agent = ua
142
+ end
143
+
144
+ command.option :user_agent, short: '-u',
145
+ value: {
146
+ type: Support::Network::HTTP::UserAgents::ALIASES.transform_keys { |key|
147
+ key.to_s.tr('_','-')
148
+ }
149
+ },
150
+ desc: 'The User-Agent to use' do |name|
151
+ self.user_agent = name
152
+ end
153
+
154
+ command.option :referer, short: '-R',
155
+ value: {
156
+ type: String,
157
+ usage: 'URL'
158
+ },
159
+ desc: 'Sets the Referer URL' do |referer|
160
+ self.referer = referer
161
+ end
162
+
163
+ command.option :delay, short: '-d',
164
+ value: {
165
+ type: Numeric,
166
+ usage: 'SECS'
167
+ },
168
+ desc: 'Sets the delay in seconds between each request' do |delay|
169
+ self.delay = delay
170
+ end
171
+
172
+ command.option :limit, short: '-l',
173
+ value: {
174
+ type: Integer,
175
+ usage: 'COUNT'
176
+ },
177
+ desc: 'Only spiders up to COUNT pages' do |limit|
178
+ self.limit = limit
179
+ end
180
+
181
+ command.option :max_depth, short: '-d',
182
+ value: {
183
+ type: Integer,
184
+ usage: 'DEPTH'
185
+ },
186
+ desc: 'Only spiders up to max depth' do |depth|
187
+ self.max_depth = depth
188
+ end
189
+
190
+ command.option :enqueue, value: {
191
+ type: String,
192
+ usage: 'URL'
193
+ },
194
+ desc: 'Adds the URL to the queue' do |url|
195
+ self.queue << url
196
+ end
197
+
198
+ command.option :visited, value: {
199
+ type: String,
200
+ usage: 'URL'
201
+ },
202
+ desc: 'Marks the URL as previously visited' do |url|
203
+ self.history << url
204
+ end
205
+
206
+ command.option :strip_fragments, desc: 'Enables/disables stripping the fragment component of every URL' do
207
+ self.strip_fragments = true
208
+ end
209
+
210
+ command.option :strip_query, desc: 'Enables/disables stripping the query component of every URL' do
211
+ self.strip_query = true
212
+ end
213
+
214
+ command.option :visit_scheme, value: {
215
+ type: String,
216
+ usage: 'SCHEME'
217
+ },
218
+ desc: 'Visit URLs with the URI scheme' do |scheme|
219
+ self.visit_schemes << scheme
220
+ end
221
+
222
+ command.option :visit_schemes_like, value: {
223
+ type: Regexp,
224
+ usage: '/REGEX/'
225
+ },
226
+ desc: 'Visit URLs with URI schemes that match the REGEX' do |regex|
227
+ self.visit_schemes << regex
228
+ end
229
+
230
+ command.option :ignore_scheme, value: {
231
+ type: String,
232
+ usage: 'SCHEME'
233
+ },
234
+ desc: 'Ignore the URLs with the URI scheme' do |scheme|
235
+ self.ignore_schemes << scheme
236
+ end
237
+
238
+ command.option :ignore_schemes_like, value: {
239
+ type: Regexp,
240
+ usage: '/REGEX/'
241
+ },
242
+ desc: 'Ignore the URLs with URI schemes matching the REGEX' do |regex|
243
+ self.ignore_schemes << regex
244
+ end
245
+
246
+ command.option :visit_host, value: {
247
+ type: String,
248
+ usage: 'HOST'
249
+ },
250
+ desc: 'Visit URLs with the matching host name' do |host|
251
+ self.visit_hosts << host
252
+ end
253
+
254
+ command.option :visit_hosts_like, value: {
255
+ type: Regexp,
256
+ usage: '/REGEX/'
257
+ },
258
+ desc: 'Visit URLs with hostnames that match the REGEX' do |regex|
259
+ self.visit_hosts << regex
260
+ end
261
+
262
+ command.option :ignore_host, value: {
263
+ type: String,
264
+ usage: 'HOST'
265
+ },
266
+ desc: 'Ignore the host name' do |host|
267
+ self.ignore_hosts << host
268
+ end
269
+
270
+ command.option :ignore_hosts_like, value: {
271
+ type: Regexp,
272
+ usage: '/REGEX/'
273
+ },
274
+ desc: 'Ignore the host names matching the REGEX' do |regex|
275
+ self.ignore_hosts << regex
276
+ end
277
+
278
+ command.option :visit_port, value: {
279
+ type: Integer,
280
+ usage: 'PORT'
281
+ },
282
+ desc: 'Visit URLs with the matching port number' do |port|
283
+ self.visit_ports << port
284
+ end
285
+
286
+ command.option :visit_ports_like, value: {
287
+ type: Regexp,
288
+ usage: '/REGEX/'
289
+ },
290
+ desc: 'Visit URLs with port numbers that match the REGEX' do |regex|
291
+ self.visit_ports << regex
292
+ end
293
+
294
+ command.option :ignore_port, value: {
295
+ type: Integer,
296
+ usage: 'PORT'
297
+ },
298
+ desc: 'Ignore the port number' do |port|
299
+ self.ignore_ports << port
300
+ end
301
+
302
+ command.option :ignore_ports_like, value: {
303
+ type: Regexp,
304
+ usage: '/REGEX/'
305
+ },
306
+ desc: 'Ignore the port numbers matching the REGEXP' do |regex|
307
+ self.ignore_ports << regex
308
+ end
309
+
310
+ command.option :visit_link, value: {
311
+ type: String,
312
+ usage: 'URL'
313
+ },
314
+ desc: 'Visit the URL' do |link|
315
+ self.visit_links << link
316
+ end
317
+
318
+ command.option :visit_links_like, value: {
319
+ type: Regexp,
320
+ usage: '/REGEX/'
321
+ },
322
+ desc: 'Visit URLs that match the REGEX' do |regex|
323
+ self.visit_links << regex
324
+ end
325
+
326
+ command.option :ignore_link, value: {
327
+ type: String,
328
+ usage: 'URL'
329
+ },
330
+ desc: 'Ignore the URL' do |link|
331
+ self.ignore_links << link
332
+ end
333
+
334
+ command.option :ignore_links_like, value: {
335
+ type: Regexp,
336
+ usage: '/REGEX/'
337
+ },
338
+ desc: 'Ignore URLs matching the REGEX' do |regex|
339
+ self.ignore_links << regex
340
+ end
341
+
342
+ command.option :visit_ext, value: {
343
+ type: String,
344
+ usage: 'FILE_EXT'
345
+ },
346
+ desc: 'Visit URLs with the matching file ext' do |ext|
347
+ self.visit_exts << ext
348
+ end
349
+
350
+ command.option :visit_exts_like, value: {
351
+ type: Regexp,
352
+ usage: '/REGEX/'
353
+ },
354
+ desc: 'Visit URLs with file exts that match the REGEX' do |regex|
355
+ self.visit_exts << regex
356
+ end
357
+
358
+ command.option :ignore_ext, value: {
359
+ type: String,
360
+ usage: 'FILE_EXT'
361
+ },
362
+ desc: 'Ignore the URLs with the file ext' do |ext|
363
+ self.ignore_exts << ext
364
+ end
365
+
366
+ command.option :ignore_exts_like, value: {
367
+ type: Regexp,
368
+ usage: '/REGEX/'
369
+ },
370
+ desc: 'Ignore URLs with file exts matching the REGEX' do |regex|
371
+ self.ignore_exts << regex
372
+ end
373
+
374
+ command.option :robots, short: '-r',
375
+ desc: 'Specifies whether to honor robots.txt' do
376
+ self.robots = true
377
+ end
378
+ end
379
+
380
+ # Keyword arguments to initialize a new `Spidr::Agent`.
381
+ #
382
+ # @return [Hash{Symbol => Object}]
383
+ #
384
+ # @since 2.0.0
385
+ attr_reader :agent_kwargs
386
+
387
+ #
388
+ # Initializes the command.
389
+ #
390
+ # @param [Hash{Symbol => Object}] kwargs
391
+ # Additional keyword arguments.
392
+ #
393
+ def initialize(**kwargs)
394
+ super(**kwargs)
395
+
396
+ @agent_kwargs = {}
397
+ end
398
+
399
+ #
400
+ # Creates a new web spider agent.
401
+ #
402
+ # @yield [agent]
403
+ # The given block will be given the newly created and configured
404
+ # web spider agent.
405
+ #
406
+ # @yieldparam [Ronin::Web::Spider::Agent] agent
407
+ # The newly created web spider agent.
408
+ #
409
+ # @return [Ronin::Web::Spider::Agent]
410
+ # The newly created web spider agent, after the agent has completed
411
+ # it's spidering.
412
+ #
413
+ def new_agent(&block)
414
+ if options[:host]
415
+ Web::Spider.host(options[:host],**agent_kwargs,&block)
416
+ elsif options[:domain]
417
+ Web::Spider.domain(options[:domain],**agent_kwargs,&block)
418
+ elsif options[:site]
419
+ Web::Spider.site(options[:site],**agent_kwargs,&block)
420
+ else
421
+ print_error "must specify --host, --domain, or --site"
422
+ exit(-1)
423
+ end
424
+ end
425
+
426
+ #
427
+ # The open connection timeout.
428
+ #
429
+ # @return [Integer, nil]
430
+ #
431
+ # @since 2.0.0
432
+ #
433
+ def open_timeout
434
+ @agent_kwargs[:open_timeout]
435
+ end
436
+
437
+ #
438
+ # Sets the open connection timeout.
439
+ #
440
+ # @param [Integer] new_timeout
441
+ #
442
+ # @return [Integer]
443
+ #
444
+ # @since 2.0.0
445
+ #
446
+ def open_timeout=(new_timeout)
447
+ @agent_kwargs[:open_timeout] = new_timeout
448
+ end
449
+
450
+ #
451
+ # The read timeout.
452
+ #
453
+ # @return [Integer, nil]
454
+ #
455
+ # @since 2.0.0
456
+ #
457
+ def read_timeout
458
+ @agent_kwargs[:read_timeout]
459
+ end
460
+
461
+ #
462
+ # Sets the read timeout.
463
+ #
464
+ # @param [Integer] new_timeout
465
+ #
466
+ # @return [Integer]
467
+ #
468
+ # @since 2.0.0
469
+ #
470
+ def read_timeout=(new_timeout)
471
+ @agent_kwargs[:read_timeout] = new_timeout
472
+ end
473
+
474
+ #
475
+ # The SSL timeout.
476
+ #
477
+ # @return [Integer, nil]
478
+ #
479
+ # @since 2.0.0
480
+ #
481
+ def ssl_timeout
482
+ @agent_kwargs[:ssl_timeout]
483
+ end
484
+
485
+ #
486
+ # Sets the SSL timeout.
487
+ #
488
+ # @param [Integer] new_timeout
489
+ #
490
+ # @return [Integer]
491
+ #
492
+ # @since 2.0.0
493
+ #
494
+ def ssl_timeout=(new_timeout)
495
+ @agent_kwargs[:ssl_timeout] = new_timeout
496
+ end
497
+
498
+ #
499
+ # The continue timeout.
500
+ #
501
+ # @return [Integer, nil]
502
+ #
503
+ # @since 2.0.0
504
+ #
505
+ def continue_timeout
506
+ @agent_kwargs[:continue_timeout]
507
+ end
508
+
509
+ #
510
+ # Sets the continue timeout.
511
+ #
512
+ # @param [Integer] new_timeout
513
+ #
514
+ # @return [Integer]
515
+ #
516
+ # @since 2.0.0
517
+ #
518
+ def continue_timeout=(new_timeout)
519
+ @agent_kwargs[:continue_timeout] = new_timeout
520
+ end
521
+
522
+ #
523
+ # The `Keep-Alive` timeout.
524
+ #
525
+ # @return [Integer, nil]
526
+ #
527
+ # @since 2.0.0
528
+ #
529
+ def keep_alive_timeout
530
+ @agent_kwargs[:keep_alive_timeout]
531
+ end
532
+
533
+ #
534
+ # Sets the `Keep-Alive` timeout.
535
+ #
536
+ # @param [Integer] new_timeout
537
+ #
538
+ # @return [Integer]
539
+ #
540
+ # @since 2.0.0
541
+ #
542
+ def keep_alive_timeout=(new_timeout)
543
+ @agent_kwargs[:keep_alive_timeout] = new_timeout
544
+ end
545
+
546
+ #
547
+ # The proxy to use for spidering.
548
+ #
549
+ # @return [String, nil]
550
+ #
551
+ # @since 0.2.0
552
+ #
553
+ def proxy
554
+ @agent_kwargs[:proxy]
555
+ end
556
+
557
+ #
558
+ # Sets the proxy to use for spidering.
559
+ #
560
+ # @param [String] new_proxy
561
+ # The new proxy URI.
562
+ #
563
+ # @return [String]
564
+ #
565
+ # @since 2.0.0
566
+ #
567
+ def proxy=(new_proxy)
568
+ @agent_kwargs[:proxy] = new_proxy
569
+ end
570
+
571
+ #
572
+ # The default headers to send with every request.
573
+ #
574
+ # @return [Hash{String => String}]
575
+ #
576
+ # @since 2.0.0
577
+ #
578
+ def default_headers
579
+ @agent_kwargs[:default_headers] ||= {}
580
+ end
581
+
582
+ #
583
+ # The default `Host` headers to send with every request.
584
+ #
585
+ # @return [Hash{String => String}]
586
+ #
587
+ # @since 2.0.0
588
+ #
589
+ def host_headers
590
+ @agent_kwargs[:host_headers] ||= {}
591
+ end
592
+
593
+ #
594
+ # Sets the new `User-Agent` header to use for spidering.
595
+ #
596
+ # @return [String, nil]
597
+ #
598
+ # @since 2.0.0
599
+ #
600
+ def user_agent
601
+ @agent_kwargs[:user_agent]
602
+ end
603
+
604
+ #
605
+ # Sets the new `User-Agent` header to use for spidering.
606
+ #
607
+ # @param [String] new_user_agent
608
+ #
609
+ # @return [String]
610
+ #
611
+ # @since 2.0.0
612
+ #
613
+ def user_agent=(new_user_agent)
614
+ @agent_kwargs[:user_agent] = new_user_agent
615
+ end
616
+
617
+ #
618
+ # The `Referer` header to use for spidering.
619
+ #
620
+ # @return [String, nil]
621
+ #
622
+ # @since 2.0.0
623
+ #
624
+ def referer
625
+ @agent_kwargs[:referer]
626
+ end
627
+
628
+ #
629
+ # Sets the `Referer` header to use for spidering.
630
+ #
631
+ # @param [String] new_referer
632
+ #
633
+ # @return [String, nil]
634
+ #
635
+ # @since 2.0.0
636
+ #
637
+ def referer=(new_referer)
638
+ @agent_kwargs[:referer] = new_referer
639
+ end
640
+
641
+ #
642
+ # The amount of seconds to pause between each request.
643
+ #
644
+ # @return [Integer, Float, nil]
645
+ #
646
+ # @since 2.0.0
647
+ #
648
+ def delay
649
+ @agent_kwargs[:delay]
650
+ end
651
+
652
+ #
653
+ # Sets the amount of seconds to pause between each request.
654
+ #
655
+ # @param [Integer, Float] new_delay
656
+ #
657
+ # @return [Integer, Float]
658
+ #
659
+ # @since 2.0.0
660
+ #
661
+ def delay=(new_delay)
662
+ @agent_kwargs[:delay] = new_delay
663
+ end
664
+
665
+ #
666
+ # The limit to how many URLs to visit.
667
+ #
668
+ # @return [Integer, nil]
669
+ #
670
+ # @since 2.0.0
671
+ #
672
+ def limit
673
+ @agent_kwargs[:limit]
674
+ end
675
+
676
+ #
677
+ # Sets the limit of how many URLs to visit.
678
+ #
679
+ # @param [Integer] new_limit
680
+ #
681
+ # @return [Integer]
682
+ #
683
+ # @since 2.0.0
684
+ #
685
+ def limit=(new_limit)
686
+ @agent_kwargs[:limit] = new_limit
687
+ end
688
+
689
+ #
690
+ # The maximum depth to spider.
691
+ #
692
+ # @return [Integer, nil]
693
+ #
694
+ # @since 2.0.0
695
+ #
696
+ def max_depth
697
+ @agent_kwargs[:max_depth]
698
+ end
699
+
700
+ #
701
+ # Sets the maximum depth to spider.
702
+ #
703
+ # @param [Integer] new_max_depth
704
+ #
705
+ # @return [Integer]
706
+ #
707
+ # @since 2.0.0
708
+ #
709
+ def max_depth=(new_max_depth)
710
+ @agent_kwargs[:max_depth] = new_max_depth
711
+ end
712
+
713
+ #
714
+ # The pre-existing queue of URLs to start spidering.
715
+ #
716
+ # @return [Array<String>]
717
+ #
718
+ # @since 2.0.0
719
+ #
720
+ def queue
721
+ @agent_kwargs[:queue] ||= []
722
+ end
723
+
724
+ #
725
+ # The pre-existing history of URLs that have already been spidered.
726
+ #
727
+ # @return [Array<String>]
728
+ #
729
+ # @since 2.0.0
730
+ #
731
+ def history
732
+ @agent_kwargs[:history] ||= []
733
+ end
734
+
735
+ #
736
+ # Whether to strip the `#fragment` components of links.
737
+ #
738
+ # @return [Boolean]
739
+ #
740
+ # @since 2.0.0
741
+ #
742
+ def strip_fragments
743
+ @agent_kwargs[:strip_fragments]
744
+ end
745
+
746
+ #
747
+ # Sets whether to strip the `#fragment` components of links.
748
+ #
749
+ # @param [Boolean] new_value
750
+ #
751
+ # @return [Boolean]
752
+ #
753
+ # @since 2.0.0
754
+ #
755
+ def strip_fragments=(new_value)
756
+ @agent_kwargs[:strip_fragments] = new_value
757
+ end
758
+
759
+ #
760
+ # Whether to strip the `?query` components of links.
761
+ #
762
+ # @return [Boolean]
763
+ #
764
+ # @since 2.0.0
765
+ #
766
+ def strip_query
767
+ @agent_kwargs[:strip_query]
768
+ end
769
+
770
+ #
771
+ # Sets whether to strip the `?query` components of links.
772
+ #
773
+ # @param [Boolean] new_value
774
+ #
775
+ # @return [Boolean]
776
+ #
777
+ # @since 2.0.0
778
+ #
779
+ def strip_query=(new_value)
780
+ @agent_kwargs[:strip_query] = new_value
781
+ end
782
+
783
+ #
784
+ # The list of URI schemes to allow spidering.
785
+ #
786
+ # @return [Array<String>]
787
+ #
788
+ # @since 2.0.0
789
+ #
790
+ def visit_schemes
791
+ @agent_kwargs[:schemes] ||= []
792
+ end
793
+
794
+ #
795
+ # The list of URI hosts to allow spidering.
796
+ #
797
+ # @return [Array<String>]
798
+ #
799
+ # @since 2.0.0
800
+ #
801
+ def visit_hosts
802
+ @agent_kwargs[:hosts] ||= []
803
+ end
804
+
805
+ #
806
+ # The list of URI ports to allow spidering.
807
+ #
808
+ # @return [Array<Integer>]
809
+ #
810
+ # @since 2.0.0
811
+ #
812
+ def visit_ports
813
+ @agent_kwargs[:ports] ||= []
814
+ end
815
+
816
+ #
817
+ # The list of URI links to allow spidering.
818
+ #
819
+ # @return [Array<String>]
820
+ #
821
+ # @since 2.0.0
822
+ #
823
+ def visit_links
824
+ @agent_kwargs[:links] ||= []
825
+ end
826
+
827
+ #
828
+ # The list of URI file extensions to allow spidering.
829
+ #
830
+ # @return [Array<String>]
831
+ #
832
+ # @since 2.0.0
833
+ #
834
+ def visit_exts
835
+ @agent_kwargs[:exts] ||= []
836
+ end
837
+
838
+ #
839
+ # The list of URI schemes to ignore while spidering.
840
+ #
841
+ # @return [Array<String>]
842
+ #
843
+ # @since 2.0.0
844
+ #
845
+ def ignore_schemes
846
+ @agent_kwargs[:ignore_schemes] ||= []
847
+ end
848
+
849
+ #
850
+ # The list of URI hosts to ignore while spidering.
851
+ #
852
+ # @return [Array<String>]
853
+ #
854
+ # @since 2.0.0
855
+ #
856
+ def ignore_hosts
857
+ @agent_kwargs[:ignore_hosts] ||= []
858
+ end
859
+
860
+ #
861
+ # The list of URI ports to ignore while spidering.
862
+ #
863
+ # @return [Array<Integer>]
864
+ #
865
+ # @since 2.0.0
866
+ #
867
+ def ignore_ports
868
+ @agent_kwargs[:ignore_ports] ||= []
869
+ end
870
+
871
+ #
872
+ # The list of URI links to ignore while spidering.
873
+ #
874
+ # @return [Array<String>]
875
+ #
876
+ # @since 2.0.0
877
+ #
878
+ def ignore_links
879
+ @agent_kwargs[:ignore_links] ||= []
880
+ end
881
+
882
+ #
883
+ # The list of URI file extensions to ignore while spidering.
884
+ #
885
+ # @return [Array<String>]
886
+ #
887
+ # @since 2.0.0
888
+ #
889
+ def ignore_exts
890
+ @agent_kwargs[:ignore_exts] ||= []
891
+ end
892
+
893
+ #
894
+ # Whether to honor the `robots.txt` file while spidering.
895
+ #
896
+ # @return [Boolean]
897
+ #
898
+ # @since 2.0.0
899
+ #
900
+ def robots
901
+ @agent_kwargs[:robots]
902
+ end
903
+
904
+ #
905
+ # Sets whether to honor the `robots.txt` file while spidering.
906
+ #
907
+ # @param [Boolean] new_value
908
+ #
909
+ # @return [Boolean]
910
+ #
911
+ # @since 2.0.0
912
+ #
913
+ def robots=(new_value)
914
+ @agent_kwargs[:robots] = new_value
915
+ end
916
+ end
917
+ end
918
+ end
919
+ end