ronin-web 1.0.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +3 -2
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +5 -0
  5. data/.ruby-version +1 -1
  6. data/ChangeLog.md +46 -1
  7. data/Gemfile +25 -12
  8. data/README.md +257 -51
  9. data/Rakefile +9 -0
  10. data/data/completions/ronin-web +203 -0
  11. data/gemspec.yml +18 -5
  12. data/lib/ronin/web/cli/browser_options.rb +92 -0
  13. data/lib/ronin/web/cli/browser_shell.rb +448 -0
  14. data/lib/ronin/web/cli/command.rb +1 -1
  15. data/lib/ronin/web/cli/commands/browser.rb +373 -0
  16. data/lib/ronin/web/cli/commands/completion.rb +63 -0
  17. data/lib/ronin/web/cli/commands/diff.rb +60 -8
  18. data/lib/ronin/web/cli/commands/html.rb +21 -33
  19. data/lib/ronin/web/cli/commands/irb.rb +1 -1
  20. data/lib/ronin/web/cli/commands/new/{webapp.rb → app.rb} +8 -8
  21. data/lib/ronin/web/cli/commands/new/nokogiri.rb +4 -4
  22. data/lib/ronin/web/cli/commands/new/server.rb +1 -1
  23. data/lib/ronin/web/cli/commands/new/spider.rb +1 -1
  24. data/lib/ronin/web/cli/commands/new.rb +5 -3
  25. data/lib/ronin/web/cli/commands/reverse_proxy.rb +1 -1
  26. data/lib/ronin/web/cli/commands/screenshot.rb +186 -0
  27. data/lib/ronin/web/cli/commands/server.rb +1 -1
  28. data/lib/ronin/web/cli/commands/session_cookie.rb +265 -0
  29. data/lib/ronin/web/cli/commands/spider.rb +61 -467
  30. data/lib/ronin/web/cli/commands/user_agent.rb +177 -0
  31. data/lib/ronin/web/cli/commands/vulns.rb +463 -0
  32. data/lib/ronin/web/cli/commands/wordlist.rb +484 -0
  33. data/lib/ronin/web/cli/commands/xml.rb +149 -0
  34. data/lib/ronin/web/cli/js_shell.rb +69 -0
  35. data/lib/ronin/web/cli/ruby_shell.rb +1 -1
  36. data/lib/ronin/web/cli/spider_options.rb +919 -0
  37. data/lib/ronin/web/cli.rb +3 -1
  38. data/lib/ronin/web/html.rb +1 -1
  39. data/lib/ronin/web/root.rb +1 -1
  40. data/lib/ronin/web/version.rb +2 -2
  41. data/lib/ronin/web/xml.rb +1 -1
  42. data/lib/ronin/web.rb +4 -364
  43. data/man/ronin-web-browser.1 +92 -0
  44. data/man/ronin-web-browser.1.md +96 -0
  45. data/man/ronin-web-completion.1 +76 -0
  46. data/man/ronin-web-completion.1.md +78 -0
  47. data/man/ronin-web-diff.1 +14 -21
  48. data/man/ronin-web-diff.1.md +13 -6
  49. data/man/ronin-web-html.1 +30 -46
  50. data/man/ronin-web-html.1.md +27 -17
  51. data/man/ronin-web-irb.1 +9 -16
  52. data/man/ronin-web-irb.1.md +6 -2
  53. data/man/ronin-web-new-app.1.md +39 -0
  54. data/man/ronin-web-new-nokogiri.1 +9 -20
  55. data/man/ronin-web-new-nokogiri.1.md +5 -5
  56. data/man/ronin-web-new-server.1 +11 -23
  57. data/man/ronin-web-new-server.1.md +5 -5
  58. data/man/ronin-web-new-spider.1 +44 -88
  59. data/man/ronin-web-new-spider.1.md +37 -37
  60. data/man/ronin-web-new.1 +18 -30
  61. data/man/ronin-web-new.1.md +15 -11
  62. data/man/ronin-web-reverse-proxy.1 +33 -38
  63. data/man/ronin-web-reverse-proxy.1.md +20 -14
  64. data/man/ronin-web-screenshot.1 +56 -0
  65. data/man/ronin-web-screenshot.1.md +56 -0
  66. data/man/ronin-web-server.1 +15 -29
  67. data/man/ronin-web-server.1.md +13 -9
  68. data/man/ronin-web-session-cookie.1 +38 -0
  69. data/man/ronin-web-session-cookie.1.md +41 -0
  70. data/man/ronin-web-spider.1 +121 -130
  71. data/man/ronin-web-spider.1.md +115 -66
  72. data/man/ronin-web-user-agent.1 +44 -0
  73. data/man/ronin-web-user-agent.1.md +46 -0
  74. data/man/ronin-web-vulns.1 +175 -0
  75. data/man/ronin-web-vulns.1.md +177 -0
  76. data/man/ronin-web-wordlist.1 +258 -0
  77. data/man/ronin-web-wordlist.1.md +263 -0
  78. data/man/ronin-web-xml.1 +43 -0
  79. data/man/ronin-web-xml.1.md +46 -0
  80. data/man/ronin-web.1 +67 -18
  81. data/man/ronin-web.1.md +55 -4
  82. data/scripts/setup +58 -0
  83. metadata +121 -30
  84. data/lib/ronin/web/mechanize.rb +0 -84
  85. data/man/ronin-web-new-webapp.1.md +0 -39
  86. /data/data/new/{webapp → app}/.gitignore +0 -0
  87. /data/data/new/{webapp → app}/.ruby-version.erb +0 -0
  88. /data/data/new/{webapp → app}/Dockerfile.erb +0 -0
  89. /data/data/new/{webapp → app}/Gemfile +0 -0
  90. /data/data/new/{webapp → app}/app.rb.erb +0 -0
  91. /data/data/new/{webapp → app}/config.ru +0 -0
  92. /data/data/new/{webapp → app}/docker-compose.yml.erb +0 -0
@@ -0,0 +1,919 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # ronin-web - A collection of useful web helper methods and commands.
4
+ #
5
+ # Copyright (c) 2006-2024 Hal Brodigan (postmodern.mod3 at gmail.com)
6
+ #
7
+ # ronin-web is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License as published by
9
+ # the Free Software Foundation, either version 3 of the License, or
10
+ # (at your option) any later version.
11
+ #
12
+ # ronin-web is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU General Public License
18
+ # along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ require 'ronin/web/spider'
22
+ require 'ronin/support/network/http/user_agents'
23
+
24
+ module Ronin
25
+ module Web
26
+ class CLI
27
+ #
28
+ # Adds options for spidering a website.
29
+ #
30
+ # @since 2.0.0
31
+ #
32
+ module SpiderOptions
33
+ #
34
+ # Adds options for configuring a web spider and spidering a website.
35
+ #
36
+ # @param [Class<Command>] command
37
+ # The command class including {SpiderOptions}.
38
+ #
39
+ def self.included(command)
40
+ command.usage '[options] {--host HOST | --domain DOMAIN | --site URL}'
41
+
42
+ command.option :host, value: {
43
+ type: String,
44
+ usage: 'HOST'
45
+ },
46
+ desc: 'Spiders the specific HOST'
47
+
48
+ command.option :domain, value: {
49
+ type: String,
50
+ usage: 'DOMAIN'
51
+ },
52
+ desc: 'Spiders the whole domain'
53
+
54
+ command.option :site, value: {
55
+ type: String,
56
+ usage: 'URL'
57
+ },
58
+ desc: 'Spiders the website, starting at the URL'
59
+
60
+ command.option :open_timeout, value: {
61
+ type: Integer,
62
+ usage: 'SECS',
63
+ default: Spidr.open_timeout
64
+ },
65
+ desc: 'Sets the connection open timeout' do |timeout|
66
+ self.open_timeout = timeout
67
+ end
68
+
69
+ command.option :read_timeout, value: {
70
+ type: Integer,
71
+ usage: 'SECS',
72
+ default: Spidr.read_timeout
73
+ },
74
+ desc: 'Sets the read timeout' do |timeout|
75
+ self.read_timeout = timeout
76
+ end
77
+
78
+ command.option :ssl_timeout, value: {
79
+ type: Integer,
80
+ usage: 'SECS',
81
+ default: Spidr.ssl_timeout
82
+ },
83
+ desc: 'Sets the SSL connection timeout' do |timeout|
84
+ self.ssl_timeout = timeout
85
+ end
86
+
87
+ command.option :continue_timeout, value: {
88
+ type: Integer,
89
+ usage: 'SECS',
90
+ default: Spidr.continue_timeout
91
+ },
92
+ desc: 'Sets the continue timeout' do |timeout|
93
+ self.continue_timeout = timeout
94
+ end
95
+
96
+ command.option :keep_alive_timeout, value: {
97
+ type: Integer,
98
+ usage: 'SECS',
99
+ default: Spidr.keep_alive_timeout
100
+ },
101
+ desc: 'Sets the connection keep alive timeout' do |timeout|
102
+ self.keep_alive_timeout = timeout
103
+ end
104
+
105
+ command.option :proxy, short: '-P',
106
+ value: {
107
+ type: String,
108
+ usage: 'PROXY'
109
+ },
110
+ desc: 'Sets the proxy to use' do |proxy|
111
+ self.proxy = proxy
112
+ end
113
+
114
+ command.option :header, short: '-H',
115
+ value: {
116
+ type: /\A[^\s:]+:.*\z/,
117
+ usage: 'NAME: VALUE'
118
+ },
119
+ desc: 'Sets a default header' do |header|
120
+ name, value = header.split(/:\s*/,2)
121
+
122
+ self.default_headers[name] = value
123
+ end
124
+
125
+ command.option :host_header, value: {
126
+ type: /\A[^\s=]+=[^\s=]+\z/,
127
+ usage: 'NAME=VALUE'
128
+ },
129
+ desc: 'Sets a default header' do |name_value|
130
+ name, value = name_value.split('=',2)
131
+
132
+ self.host_headers[name] = value
133
+ end
134
+
135
+ command.option :user_agent_string, short: '-U',
136
+ value: {
137
+ type: String,
138
+ usage: 'STRING'
139
+ },
140
+ desc: 'The User-Agent string to use' do |ua|
141
+ self.user_agent = ua
142
+ end
143
+
144
+ command.option :user_agent, short: '-u',
145
+ value: {
146
+ type: Support::Network::HTTP::UserAgents::ALIASES.transform_keys { |key|
147
+ key.to_s.tr('_','-')
148
+ }
149
+ },
150
+ desc: 'The User-Agent to use' do |name|
151
+ self.user_agent = name
152
+ end
153
+
154
+ command.option :referer, short: '-R',
155
+ value: {
156
+ type: String,
157
+ usage: 'URL'
158
+ },
159
+ desc: 'Sets the Referer URL' do |referer|
160
+ self.referer = referer
161
+ end
162
+
163
+ command.option :delay, short: '-d',
164
+ value: {
165
+ type: Numeric,
166
+ usage: 'SECS'
167
+ },
168
+ desc: 'Sets the delay in seconds between each request' do |delay|
169
+ self.delay = delay
170
+ end
171
+
172
+ command.option :limit, short: '-l',
173
+ value: {
174
+ type: Integer,
175
+ usage: 'COUNT'
176
+ },
177
+ desc: 'Only spiders up to COUNT pages' do |limit|
178
+ self.limit = limit
179
+ end
180
+
181
+ command.option :max_depth, short: '-d',
182
+ value: {
183
+ type: Integer,
184
+ usage: 'DEPTH'
185
+ },
186
+ desc: 'Only spiders up to max depth' do |depth|
187
+ self.max_depth = depth
188
+ end
189
+
190
+ command.option :enqueue, value: {
191
+ type: String,
192
+ usage: 'URL'
193
+ },
194
+ desc: 'Adds the URL to the queue' do |url|
195
+ self.queue << url
196
+ end
197
+
198
+ command.option :visited, value: {
199
+ type: String,
200
+ usage: 'URL'
201
+ },
202
+ desc: 'Marks the URL as previously visited' do |url|
203
+ self.history << url
204
+ end
205
+
206
+ command.option :strip_fragments, desc: 'Enables/disables stripping the fragment component of every URL' do
207
+ self.strip_fragments = true
208
+ end
209
+
210
+ command.option :strip_query, desc: 'Enables/disables stripping the query component of every URL' do
211
+ self.strip_query = true
212
+ end
213
+
214
+ command.option :visit_scheme, value: {
215
+ type: String,
216
+ usage: 'SCHEME'
217
+ },
218
+ desc: 'Visit URLs with the URI scheme' do |scheme|
219
+ self.visit_schemes << scheme
220
+ end
221
+
222
+ command.option :visit_schemes_like, value: {
223
+ type: Regexp,
224
+ usage: '/REGEX/'
225
+ },
226
+ desc: 'Visit URLs with URI schemes that match the REGEX' do |regex|
227
+ self.visit_schemes << regex
228
+ end
229
+
230
+ command.option :ignore_scheme, value: {
231
+ type: String,
232
+ usage: 'SCHEME'
233
+ },
234
+ desc: 'Ignore the URLs with the URI scheme' do |scheme|
235
+ self.ignore_schemes << scheme
236
+ end
237
+
238
+ command.option :ignore_schemes_like, value: {
239
+ type: Regexp,
240
+ usage: '/REGEX/'
241
+ },
242
+ desc: 'Ignore the URLs with URI schemes matching the REGEX' do |regex|
243
+ self.ignore_schemes << regex
244
+ end
245
+
246
+ command.option :visit_host, value: {
247
+ type: String,
248
+ usage: 'HOST'
249
+ },
250
+ desc: 'Visit URLs with the matching host name' do |host|
251
+ self.visit_hosts << host
252
+ end
253
+
254
+ command.option :visit_hosts_like, value: {
255
+ type: Regexp,
256
+ usage: '/REGEX/'
257
+ },
258
+ desc: 'Visit URLs with hostnames that match the REGEX' do |regex|
259
+ self.visit_hosts << regex
260
+ end
261
+
262
+ command.option :ignore_host, value: {
263
+ type: String,
264
+ usage: 'HOST'
265
+ },
266
+ desc: 'Ignore the host name' do |host|
267
+ self.ignore_hosts << host
268
+ end
269
+
270
+ command.option :ignore_hosts_like, value: {
271
+ type: Regexp,
272
+ usage: '/REGEX/'
273
+ },
274
+ desc: 'Ignore the host names matching the REGEX' do |regex|
275
+ self.ignore_hosts << regex
276
+ end
277
+
278
+ command.option :visit_port, value: {
279
+ type: Integer,
280
+ usage: 'PORT'
281
+ },
282
+ desc: 'Visit URLs with the matching port number' do |port|
283
+ self.visit_ports << port
284
+ end
285
+
286
+ command.option :visit_ports_like, value: {
287
+ type: Regexp,
288
+ usage: '/REGEX/'
289
+ },
290
+ desc: 'Visit URLs with port numbers that match the REGEX' do |regex|
291
+ self.visit_ports << regex
292
+ end
293
+
294
+ command.option :ignore_port, value: {
295
+ type: Integer,
296
+ usage: 'PORT'
297
+ },
298
+ desc: 'Ignore the port number' do |port|
299
+ self.ignore_ports << port
300
+ end
301
+
302
+ command.option :ignore_ports_like, value: {
303
+ type: Regexp,
304
+ usage: '/REGEX/'
305
+ },
306
+ desc: 'Ignore the port numbers matching the REGEXP' do |regex|
307
+ self.ignore_ports << regex
308
+ end
309
+
310
+ command.option :visit_link, value: {
311
+ type: String,
312
+ usage: 'URL'
313
+ },
314
+ desc: 'Visit the URL' do |link|
315
+ self.visit_links << link
316
+ end
317
+
318
+ command.option :visit_links_like, value: {
319
+ type: Regexp,
320
+ usage: '/REGEX/'
321
+ },
322
+ desc: 'Visit URLs that match the REGEX' do |regex|
323
+ self.visit_links << regex
324
+ end
325
+
326
+ command.option :ignore_link, value: {
327
+ type: String,
328
+ usage: 'URL'
329
+ },
330
+ desc: 'Ignore the URL' do |link|
331
+ self.ignore_links << link
332
+ end
333
+
334
+ command.option :ignore_links_like, value: {
335
+ type: Regexp,
336
+ usage: '/REGEX/'
337
+ },
338
+ desc: 'Ignore URLs matching the REGEX' do |regex|
339
+ self.ignore_links << regex
340
+ end
341
+
342
+ command.option :visit_ext, value: {
343
+ type: String,
344
+ usage: 'FILE_EXT'
345
+ },
346
+ desc: 'Visit URLs with the matching file ext' do |ext|
347
+ self.visit_exts << ext
348
+ end
349
+
350
+ command.option :visit_exts_like, value: {
351
+ type: Regexp,
352
+ usage: '/REGEX/'
353
+ },
354
+ desc: 'Visit URLs with file exts that match the REGEX' do |regex|
355
+ self.visit_exts << regex
356
+ end
357
+
358
+ command.option :ignore_ext, value: {
359
+ type: String,
360
+ usage: 'FILE_EXT'
361
+ },
362
+ desc: 'Ignore the URLs with the file ext' do |ext|
363
+ self.ignore_exts << ext
364
+ end
365
+
366
+ command.option :ignore_exts_like, value: {
367
+ type: Regexp,
368
+ usage: '/REGEX/'
369
+ },
370
+ desc: 'Ignore URLs with file exts matching the REGEX' do |regex|
371
+ self.ignore_exts << regex
372
+ end
373
+
374
+ command.option :robots, short: '-r',
375
+ desc: 'Specifies whether to honor robots.txt' do
376
+ self.robots = true
377
+ end
378
+ end
379
+
380
+ # Keyword arguments to initialize a new `Spidr::Agent`.
381
+ #
382
+ # @return [Hash{Symbol => Object}]
383
+ #
384
+ # @since 2.0.0
385
+ attr_reader :agent_kwargs
386
+
387
+ #
388
+ # Initializes the command.
389
+ #
390
+ # @param [Hash{Symbol => Object}] kwargs
391
+ # Additional keyword arguments.
392
+ #
393
+ def initialize(**kwargs)
394
+ super(**kwargs)
395
+
396
+ @agent_kwargs = {}
397
+ end
398
+
399
+ #
400
+ # Creates a new web spider agent.
401
+ #
402
+ # @yield [agent]
403
+ # The given block will be given the newly created and configured
404
+ # web spider agent.
405
+ #
406
+ # @yieldparam [Ronin::Web::Spider::Agent] agent
407
+ # The newly created web spider agent.
408
+ #
409
+ # @return [Ronin::Web::Spider::Agent]
410
+ # The newly created web spider agent, after the agent has completed
411
+ # it's spidering.
412
+ #
413
+ def new_agent(&block)
414
+ if options[:host]
415
+ Web::Spider.host(options[:host],**agent_kwargs,&block)
416
+ elsif options[:domain]
417
+ Web::Spider.domain(options[:domain],**agent_kwargs,&block)
418
+ elsif options[:site]
419
+ Web::Spider.site(options[:site],**agent_kwargs,&block)
420
+ else
421
+ print_error "must specify --host, --domain, or --site"
422
+ exit(-1)
423
+ end
424
+ end
425
+
426
+ #
427
+ # The open connection timeout.
428
+ #
429
+ # @return [Integer, nil]
430
+ #
431
+ # @since 2.0.0
432
+ #
433
+ def open_timeout
434
+ @agent_kwargs[:open_timeout]
435
+ end
436
+
437
+ #
438
+ # Sets the open connection timeout.
439
+ #
440
+ # @param [Integer] new_timeout
441
+ #
442
+ # @return [Integer]
443
+ #
444
+ # @since 2.0.0
445
+ #
446
+ def open_timeout=(new_timeout)
447
+ @agent_kwargs[:open_timeout] = new_timeout
448
+ end
449
+
450
+ #
451
+ # The read timeout.
452
+ #
453
+ # @return [Integer, nil]
454
+ #
455
+ # @since 2.0.0
456
+ #
457
+ def read_timeout
458
+ @agent_kwargs[:read_timeout]
459
+ end
460
+
461
+ #
462
+ # Sets the read timeout.
463
+ #
464
+ # @param [Integer] new_timeout
465
+ #
466
+ # @return [Integer]
467
+ #
468
+ # @since 2.0.0
469
+ #
470
+ def read_timeout=(new_timeout)
471
+ @agent_kwargs[:read_timeout] = new_timeout
472
+ end
473
+
474
+ #
475
+ # The SSL timeout.
476
+ #
477
+ # @return [Integer, nil]
478
+ #
479
+ # @since 2.0.0
480
+ #
481
+ def ssl_timeout
482
+ @agent_kwargs[:ssl_timeout]
483
+ end
484
+
485
+ #
486
+ # Sets the SSL timeout.
487
+ #
488
+ # @param [Integer] new_timeout
489
+ #
490
+ # @return [Integer]
491
+ #
492
+ # @since 2.0.0
493
+ #
494
+ def ssl_timeout=(new_timeout)
495
+ @agent_kwargs[:ssl_timeout] = new_timeout
496
+ end
497
+
498
+ #
499
+ # The continue timeout.
500
+ #
501
+ # @return [Integer, nil]
502
+ #
503
+ # @since 2.0.0
504
+ #
505
+ def continue_timeout
506
+ @agent_kwargs[:continue_timeout]
507
+ end
508
+
509
+ #
510
+ # Sets the continue timeout.
511
+ #
512
+ # @param [Integer] new_timeout
513
+ #
514
+ # @return [Integer]
515
+ #
516
+ # @since 2.0.0
517
+ #
518
+ def continue_timeout=(new_timeout)
519
+ @agent_kwargs[:continue_timeout] = new_timeout
520
+ end
521
+
522
+ #
523
+ # The `Keep-Alive` timeout.
524
+ #
525
+ # @return [Integer, nil]
526
+ #
527
+ # @since 2.0.0
528
+ #
529
+ def keep_alive_timeout
530
+ @agent_kwargs[:keep_alive_timeout]
531
+ end
532
+
533
+ #
534
+ # Sets the `Keep-Alive` timeout.
535
+ #
536
+ # @param [Integer] new_timeout
537
+ #
538
+ # @return [Integer]
539
+ #
540
+ # @since 2.0.0
541
+ #
542
+ def keep_alive_timeout=(new_timeout)
543
+ @agent_kwargs[:keep_alive_timeout] = new_timeout
544
+ end
545
+
546
+ #
547
+ # The proxy to use for spidering.
548
+ #
549
+ # @return [String, nil]
550
+ #
551
+ # @since 0.2.0
552
+ #
553
+ def proxy
554
+ @agent_kwargs[:proxy]
555
+ end
556
+
557
+ #
558
+ # Sets the proxy to use for spidering.
559
+ #
560
+ # @param [String] new_proxy
561
+ # The new proxy URI.
562
+ #
563
+ # @return [String]
564
+ #
565
+ # @since 2.0.0
566
+ #
567
+ def proxy=(new_proxy)
568
+ @agent_kwargs[:proxy] = new_proxy
569
+ end
570
+
571
+ #
572
+ # The default headers to send with every request.
573
+ #
574
+ # @return [Hash{String => String}]
575
+ #
576
+ # @since 2.0.0
577
+ #
578
+ def default_headers
579
+ @agent_kwargs[:default_headers] ||= {}
580
+ end
581
+
582
+ #
583
+ # The default `Host` headers to send with every request.
584
+ #
585
+ # @return [Hash{String => String}]
586
+ #
587
+ # @since 2.0.0
588
+ #
589
+ def host_headers
590
+ @agent_kwargs[:host_headers] ||= {}
591
+ end
592
+
593
+ #
594
+ # Sets the new `User-Agent` header to use for spidering.
595
+ #
596
+ # @return [String, nil]
597
+ #
598
+ # @since 2.0.0
599
+ #
600
+ def user_agent
601
+ @agent_kwargs[:user_agent]
602
+ end
603
+
604
+ #
605
+ # Sets the new `User-Agent` header to use for spidering.
606
+ #
607
+ # @param [String] new_user_agent
608
+ #
609
+ # @return [String]
610
+ #
611
+ # @since 2.0.0
612
+ #
613
+ def user_agent=(new_user_agent)
614
+ @agent_kwargs[:user_agent] = new_user_agent
615
+ end
616
+
617
+ #
618
+ # The `Referer` header to use for spidering.
619
+ #
620
+ # @return [String, nil]
621
+ #
622
+ # @since 2.0.0
623
+ #
624
+ def referer
625
+ @agent_kwargs[:referer]
626
+ end
627
+
628
+ #
629
+ # Sets the `Referer` header to use for spidering.
630
+ #
631
+ # @param [String] new_referer
632
+ #
633
+ # @return [String, nil]
634
+ #
635
+ # @since 2.0.0
636
+ #
637
+ def referer=(new_referer)
638
+ @agent_kwargs[:referer] = new_referer
639
+ end
640
+
641
+ #
642
+ # The amount of seconds to pause between each request.
643
+ #
644
+ # @return [Integer, Float, nil]
645
+ #
646
+ # @since 2.0.0
647
+ #
648
+ def delay
649
+ @agent_kwargs[:delay]
650
+ end
651
+
652
+ #
653
+ # Sets the amount of seconds to pause between each request.
654
+ #
655
+ # @param [Integer, Float] new_delay
656
+ #
657
+ # @return [Integer, Float]
658
+ #
659
+ # @since 2.0.0
660
+ #
661
+ def delay=(new_delay)
662
+ @agent_kwargs[:delay] = new_delay
663
+ end
664
+
665
+ #
666
+ # The limit to how many URLs to visit.
667
+ #
668
+ # @return [Integer, nil]
669
+ #
670
+ # @since 2.0.0
671
+ #
672
+ def limit
673
+ @agent_kwargs[:limit]
674
+ end
675
+
676
+ #
677
+ # Sets the limit of how many URLs to visit.
678
+ #
679
+ # @param [Integer] new_limit
680
+ #
681
+ # @return [Integer]
682
+ #
683
+ # @since 2.0.0
684
+ #
685
+ def limit=(new_limit)
686
+ @agent_kwargs[:limit] = new_limit
687
+ end
688
+
689
+ #
690
+ # The maximum depth to spider.
691
+ #
692
+ # @return [Integer, nil]
693
+ #
694
+ # @since 2.0.0
695
+ #
696
+ def max_depth
697
+ @agent_kwargs[:max_depth]
698
+ end
699
+
700
+ #
701
+ # Sets the maximum depth to spider.
702
+ #
703
+ # @param [Integer] new_max_depth
704
+ #
705
+ # @return [Integer]
706
+ #
707
+ # @since 2.0.0
708
+ #
709
+ def max_depth=(new_max_depth)
710
+ @agent_kwargs[:max_depth] = new_max_depth
711
+ end
712
+
713
+ #
714
+ # The pre-existing queue of URLs to start spidering.
715
+ #
716
+ # @return [Array<String>]
717
+ #
718
+ # @since 2.0.0
719
+ #
720
+ def queue
721
+ @agent_kwargs[:queue] ||= []
722
+ end
723
+
724
+ #
725
+ # The pre-existing history of URLs that have already been spidered.
726
+ #
727
+ # @return [Array<String>]
728
+ #
729
+ # @since 2.0.0
730
+ #
731
+ def history
732
+ @agent_kwargs[:history] ||= []
733
+ end
734
+
735
+ #
736
+ # Whether to strip the `#fragment` components of links.
737
+ #
738
+ # @return [Boolean]
739
+ #
740
+ # @since 2.0.0
741
+ #
742
+ def strip_fragments
743
+ @agent_kwargs[:strip_fragments]
744
+ end
745
+
746
+ #
747
+ # Sets whether to strip the `#fragment` components of links.
748
+ #
749
+ # @param [Boolean] new_value
750
+ #
751
+ # @return [Boolean]
752
+ #
753
+ # @since 2.0.0
754
+ #
755
+ def strip_fragments=(new_value)
756
+ @agent_kwargs[:strip_fragments] = new_value
757
+ end
758
+
759
+ #
760
+ # Whether to strip the `?query` components of links.
761
+ #
762
+ # @return [Boolean]
763
+ #
764
+ # @since 2.0.0
765
+ #
766
+ def strip_query
767
+ @agent_kwargs[:strip_query]
768
+ end
769
+
770
+ #
771
+ # Sets whether to strip the `?query` components of links.
772
+ #
773
+ # @param [Boolean] new_value
774
+ #
775
+ # @return [Boolean]
776
+ #
777
+ # @since 2.0.0
778
+ #
779
+ def strip_query=(new_value)
780
+ @agent_kwargs[:strip_query] = new_value
781
+ end
782
+
783
+ #
784
+ # The list of URI schemes to allow spidering.
785
+ #
786
+ # @return [Array<String>]
787
+ #
788
+ # @since 2.0.0
789
+ #
790
+ def visit_schemes
791
+ @agent_kwargs[:schemes] ||= []
792
+ end
793
+
794
+ #
795
+ # The list of URI hosts to allow spidering.
796
+ #
797
+ # @return [Array<String>]
798
+ #
799
+ # @since 2.0.0
800
+ #
801
+ def visit_hosts
802
+ @agent_kwargs[:hosts] ||= []
803
+ end
804
+
805
+ #
806
+ # The list of URI ports to allow spidering.
807
+ #
808
+ # @return [Array<Integer>]
809
+ #
810
+ # @since 2.0.0
811
+ #
812
+ def visit_ports
813
+ @agent_kwargs[:ports] ||= []
814
+ end
815
+
816
+ #
817
+ # The list of URI links to allow spidering.
818
+ #
819
+ # @return [Array<String>]
820
+ #
821
+ # @since 2.0.0
822
+ #
823
+ def visit_links
824
+ @agent_kwargs[:links] ||= []
825
+ end
826
+
827
+ #
828
+ # The list of URI file extensions to allow spidering.
829
+ #
830
+ # @return [Array<String>]
831
+ #
832
+ # @since 2.0.0
833
+ #
834
+ def visit_exts
835
+ @agent_kwargs[:exts] ||= []
836
+ end
837
+
838
+ #
839
+ # The list of URI schemes to ignore while spidering.
840
+ #
841
+ # @return [Array<String>]
842
+ #
843
+ # @since 2.0.0
844
+ #
845
+ def ignore_schemes
846
+ @agent_kwargs[:ignore_schemes] ||= []
847
+ end
848
+
849
+ #
850
+ # The list of URI hosts to ignore while spidering.
851
+ #
852
+ # @return [Array<String>]
853
+ #
854
+ # @since 2.0.0
855
+ #
856
+ def ignore_hosts
857
+ @agent_kwargs[:ignore_hosts] ||= []
858
+ end
859
+
860
+ #
861
+ # The list of URI ports to ignore while spidering.
862
+ #
863
+ # @return [Array<Integer>]
864
+ #
865
+ # @since 2.0.0
866
+ #
867
+ def ignore_ports
868
+ @agent_kwargs[:ignore_ports] ||= []
869
+ end
870
+
871
+ #
872
+ # The list of URI links to ignore while spidering.
873
+ #
874
+ # @return [Array<String>]
875
+ #
876
+ # @since 2.0.0
877
+ #
878
+ def ignore_links
879
+ @agent_kwargs[:ignore_links] ||= []
880
+ end
881
+
882
+ #
883
+ # The list of URI file extensions to ignore while spidering.
884
+ #
885
+ # @return [Array<String>]
886
+ #
887
+ # @since 2.0.0
888
+ #
889
+ def ignore_exts
890
+ @agent_kwargs[:ignore_exts] ||= []
891
+ end
892
+
893
+ #
894
+ # Whether to honor the `robots.txt` file while spidering.
895
+ #
896
+ # @return [Boolean]
897
+ #
898
+ # @since 2.0.0
899
+ #
900
+ def robots
901
+ @agent_kwargs[:robots]
902
+ end
903
+
904
+ #
905
+ # Sets whether to honor the `robots.txt` file while spidering.
906
+ #
907
+ # @param [Boolean] new_value
908
+ #
909
+ # @return [Boolean]
910
+ #
911
+ # @since 2.0.0
912
+ #
913
+ def robots=(new_value)
914
+ @agent_kwargs[:robots] = new_value
915
+ end
916
+ end
917
+ end
918
+ end
919
+ end