RubyGems - ronin-web - Versions diffs - 0.3.0.rc1 → 1.0.0.beta1 - Mend

ronin-web 0.3.0.rc1 → 1.0.0.beta1

Files changed (148) hide show

checksums.yaml +7 -0
data/.editorconfig +11 -0
data/.github/workflows/ruby.yml +31 -0
data/.gitignore +13 -0
data/.mailmap +1 -0
data/.ruby-version +1 -0
data/COPYING.txt +3 -3
data/ChangeLog.md +115 -70
data/Gemfile +42 -37
data/README.md +159 -145
data/Rakefile +12 -3
data/bin/ronin-web +9 -17
data/data/new/nokogiri.rb.erb +12 -0
data/data/new/server.rb.erb +22 -0
data/data/new/spider.rb.erb +26 -0
data/data/new/webapp/.gitignore +15 -0
data/data/new/webapp/.ruby-version.erb +1 -0
data/data/new/webapp/Dockerfile.erb +11 -0
data/data/new/webapp/Gemfile +6 -0
data/data/new/webapp/app.rb.erb +15 -0
data/data/new/webapp/config.ru +4 -0
data/data/new/webapp/docker-compose.yml.erb +9 -0
data/gemspec.yml +32 -14
data/lib/ronin/web/cli/command.rb +36 -0
data/lib/ronin/web/cli/commands/diff.rb +106 -0
data/lib/ronin/web/cli/commands/html.rb +174 -0
data/lib/ronin/web/cli/commands/irb.rb +56 -0
data/lib/ronin/web/cli/commands/new/nokogiri.rb +85 -0
data/lib/ronin/web/cli/commands/new/server.rb +96 -0
data/lib/ronin/web/cli/commands/new/spider.rb +315 -0
data/lib/ronin/web/cli/commands/new/webapp.rb +123 -0
data/lib/ronin/web/cli/commands/new.rb +64 -0
data/lib/ronin/web/cli/commands/reverse_proxy.rb +215 -0
data/lib/ronin/web/cli/commands/server.rb +155 -0
data/lib/ronin/web/cli/commands/spider.rb +822 -0
data/lib/ronin/web/cli/ruby_shell.rb +50 -0
data/lib/ronin/web/cli.rb +44 -0
data/lib/ronin/web/html.rb +85 -0
data/lib/ronin/web/mechanize.rb +34 -36
data/lib/ronin/web/root.rb +27 -0
data/lib/ronin/web/version.rb +7 -10
data/lib/ronin/web/xml.rb +85 -0
data/lib/ronin/web.rb +372 -13
data/man/ronin-web-diff.1 +41 -0
data/man/ronin-web-diff.1.md +30 -0
data/man/ronin-web-html.1 +89 -0
data/man/ronin-web-html.1.md +66 -0
data/man/ronin-web-irb.1 +31 -0
data/man/ronin-web-irb.1.md +22 -0
data/man/ronin-web-new-nokogiri.1 +41 -0
data/man/ronin-web-new-nokogiri.1.md +30 -0
data/man/ronin-web-new-server.1 +45 -0
data/man/ronin-web-new-server.1.md +33 -0
data/man/ronin-web-new-spider.1 +173 -0
data/man/ronin-web-new-spider.1.md +129 -0
data/man/ronin-web-new-webapp.1 +53 -0
data/man/ronin-web-new-webapp.1.md +39 -0
data/man/ronin-web-new.1 +59 -0
data/man/ronin-web-new.1.md +44 -0
data/man/ronin-web-reverse-proxy.1 +63 -0
data/man/ronin-web-reverse-proxy.1.md +47 -0
data/man/ronin-web-server.1 +59 -0
data/man/ronin-web-server.1.md +43 -0
data/man/ronin-web-spider.1 +225 -0
data/man/ronin-web-spider.1.md +168 -0
data/man/ronin-web.1 +41 -0
data/man/ronin-web.1.md +30 -0
data/ronin-web.gemspec +39 -109
data/spec/cli/ruby_shell_spec.rb +14 -0
data/spec/html_spec.rb +43 -0
data/spec/mechanize_spec.rb +72 -0
data/spec/spec_helper.rb +5 -3
data/spec/web_spec.rb +97 -0
data/spec/xml_spec.rb +42 -0
metadata +236 -224
data/.gemtest +0 -0
data/data/ronin/web/user_agents.yml +0 -247
data/lib/ronin/network/mixins/web.rb +0 -258
data/lib/ronin/web/config.rb +0 -34
data/lib/ronin/web/extensions/nokogiri/xml/attr.rb +0 -47
data/lib/ronin/web/extensions/nokogiri/xml/document.rb +0 -48
data/lib/ronin/web/extensions/nokogiri/xml/element.rb +0 -57
data/lib/ronin/web/extensions/nokogiri/xml/node.rb +0 -86
data/lib/ronin/web/extensions/nokogiri/xml/text.rb +0 -47
data/lib/ronin/web/extensions/nokogiri/xml.rb +0 -27
data/lib/ronin/web/extensions/nokogiri.rb +0 -23
data/lib/ronin/web/extensions.rb +0 -23
data/lib/ronin/web/middleware/base.rb +0 -144
data/lib/ronin/web/middleware/directories.rb +0 -179
data/lib/ronin/web/middleware/files.rb +0 -144
data/lib/ronin/web/middleware/filters/campaign_filter.rb +0 -77
data/lib/ronin/web/middleware/filters/ip_filter.rb +0 -73
data/lib/ronin/web/middleware/filters/path_filter.rb +0 -73
data/lib/ronin/web/middleware/filters/referer_filter.rb +0 -71
data/lib/ronin/web/middleware/filters/user_agent_filter.rb +0 -71
data/lib/ronin/web/middleware/filters/vhost_filter.rb +0 -71
data/lib/ronin/web/middleware/filters.rb +0 -28
data/lib/ronin/web/middleware/helpers.rb +0 -145
data/lib/ronin/web/middleware/proxy.rb +0 -265
data/lib/ronin/web/middleware/proxy_request.rb +0 -262
data/lib/ronin/web/middleware/request.rb +0 -79
data/lib/ronin/web/middleware/response.rb +0 -33
data/lib/ronin/web/middleware/router.rb +0 -167
data/lib/ronin/web/middleware/rule.rb +0 -103
data/lib/ronin/web/middleware.rb +0 -27
data/lib/ronin/web/proxy/app.rb +0 -32
data/lib/ronin/web/proxy/base.rb +0 -46
data/lib/ronin/web/proxy/web.rb +0 -46
data/lib/ronin/web/proxy.rb +0 -25
data/lib/ronin/web/server/app.rb +0 -32
data/lib/ronin/web/server/base.rb +0 -461
data/lib/ronin/web/server/web.rb +0 -66
data/lib/ronin/web/server.rb +0 -25
data/lib/ronin/web/spider.rb +0 -120
data/lib/ronin/web/user_agents.rb +0 -196
data/lib/ronin/web/web.rb +0 -560
data/spec/helpers/output.rb +0 -3
data/spec/web/extensions/nokogiri_spec.rb +0 -38
data/spec/web/helpers/rack_app.rb +0 -24
data/spec/web/helpers/root/test1/index.html +0 -1
data/spec/web/helpers/root/test1/test1.txt +0 -1
data/spec/web/helpers/root/test1.txt +0 -1
data/spec/web/helpers/root/test2/test2.txt +0 -1
data/spec/web/helpers/root/test2.txt +0 -1
data/spec/web/helpers/root/test3/test3.txt +0 -1
data/spec/web/helpers/root/test3.txt +0 -1
data/spec/web/helpers/root.rb +0 -15
data/spec/web/mechanize_spec.rb +0 -62
data/spec/web/middleware/directories_spec.rb +0 -86
data/spec/web/middleware/files_spec.rb +0 -57
data/spec/web/middleware/filters/campaign_filter_spec.rb +0 -30
data/spec/web/middleware/filters/ip_filter_spec.rb +0 -25
data/spec/web/middleware/filters/path_filter_spec.rb +0 -29
data/spec/web/middleware/filters/referer_filter_spec.rb +0 -25
data/spec/web/middleware/filters/user_agent_filter_spec.rb +0 -25
data/spec/web/middleware/filters/vhost_filter_spec.rb +0 -23
data/spec/web/middleware/proxy_spec.rb +0 -67
data/spec/web/middleware/response_spec.rb +0 -20
data/spec/web/middleware/router_spec.rb +0 -65
data/spec/web/middleware/rule_spec.rb +0 -37
data/spec/web/proxy/base_spec.rb +0 -8
data/spec/web/server/base_spec.rb +0 -77
data/spec/web/server/classes/public1/static1.txt +0 -1
data/spec/web/server/classes/public2/static2.txt +0 -1
data/spec/web/server/classes/sub_app.rb +0 -13
data/spec/web/server/classes/test_app.rb +0 -20
data/spec/web/user_agents_spec.rb +0 -56
data/spec/web/web_spec.rb +0 -101

data/lib/ronin/web/cli/commands/spider.rb ADDED Viewed

@@ -0,0 +1,822 @@
+#
+# ronin-web - A collection of useful web helper methods and commands.
+#
+# Copyright (c) 2006-2022 Hal Brodigan (postmodern.mod3 at gmail.com)
+#
+# ronin-web is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ronin-web is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with ronin-web.  If not, see <https://www.gnu.org/licenses/>.
+#
+require 'ronin/web/cli/command'
+require 'ronin/web/spider'
+require 'ronin/web/spider/archive'
+require 'ronin/web/spider/git_archive'
+require 'ronin/support/network/http/user_agents'
+require 'command_kit/colors'
+require 'command_kit/printing/indent'
+require 'command_kit/options/verbose'
+module Ronin
+  module Web
+    class CLI
+      module Commands
+        #
+        # Spiders a website.
+        #
+        # ## Usage
+        #
+        #     ronin-web spider [options] {--host HOST | --domain DOMAIN | --site URL}
+        #
+        # ## Options
+        #
+        #     -v, --verbose                    Enables verbose output
+        #         --open-timeout SECS          Sets the connection open timeout
+        #         --read-timeout SECS          Sets the read timeout
+        #         --ssl-timeout SECS           Sets the SSL connection timeout
+        #         --continue-timeout SECS      Sets the continue timeout
+        #         --keep-alive-timeout SECS    Sets the connection keep alive timeout
+        #     -P, --proxy PROXY                Sets the proxy to use.
+        #     -H, --header NAME: VALUE         Sets a default header
+        #         --host-header NAME=VALUE     Sets a default header
+        #     -u chrome_linux|chrome_macos|chrome_windows|chrome_iphone|chrome_ipad|chrome_android|firefox_linux|firefox_macos|firefox_windows|firefox_iphone|firefox_ipad|firefox_android|safari_macos|safari_iphone|safari_ipad|edge,
+        #         --user-agent                 The User-Agent to use
+        #     -U, --user-agent-string STRING   The User-Agent string to use
+        #     -R, --referer URL                Sets the Referer URL
+        #         --delay SECS                 Sets the delay in seconds between each request
+        #     -l, --limit COUNT                Only spiders up to COUNT pages
+        #     -d, --max-depth DEPTH            Only spiders up to max depth
+        #         --enqueue URL                Adds the URL to the queue
+        #         --visited URL                Marks the URL as previously visited
+        #         --strip-fragments            Enables/disables stripping the fragment component of every URL
+        #         --strip-query                Enables/disables stripping the query component of every URL
+        #         --visit-host HOST            Visit URLs with the matching host name
+        #         --visit-hosts-like /REGEX/   Visit URLs with hostnames that match the REGEX
+        #         --ignore-host HOST           Ignore the host name
+        #         --ignore-hosts-like /REGEX/  Ignore the host names matching the REGEX
+        #         --visit-port PORT            Visit URLs with the matching port number
+        #         --visit-ports-like /REGEX/   Visit URLs with port numbers that match the REGEX
+        #         --ignore-port PORT           Ignore the port number
+        #         --ignore-ports-like /REGEX/  Ignore the port numbers matching the REGEXP
+        #         --visit-link URL             Visit the URL
+        #         --visit-links-like /REGEX/   Visit URLs that match the REGEX
+        #         --ignore-link URL            Ignore the URL
+        #         --ignore-links-like /REGEX/  Ignore URLs matching the REGEX
+        #         --visit-ext FILE_EXT         Visit URLs with the matching file ext
+        #         --visit-exts-like /REGEX/    Visit URLs with file exts that match the REGEX
+        #         --ignore-ext FILE_EXT        Ignore the URLs with the file ext
+        #         --ignore-exts-like /REGEX/   Ignore URLs with file exts matching the REGEX
+        #     -r, --robots                     Specifies whether to honor robots.txt
+        #         --host HOST                  Spiders the specific HOST
+        #         --domain DOMAIN              Spiders the whole domain
+        #         --site URL                   Spiders the website, starting at the URL
+        #         --print-status               Print the status codes for each URL
+        #         --print-headers              Print response headers for each URL
+        #         --print-header NAME          Prints a specific header
+        #         --archive DIR                Archive every visited page to the DIR
+        #         --git-archive DIR            Archive every visited page to the git repository
+        #     -X, --xpath XPATH                Evaluates the XPath on each HTML page
+        #     -C, --css-path XPATH             Evaluates the CSS-path on each HTML page
+        #     -h, --help                       Print help information
+        #
+        # ## Examples
+        #
+        #     ronin-web spider --host scanme.nmap.org
+        #     ronin-web spider --domain nmap.org
+        #     ronin-web spider --site https://scanme.nmap.org/
+        #
+        class Spider < Command
+          include CommandKit::Colors
+          include CommandKit::Printing::Indent
+          include CommandKit::Options::Verbose
+          usage '[options] {--host HOST | --domain DOMAIN | --site URL}'
+          option :open_timeout, value: {
+                                  type: Integer,
+                                  usage: 'SECS',
+                                  default: Spidr.open_timeout
+                                },
+                                desc: 'Sets the connection open timeout'
+          option :read_timeout, value: {
+                                  type: Integer,
+                                  usage: 'SECS',
+                                  default: Spidr.read_timeout
+                                },
+                                desc: 'Sets the read timeout'
+          option :ssl_timeout, value: {
+                                  type: Integer,
+                                  usage: 'SECS',
+                                  default: Spidr.ssl_timeout
+                                },
+                                desc: 'Sets the SSL connection timeout'
+          option :continue_timeout, value: {
+                                  type: Integer,
+                                  usage: 'SECS',
+                                  default: Spidr.continue_timeout
+                                },
+                                desc: 'Sets the continue timeout'
+          option :keep_alive_timeout, value: {
+                                        type:    Integer,
+                                        usage:   'SECS',
+                                        default: Spidr.keep_alive_timeout
+                                      },
+                                      desc: 'Sets the connection keep alive timeout'
+          option :proxy, short: '-P',
+                         value: {
+                           type:  String,
+                           usage: 'PROXY'
+                         },
+                         desc: 'Sets the proxy to use'
+          option :header, short: '-H',
+                          value: {
+                            type:  /\A[^\s:]+:.*\z/,
+                            usage: 'NAME: VALUE'
+                          },
+                          desc: 'Sets a default header' do |header|
+                            name, value = header.split(/:\s*/,2)
+                            @default_headers[name] = value
+                          end
+          option :host_header, value: {
+                                 type: /\A[^\s=]+=[^\s=]+\z/,
+                                 usage: 'NAME=VALUE'
+                               },
+                               desc: 'Sets a default header' do |name_value|
+                                 name, value = name_value.split('=',2)
+                                 @host_headers[name] = value
+                               end
+          option :user_agent, value: {
+                                type: String,
+                                usage: 'USER-AGENT'
+                              },
+                              desc: 'Sets the User-Agent string'
+          option :user_agent_string, short: '-U',
+                                     value: {
+                                       type: String,
+                                       usage: 'STRING'
+                                     },
+                                     desc: 'The User-Agent string to use' do |ua|
+                                       @user_agent = ua
+                                     end
+          option :user_agent, short: '-u',
+                              value: {
+                                type: Support::Network::HTTP::UserAgents::ALIASES.keys
+                              },
+                              desc: 'The User-Agent to use' do |name|
+                                @user_agent = name
+                              end
+          option :referer, short: '-R',
+                           value: {
+                             type: String,
+                             usage: 'URL'
+                           },
+                           desc: 'Sets the Referer URL'
+          option :delay, short: '-d',
+                         value: {
+                           type: Numeric,
+                           usage: 'SECS'
+                         },
+                         desc: 'Sets the delay in seconds between each request'
+          option :limit, short: '-l',
+                         value: {
+                           type: Integer,
+                           usage: 'COUNT'
+                         },
+                         desc: 'Only spiders up to COUNT pages'
+          option :max_depth, short: '-d',
+                             value: {
+                               type: Integer,
+                               usage: 'DEPTH',
+                             },
+                             desc: 'Only spiders up to max depth'
+          option :enqueue, value: {
+                             type: String,
+                             usage: 'URL'
+                           },
+                           desc: 'Adds the URL to the queue' do |url|
+                             @queue << url
+                           end
+          option :visited, value: {
+                             type: String,
+                             usage: 'URL'
+                           },
+                           desc: 'Marks the URL as previously visited' do |url|
+                             @history << url
+                           end
+          option :strip_fragments, desc: 'Enables/disables stripping the fragment component of every URL'
+          option :strip_query, desc: 'Enables/disables stripping the query component of every URL'
+          option :visit_host, value: {
+                                type: String,
+                                usage: 'HOST'
+                              },
+                              desc: 'Visit URLs with the matching host name' do |host|
+                                @visit_hosts << host
+                              end
+          option :visit_hosts_like, value: {
+                                      type:  Regexp,
+                                      usage: '/REGEX/'
+                                    },
+                                    desc: 'Visit URLs with hostnames that match the REGEX' do |regex|
+                                      @visit_hosts << regex
+                                    end
+          option :ignore_host, value: {
+                                 type:  String,
+                                 usage: 'HOST'
+                               },
+                               desc: 'Ignore the host name' do |host|
+                                 @ignore_hosts << host
+                               end
+          option :ignore_hosts_like, value: {
+                                      type:  Regexp,
+                                      usage: '/REGEX/'
+                                    },
+                                    desc: 'Ignore the host names matching the REGEX' do |regex|
+                                      @ignore_hosts << regex
+                                    end
+          option :visit_port, value: {
+                                type: Integer,
+                                usage: 'PORT'
+                              },
+                              desc: 'Visit URLs with the matching port number' do |port|
+                                @visit_ports << port
+                              end
+          option :visit_ports_like, value: {
+                                      type:  Regexp,
+                                      usage: '/REGEX/'
+                                    },
+                                    desc: 'Visit URLs with port numbers that match the REGEX' do |regex|
+                                      @visit_ports << regex
+                                    end
+          option :ignore_port, value: {
+                                 type:  Integer,
+                                 usage: 'PORT'
+                               },
+                               desc: 'Ignore the port number' do |port|
+                                 @ignore_ports << port
+                               end
+          option :ignore_ports_like, value: {
+                                      type:  Regexp,
+                                      usage: '/REGEX/'
+                                    },
+                                    desc: 'Ignore the port numbers matching the REGEXP' do |regex|
+                                      @ignore_ports << regex
+                                    end
+          option :visit_link, value: {
+                                type: String,
+                                usage: 'URL'
+                              },
+                              desc: 'Visit the URL' do |link|
+                                @visit_links << link
+                              end
+          option :visit_links_like, value: {
+                                      type:  Regexp,
+                                      usage: '/REGEX/'
+                                    },
+                                    desc: 'Visit URLs that match the REGEX' do |regex|
+                                      @visit_links << regex
+                                    end
+          option :ignore_link, value: {
+                                 type:  String,
+                                 usage: 'URL'
+                               },
+                               desc: 'Ignore the URL' do |link|
+                                 @ignore_links << link
+                               end
+          option :ignore_links_like, value: {
+                                      type:  Regexp,
+                                      usage: '/REGEX/'
+                                    },
+                                    desc: 'Ignore URLs matching the REGEX' do |regex|
+                                      @ignore_links << regex
+                                    end
+          option :visit_ext, value: {
+                                type: String,
+                                usage: 'FILE_EXT'
+                              },
+                              desc: 'Visit URLs with the matching file ext' do |ext|
+                                @visit_exts << ext
+                              end
+          option :visit_exts_like, value: {
+                                      type:  Regexp,
+                                      usage: '/REGEX/'
+                                    },
+                                    desc: 'Visit URLs with file exts that match the REGEX' do |regex|
+                                      @visit_exts << regex
+                                    end
+          option :ignore_ext, value: {
+                                 type:  String,
+                                 usage: 'FILE_EXT'
+                               },
+                               desc: 'Ignore the URLs with the file ext' do |ext|
+                                 @ignore_exts << ext
+                               end
+          option :ignore_exts_like, value: {
+                                      type:  Regexp,
+                                      usage: '/REGEX/'
+                                    },
+                                    desc: 'Ignore URLs with file exts matching the REGEX' do |regex|
+                                      @ignore_exts << regex
+                                    end
+          option :robots, short: '-r',
+                          desc:  'Specifies whether to honor robots.txt'
+          option :host, value: {
+                          type: String,
+                          usage: 'HOST'
+                        },
+                        desc: 'Spiders the specific HOST'
+          option :domain, value: {
+                            type: String,
+                            usage: 'DOMAIN',
+                          },
+                          desc: 'Spiders the whole domain'
+          option :site, value: {
+                          type: String,
+                          usage: 'URL'
+                        },
+                        desc: 'Spiders the website, starting at the URL'
+          option :print_verbose, desc: 'Print the status codes for each URL'
+          option :print_headers, desc: 'Print response headers for each URL'
+          option :print_header, value: {
+                                  type: String,
+                                  usage: 'NAME'
+                                },
+                                desc: 'Prints a specific header'
+          option :archive, value: {
+                             type: String,
+                             usage: 'DIR'
+                           },
+                           desc: 'Archive every visited page to the DIR'
+          option :git_archive, value: {
+                                 type: String,
+                                 usage: 'DIR'
+                               },
+                               desc: 'Archive every visited page to the git repository'
+          option :xpath, short: '-X',
+                         value: {
+                           type:  String,
+                           usage: 'XPATH'
+                         },
+                         desc: 'Evaluates the XPath on each HTML page'
+          option :css_path, short: '-C',
+                            value: {
+                              type:  String,
+                              usage: 'XPATH'
+                            },
+                            desc: 'Evaluates the CSS-path on each HTML page'
+          option :print_hosts, desc: 'Print all discovered hostnames'
+          option :print_certs, desc: 'Print all encountered SSL/TLS certificates'
+          option :save_certs, desc: 'Saves all encountered SSL/TLS certificates'
+          option :print_js_strings, desc: 'Print all JavaScript strings'
+          option :print_html_comments, desc: 'Print HTML comments'
+          option :print_js_comments, desc: 'Print JavaScript comments'
+          option :print_comments, desc: 'Print all HTML and JavaScript comments'
+          description 'Spiders a website'
+          examples [
+            "--host scanme.nmap.org",
+            "--domain nmap.org",
+            "--site https://scanme.nmap.org/"
+          ]
+          man_page 'ronin-web-spider.1'
+          # The default HTTP headers to send with every request.
+          #
+          # @return [Hash{String => String}]
+          attr_reader :default_headers
+          # The mapping of custom `Host` headers.
+          #
+          # @return [Hash{String => String}]
+          attr_reader :host_headers
+          # The pre-existing queue of URLs to start spidering with.
+          #
+          # @return [Array<String>]
+          attr_reader :queue
+          # The pre-existing of previously visited URLs to start spidering with.
+          #
+          # @return [Array<String>]
+          attr_reader :history
+          # The schemes to visit.
+          #
+          # @return [Array<String>]
+          attr_reader :visit_schemes
+          # The hosts to visit.
+          #
+          # @return [Array<String, Regexp>]
+          attr_reader :visit_hosts
+          # The port numbers to visit.
+          #
+          # @return [Array<Integer, Regexp>]
+          attr_reader :visit_ports
+          # The links to visit.
+          #
+          # @return [Array<String, Regexp>]
+          attr_reader :visit_links
+          # The URL file extensions to visit.
+          #
+          # @return [Array<String, Regexp>]
+          attr_reader :visit_exts
+          # The hosts to ignore.
+          #
+          # @return [Array<String, Regexp>]
+          attr_reader :ignore_hosts
+          # The port numbers to ignore.
+          #
+          # @return [Array<Integer, Regexp>]
+          attr_reader :ignore_ports
+          # The links to ignore.
+          #
+          # @return [Array<String, Regexp>]
+          attr_reader :ignore_links
+          # The URL file extensions to ignore.
+          #
+          # @return [Array<String, Regexp>]
+          attr_reader :ignore_exts
+          #
+          # Initializes the spider command.
+          #
+          # @param [Hash{Symbol => Object}] kwargs
+          #   Additional keyword arguments.
+          #
+          def initialize(**kwargs)
+            super(**kwargs)
+            @default_headers = {}
+            @host_headers    = {}
+            @queue   = []
+            @history = []
+            @visit_schemes = []
+            @visit_hosts   = []
+            @visit_ports   = []
+            @visit_links   = []
+            @visit_exts    = []
+            @ignore_hosts = []
+            @ignore_ports = []
+            @ignore_links = []
+            @ignore_exts  = []
+          end
+          #
+          # Runs the `ronin-web spider` command.
+          #
+          def run
+            archive = if options[:archive]
+                        Web::Spider::Archive.open(options[:archive])
+                      elsif options[:git_archive]
+                        Web::Spider::GitArchive.open(options[:git_archive])
+                      end
+            agent = new_agent do |agent|
+              agent.every_page do |page|
+                print_page(page)
+              end
+              agent.every_failed_url do |url|
+                print_verbose "failed to request #{url}"
+              end
+              if options[:print_hosts]
+                agent.every_host do |host|
+                  print_verbose "spidering new host #{host}"
+                end
+              end
+              if options[:print_certs]
+                agent.every_cert do |cert|
+                  print_verbose "encountered new certificate for #{cert.subject.common_name}"
+                end
+              end
+              if options[:print_js_strings]
+                agent.every_js_string do |string|
+                  print_content string
+                end
+              end
+              if options[:print_html_comments]
+                agent.every_html_comment do |comment|
+                  print_content comment
+                end
+              end
+              if options[:print_js_comments]
+                agent.every_js_comment do |comment|
+                  print_content comment
+                end
+              end
+              if options[:print_comments]
+                agent.every_comment do |comment|
+                  print_content comment
+                end
+              end
+              if archive
+                agent.every_ok_page do |page|
+                  archive.write(page.url,page.body)
+                end
+              end
+            end
+            if options[:git_archive]
+              archive.commit "Updated #{Time.now}"
+            end
+            if options[:print_hosts]
+              puts
+              puts "Spidered the following hosts:"
+              puts
+              indent do
+                agent.visited_hosts.each do |host|
+                  puts host
+                end
+              end
+            end
+            if options[:print_certs]
+              puts
+              puts "Discovered the following certs:"
+              puts
+              agent.collected_certs.each do |cert|
+                puts cert
+                puts
+              end
+            end
+          end
+          #
+          # Creates a new web spider agent.
+          #
+          # @yield [agent]
+          #   The given block will be given the newly created and configured
+          #   web spider agent.
+          #
+          # @yieldparam [Ronin::Web::Spider::Agent] agent
+          #   The newly created web spider agent.
+          #
+          # @return [Ronin::Web::Spider::Agent]
+          #   The newly created web spider agent, after the agent has completed
+          #   it's spidering.
+          #
+          def new_agent(&block)
+            if options[:host]
+              Web::Spider.host(options[:host],**agent_kwargs,&block)
+            elsif options[:domain]
+              Web::Spider.domain(options[:domain],**agent_kwargs,&block)
+            elsif options[:site]
+              Web::Spider.site(options[:site],**agent_kwargs,&block)
+            else
+              print_error "must specify --host, --domain, or --site"
+              exit(-1)
+            end
+          end
+          #
+          # Builds keyword arguments for `Ronin::Web::Spider::Agent#initialize`.
+          #
+          # @return [Hash{Symbol => Object}]
+          #   The keyword arguments for `Ronin::Web::Spider::Agent#initialize`.
+          #
+          def agent_kwargs
+            kwargs = {}
+            kwargs[:proxy] = options[:proxy] if options[:proxy]
+            unless @default_headers.empty?
+              kwargs[:default_headers] = @default_headers
+            end
+            unless @host_headers.empty?
+              kwargs[:host_headers] = @host_headers
+            end
+            kwargs[:user_agent] = @user_agent       if @user_agent
+            kwargs[:referer]    = options[:referer] if options[:referer]
+            kwargs[:delay]     = options[:delay]     if options[:delay]
+            kwargs[:limit]     = options[:limit]     if options[:limit]
+            kwargs[:max_depth] = options[:max_depth] if options[:max_depth]
+            kwargs[:queue]   = @queue   unless @queue.empty?
+            kwargs[:history] = @history unless @history.empty?
+            if options.has_key?(:strip_fragments)
+              kwargs[:strip_fragments] = options[:strip_fragments]
+            end
+            if options.has_key?(:strip_query)
+              kwargs[:strip_query] = options[:strip_query]
+            end
+            kwargs[:schemes] = @visit_schemes unless @visit_schemes.empty?
+            kwargs[:hosts]   = @visit_hosts   unless @visit_hosts.empty?
+            kwargs[:ports]   = @visit_ports   unless @visit_ports.empty?
+            kwargs[:links]   = @visit_links   unless @visit_links.empty?
+            kwargs[:exts]    = @visit_exts    unless @visit_exts.empty?
+            kwargs[:ignore_hosts] = @ignore_hosts unless @ignore_hosts.empty?
+            kwargs[:ignore_ports] = @ignore_ports unless @ignore_ports.empty?
+            kwargs[:ignore_links] = @ignore_links unless @ignore_links.empty?
+            kwargs[:ignore_exts]  = @ignore_exts  unless @ignore_exts.empty?
+            kwargs[:robots] = options[:robots] if options.has_key?(:robots)
+            return kwargs
+          end
+          #
+          # Prints the status of a page.
+          #
+          # @param [Spidr::Page] page
+          #   A spidered page.
+          #
+          def print_verbose(page)
+            if page.code < 300
+              print "#{colors.bright_green(page.code)} "
+            elsif page.code < 400
+              print "#{colors.bright_yellow(page.code)} "
+            elsif page.code < 500
+              print "#{colors.bright_red(page.code)} "
+            else
+              print "#{colors.bold(colors.bright_red(page.code))} "
+            end
+          end
+          #
+          # Prints the URL for a page.
+          #
+          # @param [Spidr::Page] page
+          #   A spidered page.
+          #
+          def print_url(page)
+            if page.code < 300
+              puts "#{colors.green(page.url)} "
+            elsif page.code < 400
+              puts "#{colors.yellow(page.url)} "
+            elsif page.code < 500
+              puts "#{colors.red(page.url)} "
+            else
+              puts "#{colors.bold(colors.red(page.url))} "
+            end
+          end
+          #
+          # Prints a page.
+          #
+          # @param [Spidr::Page] page
+          #   A spidered page.
+          #
+          def print_page(page)
+            print_verbose(page) if options[:print_verbose]
+            print_url(page)
+            if options[:print_headers]
+              print_headers(page)
+            elsif options[:print_header]
+              if (header = page.response[options[:print_header]])
+                print_content header
+              end
+            end
+            print_query(page) if (options[:xpath] || options[:css_path])
+          end
+          #
+          # Prints the headers of a page.
+          #
+          # @param [Spidr::Page] page
+          #   A spidered page.
+          #
+          def print_headers(page)
+            page.response.each_capitalized do |name,value|
+              print_content "#{name}: #{value}"
+            end
+          end
+          #
+          # Prints the XPath or CSS-path query result for the page.
+          #
+          # @param [Spidr::Page] page
+          #   A spidered page.
+          #
+          def print_query(page)
+            if page.html?
+              if options[:xpath]
+                print_content page.doc.xpath(options[:xpath])
+              elsif options[:css_path]
+                print_content page.doc.css(options[:css_path])
+              end
+            end
+          end
+          #
+          # Prints an information message.
+          #
+          # @param [String] message
+          #
+          def print_verbose(message)
+            if verbose?
+              puts colors.yellow("* #{message}")
+            end
+          end
+          #
+          # Print content from a page.
+          #
+          # @param [#to_s] content
+          #   The content to print.
+          #
+          def print_content(content)
+            content.to_s.each_line do |line|
+              puts "    #{line}"
+            end
+          end
+        end
+      end
+    end
+  end
+end