browser_crawler 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +2 -0
- data/.rubocop.yml +10 -0
- data/.travis.yml +29 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +277 -0
- data/Rakefile +7 -0
- data/bin/console +10 -0
- data/bin/crawl +51 -0
- data/bin/setup +8 -0
- data/browser_crawler.gemspec +47 -0
- data/lib/browser_crawler.rb +12 -0
- data/lib/browser_crawler/dsl/js_helpers.rb +13 -0
- data/lib/browser_crawler/dsl/sign_in.rb +37 -0
- data/lib/browser_crawler/engine.rb +156 -0
- data/lib/browser_crawler/engine_utilities/crawl_manager.rb +100 -0
- data/lib/browser_crawler/engine_utilities/inspect_page_process.rb +74 -0
- data/lib/browser_crawler/engine_utilities/link_inspector.rb +31 -0
- data/lib/browser_crawler/engine_utilities/link_scanner.rb +38 -0
- data/lib/browser_crawler/engine_utilities/page_inspector.rb +65 -0
- data/lib/browser_crawler/errors/invalid_hooks_type.rb +12 -0
- data/lib/browser_crawler/followups/screenshots_indexer.rb +40 -0
- data/lib/browser_crawler/followups/templates/index.html.erb +69 -0
- data/lib/browser_crawler/followups/wraith_integrator.rb +41 -0
- data/lib/browser_crawler/hooks_container.rb +31 -0
- data/lib/browser_crawler/hooks_operator.rb +44 -0
- data/lib/browser_crawler/options.rb +86 -0
- data/lib/browser_crawler/report_factory.rb +22 -0
- data/lib/browser_crawler/reports/csv_report.rb +75 -0
- data/lib/browser_crawler/reports/store.rb +114 -0
- data/lib/browser_crawler/reports/yaml_report.rb +15 -0
- data/lib/browser_crawler/screenshot_operator.rb +47 -0
- data/lib/browser_crawler/support/capybara.rb +20 -0
- data/lib/browser_crawler/url_tools.rb +32 -0
- data/lib/browser_crawler/version.rb +3 -0
- metadata +244 -0
| @@ -0,0 +1,31 @@ | |
| 1 | 
            +
            require_relative '../url_tools'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module BrowserCrawler
         | 
| 4 | 
            +
              module EngineUtilities
         | 
| 5 | 
            +
                class LinkInspector
         | 
| 6 | 
            +
                  attr_reader :raw_link, :host_name, :uri
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  def initialize(raw_link:, host_name:)
         | 
| 9 | 
            +
                    @raw_link = raw_link
         | 
| 10 | 
            +
                    @host_name = host_name
         | 
| 11 | 
            +
                    @uri = UrlTools.uri(url: raw_link)
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  def external_url?
         | 
| 15 | 
            +
                    !internal_url?
         | 
| 16 | 
            +
                  end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                  def link_valid?
         | 
| 19 | 
            +
                    @link_valid ||= !uri.nil? && uri.host && uri.scheme
         | 
| 20 | 
            +
                  end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                  def internal_url?
         | 
| 23 | 
            +
                    @internal_url ||= !uri.nil? && uri.host == host_name
         | 
| 24 | 
            +
                  end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                  def full_url
         | 
| 27 | 
            +
                    @full_url ||= UrlTools.full_url(uri: uri)
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
            end
         | 
| @@ -0,0 +1,38 @@ | |
| 1 | 
            +
            module BrowserCrawler
         | 
| 2 | 
            +
              module EngineUtilities
         | 
| 3 | 
            +
                class LinkScanner
         | 
| 4 | 
            +
                  include Capybara::DSL
         | 
| 5 | 
            +
                  include HooksOperator
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                  attr_reader :link_inspector
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  def initialize(link_inspector:)
         | 
| 10 | 
            +
                    @link_inspector = link_inspector
         | 
| 11 | 
            +
                  end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  def scan(page:)
         | 
| 14 | 
            +
                    link_inspector.internal_url? ? get_page_links(page: page) : []
         | 
| 15 | 
            +
                  end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  private
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def get_page_links(page:)
         | 
| 20 | 
            +
                    remove_blank_links(link_matcher(page: page))
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                  def remove_blank_links(links)
         | 
| 24 | 
            +
                    links.reject do |link|
         | 
| 25 | 
            +
                      link.nil? || link.empty?
         | 
| 26 | 
            +
                    end
         | 
| 27 | 
            +
                  end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  # return Array consists of links from page
         | 
| 30 | 
            +
                  # if hooks exists when execute them instead of basic behavior
         | 
| 31 | 
            +
                  def link_matcher(page:)
         | 
| 32 | 
            +
                    exchange_on_hooks(type: :scan_rules) do
         | 
| 33 | 
            +
                      page.all('a').map { |a| a['href'] }
         | 
| 34 | 
            +
                    end
         | 
| 35 | 
            +
                  end
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
              end
         | 
| 38 | 
            +
            end
         | 
| @@ -0,0 +1,65 @@ | |
| 1 | 
            +
            require_relative '../url_tools'
         | 
| 2 | 
            +
            require_relative '../hooks_operator'
         | 
| 3 | 
            +
            require_relative 'link_scanner'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module BrowserCrawler
         | 
| 6 | 
            +
              module EngineUtilities
         | 
| 7 | 
            +
                class PageInspector
         | 
| 8 | 
            +
                  include Capybara::DSL
         | 
| 9 | 
            +
                  include HooksOperator
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  attr_accessor :link_inspector,
         | 
| 12 | 
            +
                                :link_scanner,
         | 
| 13 | 
            +
                                :capybara_session,
         | 
| 14 | 
            +
                                :scan_result,
         | 
| 15 | 
            +
                                :report_store
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  def initialize(link_inspector:, capybara_session:, report_store:)
         | 
| 18 | 
            +
                    @link_inspector = link_inspector
         | 
| 19 | 
            +
                    @capybara_session = capybara_session
         | 
| 20 | 
            +
                    @report_store = report_store
         | 
| 21 | 
            +
                    @scan_result = []
         | 
| 22 | 
            +
                    @link_scanner = LinkScanner.new(link_inspector: link_inspector)
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  def visit_page
         | 
| 26 | 
            +
                    uri = link_inspector.uri
         | 
| 27 | 
            +
                    Capybara.app_host = "#{uri.scheme}://#{uri.host}:#{uri.port}"
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    visit link_inspector.full_url
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                    with_hooks_for(type: :each) do
         | 
| 32 | 
            +
                      @scan_result = scanning
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  def save_to_report(screenshot_operator: nil)
         | 
| 37 | 
            +
                    screenshot_path = save_screenshot(screenshot_operator)
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                    report_store.record_page_visit(
         | 
| 40 | 
            +
                      page: link_inspector.full_url,
         | 
| 41 | 
            +
                      extracted_links: scan_result,
         | 
| 42 | 
            +
                      screenshot_filename: screenshot_path,
         | 
| 43 | 
            +
                      external: link_inspector.external_url?,
         | 
| 44 | 
            +
                      code: capybara_session.status_code
         | 
| 45 | 
            +
                    )
         | 
| 46 | 
            +
                  end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                  def before_page_scan; end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                  private
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                  def scanning
         | 
| 53 | 
            +
                    link_scanner.scan(page: capybara_session) || []
         | 
| 54 | 
            +
                  end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                  def save_screenshot(screenshot_operator)
         | 
| 57 | 
            +
                    return unless screenshot_operator&.save_screenshots?
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    capybara_session.save_screenshot(
         | 
| 60 | 
            +
                      screenshot_operator.file_path(url: capybara_session.current_url)
         | 
| 61 | 
            +
                    )
         | 
| 62 | 
            +
                  end
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
              end
         | 
| 65 | 
            +
            end
         | 
| @@ -0,0 +1,12 @@ | |
| 1 | 
            +
            module BrowserCrawler
         | 
| 2 | 
            +
              module Errors
         | 
| 3 | 
            +
                class InvalidHooksType < StandardError
         | 
| 4 | 
            +
                  def initialize(invalid_type:)
         | 
| 5 | 
            +
                    message = "Passed hooks type `#{invalid_type}` is invalid." \
         | 
| 6 | 
            +
                              ' A type has to apply one of the follow values:' \
         | 
| 7 | 
            +
                              " #{HooksContainer::VALID_TYPES.join(', ')}"
         | 
| 8 | 
            +
                    super(message)
         | 
| 9 | 
            +
                  end
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
            end
         | 
| @@ -0,0 +1,40 @@ | |
| 1 | 
            +
            require 'erb'
         | 
| 2 | 
            +
            module BrowserCrawler
         | 
| 3 | 
            +
              module Followups
         | 
| 4 | 
            +
                # Indexes screenshots captured by the crawler, creates index.html from the captured screenshots.
         | 
| 5 | 
            +
                # ERB Template can be provided that will receive the list of files.
         | 
| 6 | 
            +
                class ScreenshotsIndexer
         | 
| 7 | 
            +
                  def initialize(template:)
         | 
| 8 | 
            +
                    @template = template || File.read(default_template_file)
         | 
| 9 | 
            +
                  end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  # Produce index.html with links to screenshots found in the `path` specified.
         | 
| 12 | 
            +
                  # Optionally file_mask can be provided to filter out files to be indexed.
         | 
| 13 | 
            +
                  def index_directory(path, file_mask: '*.png')
         | 
| 14 | 
            +
                    files = Dir[File.join(path, file_mask)].map { |file| File.basename(file) }
         | 
| 15 | 
            +
                    html = render_index(files: files)
         | 
| 16 | 
            +
                    index_path = File.join(path, 'index.html')
         | 
| 17 | 
            +
                    File.write(index_path, html)
         | 
| 18 | 
            +
                    index_path
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                  def index_report(report)
         | 
| 22 | 
            +
                    sorted_pages = Hash[report.pages.sort_by { |(k, _v)| k }]
         | 
| 23 | 
            +
                    files = Hash[sorted_pages.map do |(k, _v)|
         | 
| 24 | 
            +
                      k
         | 
| 25 | 
            +
                    end]
         | 
| 26 | 
            +
                  end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                  private
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                  def default_template_file
         | 
| 31 | 
            +
                    File.join(__dir__, 'templates/index.html.erb')
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                  def render_index(files:)
         | 
| 35 | 
            +
                    renderer = ERB.new(@template)
         | 
| 36 | 
            +
                    renderer.result(binding)
         | 
| 37 | 
            +
                  end
         | 
| 38 | 
            +
                end
         | 
| 39 | 
            +
              end
         | 
| 40 | 
            +
            end
         | 
| @@ -0,0 +1,69 @@ | |
| 1 | 
            +
            <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
         | 
| 2 | 
            +
            <html>
         | 
| 3 | 
            +
            <head>
         | 
| 4 | 
            +
              <title>Screenshots</title>
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              <style type="text/css">
         | 
| 7 | 
            +
                body {
         | 
| 8 | 
            +
                  color: #eee;
         | 
| 9 | 
            +
                  margin-top: 20px;
         | 
| 10 | 
            +
                  font-family: Arial, "Helvetica Neue", Helvetica, sans-serif;
         | 
| 11 | 
            +
                }
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                a {
         | 
| 14 | 
            +
                  color: #FFF;
         | 
| 15 | 
            +
                }
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                a:hover {
         | 
| 18 | 
            +
                  color: yellow;
         | 
| 19 | 
            +
                  text-decoration: underline;
         | 
| 20 | 
            +
                }
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                .thumbnails {
         | 
| 23 | 
            +
                  overflow: scroll;
         | 
| 24 | 
            +
                  height: 150px;
         | 
| 25 | 
            +
                }
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                .thumbnails img {
         | 
| 28 | 
            +
                  height: 80px;
         | 
| 29 | 
            +
                  border: 4px solid #555;
         | 
| 30 | 
            +
                  padding: 1px;
         | 
| 31 | 
            +
                  margin: 0 10px 10px 0;
         | 
| 32 | 
            +
                }
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                .thumbnails img:hover {
         | 
| 35 | 
            +
                  border: 4px solid #00ccff;
         | 
| 36 | 
            +
                  cursor: pointer;
         | 
| 37 | 
            +
                }
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                .preview img {
         | 
| 40 | 
            +
                  border: 4px solid #444;
         | 
| 41 | 
            +
                  padding: 1px;
         | 
| 42 | 
            +
                  width: 800px;
         | 
| 43 | 
            +
                }
         | 
| 44 | 
            +
              </style>
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            </head>
         | 
| 47 | 
            +
            <body>
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            <div class="gallery" align="center">
         | 
| 50 | 
            +
              <h2>Screenshots captured</h2>
         | 
| 51 | 
            +
             | 
| 52 | 
            +
              <br/>
         | 
| 53 | 
            +
             | 
| 54 | 
            +
              <div class="thumbnails">
         | 
| 55 | 
            +
                <% files.each_with_index do |img, index| %>
         | 
| 56 | 
            +
                    <img onmouseover="preview.src=img<%= index %>.src" name="img<%= index %>" src="<%= img %>" alt=""/>
         | 
| 57 | 
            +
                <% end %>
         | 
| 58 | 
            +
              </div>
         | 
| 59 | 
            +
              <br/>
         | 
| 60 | 
            +
             | 
| 61 | 
            +
              <div class="preview" align="center">
         | 
| 62 | 
            +
                <img name="preview" src="<%= files.first %>" alt=""/>
         | 
| 63 | 
            +
              </div>
         | 
| 64 | 
            +
             | 
| 65 | 
            +
            </div>
         | 
| 66 | 
            +
             | 
| 67 | 
            +
             | 
| 68 | 
            +
            </body>
         | 
| 69 | 
            +
            </html>
         | 
| @@ -0,0 +1,41 @@ | |
| 1 | 
            +
            require 'yaml'
         | 
| 2 | 
            +
            require 'active_support/core_ext/string'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            module BrowserCrawler
         | 
| 5 | 
            +
              module Followups
         | 
| 6 | 
            +
                # Updates the :paths section of the Wraith's config file.
         | 
| 7 | 
            +
                class WraithIntegrator
         | 
| 8 | 
            +
                  def initialize(report:)
         | 
| 9 | 
            +
                    @report = if report.respond_to?(:pages)
         | 
| 10 | 
            +
                                report
         | 
| 11 | 
            +
                              else
         | 
| 12 | 
            +
                                YAML.safe_load(report, [Symbol]).symbolize_keys
         | 
| 13 | 
            +
                              end
         | 
| 14 | 
            +
                  end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  def update_config(wraith_config_file, path_suffix: nil)
         | 
| 17 | 
            +
                    config = YAML.safe_load(File.read(wraith_config_file))
         | 
| 18 | 
            +
                    config['paths'] = paths(with_suffix: path_suffix)
         | 
| 19 | 
            +
                    File.write(wraith_config_file, config.to_yaml)
         | 
| 20 | 
            +
                  end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                  # @return [Hash] sorted hash of page_name => path pair values appended with optional suffix.
         | 
| 23 | 
            +
                  #     Page name equals to path which makes it easy to navigate the page from the Wraith gallery.
         | 
| 24 | 
            +
                  def paths(with_suffix: nil)
         | 
| 25 | 
            +
                    Hash[sorted_pages.map { |(k, v)| [k, "#{v}#{with_suffix}"] }]
         | 
| 26 | 
            +
                  end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                  def named_pages
         | 
| 29 | 
            +
                    @report[:pages].each_with_object({}) do |(page_url, _links), h|
         | 
| 30 | 
            +
                      page_path = URI(page_url.to_s).path
         | 
| 31 | 
            +
                      page_name = page_path.parameterize
         | 
| 32 | 
            +
                      h[page_name] = page_path
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  def sorted_pages
         | 
| 37 | 
            +
                    Hash[named_pages.sort_by { |(k, _v)| k }]
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
              end
         | 
| 41 | 
            +
            end
         | 
| @@ -0,0 +1,31 @@ | |
| 1 | 
            +
            require 'singleton'
         | 
| 2 | 
            +
            require_relative 'errors/invalid_hooks_type'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            module BrowserCrawler
         | 
| 5 | 
            +
              class HooksContainer
         | 
| 6 | 
            +
                include Singleton
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                VALID_TYPES = %i[each all unvisited_links scan_rules].freeze
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                def initialize
         | 
| 11 | 
            +
                  reset
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                def reset
         | 
| 15 | 
            +
                  @hooks_container = Hash.new { |h, k| h[k] = { each: [],
         | 
| 16 | 
            +
                                                                all: [],
         | 
| 17 | 
            +
                                                                unvisited_links: [],
         | 
| 18 | 
            +
                                                                scan_rules: []} }
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                attr_reader :hooks_container
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def add_hook(method: :run_only_one, type:, hook: nil)
         | 
| 24 | 
            +
                  unless VALID_TYPES.include?(type)
         | 
| 25 | 
            +
                    raise Errors::InvalidHooksType.new(invalid_type: type)
         | 
| 26 | 
            +
                  end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                  @hooks_container[method][type.to_sym] << hook
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
            end
         | 
| @@ -0,0 +1,44 @@ | |
| 1 | 
            +
            module BrowserCrawler
         | 
| 2 | 
            +
              module HooksOperator
         | 
| 3 | 
            +
                def with_hooks_for(type:)
         | 
| 4 | 
            +
                  run_before_hooks(type: type)
         | 
| 5 | 
            +
                  yield
         | 
| 6 | 
            +
                  run_after_hooks(type: type)
         | 
| 7 | 
            +
                end
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def exchange_on_hooks(type:, &default_block)
         | 
| 10 | 
            +
                  hooks_array = BrowserCrawler::HooksContainer
         | 
| 11 | 
            +
                          .instance.hooks_container[:run_only_one][type]
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  if hooks_array && !hooks_array.empty?
         | 
| 14 | 
            +
                    instance_exec(&hooks_array[0])
         | 
| 15 | 
            +
                  elsif block_given?
         | 
| 16 | 
            +
                    instance_exec(&default_block)
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                private
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                def run_before_hooks(type:)
         | 
| 23 | 
            +
                  before_hook = BrowserCrawler::HooksContainer.instance
         | 
| 24 | 
            +
                                  .hooks_container[:before][type]
         | 
| 25 | 
            +
                  return unless before_hook
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  run_hooks(before_hook)
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def run_after_hooks(type:)
         | 
| 31 | 
            +
                  after_hook = BrowserCrawler::HooksContainer.instance
         | 
| 32 | 
            +
                                 .hooks_container[:after][type]
         | 
| 33 | 
            +
                  return unless after_hook
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                  run_hooks(after_hook)
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def run_hooks(hooks)
         | 
| 39 | 
            +
                  hooks.each do |hook|
         | 
| 40 | 
            +
                    instance_exec(&hook)
         | 
| 41 | 
            +
                  end
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
              end
         | 
| 44 | 
            +
            end
         | 
| @@ -0,0 +1,86 @@ | |
| 1 | 
            +
            require 'optionparser'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module BrowserCrawler
         | 
| 4 | 
            +
              module Options
         | 
| 5 | 
            +
                module_function
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def default_options
         | 
| 8 | 
            +
                  {
         | 
| 9 | 
            +
                    report_folder: 'tmp',
         | 
| 10 | 
            +
                    report_format: 'yaml',
         | 
| 11 | 
            +
                    window_width: 1024,
         | 
| 12 | 
            +
                    window_height: 768
         | 
| 13 | 
            +
                  }
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                def parse_args
         | 
| 17 | 
            +
                  options = {}
         | 
| 18 | 
            +
                  p = OptionParser.new do |opts|
         | 
| 19 | 
            +
                    opts.on_tail
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                    opts.banner = 'Site crawler. Usage example: crawl http://localhost:3000'
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                    opts.on('-U', '[--url] URL', 'Crawls the site starting from the url specified. E.g. http://localhost:3000/welcome.') do |v|
         | 
| 24 | 
            +
                      options[:url] = v
         | 
| 25 | 
            +
                    end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    opts.on('-u', '--user USERNAME', 'The authentication user name (optional).') do |v|
         | 
| 28 | 
            +
                      options[:username] = v
         | 
| 29 | 
            +
                    end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                    opts.on('-p', '--password PASSWORD', 'The authentication password (optional).') do |v|
         | 
| 32 | 
            +
                      options[:password] = v
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    opts.on('-n', '--max_pages NUM', 'The maximum number of pages to visit.') do |v|
         | 
| 36 | 
            +
                      options[:max_pages] = v.to_i
         | 
| 37 | 
            +
                    end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                    opts.on('-w', '--window_size WxH', 'Browser window size. Default 1024x768') do |v|
         | 
| 40 | 
            +
                      options[:window_width], options[:window_height] = v.split('x')
         | 
| 41 | 
            +
                    end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                    opts.on('-r', '--report FOLDER', 'The folder path to save report to. '\
         | 
| 44 | 
            +
                                                     'Default: tmp') do |v|
         | 
| 45 | 
            +
                      options[:report_folder] = v
         | 
| 46 | 
            +
                    end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    opts.on('-f', '--report_format TYPE', 'The report type to save result  '\
         | 
| 49 | 
            +
                                                     'Default: yaml') do |v|
         | 
| 50 | 
            +
                      options[:report_format] = v
         | 
| 51 | 
            +
                    end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                    opts.on('-s', '--screenshots_path PATH',
         | 
| 54 | 
            +
                            'If specified along with the url, screenshots are captured visiting each page.'\
         | 
| 55 | 
            +
                            ' Otherwise used to generate a screenshots index based on files caprured previously. ') do |v|
         | 
| 56 | 
            +
                      options[:screenshots_path] = v
         | 
| 57 | 
            +
                    end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    opts.on('-t', '--template FILENAME',
         | 
| 60 | 
            +
                            'Specify the template used for indexing.'\
         | 
| 61 | 
            +
                            '  Default: followups/templates/index.html.erb') do |v|
         | 
| 62 | 
            +
                      options[:index_template] = v
         | 
| 63 | 
            +
                    end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                    opts.on('-c', '--wraith_config FILENAME',
         | 
| 66 | 
            +
                            'Update config "paths" section with the pages extracted.') do |v|
         | 
| 67 | 
            +
                      options[:wraith_config] = v
         | 
| 68 | 
            +
                    end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                    opts.on('-h', '--help', 'Show this help message and exit.') do
         | 
| 71 | 
            +
                      puts opts
         | 
| 72 | 
            +
                    end
         | 
| 73 | 
            +
                  end
         | 
| 74 | 
            +
                  p.parse!
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                  options[:url] = ARGV.pop unless ARGV.empty?
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                  if options.empty?
         | 
| 79 | 
            +
                    puts p
         | 
| 80 | 
            +
                    exit
         | 
| 81 | 
            +
                  end
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                  default_options.merge(options)
         | 
| 84 | 
            +
                end
         | 
| 85 | 
            +
              end
         | 
| 86 | 
            +
            end
         |