kimurai 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +56 -1
- data/README.md +183 -69
- data/kimurai.gemspec +1 -1
- data/lib/kimurai/base.rb +96 -36
- data/lib/kimurai/base/{simple_saver.rb → saver.rb} +25 -17
- data/lib/kimurai/base/storage.rb +91 -0
- data/lib/kimurai/browser_builder.rb +6 -0
- data/lib/kimurai/browser_builder/mechanize_builder.rb +22 -18
- data/lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb +25 -20
- data/lib/kimurai/browser_builder/selenium_chrome_builder.rb +21 -23
- data/lib/kimurai/browser_builder/selenium_firefox_builder.rb +22 -18
- data/lib/kimurai/capybara_ext/mechanize/driver.rb +1 -1
- data/lib/kimurai/capybara_ext/session.rb +47 -7
- data/lib/kimurai/cli.rb +2 -1
- data/lib/kimurai/pipeline.rb +6 -2
- data/lib/kimurai/template/Gemfile +8 -0
- data/lib/kimurai/template/spiders/application_spider.rb +50 -35
- data/lib/kimurai/version.rb +1 -1
- metadata +5 -5
- data/lib/kimurai/base/uniq_checker.rb +0 -22
| @@ -3,38 +3,46 @@ require 'csv' | |
| 3 3 |  | 
| 4 4 | 
             
            module Kimurai
         | 
| 5 5 | 
             
              class Base
         | 
| 6 | 
            -
                class  | 
| 7 | 
            -
                   | 
| 6 | 
            +
                class Saver
         | 
| 7 | 
            +
                  attr_reader :format, :path, :position, :append
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  def initialize(path, format:, position: true, append: false)
         | 
| 10 | 
            +
                    unless %i(json pretty_json jsonlines csv).include?(format)
         | 
| 11 | 
            +
                      raise "SimpleSaver: wrong type of format: #{format}"
         | 
| 12 | 
            +
                    end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                    @path = path
         | 
| 15 | 
            +
                    @format = format
         | 
| 16 | 
            +
                    @position = position
         | 
| 8 17 | 
             
                    @index = 0
         | 
| 18 | 
            +
                    @append = append
         | 
| 9 19 | 
             
                    @mutex = Mutex.new
         | 
| 10 20 | 
             
                  end
         | 
| 11 21 |  | 
| 12 | 
            -
                  def save( | 
| 22 | 
            +
                  def save(item)
         | 
| 13 23 | 
             
                    @mutex.synchronize do
         | 
| 14 24 | 
             
                      @index += 1
         | 
| 15 25 | 
             
                      item[:position] = @index if position
         | 
| 16 26 |  | 
| 17 27 | 
             
                      case format
         | 
| 18 28 | 
             
                      when :json
         | 
| 19 | 
            -
                        save_to_json(item | 
| 29 | 
            +
                        save_to_json(item)
         | 
| 20 30 | 
             
                      when :pretty_json
         | 
| 21 | 
            -
                        save_to_pretty_json(item | 
| 31 | 
            +
                        save_to_pretty_json(item)
         | 
| 22 32 | 
             
                      when :jsonlines
         | 
| 23 | 
            -
                        save_to_jsonlines(item | 
| 33 | 
            +
                        save_to_jsonlines(item)
         | 
| 24 34 | 
             
                      when :csv
         | 
| 25 | 
            -
                        save_to_csv(item | 
| 26 | 
            -
                      else
         | 
| 27 | 
            -
                        raise "SimpleSaver: wrong type of format: #{format}"
         | 
| 35 | 
            +
                        save_to_csv(item)
         | 
| 28 36 | 
             
                      end
         | 
| 29 37 | 
             
                    end
         | 
| 30 38 | 
             
                  end
         | 
| 31 39 |  | 
| 32 40 | 
             
                  private
         | 
| 33 41 |  | 
| 34 | 
            -
                  def save_to_json(item | 
| 42 | 
            +
                  def save_to_json(item)
         | 
| 35 43 | 
             
                    data = JSON.generate([item])
         | 
| 36 44 |  | 
| 37 | 
            -
                    if @index > 1
         | 
| 45 | 
            +
                    if append || @index > 1
         | 
| 38 46 | 
             
                      file_content = File.read(path).sub(/\}\]\Z/, "\}\,")
         | 
| 39 47 | 
             
                      File.open(path, "w") do |f|
         | 
| 40 48 | 
             
                        f.write(file_content + data.sub(/\A\[/, ""))
         | 
| @@ -44,10 +52,10 @@ module Kimurai | |
| 44 52 | 
             
                    end
         | 
| 45 53 | 
             
                  end
         | 
| 46 54 |  | 
| 47 | 
            -
                  def save_to_pretty_json(item | 
| 55 | 
            +
                  def save_to_pretty_json(item)
         | 
| 48 56 | 
             
                    data = JSON.pretty_generate([item])
         | 
| 49 57 |  | 
| 50 | 
            -
                    if @index > 1
         | 
| 58 | 
            +
                    if append || @index > 1
         | 
| 51 59 | 
             
                      file_content = File.read(path).sub(/\}\n\]\Z/, "\}\,\n")
         | 
| 52 60 | 
             
                      File.open(path, "w") do |f|
         | 
| 53 61 | 
             
                        f.write(file_content + data.sub(/\A\[\n/, ""))
         | 
| @@ -57,20 +65,20 @@ module Kimurai | |
| 57 65 | 
             
                    end
         | 
| 58 66 | 
             
                  end
         | 
| 59 67 |  | 
| 60 | 
            -
                  def save_to_jsonlines(item | 
| 68 | 
            +
                  def save_to_jsonlines(item)
         | 
| 61 69 | 
             
                    data = JSON.generate(item)
         | 
| 62 70 |  | 
| 63 | 
            -
                    if @index > 1
         | 
| 71 | 
            +
                    if append || @index > 1
         | 
| 64 72 | 
             
                      File.open(path, "a") { |file| file.write("\n" + data) }
         | 
| 65 73 | 
             
                    else
         | 
| 66 74 | 
             
                      File.open(path, "w") { |file| file.write(data) }
         | 
| 67 75 | 
             
                    end
         | 
| 68 76 | 
             
                  end
         | 
| 69 77 |  | 
| 70 | 
            -
                  def save_to_csv(item | 
| 78 | 
            +
                  def save_to_csv(item)
         | 
| 71 79 | 
             
                    data = flatten_hash(item)
         | 
| 72 80 |  | 
| 73 | 
            -
                    if @index > 1
         | 
| 81 | 
            +
                    if append || @index > 1
         | 
| 74 82 | 
             
                      CSV.open(path, "a+", force_quotes: true) do |csv|
         | 
| 75 83 | 
             
                        csv << data.values
         | 
| 76 84 | 
             
                      end
         | 
| @@ -0,0 +1,91 @@ | |
| 1 | 
            +
            require 'pstore'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Kimurai
         | 
| 4 | 
            +
              class Base
         | 
| 5 | 
            +
                class Storage
         | 
| 6 | 
            +
                  attr_reader :database, :path
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  def initialize(path = nil)
         | 
| 9 | 
            +
                    @path = path
         | 
| 10 | 
            +
                    @mutex = Mutex.new
         | 
| 11 | 
            +
                    @database = path ? PStore.new(path) : {}
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  def all(scope = nil)
         | 
| 15 | 
            +
                    @mutex.synchronize do
         | 
| 16 | 
            +
                      if path
         | 
| 17 | 
            +
                        database.transaction { scope ? database.fetch(scope, []) : database }
         | 
| 18 | 
            +
                      else
         | 
| 19 | 
            +
                        scope ? database.fetch(scope, []) : database
         | 
| 20 | 
            +
                      end
         | 
| 21 | 
            +
                    end
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  def include?(scope, value)
         | 
| 25 | 
            +
                    @mutex.synchronize do
         | 
| 26 | 
            +
                      if path
         | 
| 27 | 
            +
                        database.transaction do
         | 
| 28 | 
            +
                          database[scope] ||= []
         | 
| 29 | 
            +
                          database[scope].include?(value)
         | 
| 30 | 
            +
                        end
         | 
| 31 | 
            +
                      else
         | 
| 32 | 
            +
                        database[scope] ||= []
         | 
| 33 | 
            +
                        database[scope].include?(value)
         | 
| 34 | 
            +
                      end
         | 
| 35 | 
            +
                    end
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                  def add(scope, value)
         | 
| 39 | 
            +
                    @mutex.synchronize do
         | 
| 40 | 
            +
                      if path
         | 
| 41 | 
            +
                        database.transaction do
         | 
| 42 | 
            +
                          database[scope] ||= []
         | 
| 43 | 
            +
                          database[scope].push(value) unless database[scope].include?(value)
         | 
| 44 | 
            +
                        end
         | 
| 45 | 
            +
                      else
         | 
| 46 | 
            +
                        database[scope] ||= []
         | 
| 47 | 
            +
                        database[scope].push(value) unless database[scope].include?(value)
         | 
| 48 | 
            +
                      end
         | 
| 49 | 
            +
                    end
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                  ###
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                  def unique?(scope, value)
         | 
| 55 | 
            +
                    @mutex.synchronize do
         | 
| 56 | 
            +
                      if path
         | 
| 57 | 
            +
                        database.transaction do
         | 
| 58 | 
            +
                          database[scope] ||= []
         | 
| 59 | 
            +
                          database[scope].include?(value) ? false : database[scope].push(value) and true
         | 
| 60 | 
            +
                        end
         | 
| 61 | 
            +
                      else
         | 
| 62 | 
            +
                        database[scope] ||= []
         | 
| 63 | 
            +
                        database[scope].include?(value) ? false : database[scope].push(value) and true
         | 
| 64 | 
            +
                      end
         | 
| 65 | 
            +
                    end
         | 
| 66 | 
            +
                  end
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                  ###
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                  def clear!
         | 
| 71 | 
            +
                    @mutex.synchronize do
         | 
| 72 | 
            +
                      if path
         | 
| 73 | 
            +
                        database.transaction do
         | 
| 74 | 
            +
                          database.roots.each { |key| database.delete key }
         | 
| 75 | 
            +
                        end
         | 
| 76 | 
            +
                      else
         | 
| 77 | 
            +
                        database = {}
         | 
| 78 | 
            +
                      end
         | 
| 79 | 
            +
                    end
         | 
| 80 | 
            +
                  end
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                  def delete!
         | 
| 83 | 
            +
                    @mutex.synchronize do
         | 
| 84 | 
            +
                      if path
         | 
| 85 | 
            +
                        File.delete path if File.exists? path
         | 
| 86 | 
            +
                      end
         | 
| 87 | 
            +
                    end
         | 
| 88 | 
            +
                  end
         | 
| 89 | 
            +
                end
         | 
| 90 | 
            +
              end
         | 
| 91 | 
            +
            end
         | 
| @@ -13,6 +13,12 @@ module Kimurai | |
| 13 13 | 
             
                    raise "BrowserBuilder: wrong name of engine, available engines: #{AVAILABLE_ENGINES.join(', ')}"
         | 
| 14 14 | 
             
                  end
         | 
| 15 15 |  | 
| 16 | 
            +
                  if config[:browser].present?
         | 
| 17 | 
            +
                    raise "++++++ BrowserBuilder: browser option is depricated. Now all sub-options inside " \
         | 
| 18 | 
            +
                      "`browser` should be placed right into `@config` hash, without `browser` parent key.\n" \
         | 
| 19 | 
            +
                      "See more here: https://github.com/vifreefly/kimuraframework/blob/master/CHANGELOG.md#breaking-changes-110 ++++++"
         | 
| 20 | 
            +
                  end
         | 
| 21 | 
            +
             | 
| 16 22 | 
             
                  case engine
         | 
| 17 23 | 
             
                  when :mechanize
         | 
| 18 24 | 
             
                    require_relative 'browser_builder/mechanize_builder'
         | 
| @@ -29,6 +29,10 @@ module Kimurai | |
| 29 29 | 
             
                    @browser.spider = spider
         | 
| 30 30 | 
             
                    logger.debug "BrowserBuilder (mechanize): created browser instance"
         | 
| 31 31 |  | 
| 32 | 
            +
                    if @config[:extensions].present?
         | 
| 33 | 
            +
                      logger.error "BrowserBuilder (mechanize): `extensions` option not supported, skipped"
         | 
| 34 | 
            +
                    end
         | 
| 35 | 
            +
             | 
| 32 36 | 
             
                    # Proxy
         | 
| 33 37 | 
             
                    if proxy = @config[:proxy].presence
         | 
| 34 38 | 
             
                      proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip
         | 
| @@ -63,7 +67,7 @@ module Kimurai | |
| 63 67 | 
             
                      user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
         | 
| 64 68 |  | 
| 65 69 | 
             
                      @browser.driver.add_header("User-Agent", user_agent_string)
         | 
| 66 | 
            -
                      logger.debug "BrowserBuilder (mechanize): enabled custom  | 
| 70 | 
            +
                      logger.debug "BrowserBuilder (mechanize): enabled custom user_agent"
         | 
| 67 71 | 
             
                    end
         | 
| 68 72 |  | 
| 69 73 | 
             
                    # Cookies
         | 
| @@ -77,59 +81,59 @@ module Kimurai | |
| 77 81 |  | 
| 78 82 | 
             
                    # Browser instance options
         | 
| 79 83 | 
             
                    # retry_request_errors
         | 
| 80 | 
            -
                    if errors = @config | 
| 84 | 
            +
                    if errors = @config[:retry_request_errors].presence
         | 
| 81 85 | 
             
                      @browser.config.retry_request_errors = errors
         | 
| 82 | 
            -
                      logger.debug "BrowserBuilder (mechanize): enabled  | 
| 86 | 
            +
                      logger.debug "BrowserBuilder (mechanize): enabled retry_request_errors"
         | 
| 83 87 | 
             
                    end
         | 
| 84 88 |  | 
| 85 89 | 
             
                    # restart_if
         | 
| 86 | 
            -
                    if @config | 
| 87 | 
            -
                      logger.warn "BrowserBuilder (mechanize):  | 
| 90 | 
            +
                    if @config[:restart_if].present?
         | 
| 91 | 
            +
                      logger.warn "BrowserBuilder (mechanize): restart_if options not supported by Mechanize, skipped"
         | 
| 88 92 | 
             
                    end
         | 
| 89 93 |  | 
| 90 94 | 
             
                    # before_request clear_cookies
         | 
| 91 | 
            -
                    if @config.dig(: | 
| 95 | 
            +
                    if @config.dig(:before_request, :clear_cookies)
         | 
| 92 96 | 
             
                      @browser.config.before_request[:clear_cookies] = true
         | 
| 93 | 
            -
                      logger.debug "BrowserBuilder (mechanize): enabled  | 
| 97 | 
            +
                      logger.debug "BrowserBuilder (mechanize): enabled before_request.clear_cookies"
         | 
| 94 98 | 
             
                    end
         | 
| 95 99 |  | 
| 96 100 | 
             
                    # before_request clear_and_set_cookies
         | 
| 97 | 
            -
                    if @config.dig(: | 
| 101 | 
            +
                    if @config.dig(:before_request, :clear_and_set_cookies)
         | 
| 98 102 | 
             
                      if cookies = @config[:cookies].presence
         | 
| 99 103 | 
             
                        @browser.config.cookies = cookies
         | 
| 100 104 | 
             
                        @browser.config.before_request[:clear_and_set_cookies] = true
         | 
| 101 | 
            -
                        logger.debug "BrowserBuilder (mechanize): enabled  | 
| 105 | 
            +
                        logger.debug "BrowserBuilder (mechanize): enabled before_request.clear_and_set_cookies"
         | 
| 102 106 | 
             
                      else
         | 
| 103 | 
            -
                        logger.error "BrowserBuilder (mechanize):  | 
| 107 | 
            +
                        logger.error "BrowserBuilder (mechanize): cookies should be present to enable before_request.clear_and_set_cookies, skipped"
         | 
| 104 108 | 
             
                      end
         | 
| 105 109 | 
             
                    end
         | 
| 106 110 |  | 
| 107 111 | 
             
                    # before_request change_user_agent
         | 
| 108 | 
            -
                    if @config.dig(: | 
| 112 | 
            +
                    if @config.dig(:before_request, :change_user_agent)
         | 
| 109 113 | 
             
                      if @config[:user_agent].present? && @config[:user_agent].class == Proc
         | 
| 110 114 | 
             
                        @browser.config.user_agent = @config[:user_agent]
         | 
| 111 115 | 
             
                        @browser.config.before_request[:change_user_agent] = true
         | 
| 112 | 
            -
                        logger.debug "BrowserBuilder (mechanize): enabled  | 
| 116 | 
            +
                        logger.debug "BrowserBuilder (mechanize): enabled before_request.change_user_agent"
         | 
| 113 117 | 
             
                      else
         | 
| 114 | 
            -
                        logger.error "BrowserBuilder (mechanize):  | 
| 118 | 
            +
                        logger.error "BrowserBuilder (mechanize): user_agent should be present and has lambda format to enable before_request.change_user_agent, skipped"
         | 
| 115 119 | 
             
                      end
         | 
| 116 120 | 
             
                    end
         | 
| 117 121 |  | 
| 118 122 | 
             
                    # before_request change_proxy
         | 
| 119 | 
            -
                    if @config.dig(: | 
| 123 | 
            +
                    if @config.dig(:before_request, :change_proxy)
         | 
| 120 124 | 
             
                      if @config[:proxy].present? && @config[:proxy].class == Proc
         | 
| 121 125 | 
             
                        @browser.config.proxy = @config[:proxy]
         | 
| 122 126 | 
             
                        @browser.config.before_request[:change_proxy] = true
         | 
| 123 | 
            -
                        logger.debug "BrowserBuilder (mechanize): enabled  | 
| 127 | 
            +
                        logger.debug "BrowserBuilder (mechanize): enabled before_request.change_proxy"
         | 
| 124 128 | 
             
                      else
         | 
| 125 | 
            -
                        logger.error "BrowserBuilder (mechanize):  | 
| 129 | 
            +
                        logger.error "BrowserBuilder (mechanize): proxy should be present and has lambda format to enable before_request.change_proxy, skipped"
         | 
| 126 130 | 
             
                      end
         | 
| 127 131 | 
             
                    end
         | 
| 128 132 |  | 
| 129 133 | 
             
                    # before_request delay
         | 
| 130 | 
            -
                    if delay = @config.dig(: | 
| 134 | 
            +
                    if delay = @config.dig(:before_request, :delay).presence
         | 
| 131 135 | 
             
                      @browser.config.before_request[:delay] = delay
         | 
| 132 | 
            -
                      logger.debug "BrowserBuilder (mechanize): enabled  | 
| 136 | 
            +
                      logger.debug "BrowserBuilder (mechanize): enabled before_request.delay"
         | 
| 133 137 | 
             
                    end
         | 
| 134 138 |  | 
| 135 139 | 
             
                    # return Capybara session instance
         | 
| @@ -23,6 +23,11 @@ module Kimurai | |
| 23 23 | 
             
                        js_errors: false, debug: false, inspector: false, phantomjs_options: []
         | 
| 24 24 | 
             
                      }
         | 
| 25 25 |  | 
| 26 | 
            +
                      if extensions = @config[:extensions].presence
         | 
| 27 | 
            +
                        driver_options[:extensions] = extensions
         | 
| 28 | 
            +
                        logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled extensions"
         | 
| 29 | 
            +
                      end
         | 
| 30 | 
            +
             | 
| 26 31 | 
             
                      # Window size
         | 
| 27 32 | 
             
                      if size = @config[:window_size].presence
         | 
| 28 33 | 
             
                        driver_options[:window_size] = size
         | 
| @@ -73,7 +78,7 @@ module Kimurai | |
| 73 78 | 
             
                      user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
         | 
| 74 79 |  | 
| 75 80 | 
             
                      @browser.driver.add_header("User-Agent", user_agent_string)
         | 
| 76 | 
            -
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom  | 
| 81 | 
            +
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom user_agent"
         | 
| 77 82 | 
             
                    end
         | 
| 78 83 |  | 
| 79 84 | 
             
                    # Cookies
         | 
| @@ -87,65 +92,65 @@ module Kimurai | |
| 87 92 |  | 
| 88 93 | 
             
                    # Browser instance options
         | 
| 89 94 | 
             
                    # retry_request_errors
         | 
| 90 | 
            -
                    if errors = @config | 
| 95 | 
            +
                    if errors = @config[:retry_request_errors].presence
         | 
| 91 96 | 
             
                      @browser.config.retry_request_errors = errors
         | 
| 92 | 
            -
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled  | 
| 97 | 
            +
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled retry_request_errors"
         | 
| 93 98 | 
             
                    end
         | 
| 94 99 |  | 
| 95 100 | 
             
                    # restart_if
         | 
| 96 | 
            -
                    if requests_limit = @config.dig(: | 
| 101 | 
            +
                    if requests_limit = @config.dig(:restart_if, :requests_limit).presence
         | 
| 97 102 | 
             
                      @browser.config.restart_if[:requests_limit] = requests_limit
         | 
| 98 | 
            -
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled  | 
| 103 | 
            +
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled restart_if.requests_limit >= #{requests_limit}"
         | 
| 99 104 | 
             
                    end
         | 
| 100 105 |  | 
| 101 | 
            -
                    if memory_limit = @config.dig(: | 
| 106 | 
            +
                    if memory_limit = @config.dig(:restart_if, :memory_limit).presence
         | 
| 102 107 | 
             
                      @browser.config.restart_if[:memory_limit] = memory_limit
         | 
| 103 | 
            -
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled  | 
| 108 | 
            +
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled restart_if.memory_limit >= #{memory_limit}"
         | 
| 104 109 | 
             
                    end
         | 
| 105 110 |  | 
| 106 111 | 
             
                    # before_request clear_cookies
         | 
| 107 | 
            -
                    if @config.dig(: | 
| 112 | 
            +
                    if @config.dig(:before_request, :clear_cookies)
         | 
| 108 113 | 
             
                      @browser.config.before_request[:clear_cookies] = true
         | 
| 109 | 
            -
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled  | 
| 114 | 
            +
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.clear_cookies"
         | 
| 110 115 | 
             
                    end
         | 
| 111 116 |  | 
| 112 117 | 
             
                    # before_request clear_and_set_cookies
         | 
| 113 | 
            -
                    if @config.dig(: | 
| 118 | 
            +
                    if @config.dig(:before_request, :clear_and_set_cookies)
         | 
| 114 119 | 
             
                      if cookies = @config[:cookies].presence
         | 
| 115 120 | 
             
                        @browser.config.cookies = cookies
         | 
| 116 121 | 
             
                        @browser.config.before_request[:clear_and_set_cookies] = true
         | 
| 117 | 
            -
                        logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled  | 
| 122 | 
            +
                        logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.clear_and_set_cookies"
         | 
| 118 123 | 
             
                      else
         | 
| 119 | 
            -
                        logger.error "BrowserBuilder (poltergeist_phantomjs):  | 
| 124 | 
            +
                        logger.error "BrowserBuilder (poltergeist_phantomjs): cookies should be present to enable before_request.clear_and_set_cookies, skipped"
         | 
| 120 125 | 
             
                      end
         | 
| 121 126 | 
             
                    end
         | 
| 122 127 |  | 
| 123 128 | 
             
                    # before_request change_user_agent
         | 
| 124 | 
            -
                    if @config.dig(: | 
| 129 | 
            +
                    if @config.dig(:before_request, :change_user_agent)
         | 
| 125 130 | 
             
                      if @config[:user_agent].present? && @config[:user_agent].class == Proc
         | 
| 126 131 | 
             
                        @browser.config.user_agent = @config[:user_agent]
         | 
| 127 132 | 
             
                        @browser.config.before_request[:change_user_agent] = true
         | 
| 128 | 
            -
                        logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled  | 
| 133 | 
            +
                        logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.change_user_agent"
         | 
| 129 134 | 
             
                      else
         | 
| 130 | 
            -
                        logger.error "BrowserBuilder (poltergeist_phantomjs):  | 
| 135 | 
            +
                        logger.error "BrowserBuilder (poltergeist_phantomjs): user_agent should be present and has lambda format to enable before_request.change_user_agent, skipped"
         | 
| 131 136 | 
             
                      end
         | 
| 132 137 | 
             
                    end
         | 
| 133 138 |  | 
| 134 139 | 
             
                    # before_request change_proxy
         | 
| 135 | 
            -
                    if @config.dig(: | 
| 140 | 
            +
                    if @config.dig(:before_request, :change_proxy)
         | 
| 136 141 | 
             
                      if @config[:proxy].present? && @config[:proxy].class == Proc
         | 
| 137 142 | 
             
                        @browser.config.proxy = @config[:proxy]
         | 
| 138 143 | 
             
                        @browser.config.before_request[:change_proxy] = true
         | 
| 139 | 
            -
                        logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled  | 
| 144 | 
            +
                        logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.change_proxy"
         | 
| 140 145 | 
             
                      else
         | 
| 141 | 
            -
                        logger.error "BrowserBuilder (poltergeist_phantomjs):  | 
| 146 | 
            +
                        logger.error "BrowserBuilder (poltergeist_phantomjs): proxy should be present and has lambda format to enable before_request.change_proxy, skipped"
         | 
| 142 147 | 
             
                      end
         | 
| 143 148 | 
             
                    end
         | 
| 144 149 |  | 
| 145 150 | 
             
                    # before_request delay
         | 
| 146 | 
            -
                    if delay = @config.dig(: | 
| 151 | 
            +
                    if delay = @config.dig(:before_request, :delay).presence
         | 
| 147 152 | 
             
                      @browser.config.before_request[:delay] = delay
         | 
| 148 | 
            -
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled  | 
| 153 | 
            +
                      logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled before_request.delay"
         | 
| 149 154 | 
             
                    end
         | 
| 150 155 |  | 
| 151 156 | 
             
                    # return Capybara session instance
         | 
| @@ -75,7 +75,7 @@ module Kimurai | |
| 75 75 | 
             
                      if user_agent = @config[:user_agent].presence
         | 
| 76 76 | 
             
                        user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
         | 
| 77 77 | 
             
                        driver_options.args << "--user-agent='#{user_agent_string}'"
         | 
| 78 | 
            -
                        logger.debug "BrowserBuilder (selenium_chrome): enabled custom  | 
| 78 | 
            +
                        logger.debug "BrowserBuilder (selenium_chrome): enabled custom user_agent"
         | 
| 79 79 | 
             
                      end
         | 
| 80 80 |  | 
| 81 81 | 
             
                      # Headless mode
         | 
| @@ -107,11 +107,9 @@ module Kimurai | |
| 107 107 | 
             
                    @browser.spider = spider
         | 
| 108 108 | 
             
                    logger.debug "BrowserBuilder (selenium_chrome): created browser instance"
         | 
| 109 109 |  | 
| 110 | 
            -
                     | 
| 111 | 
            -
             | 
| 112 | 
            -
                     | 
| 113 | 
            -
                    #   logger.debug "BrowserBuilder (selenium_chrome): enabled window_size"
         | 
| 114 | 
            -
                    # end
         | 
| 110 | 
            +
                    if @config[:extensions].present?
         | 
| 111 | 
            +
                      logger.error "BrowserBuilder (selenium_chrome): `extensions` option not supported by Selenium, skipped"
         | 
| 112 | 
            +
                    end
         | 
| 115 113 |  | 
| 116 114 | 
             
                    # Cookies
         | 
| 117 115 | 
             
                    if cookies = @config[:cookies].presence
         | 
| @@ -121,53 +119,53 @@ module Kimurai | |
| 121 119 |  | 
| 122 120 | 
             
                    # Browser instance options
         | 
| 123 121 | 
             
                    # retry_request_errors
         | 
| 124 | 
            -
                    if errors = @config | 
| 122 | 
            +
                    if errors = @config[:retry_request_errors].presence
         | 
| 125 123 | 
             
                      @browser.config.retry_request_errors = errors
         | 
| 126 | 
            -
                      logger.debug "BrowserBuilder (selenium_chrome): enabled  | 
| 124 | 
            +
                      logger.debug "BrowserBuilder (selenium_chrome): enabled retry_request_errors"
         | 
| 127 125 | 
             
                    end
         | 
| 128 126 |  | 
| 129 127 | 
             
                    # restart_if
         | 
| 130 | 
            -
                    if requests_limit = @config.dig(: | 
| 128 | 
            +
                    if requests_limit = @config.dig(:restart_if, :requests_limit).presence
         | 
| 131 129 | 
             
                      @browser.config.restart_if[:requests_limit] = requests_limit
         | 
| 132 | 
            -
                      logger.debug "BrowserBuilder (selenium_chrome): enabled  | 
| 130 | 
            +
                      logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.requests_limit >= #{requests_limit}"
         | 
| 133 131 | 
             
                    end
         | 
| 134 132 |  | 
| 135 | 
            -
                    if memory_limit = @config.dig(: | 
| 133 | 
            +
                    if memory_limit = @config.dig(:restart_if, :memory_limit).presence
         | 
| 136 134 | 
             
                      @browser.config.restart_if[:memory_limit] = memory_limit
         | 
| 137 | 
            -
                      logger.debug "BrowserBuilder (selenium_chrome): enabled  | 
| 135 | 
            +
                      logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.memory_limit >= #{memory_limit}"
         | 
| 138 136 | 
             
                    end
         | 
| 139 137 |  | 
| 140 138 | 
             
                    # before_request clear_cookies
         | 
| 141 | 
            -
                    if @config.dig(: | 
| 139 | 
            +
                    if @config.dig(:before_request, :clear_cookies)
         | 
| 142 140 | 
             
                      @browser.config.before_request[:clear_cookies] = true
         | 
| 143 | 
            -
                      logger.debug "BrowserBuilder (selenium_chrome): enabled  | 
| 141 | 
            +
                      logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.clear_cookies"
         | 
| 144 142 | 
             
                    end
         | 
| 145 143 |  | 
| 146 144 | 
             
                    # before_request clear_and_set_cookies
         | 
| 147 | 
            -
                    if @config.dig(: | 
| 145 | 
            +
                    if @config.dig(:before_request, :clear_and_set_cookies)
         | 
| 148 146 | 
             
                      if cookies = @config[:cookies].presence
         | 
| 149 147 | 
             
                        @browser.config.cookies = cookies
         | 
| 150 148 | 
             
                        @browser.config.before_request[:clear_and_set_cookies] = true
         | 
| 151 | 
            -
                        logger.debug "BrowserBuilder (selenium_chrome): enabled  | 
| 149 | 
            +
                        logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.clear_and_set_cookies"
         | 
| 152 150 | 
             
                      else
         | 
| 153 | 
            -
                        logger.error "BrowserBuilder (selenium_chrome):  | 
| 151 | 
            +
                        logger.error "BrowserBuilder (selenium_chrome): cookies should be present to enable before_request.clear_and_set_cookies, skipped"
         | 
| 154 152 | 
             
                      end
         | 
| 155 153 | 
             
                    end
         | 
| 156 154 |  | 
| 157 155 | 
             
                    # before_request change_user_agent
         | 
| 158 | 
            -
                    if @config.dig(: | 
| 159 | 
            -
                      logger.error "BrowserBuilder (selenium_chrome):  | 
| 156 | 
            +
                    if @config.dig(:before_request, :change_user_agent)
         | 
| 157 | 
            +
                      logger.error "BrowserBuilder (selenium_chrome): before_request.change_user_agent option not supported by Selenium, skipped"
         | 
| 160 158 | 
             
                    end
         | 
| 161 159 |  | 
| 162 160 | 
             
                    # before_request change_proxy
         | 
| 163 | 
            -
                    if @config.dig(: | 
| 164 | 
            -
                      logger.error "BrowserBuilder (selenium_chrome):  | 
| 161 | 
            +
                    if @config.dig(:before_request, :change_proxy)
         | 
| 162 | 
            +
                      logger.error "BrowserBuilder (selenium_chrome): before_request.change_proxy option not supported by Selenium, skipped"
         | 
| 165 163 | 
             
                    end
         | 
| 166 164 |  | 
| 167 165 | 
             
                    # before_request delay
         | 
| 168 | 
            -
                    if delay = @config.dig(: | 
| 166 | 
            +
                    if delay = @config.dig(:before_request, :delay).presence
         | 
| 169 167 | 
             
                      @browser.config.before_request[:delay] = delay
         | 
| 170 | 
            -
                      logger.debug "BrowserBuilder (selenium_chrome): enabled  | 
| 168 | 
            +
                      logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.delay"
         | 
| 171 169 | 
             
                    end
         | 
| 172 170 |  | 
| 173 171 | 
             
                    # return Capybara session instance
         |