scruber 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/scruber/version.rb +1 -1
- data/scruber.gemspec +1 -1
- data/spec/core/extensions/csv_output_spec.rb +44 -0
- data/spec/core/extensions/dict.csv +4 -0
- data/spec/core/extensions/log_spec.rb +25 -0
- data/spec/core/extensions/loop_spec.rb +26 -0
- data/spec/core/extensions/parser_aliases_spec.rb +89 -0
- data/spec/core/extensions/queue_aliases_spec.rb +72 -0
- data/spec/core/extensions/seed_spec.rb +44 -0
- data/spec/fetcher.rb +27 -0
- data/spec/helpers/dictionary_reader/dict.csv +4 -0
- data/spec/helpers/dictionary_reader/dict.xml +5 -0
- data/spec/helpers/dictionary_reader/dict_records.xml +5 -0
- data/spec/helpers/dictionary_reader/dictionary_reader_csv_spec.rb +36 -0
- data/spec/helpers/dictionary_reader/dictionary_reader_xml_spec.rb +46 -0
- data/spec/helpers/fetcher_agent_adapters/abstract_adapter_spec.rb +46 -0
- data/spec/helpers/fetcher_agent_adapters/memory_spec.rb +45 -0
- data/spec/helpers/proxy_rotator/proxy_rotator_proxy_spec.rb +21 -0
- data/spec/helpers/proxy_rotator/proxy_rotator_spec.rb +118 -0
- data/spec/helpers/user_agent_rotator/user_agent_rotator_spec.rb +145 -0
- data/spec/helpers/user_agent_rotator/user_agent_rotator_user_agent_spec.rb +40 -0
- data/spec/helpers/user_agent_rotator/user_agents.xml +6 -0
- data/spec/queue_adapter/memory_spec.rb +15 -0
- data/spec/queue_spec.rb +27 -0
- data/spec/scruber_spec.rb +198 -0
- data/spec/spec_helper.rb +36 -0
- data/spec/support/queue/queue_adapter.rb +171 -0
- metadata +26 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b845332207b108efa91983b4721cf7631120ea36
         | 
| 4 | 
            +
              data.tar.gz: 4c9d931ccdbf777c9d469d7cf2697f00acfe94dc
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: b7c9bc638e7f168401bfd15de02746b691c5477563404db66810f894e7a1925821064b096d47c065209223856782e787bdc3f6272a179068aa0fdfcb6c14994d
         | 
| 7 | 
            +
              data.tar.gz: f4c73d7e94e795b32c647285f320cf9e25b9f6efa0c528aaa3138ab99c263627f9ad47088851126d2df07421e633037fa3e8f434d7303a91ad350804d9a64903
         | 
    
        data/lib/scruber/version.rb
    CHANGED
    
    
    
        data/scruber.gemspec
    CHANGED
    
    | @@ -24,7 +24,7 @@ Gem::Specification.new do |spec| | |
| 24 24 | 
             
              end
         | 
| 25 25 |  | 
| 26 26 | 
             
              spec.files         = `git ls-files -z`.split("\x0").reject do |f|
         | 
| 27 | 
            -
                f.match(%r{^(test| | 
| 27 | 
            +
                f.match(%r{^(test|features)/})
         | 
| 28 28 | 
             
              end
         | 
| 29 29 | 
             
              spec.bindir        = "exe"
         | 
| 30 30 | 
             
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         | 
| @@ -0,0 +1,44 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.describe Scruber::Core::Extensions::CsvOutput do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              describe "register" do
         | 
| 6 | 
            +
                it "should extend Scruber::CsvOutput with csv_file and csv_out method" do
         | 
| 7 | 
            +
                  described_class.register
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  expect(Scruber::Core::Crawler.method_defined?(:csv_file)).to be_truthy
         | 
| 10 | 
            +
                  expect(Scruber::Core::Crawler.method_defined?(:csv_out)).to be_truthy
         | 
| 11 | 
            +
                  expect(Scruber::Core::Crawler._registered_method_missings.keys.include?(/\Acsv_(\w+)_file\Z/)).to be_truthy
         | 
| 12 | 
            +
                  expect(Scruber::Core::Crawler.new(:sample).respond_to?(:csv_products_file)).to be_truthy
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              describe "csv_file" do
         | 
| 17 | 
            +
                it "should create csv_file and write output" do
         | 
| 18 | 
            +
                  described_class.register
         | 
| 19 | 
            +
                  csv_file_name = File.join(File.expand_path(File.dirname(__FILE__)), 'test.csv')
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                  Scruber.run :sample do
         | 
| 22 | 
            +
                    csv_file csv_file_name, col_sep: '|'
         | 
| 23 | 
            +
                    csv_out [1,2,3]
         | 
| 24 | 
            +
                  end
         | 
| 25 | 
            +
                  expect(File.exists?(csv_file_name)).to be_truthy
         | 
| 26 | 
            +
                  expect(File.open(csv_file_name, 'r'){|f| f.read }.strip).to eq('1|2|3')
         | 
| 27 | 
            +
                  File.delete(csv_file_name) if File.exists?(csv_file_name)
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
              end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
              describe "csv_{pattern}_file" do
         | 
| 32 | 
            +
                it "should register file and write output" do
         | 
| 33 | 
            +
                  described_class.register
         | 
| 34 | 
            +
                  csv_file_name = File.join(File.expand_path(File.dirname(__FILE__)), 'products.csv')
         | 
| 35 | 
            +
                  Scruber.run :sample do
         | 
| 36 | 
            +
                    csv_products_file csv_file_name, col_sep: '|'
         | 
| 37 | 
            +
                    csv_products_out [1,2,3]
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
                  expect(File.exists?(csv_file_name)).to be_truthy
         | 
| 40 | 
            +
                  expect(File.open(csv_file_name, 'r'){|f| f.read }.strip).to eq('1|2|3')
         | 
| 41 | 
            +
                  File.delete(csv_file_name) if File.exists?(csv_file_name)
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
              end
         | 
| 44 | 
            +
            end
         | 
| @@ -0,0 +1,25 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.describe Scruber::Core::Extensions::Log do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              describe "register" do
         | 
| 6 | 
            +
                it "should extend Scruber::Core with log method" do
         | 
| 7 | 
            +
                  described_class.register
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  expect(Scruber::Core::Crawler.method_defined?(:log)).to be_truthy
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              describe "#log" do
         | 
| 14 | 
            +
                let(:log_file) { Pathname.new(File.expand_path('../log.txt', __FILE__)) }
         | 
| 15 | 
            +
                before { Scruber.logger = Logger.new(log_file)  }
         | 
| 16 | 
            +
                after{ (File.delete(log_file) rescue nil) }
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                it "should write log to file" do
         | 
| 19 | 
            +
                  Scruber.run :sample, silent: true do
         | 
| 20 | 
            +
                    log "Seeding"
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
                  expect(File.open(log_file){|f| f.read}).to match(/Seeding/)
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
            end
         | 
| @@ -0,0 +1,26 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.describe Scruber::Core::Extensions::Loop do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              describe "register" do
         | 
| 6 | 
            +
                it "should extend Scruber::Core with loop method" do
         | 
| 7 | 
            +
                  described_class.register
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  expect(Scruber::Core::Crawler.method_defined?(:loop)).to be_truthy
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                it "should add dictionary and read info" do
         | 
| 13 | 
            +
                  Scruber::Core::Extensions::Loop.register
         | 
| 14 | 
            +
                  $zip_codes = []
         | 
| 15 | 
            +
                  Scruber.run :sample do
         | 
| 16 | 
            +
                    add_dictionary :zip_codes_usa, File.expand_path(File.dirname(__FILE__))+'/dict.csv', :csv
         | 
| 17 | 
            +
                    seed do
         | 
| 18 | 
            +
                      loop :zip_codes_usa, state: 'NY' do |row|
         | 
| 19 | 
            +
                        $zip_codes.push row['zip']
         | 
| 20 | 
            +
                      end
         | 
| 21 | 
            +
                    end
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
                  expect($zip_codes).to eq(['10001', '10002'])
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
              end
         | 
| 26 | 
            +
            end
         | 
| @@ -0,0 +1,89 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.describe Scruber::Core::Extensions::ParserAliases do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              describe "register" do
         | 
| 6 | 
            +
                it "should extend Crawler with parse and parse_* methods" do
         | 
| 7 | 
            +
                  described_class.register
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  expect(Scruber::Core::Crawler.method_defined?(:parse)).to be_truthy
         | 
| 10 | 
            +
                  expect(Scruber::Core::Crawler._registered_method_missings.keys.include?(/\Aparse_(\w+)\Z/)).to be_truthy
         | 
| 11 | 
            +
                  expect(Scruber::Core::Crawler.new(:sample).respond_to?(:parse_product)).to be_truthy
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
              end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              describe "#parse" do
         | 
| 16 | 
            +
                context "without format" do
         | 
| 17 | 
            +
                  it "should register parser" do
         | 
| 18 | 
            +
                    described_class.register
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                    stub_request(:get, "http://example.com").to_return(body: 'Example Domain')
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                    Scruber.run :sample do
         | 
| 23 | 
            +
                      get "http://example.com"
         | 
| 24 | 
            +
                      
         | 
| 25 | 
            +
                      parse do |page|
         | 
| 26 | 
            +
                        $page = page
         | 
| 27 | 
            +
                      end
         | 
| 28 | 
            +
                    end
         | 
| 29 | 
            +
                    expect($page.url).to eq("http://example.com")
         | 
| 30 | 
            +
                    expect($page.page_type.to_s).to eq("seed")
         | 
| 31 | 
            +
                  end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                  it "should register parser with custom page_type" do
         | 
| 34 | 
            +
                    described_class.register
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                    stub_request(:post, "http://example.com").to_return(body: 'Example Domain')
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                    Scruber.run :sample do
         | 
| 39 | 
            +
                      post_product "http://example.com"
         | 
| 40 | 
            +
                      
         | 
| 41 | 
            +
                      parse_product do |page|
         | 
| 42 | 
            +
                        $page = page
         | 
| 43 | 
            +
                      end
         | 
| 44 | 
            +
                    end
         | 
| 45 | 
            +
                    expect($page.url).to eq("http://example.com")
         | 
| 46 | 
            +
                    expect($page.method.to_s).to eq("post")
         | 
| 47 | 
            +
                    expect($page.page_type.to_s).to eq("product")
         | 
| 48 | 
            +
                  end
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                context "with format" do
         | 
| 52 | 
            +
                  it "should register parser" do
         | 
| 53 | 
            +
                    described_class.register
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                    stub_request(:get, "http://example.com").to_return(body: '<div><span>Example Domain</span></div>')
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                    Scruber.run :sample do
         | 
| 58 | 
            +
                      get "http://example.com"
         | 
| 59 | 
            +
                      
         | 
| 60 | 
            +
                      parse :html do |page,doc|
         | 
| 61 | 
            +
                        $page = page
         | 
| 62 | 
            +
                        $doc = doc
         | 
| 63 | 
            +
                      end
         | 
| 64 | 
            +
                    end
         | 
| 65 | 
            +
                    expect($doc.at('span').text).to eq("Example Domain")
         | 
| 66 | 
            +
                    expect($page.page_type.to_s).to eq("seed")
         | 
| 67 | 
            +
                    expect($page.method.to_s).to eq("get")
         | 
| 68 | 
            +
                  end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                  it "should register parser with custom page_type" do
         | 
| 71 | 
            +
                    described_class.register
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                    stub_request(:post, "http://example.com").to_return(body: '<div><span>Example Post</span></div>')
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                    Scruber.run :sample do
         | 
| 76 | 
            +
                      post_product "http://example.com"
         | 
| 77 | 
            +
                      
         | 
| 78 | 
            +
                      parse_product :html do |page,doc|
         | 
| 79 | 
            +
                        $page = page
         | 
| 80 | 
            +
                        $doc = doc
         | 
| 81 | 
            +
                      end
         | 
| 82 | 
            +
                    end
         | 
| 83 | 
            +
                    expect($doc.at('span').text).to eq("Example Post")
         | 
| 84 | 
            +
                    expect($page.method.to_s).to eq("post")
         | 
| 85 | 
            +
                    expect($page.page_type.to_s).to eq("product")
         | 
| 86 | 
            +
                  end
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
              end
         | 
| 89 | 
            +
            end
         | 
| @@ -0,0 +1,72 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.describe Scruber::Core::Extensions::QueueAliases do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              describe "register" do
         | 
| 6 | 
            +
                it "should extend Crawler with get,post,head and (get|post|head)_* methods" do
         | 
| 7 | 
            +
                  described_class.register
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  expect(Scruber::Core::Crawler.method_defined?(:get)).to be_truthy
         | 
| 10 | 
            +
                  expect(Scruber::Core::Crawler.method_defined?(:head)).to be_truthy
         | 
| 11 | 
            +
                  expect(Scruber::Core::Crawler.method_defined?(:post)).to be_truthy
         | 
| 12 | 
            +
                  expect(Scruber::Core::Crawler._registered_method_missings.keys.include?(/\A(get|post|head)_(\w+)\Z/)).to be_truthy
         | 
| 13 | 
            +
                  expect(Scruber::Core::Crawler.new(:sample).respond_to?(:get_product)).to be_truthy
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              describe "#get,#post" do
         | 
| 18 | 
            +
                context "without options" do
         | 
| 19 | 
            +
                  it "should add page to queue" do
         | 
| 20 | 
            +
                    described_class.register
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                    Scruber.run :sample do
         | 
| 23 | 
            +
                      get "http://example.com"
         | 
| 24 | 
            +
                      $page = queue.fetch_pending
         | 
| 25 | 
            +
                    end
         | 
| 26 | 
            +
                    expect($page.url).to eq("http://example.com")
         | 
| 27 | 
            +
                    expect($page.method.to_s).to eq("get")
         | 
| 28 | 
            +
                    expect($page.page_type.to_s).to eq("seed")
         | 
| 29 | 
            +
                  end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                  it "should add page to queue" do
         | 
| 32 | 
            +
                    described_class.register
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                    Scruber.run :sample do
         | 
| 35 | 
            +
                      post_product "http://example.com"
         | 
| 36 | 
            +
                      $page = queue.fetch_pending
         | 
| 37 | 
            +
                    end
         | 
| 38 | 
            +
                    expect($page.url).to eq("http://example.com")
         | 
| 39 | 
            +
                    expect($page.method.to_s).to eq("post")
         | 
| 40 | 
            +
                    expect($page.page_type).to eq("product")
         | 
| 41 | 
            +
                  end
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                context "with options" do
         | 
| 45 | 
            +
                  it "should add page to queue" do
         | 
| 46 | 
            +
                    described_class.register
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    Scruber.run :sample do
         | 
| 49 | 
            +
                      get "http://example.com", user_agent: 'Agent 1'
         | 
| 50 | 
            +
                      $page = queue.fetch_pending
         | 
| 51 | 
            +
                    end
         | 
| 52 | 
            +
                    expect($page.url).to eq("http://example.com")
         | 
| 53 | 
            +
                    expect($page.method.to_s).to eq("get")
         | 
| 54 | 
            +
                    expect($page.page_type.to_s).to eq("seed")
         | 
| 55 | 
            +
                    expect($page.user_agent).to eq('Agent 1')
         | 
| 56 | 
            +
                  end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                  it "should add page to queue" do
         | 
| 59 | 
            +
                    described_class.register
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                    Scruber.run :sample do
         | 
| 62 | 
            +
                      post_product "http://example.com", user_agent: 'Agent 1'
         | 
| 63 | 
            +
                      $page = queue.fetch_pending
         | 
| 64 | 
            +
                    end
         | 
| 65 | 
            +
                    expect($page.url).to eq("http://example.com")
         | 
| 66 | 
            +
                    expect($page.method.to_s).to eq("post")
         | 
| 67 | 
            +
                    expect($page.page_type).to eq("product")
         | 
| 68 | 
            +
                    expect($page.user_agent).to eq('Agent 1')
         | 
| 69 | 
            +
                  end
         | 
| 70 | 
            +
                end
         | 
| 71 | 
            +
              end
         | 
| 72 | 
            +
            end
         | 
| @@ -0,0 +1,44 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.describe Scruber::Core::Extensions::Seed do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              describe "register" do
         | 
| 6 | 
            +
                it "should extend Scruber::Core with seed method" do
         | 
| 7 | 
            +
                  described_class.register
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  expect(Scruber::Core::Crawler.method_defined?(:seed)).to be_truthy
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              before do
         | 
| 14 | 
            +
                stub_request(:get, "http://example.com").to_return(body: '<div><a>Main</a></div>')
         | 
| 15 | 
            +
                stub_request(:get, "http://example.com/contacts").to_return(body: '<div><a>Contacts</a></div>')
         | 
| 16 | 
            +
              end
         | 
| 17 | 
            +
              
         | 
| 18 | 
            +
              it "should execute seed block" do
         | 
| 19 | 
            +
                $queue_size = 0
         | 
| 20 | 
            +
                Scruber.run :sample do
         | 
| 21 | 
            +
                  seed do
         | 
| 22 | 
            +
                    get 'http://example.com'
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
                  $queue_size = queue.size
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
                expect($queue_size).to eq(1)
         | 
| 27 | 
            +
              end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
              it "should not execute seed block" do
         | 
| 30 | 
            +
                $queue_size = 0
         | 
| 31 | 
            +
                Scruber.run :sample do
         | 
| 32 | 
            +
                  seed do
         | 
| 33 | 
            +
                    get 'http://example.com'
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
                  seed do
         | 
| 36 | 
            +
                    get 'http://example.com/contacts'
         | 
| 37 | 
            +
                  end
         | 
| 38 | 
            +
                  $queue_size = queue.size
         | 
| 39 | 
            +
                  $page = queue.fetch_pending
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
                expect($queue_size).to eq(1)
         | 
| 42 | 
            +
                expect($page.url).to eq("http://example.com")
         | 
| 43 | 
            +
              end
         | 
| 44 | 
            +
            end
         | 
    
        data/spec/fetcher.rb
    ADDED
    
    | @@ -0,0 +1,27 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.describe Scruber::Fetcher do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              describe "add_adapter" do
         | 
| 6 | 
            +
                it "should raise error" do
         | 
| 7 | 
            +
                  expect{ described_class.add_adapter(:obj, Object) }.to raise_error(NoMethodError)
         | 
| 8 | 
            +
                end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                it "should add new adapter and return added class" do
         | 
| 11 | 
            +
                  expect(described_class.add_adapter(:typhoeus_fetcher, Scruber::FetcherAdapters::TyphoeusFetcher)).to eq(Scruber::FetcherAdapters::TyphoeusFetcher)
         | 
| 12 | 
            +
                  expect(described_class._adapters.keys).to include(:typhoeus_fetcher)
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              describe "adapter" do
         | 
| 17 | 
            +
                it "should return default adapter" do
         | 
| 18 | 
            +
                  expect(described_class.adapter).to eq(Scruber::FetcherAdapters::TyphoeusFetcher)
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              describe "new" do
         | 
| 23 | 
            +
                it "should return instance of default adapter" do
         | 
| 24 | 
            +
                  expect(described_class.new).to be_a(Scruber::FetcherAdapters::TyphoeusFetcher)
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
            end
         | 
| @@ -0,0 +1,36 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.describe Scruber::Helpers::DictionaryReader::Csv do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              describe "register" do
         | 
| 6 | 
            +
                it "should correctly read first element" do
         | 
| 7 | 
            +
                  cl = described_class.new(File.expand_path(File.dirname(__FILE__))+'/dict.csv')
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  result = nil
         | 
| 10 | 
            +
                  cl.read do |obj|
         | 
| 11 | 
            +
                    result = obj
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
                  expect(result.sort).to eq({"r10"=>"true", "country"=>"US", "state"=>"NY", "postal_code"=>"10002"}.sort)
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                it "should read 3 elements total" do
         | 
| 17 | 
            +
                  cl = described_class.new(File.expand_path(File.dirname(__FILE__))+'/dict.csv')
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  count = 0
         | 
| 20 | 
            +
                  cl.read do |obj|
         | 
| 21 | 
            +
                    count += 1
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
                  expect(count).to eq(3)
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                it "should read 1 elements with state=WI" do
         | 
| 27 | 
            +
                  cl = described_class.new(File.expand_path(File.dirname(__FILE__))+'/dict.csv')
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  results = []
         | 
| 30 | 
            +
                  cl.read({state: 'WI'}) do |obj|
         | 
| 31 | 
            +
                    results.push obj.sort
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
                  expect(results).to eq([{"r10"=>"false", "country"=>"US", "state"=>"WI", "postal_code"=>"54914"}.sort])
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
            end
         | 
| @@ -0,0 +1,46 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            RSpec.describe Scruber::Helpers::DictionaryReader::Xml do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              describe "register" do
         | 
| 6 | 
            +
                it "should correctly read first element" do
         | 
| 7 | 
            +
                  cl = described_class.new(File.expand_path(File.dirname(__FILE__))+'/dict.xml')
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  result = nil
         | 
| 10 | 
            +
                  cl.read do |obj|
         | 
| 11 | 
            +
                    result = obj
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
                  expect(result.sort).to eq({"r10"=>"true", "country"=>"US", "state"=>"NY", "postal_code"=>"10002"}.sort)
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                it "should correctly read first element with different selector" do
         | 
| 17 | 
            +
                  cl = described_class.new(File.expand_path(File.dirname(__FILE__))+'/dict_records.xml')
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  result = nil
         | 
| 20 | 
            +
                  cl.read(selector: 'record') do |obj|
         | 
| 21 | 
            +
                    result = obj
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
                  expect(result.sort).to eq({"r10"=>"true", "country"=>"US", "state"=>"NY", "postal_code"=>"10002"}.sort)
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                it "should read 3 elements total" do
         | 
| 27 | 
            +
                  cl = described_class.new(File.expand_path(File.dirname(__FILE__))+'/dict.xml')
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  count = 0
         | 
| 30 | 
            +
                  cl.read do |obj|
         | 
| 31 | 
            +
                    count += 1
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
                  expect(count).to eq(3)
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                it "should read 1 elements with state=WI" do
         | 
| 37 | 
            +
                  cl = described_class.new(File.expand_path(File.dirname(__FILE__))+'/dict.xml')
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                  results = []
         | 
| 40 | 
            +
                  cl.read({state: 'WI'}) do |obj|
         | 
| 41 | 
            +
                    results.push obj.sort
         | 
| 42 | 
            +
                  end
         | 
| 43 | 
            +
                  expect(results).to eq([{"r10"=>"false", "country"=>"US", "state"=>"WI", "postal_code"=>"54914"}.sort])
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
              end
         | 
| 46 | 
            +
            end
         |