email_crawler 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a260b02f463c94ff01de5957eb1ec7ea95e8e150
4
- data.tar.gz: aac88c2198ed95902045c7ee2fc874bf6d5b65f4
3
+ metadata.gz: c66a31ba29e57a09952acc7e1816093ba8aaf613
4
+ data.tar.gz: 094c99616e62b5c92d892faa02d44e322b44c2b2
5
5
  SHA512:
6
- metadata.gz: 00f1003d1e385527d0bcceae8fdddd042e06b074f4f1d3447b1f79506fcc173aa8d7ed750a5698738a5c59a28c0e8306e4f9ea4859bc318c5697f36d6ce0b3c1
7
- data.tar.gz: c53d9f2e6cede921ec98c1ce42986d628e4394124514cca1aaf0b0a2fc174d966e20abed152368545714b4e11da724e12b294f491b10c48b8d1fa5dd1d930e21
6
+ metadata.gz: 387b8ef9db7125a79ed935801b07b5cfb3175895c43fdbfcc922135b150ce6535fa3508b565245905568f5ac0241d9fa57c2f50452046185984bbd96d7c40e15
7
+ data.tar.gz: 9e51f77812283e4d02ecb8130d19030d17e19716be3cad0521f6ff2f7dfe6d271134fa5fd9c4f52e422b194fba94675292b2a15bac411943e0beb7cb6abb4560
@@ -49,6 +49,8 @@ module EmailCrawler
49
49
  @logger.error "Giving up grabbing link for '#{@url}' after #{retries} retries"
50
50
  break
51
51
  end
52
+ rescue URI::InvalidComponentError => err
53
+ @logger.warn err.inspect
52
54
  else
53
55
  retries = 0
54
56
  end
@@ -1,3 +1,3 @@
1
1
  module EmailCrawler
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
@@ -4,10 +4,12 @@ require File.expand_path("lib/email_crawler")
4
4
 
5
5
  module EmailCrawler
6
6
  describe Scraper do
7
- subject { Scraper.new("google.de") }
7
+ let(:max_results) { 10 }
8
+
9
+ subject { Scraper.new("google.de", max_results) }
8
10
 
9
11
  it "returns the top 10 URLs for a given search term/expression" do
10
- subject.top_ten_urls_for("berlin tours").length.must_equal 10
12
+ subject.search_result_urls_for("berlin tours").length.must_equal max_results
11
13
  end
12
14
  end
13
15
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: email_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristian Rasch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-08 00:00:00.000000000 Z
11
+ date: 2014-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize