email_crawler 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/email_crawler/version.rb +1 -1
- data/lib/email_crawler.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aadbe92dd2c7670e25d389a5008badaae8334fc0
|
4
|
+
data.tar.gz: 265adbfb7bb28397ff93af922ddd670044b2f2a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0c2eb98801a94251434c1c009a357552813c0f3734d999f5b29c740f5419756561d97ef5b30c22472c1eaacf0cc37bfde835e47502b516d4a45232cdff8a846b
|
7
|
+
data.tar.gz: 607749a5ce4831fcbd929721f09191f3e2d4f10d93485fc41c92577b5250c2d480b1b1138493ce18519e30969a91ab0e101c7300f31a59ef0482b598589b7935
|
data/README.md
CHANGED
@@ -40,7 +40,7 @@ email-crawler --query "berlin walks" --max-results 250
|
|
40
40
|
email-crawler --query "berlin walks" --max-links 250
|
41
41
|
```
|
42
42
|
|
43
|
-
* Specify how many threads to use when searching for links and email addresses (defaults to
|
43
|
+
* Specify how many threads to use when searching for links and email addresses (defaults to 50)
|
44
44
|
|
45
45
|
```bash
|
46
46
|
email-crawler --query "berlin walks" --concurrency 25
|
data/lib/email_crawler.rb
CHANGED
@@ -12,7 +12,7 @@ require_relative "email_crawler/email_scanner"
|
|
12
12
|
|
13
13
|
module EmailCrawler
|
14
14
|
class Runner
|
15
|
-
MAX_CONCURRENCY =
|
15
|
+
MAX_CONCURRENCY = 50
|
16
16
|
|
17
17
|
attr_writer :max_results, :max_links, :max_concurrency
|
18
18
|
|
@@ -62,7 +62,7 @@ module EmailCrawler
|
|
62
62
|
|
63
63
|
links_by_url.each { |arr| queue.push(arr) }
|
64
64
|
emails_by_url = ThreadSafe::Hash.new
|
65
|
-
threads = (1..[links_by_url.length,
|
65
|
+
threads = (1..[links_by_url.length, @max_concurrency].min).map do |i|
|
66
66
|
Thread.new(i) do |i|
|
67
67
|
arr = begin
|
68
68
|
queue.pop(true)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cristian Rasch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|