linkedincrawler 0.0.10 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/linkedincrawler.rb +16 -20
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad1998e603add9f4dc968ade37d9eec959f0135f
4
- data.tar.gz: fbf79531d15ddd6584ca4a4e34b10d1ce33ce6d1
3
+ metadata.gz: 64048a6ad5a246281c7546bd04105db4a4d2d8de
4
+ data.tar.gz: e933a51855e800a285b90e39e8129a146aa17e5e
5
5
  SHA512:
6
- metadata.gz: cdaf3cdd5ced3d72ef91cbc151493616e2f1ae7258ff5e2018a35363128f1778f6c4350394b1e53518c7f9748af861c2eb259f882d5b7ab2c4b11c6b53390c01
7
- data.tar.gz: 8eab640479ba0f4294e5eaf169b9fabe26f64f11aec04450e1ea2b0245fa3d9e0c51d62d0a586ec39b65870b5a7294df7f853ee39785f656c89f5f0458625d44
6
+ metadata.gz: 4b533610302c151219fe9d5f3fb1c1364b58bacb696c9b2d837c7a826bd735482de5328a64cfe8c97cf6888375317726dd7b70f70229c9148bde0cbc825e4d2f
7
+ data.tar.gz: f4c8663a5622ca92047039e14ddf1de69dab44e7541481ea37069316aa9f4fd78048ec84da2d9ac48a58ce5344c8cdfe03ce6a97ea13434ae5e60315566501ea
@@ -6,44 +6,38 @@ require 'selenium-webdriver'
6
6
  require 'pry'
7
7
 
8
8
  class LinkedinCrawler
9
- def initialize(search_terms, retry_limit, proxy_list, request_time)
9
+ def initialize(search_terms, retry_limit, requests, requests_google)
10
10
  @search_terms = search_terms
11
11
  @output = Array.new
12
+
12
13
  @retry_limit = retry_limit
13
14
  @retry_count = 0
14
- @proxy_list = proxy_list
15
- @requests = RequestManager.new(@proxy_list, request_time, 5)
15
+
16
+ @requests = requests
17
+ @requests_google = requests_google
16
18
  end
17
19
 
18
20
  # Run search terms and get results
19
21
  def search
20
22
  # Run Google search
21
- g = GeneralScraper.new("site:linkedin.com/pub -site:linkedin.com/pub/dir/", @search_terms, nil)
22
- # begin
23
- urls = g.getURLs
24
- # rescue # Search again if it didn't work the first time
25
- # search
26
- #end
23
+ g = GeneralScraper.new("site:linkedin.com/pub -site:linkedin.com/pub/dir/", @search_terms, @requests_google)
24
+ urls = g.getURLs
27
25
 
28
- # Search again if it didn't run
29
- # if urls.length == 0 || urls.empty?
30
- # search
31
- # else
32
- # Scrape each resulting LinkedIn page
33
- JSON.parse(urls).each do |profile|
34
- if profile.include?(".linkedin.") && !profile.include?("/search")
35
- scrape(profile)
36
- end
26
+ # Scrape each resulting LinkedIn page
27
+ JSON.parse(urls).each do |profile|
28
+ if check_right_page(profile)
29
+ scrape(profile)
37
30
  end
38
- # end
31
+ end
39
32
 
40
- # Close all the browsers
33
+ # Close all the browsers when done
41
34
  @requests.close_all_browsers
42
35
  end
43
36
 
44
37
  # Check that it is actually a LinkedIn profile page
45
38
  def check_right_page(profile_url)
46
39
  return !profile_url.include?("www.google") &&
40
+ profile_url.include?(".linkedin.") &&
47
41
  !profile_url.include?("linkedin.com/pub/dir") &&
48
42
  !profile_url.include?("/search") &&
49
43
  @retry_count < @retry_limit
@@ -65,6 +59,8 @@ class LinkedinCrawler
65
59
  @requests.restart_browser
66
60
  @retry_count += 1
67
61
  scrape(profile_url)
62
+ else
63
+ @retry_count = 0
68
64
  end
69
65
  end
70
66
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedincrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-03 00:00:00.000000000 Z
11
+ date: 2015-11-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Crawls public LinkedIn profiles via Google
14
14
  email: shidash@shidash.com