linkedincrawler 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/linkedincrawler.rb +16 -20
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad1998e603add9f4dc968ade37d9eec959f0135f
4
- data.tar.gz: fbf79531d15ddd6584ca4a4e34b10d1ce33ce6d1
3
+ metadata.gz: 64048a6ad5a246281c7546bd04105db4a4d2d8de
4
+ data.tar.gz: e933a51855e800a285b90e39e8129a146aa17e5e
5
5
  SHA512:
6
- metadata.gz: cdaf3cdd5ced3d72ef91cbc151493616e2f1ae7258ff5e2018a35363128f1778f6c4350394b1e53518c7f9748af861c2eb259f882d5b7ab2c4b11c6b53390c01
7
- data.tar.gz: 8eab640479ba0f4294e5eaf169b9fabe26f64f11aec04450e1ea2b0245fa3d9e0c51d62d0a586ec39b65870b5a7294df7f853ee39785f656c89f5f0458625d44
6
+ metadata.gz: 4b533610302c151219fe9d5f3fb1c1364b58bacb696c9b2d837c7a826bd735482de5328a64cfe8c97cf6888375317726dd7b70f70229c9148bde0cbc825e4d2f
7
+ data.tar.gz: f4c8663a5622ca92047039e14ddf1de69dab44e7541481ea37069316aa9f4fd78048ec84da2d9ac48a58ce5344c8cdfe03ce6a97ea13434ae5e60315566501ea
@@ -6,44 +6,38 @@ require 'selenium-webdriver'
6
6
  require 'pry'
7
7
 
8
8
  class LinkedinCrawler
9
- def initialize(search_terms, retry_limit, proxy_list, request_time)
9
+ def initialize(search_terms, retry_limit, requests, requests_google)
10
10
  @search_terms = search_terms
11
11
  @output = Array.new
12
+
12
13
  @retry_limit = retry_limit
13
14
  @retry_count = 0
14
- @proxy_list = proxy_list
15
- @requests = RequestManager.new(@proxy_list, request_time, 5)
15
+
16
+ @requests = requests
17
+ @requests_google = requests_google
16
18
  end
17
19
 
18
20
  # Run search terms and get results
19
21
  def search
20
22
  # Run Google search
21
- g = GeneralScraper.new("site:linkedin.com/pub -site:linkedin.com/pub/dir/", @search_terms, nil)
22
- # begin
23
- urls = g.getURLs
24
- # rescue # Search again if it didn't work the first time
25
- # search
26
- #end
23
+ g = GeneralScraper.new("site:linkedin.com/pub -site:linkedin.com/pub/dir/", @search_terms, @requests_google)
24
+ urls = g.getURLs
27
25
 
28
- # Search again if it didn't run
29
- # if urls.length == 0 || urls.empty?
30
- # search
31
- # else
32
- # Scrape each resulting LinkedIn page
33
- JSON.parse(urls).each do |profile|
34
- if profile.include?(".linkedin.") && !profile.include?("/search")
35
- scrape(profile)
36
- end
26
+ # Scrape each resulting LinkedIn page
27
+ JSON.parse(urls).each do |profile|
28
+ if check_right_page(profile)
29
+ scrape(profile)
37
30
  end
38
- # end
31
+ end
39
32
 
40
- # Close all the browsers
33
+ # Close all the browsers when done
41
34
  @requests.close_all_browsers
42
35
  end
43
36
 
44
37
  # Check that it is actually a LinkedIn profile page
45
38
  def check_right_page(profile_url)
46
39
  return !profile_url.include?("www.google") &&
40
+ profile_url.include?(".linkedin.") &&
47
41
  !profile_url.include?("linkedin.com/pub/dir") &&
48
42
  !profile_url.include?("/search") &&
49
43
  @retry_count < @retry_limit
@@ -65,6 +59,8 @@ class LinkedinCrawler
65
59
  @requests.restart_browser
66
60
  @retry_count += 1
67
61
  scrape(profile_url)
62
+ else
63
+ @retry_count = 0
68
64
  end
69
65
  end
70
66
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedincrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-03 00:00:00.000000000 Z
11
+ date: 2015-11-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Crawls public LinkedIn profiles via Google
14
14
  email: shidash@shidash.com