linkedincrawler 0.0.13 → 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/linkedincrawler.rb +24 -10
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b0d3e42277b83b79704ceaa5d7b07a0a1996b5d2
4
- data.tar.gz: c2189d0a40e02f8f0e6e085720cc60335cdb2cef
3
+ metadata.gz: 9cab546fac1482b262f2435257af20d645b7a2ca
4
+ data.tar.gz: 8a18d774dbaf1791b13a104b85bd96edd3400dc9
5
5
  SHA512:
6
- metadata.gz: db465540122c5ec210afacec2c3445827542596c2db78271a3eef97e18dbd9795e6198c8cf7d0632838c77cbf197fbf127709f3cbd3902aacd4f2cdb3c04a616
7
- data.tar.gz: f5e34bd6e608e8c675029c353c7379ac2b5af1433ac9047f1cf8708c0a0adfe72a145ec99333ab71d01f3150a95ca15667f7f69052d1147c02b8a6ca6a670ccc
6
+ metadata.gz: fb059768cdaf5204cfef18ca7fc8ec8670c0ed4bed5346fc984ab57a44eb6d7fa988e8e2b584920b1f500a20257196fcf45bedaafdf86197887e3ad232fe398c
7
+ data.tar.gz: e6b09adb74fec01250746172e372e82b5277badda83c8156d8c60049945d36bdafcc528e5bf69f2063c219cfc3508c771138d363a0226742ac45aa25a0eb00e7
@@ -44,25 +44,39 @@ class LinkedinCrawler
44
44
  @retry_count < @retry_limit
45
45
  end
46
46
 
47
+ # Add the parsed profile to output, reset the retry count, and continue
48
+ def save_and_continue(parsed_profile)
49
+ @output += parsed_profile if parsed_profile != nil && !parsed_profile.empty?
50
+ @retry_count = 0
51
+ end
52
+
53
+ # Check if profile parsed successfully
54
+ def profile_parsing_failed?(parsed_profile)
55
+ return (parsed_profile == nil) || parsed_profile.empty? || parsed_profile.first["parsing_failed"]
56
+ end
57
+
47
58
  # Scrape each page
48
59
  def scrape(profile_url)
49
60
  # Get profile page
50
61
  profile_html = @requests.get_page(profile_url)
51
62
 
52
- # Parse profile and add to output
53
- begin
54
- l = LinkedinParser.new(profile_html, profile_url, {timestamp: Time.now, search_terms: @search_terms})
55
- @output += JSON.parse(l.results_by_job)
56
- @retry_count = 0
57
- rescue
58
- # If proxy doesn't work, try another a few times
59
- if check_right_page(profile_url)
63
+ # Parse profile
64
+ l = LinkedinParser.new(profile_html, profile_url, {timestamp: Time.now, search_terms: @search_terms})
65
+ parsed_profile = JSON.parse(l.results_by_job)
66
+
67
+ # Check if it failed or succeeded
68
+ if profile_parsing_failed?(parsed_profile)
69
+ # Handle something wrong- restart in case it is blocked and rescrape
70
+ if @retry_count < @retry_limit
60
71
  @requests.restart_browser
61
72
  @retry_count += 1
62
73
  scrape(profile_url)
63
- else
64
- @retry_count = 0
74
+ else # Just save it and move on
75
+ save_and_continue(parsed_profile)
65
76
  end
77
+
78
+ else # It succeeded!
79
+ save_and_continue(parsed_profile)
66
80
  end
67
81
  end
68
82
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedincrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-05 00:00:00.000000000 Z
11
+ date: 2015-12-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Crawls public LinkedIn profiles via Google
14
14
  email: shidash@shidash.com