linkedincrawler 0.0.13 → 0.0.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/linkedincrawler.rb +24 -10
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9cab546fac1482b262f2435257af20d645b7a2ca
|
4
|
+
data.tar.gz: 8a18d774dbaf1791b13a104b85bd96edd3400dc9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb059768cdaf5204cfef18ca7fc8ec8670c0ed4bed5346fc984ab57a44eb6d7fa988e8e2b584920b1f500a20257196fcf45bedaafdf86197887e3ad232fe398c
|
7
|
+
data.tar.gz: e6b09adb74fec01250746172e372e82b5277badda83c8156d8c60049945d36bdafcc528e5bf69f2063c219cfc3508c771138d363a0226742ac45aa25a0eb00e7
|
data/lib/linkedincrawler.rb
CHANGED
@@ -44,25 +44,39 @@ class LinkedinCrawler
|
|
44
44
|
@retry_count < @retry_limit
|
45
45
|
end
|
46
46
|
|
47
|
+
# Add the parsed profile to output, reset the retry count, and continue
|
48
|
+
def save_and_continue(parsed_profile)
|
49
|
+
@output += parsed_profile if parsed_profile != nil && !parsed_profile.empty?
|
50
|
+
@retry_count = 0
|
51
|
+
end
|
52
|
+
|
53
|
+
# Check if profile parsed successfully
|
54
|
+
def profile_parsing_failed?(parsed_profile)
|
55
|
+
return (parsed_profile == nil) || parsed_profile.empty? || parsed_profile.first["parsing_failed"]
|
56
|
+
end
|
57
|
+
|
47
58
|
# Scrape each page
|
48
59
|
def scrape(profile_url)
|
49
60
|
# Get profile page
|
50
61
|
profile_html = @requests.get_page(profile_url)
|
51
62
|
|
52
|
-
# Parse profile
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
#
|
59
|
-
if
|
63
|
+
# Parse profile
|
64
|
+
l = LinkedinParser.new(profile_html, profile_url, {timestamp: Time.now, search_terms: @search_terms})
|
65
|
+
parsed_profile = JSON.parse(l.results_by_job)
|
66
|
+
|
67
|
+
# Check if it failed or succeeded
|
68
|
+
if profile_parsing_failed?(parsed_profile)
|
69
|
+
# Handle something wrong- restart in case it is blocked and rescrape
|
70
|
+
if @retry_count < @retry_limit
|
60
71
|
@requests.restart_browser
|
61
72
|
@retry_count += 1
|
62
73
|
scrape(profile_url)
|
63
|
-
else
|
64
|
-
|
74
|
+
else # Just save it and move on
|
75
|
+
save_and_continue(parsed_profile)
|
65
76
|
end
|
77
|
+
|
78
|
+
else # It succeeded!
|
79
|
+
save_and_continue(parsed_profile)
|
66
80
|
end
|
67
81
|
end
|
68
82
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedincrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Crawls public LinkedIn profiles via Google
|
14
14
|
email: shidash@shidash.com
|