linkedincrawler 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/linkedincrawler.rb +18 -5
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99433021962017d0dc2ef40342127cce06abac54
|
4
|
+
data.tar.gz: 4a0a0a5441e6a9bcdb76ce69b9fb1be8f3647619
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec59a52e5c73cdf9f3d91bd4e7936a2957c7671ee92d5566f06ba64ae230380ecfcb00aa52093bed3c84b5909309f1b1f41d3b40d78968b66a116c8215f149ba
|
7
|
+
data.tar.gz: 1bafd3d56a9b3ad9422a25ef2118cb610aaa229eb899c63a070fa46c8c933b65052402f8191246421fbaa8983910591a3fbb38bce704d48f0c6e1457a8b7f549
|
data/lib/linkedincrawler.rb
CHANGED
@@ -19,11 +19,21 @@ class LinkedinCrawler
|
|
19
19
|
def search
|
20
20
|
# Run Google search
|
21
21
|
g = GeneralScraper.new("site:linkedin.com/pub -site:linkedin.com/pub/dir/", @search_terms, @proxy_list)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
begin
|
23
|
+
urls = g.getURLs
|
24
|
+
rescue # Search again if it didn't work the first time
|
25
|
+
search
|
26
|
+
end
|
27
|
+
|
28
|
+
# Search again if it didn't run
|
29
|
+
if urls.length == 0 || urls.empty?
|
30
|
+
search
|
31
|
+
else
|
32
|
+
# Scrape each resulting LinkedIn page
|
33
|
+
JSON.parse(urls).each do |profile|
|
34
|
+
if profile.include?(".linkedin.") && !profile.include?("/search")
|
35
|
+
scrape(profile)
|
36
|
+
end
|
27
37
|
end
|
28
38
|
end
|
29
39
|
|
@@ -64,3 +74,6 @@ class LinkedinCrawler
|
|
64
74
|
JSON.pretty_generate(@output)
|
65
75
|
end
|
66
76
|
end
|
77
|
+
c = LinkedinCrawler.new("remotely piloted aircraft", 5, "/home/shidash/proxylist2", [3, 5])
|
78
|
+
c.search
|
79
|
+
File.write("test.json", c.gen_json)
|