linkedincrawler 0.0.14 → 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/linkedincrawler.rb +9 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d133e662b783dcf61594013b375ddec17cb22102
|
4
|
+
data.tar.gz: 4c477ee0f284c4bc13171d25c747cf9f2b42c090
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 47a121927417ce0d195d5be681d533f35c88613710f104d540e032bc9539ebd25840163eda54c75a63caeb27924f1fc65fd767042e5e18664d4c0fd13809e01e
|
7
|
+
data.tar.gz: 87f53246bb4d98c67683525ea32095729a453502e0426f87268edd949809fea93ea27da0f288598a1698202b433e7a75f660c64820a2bb66b2605a56ad8d563c
|
data/lib/linkedincrawler.rb
CHANGED
@@ -6,7 +6,7 @@ require 'selenium-webdriver'
|
|
6
6
|
require 'pry'
|
7
7
|
|
8
8
|
class LinkedinCrawler
|
9
|
-
def initialize(search_terms, retry_limit, requests, requests_google, solver_details)
|
9
|
+
def initialize(search_terms, retry_limit, requests, requests_google, requests_google2, solver_details)
|
10
10
|
@search_terms = search_terms
|
11
11
|
@output = Array.new
|
12
12
|
|
@@ -15,6 +15,7 @@ class LinkedinCrawler
|
|
15
15
|
|
16
16
|
@requests = requests
|
17
17
|
@requests_google = requests_google
|
18
|
+
@requests_google2 = requests_google2
|
18
19
|
@solver_details = solver_details
|
19
20
|
end
|
20
21
|
|
@@ -23,9 +24,13 @@ class LinkedinCrawler
|
|
23
24
|
# Run Google search
|
24
25
|
g = GeneralScraper.new("site:linkedin.com/pub -site:linkedin.com/pub/dir/", @search_terms, @requests_google, @solver_details)
|
25
26
|
urls = g.getURLs
|
26
|
-
|
27
|
+
|
28
|
+
# Look for new LI urls
|
29
|
+
g2 = GeneralScraper.new("site:linkedin.com/in", @search_terms, @requests_google2, @solver_details)
|
30
|
+
urls = JSON.parse(urls) + JSON.parse(g2.getURLs)
|
31
|
+
|
27
32
|
# Scrape each resulting LinkedIn page
|
28
|
-
|
33
|
+
urls.each do |profile|
|
29
34
|
if check_right_page(profile)
|
30
35
|
scrape(profile)
|
31
36
|
end
|
@@ -85,3 +90,4 @@ class LinkedinCrawler
|
|
85
90
|
JSON.pretty_generate(@output)
|
86
91
|
end
|
87
92
|
end
|
93
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedincrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Crawls public LinkedIn profiles via Google
|
14
14
|
email: shidash@shidash.com
|