linkedindata 0.0.21 → 0.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/linkedin.rb +9 -6
- data/lib/linkedindata.rb +11 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5565d9fa31827b9a1d1abfff886dfd04510dc536
|
4
|
+
data.tar.gz: eb75c5bb0c6b838eb848ec6aed32ab4f6b446421
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23b1e31326ad48ba8ed6d532a02511ce340f6c3585467854ecbdc00299eb3a0b873a18880d930e5cc72db3e235ac0db7d17c1a52b9692b3151387a9a80b29455
|
7
|
+
data.tar.gz: 3390991833fc205b43cedbb92964611e647ac0a0cf052ff48ddc79f4a7faa5f6f14f2216661adb6bf08f83123c0feb9ad071fee2ef96d8e6476c21049a04f5d6
|
data/lib/linkedin.rb
CHANGED
@@ -6,7 +6,7 @@ module Linkedin
|
|
6
6
|
include ProxyManager
|
7
7
|
include GetRelated
|
8
8
|
|
9
|
-
def initialize(url, curhops, proxylist, usedproxies, use_proxies_li)
|
9
|
+
def initialize(url, driver, curhops, proxylist, usedproxies, use_proxies_li)
|
10
10
|
@linkedin_url = url
|
11
11
|
@curhops = curhops
|
12
12
|
@proxylist = proxylist
|
@@ -19,20 +19,23 @@ module Linkedin
|
|
19
19
|
"timestamp",
|
20
20
|
"degree",
|
21
21
|
"pic_path")
|
22
|
-
|
23
|
-
|
22
|
+
|
23
|
+
# Get page
|
24
|
+
@driver = driver
|
25
|
+
@page = Nokogiri::HTML(getPage(url, @driver, nil, 5, use_proxies_li).page_source)
|
26
|
+
sleep(10)
|
24
27
|
end
|
25
28
|
|
26
29
|
|
27
|
-
def self.get_profile(url, curhops, proxylist, usedproxies, use_proxies_li)
|
28
|
-
Linkedin::Profile.new(url, curhops, proxylist, usedproxies, use_proxies_li)
|
30
|
+
def self.get_profile(url, driver, curhops, proxylist, usedproxies, use_proxies_li)
|
31
|
+
Linkedin::Profile.new(url, driver, curhops, proxylist, usedproxies, use_proxies_li)
|
29
32
|
rescue => e
|
30
33
|
puts e
|
31
34
|
end
|
32
35
|
|
33
36
|
# Gets "people also viewed list" form profile sidebar
|
34
37
|
def related_people
|
35
|
-
@related_people ||= getList(
|
38
|
+
@related_people ||= getList(@page)
|
36
39
|
end
|
37
40
|
|
38
41
|
# Similar to linkedin_url
|
data/lib/linkedindata.rb
CHANGED
@@ -28,17 +28,26 @@ class LinkedinData
|
|
28
28
|
# Searches for profiles on Google
|
29
29
|
def search(search_terms)
|
30
30
|
g = GeneralScraper.new("site:linkedin.com/pub", search_terms, @proxy_list_path, @use_proxy)
|
31
|
+
gen_driver
|
32
|
+
|
31
33
|
JSON.parse(g.getURLs).each do |profile|
|
32
34
|
scrape(profile, 0)
|
33
35
|
end
|
34
36
|
end
|
35
37
|
|
38
|
+
# Generate driver for searches
|
39
|
+
def gen_driver
|
40
|
+
profile = Selenium::WebDriver::Firefox::Profile.new
|
41
|
+
profile['intl.accept_languages'] = 'en'
|
42
|
+
@driver = Selenium::WebDriver.for :firefox, profile: profile
|
43
|
+
end
|
44
|
+
|
36
45
|
# Scrapes and parses individual profile
|
37
46
|
def scrape(url, curhops)
|
38
47
|
# Download profile and rescue on error
|
39
48
|
begin
|
40
49
|
url.gsub!("https", "http")
|
41
|
-
profile = Linkedin::Profile.get_profile(url, curhops, @proxylist, @usedproxies, @use_proxy_li)
|
50
|
+
profile = Linkedin::Profile.get_profile(url, @driver, curhops, @proxylist, @usedproxies, @use_proxy_li)
|
42
51
|
|
43
52
|
# Parse profile if returned and add to output
|
44
53
|
@output.concat(parseResume(profile)) if profile
|
@@ -73,6 +82,7 @@ class LinkedinData
|
|
73
82
|
def prepareResults
|
74
83
|
getRelatedProfiles
|
75
84
|
deleteDuplicatePics
|
85
|
+
@driver.close
|
76
86
|
return JSON.pretty_generate(relScore(showAllKeys(@output)))
|
77
87
|
end
|
78
88
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedindata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Scrapes all LinkedIn profiles including terms you specify.
|
14
14
|
email: shidash@shidash.com
|