linkedinparser 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/causes.rb +50 -0
- data/lib/certifications.rb +60 -0
- data/lib/education.rb +58 -0
- data/lib/groups.rb +37 -0
- data/lib/languages.rb +37 -0
- data/lib/linkedinparser.rb +18 -27
- data/lib/personal_info.rb +54 -4
- data/lib/picture.rb +2 -1
- data/lib/related_people.rb +49 -0
- data/lib/utilities.rb +9 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1bc27e7c376c9641f15b6dd2454318d2082fed25
|
4
|
+
data.tar.gz: 77872a929f8844da3afc4c8bea62a1a8ddec729f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c64448f45ef78f600251bdf43f56e4cda01d35528f7d0877d46b78cf1061a17407742d3d99d0732456d43f50505efcd8d9473873ac290cec04a7c8e5127a978
|
7
|
+
data.tar.gz: cb5649581361d23fa8357d04c336f45801eb8259b067c65ffc927d45ad8d6b2386dabb65751093178fff9fc650611690b497fd07de22d527fe3c9b6a24c1f966
|
data/lib/causes.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Causes
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_causes
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of causes
|
12
|
+
def get_causes
|
13
|
+
return @cause_hash
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_causes
|
17
|
+
volunteering = @html.css("#volunteering")
|
18
|
+
if !is_empty?(volunteering)
|
19
|
+
@cause_hash = Hash.new
|
20
|
+
@cause_hash[:volunteer_opportunities] = volunteer_opportunities(volunteering)
|
21
|
+
@cause_hash[:supported_causes] = supported_causes(volunteering)
|
22
|
+
@cause_hash[:supported_organizations] = supported_organizations(volunteering)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get opportunities they are looking for
|
27
|
+
def volunteer_opportunities(volunteering)
|
28
|
+
section = volunteering.css(".opportunities").css("li")
|
29
|
+
return make_list(section) if !is_empty?(section)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get causes they support
|
33
|
+
def supported_causes(volunteering)
|
34
|
+
section = get_right_section("Causes", volunteering.css(".extra-section"))
|
35
|
+
return make_list(section.css("li")) if !is_empty?(section)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Get organizations they support
|
39
|
+
def supported_organizations(volunteering)
|
40
|
+
section = get_right_section("Organizations", volunteering.css(".extra-section"))
|
41
|
+
return make_list(section.css("li")) if !is_empty?(section)
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_right_section(look_for, sections)
|
45
|
+
sections.each do |section|
|
46
|
+
return section if section.css("h4").text.include?(look_for)
|
47
|
+
end
|
48
|
+
return nil
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Certifications
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_certifications
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of certifications
|
12
|
+
def get_certifications
|
13
|
+
return @certificate_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_certifications
|
17
|
+
certifications = @html.css(".certifications").css("li")
|
18
|
+
|
19
|
+
@certificate_list = Array.new
|
20
|
+
certifications.each do |certificate|
|
21
|
+
@certificate_list.push({
|
22
|
+
certificate_name: certificate_name(certificate),
|
23
|
+
certificate_authority: certificate_authority(certificate),
|
24
|
+
license_num: license_num(certificate),
|
25
|
+
certificate_start: certificate_start(certificate),
|
26
|
+
certificate_end: certificate_end(certificate)
|
27
|
+
})
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Name of certification
|
32
|
+
def certificate_name(certificate)
|
33
|
+
cert_name = certificate.css("h4")
|
34
|
+
return cert_name.text if !is_empty?(cert_name)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Issuing authority
|
38
|
+
def certificate_authority(certificate)
|
39
|
+
cert_auth = certificate.css("h5")
|
40
|
+
return cert_auth.text.split(", ")[0] if !is_empty?(cert_auth)
|
41
|
+
end
|
42
|
+
|
43
|
+
# License Number
|
44
|
+
def license_num(certificate)
|
45
|
+
cert_num = certificate.css("h5")
|
46
|
+
return cert_num.text.split(", ")[1] if !is_empty?(cert_num)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Start date for certificate
|
50
|
+
def certificate_start(certificate)
|
51
|
+
cert_start = certificate.css(".date-range").css("time")
|
52
|
+
return cert_start[0].text if !is_empty?(cert_start[0])
|
53
|
+
end
|
54
|
+
|
55
|
+
# Expiry date for certificate
|
56
|
+
def certificate_end(certificate)
|
57
|
+
cert_end = certificate.css(".date-range").css("time")
|
58
|
+
return cert_end[1].text if !is_empty?(cert_end[1])
|
59
|
+
end
|
60
|
+
end
|
data/lib/education.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Education
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_education
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of jobs
|
12
|
+
def get_education
|
13
|
+
return @degree_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_education
|
17
|
+
schools = @html.css(".schools").css(".school")
|
18
|
+
|
19
|
+
@degree_list = Array.new
|
20
|
+
schools.each do |school|
|
21
|
+
@degree_list.push({
|
22
|
+
school_name: school_name(school),
|
23
|
+
education_desc: education_desc(school),
|
24
|
+
education_degree: education_degree(school),
|
25
|
+
degree_start_date: degree_start_date(school),
|
26
|
+
degree_end_date: degree_end_date(school)
|
27
|
+
})
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get the name of the school
|
33
|
+
def school_name(school)
|
34
|
+
return school.css("h4").text
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get the description
|
38
|
+
def education_desc(school)
|
39
|
+
return school.css(".description").text
|
40
|
+
end
|
41
|
+
|
42
|
+
# Get the degree info
|
43
|
+
def education_degree(school)
|
44
|
+
return school.css("h5").text
|
45
|
+
end
|
46
|
+
|
47
|
+
# Get the start date for the degree
|
48
|
+
def degree_start_date(school)
|
49
|
+
start_date = school.css(".date-range").css("time")
|
50
|
+
return start_date[0].text if !is_empty?(start_date[0])
|
51
|
+
end
|
52
|
+
|
53
|
+
# Get the end date for the degree
|
54
|
+
def degree_end_date(school)
|
55
|
+
end_date = school.css(".date-range").css("time")
|
56
|
+
return end_date[1].text if !is_empty?(end_date[1])
|
57
|
+
end
|
58
|
+
end
|
data/lib/groups.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Groups
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_groups
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of groups
|
12
|
+
def get_groups
|
13
|
+
return @group_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_groups
|
17
|
+
groups = @html.css('#groups').css('.group').css('.item-title')
|
18
|
+
|
19
|
+
@group_list = Array.new
|
20
|
+
groups.each do |group|
|
21
|
+
@group_list.push({
|
22
|
+
group_name: group_name(group),
|
23
|
+
group_link: group_link(group)
|
24
|
+
})
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get group name
|
29
|
+
def group_name(group)
|
30
|
+
return group.text
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get group link
|
34
|
+
def group_link(group)
|
35
|
+
return group.css("a")[0]["href"]
|
36
|
+
end
|
37
|
+
end
|
data/lib/languages.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Languages
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_languages
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of langauges
|
12
|
+
def get_languages
|
13
|
+
return @language_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_languages
|
17
|
+
languages = @html.css("#languages").css("li")
|
18
|
+
|
19
|
+
@language_list = Array.new
|
20
|
+
languages.each do |l|
|
21
|
+
@language_list.push({
|
22
|
+
language: language(l),
|
23
|
+
proficiency: proficiency(l)
|
24
|
+
})
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Language name
|
29
|
+
def language(language_name)
|
30
|
+
language_name.css("h4").text
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get proficiency
|
34
|
+
def proficiency(language_name)
|
35
|
+
language_name.css(".proficiency").text
|
36
|
+
end
|
37
|
+
end
|
data/lib/linkedinparser.rb
CHANGED
@@ -14,12 +14,25 @@ class LinkedinParser
|
|
14
14
|
|
15
15
|
def parse
|
16
16
|
# Get details about the person
|
17
|
-
|
18
|
-
|
17
|
+
begin
|
18
|
+
p = PersonalInfo.new(@profile, @profile_url)
|
19
|
+
@personal_info = p.get_personal_info
|
20
|
+
@personal_info.merge!({parsing_failed: false})
|
21
|
+
rescue # Handle failed parsing
|
22
|
+
@personal_info = {
|
23
|
+
profile_url: @profile_url,
|
24
|
+
full_html: @profile,
|
25
|
+
parsing_failed: true
|
26
|
+
}
|
27
|
+
end
|
19
28
|
|
20
29
|
# Get job info
|
21
|
-
|
22
|
-
|
30
|
+
begin
|
31
|
+
j = Jobs.new(@profile)
|
32
|
+
@job_info = j.get_jobs
|
33
|
+
rescue # Handle failed job parsing
|
34
|
+
@job_info = {job_parsing_failed: true}
|
35
|
+
end
|
23
36
|
end
|
24
37
|
|
25
38
|
# Return results with new item for each job
|
@@ -38,27 +51,5 @@ class LinkedinParser
|
|
38
51
|
output[:jobs] = @job_info
|
39
52
|
output.merge!(@crawler_fields)
|
40
53
|
JSON.pretty_generate(output)
|
41
|
-
end
|
42
|
-
|
43
|
-
# TODO: Fields to add to parser-
|
44
|
-
# Organizations
|
45
|
-
# Education
|
46
|
-
# Projects
|
47
|
-
# Related people
|
48
|
-
# Languages
|
49
|
-
# Certifications
|
50
|
-
# Groups
|
54
|
+
end
|
51
55
|
end
|
52
|
-
|
53
|
-
# Test:
|
54
|
-
#profile = Selenium::WebDriver::Firefox::Profile.new
|
55
|
-
#profile['intl.accept_languages'] = 'en'
|
56
|
-
#profile["javascript.enabled"] = false
|
57
|
-
#driver = Selenium::WebDriver.for :firefox, profile: profile
|
58
|
-
#url = "https://www.linkedin.com/pub/christopher-mcclellan/5b/a09/ba9"
|
59
|
-
#url = "https://www.linkedin.com/pub/maryann-holmes/2b/770/3b2"
|
60
|
-
|
61
|
-
#url = "https://www.linkedin.com/pub/kenneth-chamberlin/32/8bb/b22"
|
62
|
-
#driver.navigate.to url
|
63
|
-
#l = LinkedinParser.new(driver.page_source, url, {timestamp: Time.now})
|
64
|
-
#puts l.results_by_job
|
data/lib/personal_info.rb
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
load 'picture.rb'
|
2
2
|
load 'utilities.rb'
|
3
|
+
load 'education.rb'
|
4
|
+
load 'groups.rb'
|
5
|
+
load 'languages.rb'
|
6
|
+
load 'related_people.rb'
|
7
|
+
load 'certifications.rb'
|
8
|
+
load 'causes.rb'
|
3
9
|
|
4
10
|
class PersonalInfo
|
5
11
|
include Utilities
|
@@ -23,6 +29,12 @@ class PersonalInfo
|
|
23
29
|
summary: summary,
|
24
30
|
current_title: title,
|
25
31
|
interests: interests,
|
32
|
+
education: education,
|
33
|
+
groups: groups,
|
34
|
+
causes: causes,
|
35
|
+
certifications: certifications,
|
36
|
+
languages: languages,
|
37
|
+
related_people: related_people,
|
26
38
|
number_of_connections: number_of_connections,
|
27
39
|
picture: p.picture,
|
28
40
|
pic_path: p.pic_path,
|
@@ -36,7 +48,9 @@ class PersonalInfo
|
|
36
48
|
|
37
49
|
# Get the full name of the person
|
38
50
|
def full_name
|
39
|
-
@html.css(".profile-overview").css('h1')
|
51
|
+
name = @html.css(".profile-overview").css('h1')
|
52
|
+
name = @html.css(".profile-overview-content").css('h1') if is_empty?(name)
|
53
|
+
return name.text
|
40
54
|
end
|
41
55
|
|
42
56
|
# Get first part of name
|
@@ -49,6 +63,42 @@ class PersonalInfo
|
|
49
63
|
full_name.split(" ", 2).last.strip
|
50
64
|
end
|
51
65
|
|
66
|
+
# Get education info
|
67
|
+
def education
|
68
|
+
e = Education.new(@html)
|
69
|
+
return e.get_education
|
70
|
+
end
|
71
|
+
|
72
|
+
# Get a list of groups they are in
|
73
|
+
def groups
|
74
|
+
g = Groups.new(@html)
|
75
|
+
return g.get_groups
|
76
|
+
end
|
77
|
+
|
78
|
+
# Get causes they care about
|
79
|
+
def causes
|
80
|
+
c = Causes.new(@html)
|
81
|
+
return c.get_causes
|
82
|
+
end
|
83
|
+
|
84
|
+
# Get the person's certifications
|
85
|
+
def certifications
|
86
|
+
c = Certifications.new(@html)
|
87
|
+
return c.get_certifications
|
88
|
+
end
|
89
|
+
|
90
|
+
# Get a list of languages they speak
|
91
|
+
def languages
|
92
|
+
l = Languages.new(@html)
|
93
|
+
return l.get_languages
|
94
|
+
end
|
95
|
+
|
96
|
+
# Get the people also viewed list from the side
|
97
|
+
def related_people
|
98
|
+
r = RelatedPeople.new(@html)
|
99
|
+
return r.get_related
|
100
|
+
end
|
101
|
+
|
52
102
|
# Get list of skills
|
53
103
|
def skills
|
54
104
|
skill_list = Array.new
|
@@ -71,12 +121,12 @@ class PersonalInfo
|
|
71
121
|
|
72
122
|
# Get town
|
73
123
|
def location
|
74
|
-
full_location.split(",").first.strip
|
124
|
+
full_location.split(",").first.strip if !full_location.empty?
|
75
125
|
end
|
76
126
|
|
77
127
|
# Get country/state
|
78
128
|
def area
|
79
|
-
full_location.split(",").last.strip
|
129
|
+
full_location.split(",").last.strip if !full_location.empty?
|
80
130
|
end
|
81
131
|
|
82
132
|
# Get the industry the person works in (2 different formats)
|
@@ -90,7 +140,7 @@ class PersonalInfo
|
|
90
140
|
def summary
|
91
141
|
summary = @html.css('#summary').css('.description')
|
92
142
|
summary = @html.css('.summary').first if is_empty?(summary)
|
93
|
-
return summary.text
|
143
|
+
return summary.text if summary
|
94
144
|
end
|
95
145
|
|
96
146
|
# Get the overall/current title
|
data/lib/picture.rb
CHANGED
@@ -0,0 +1,49 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class RelatedPeople
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_related
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of groups
|
12
|
+
def get_related
|
13
|
+
return @related_people_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_related
|
17
|
+
related_people = @html.css(".insights").css(".browse-map").css(".profile-card")
|
18
|
+
|
19
|
+
@related_people_list = Array.new
|
20
|
+
related_people.each do |person|
|
21
|
+
@related_people_list.push({
|
22
|
+
related_name: related_name(person),
|
23
|
+
related_link: related_link(person),
|
24
|
+
related_person_company: related_person_company(person),
|
25
|
+
related_person_title: related_person_title(person)
|
26
|
+
})
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get name of related person
|
31
|
+
def related_name(person)
|
32
|
+
return person.css("h4").text
|
33
|
+
end
|
34
|
+
|
35
|
+
# Get link to related person's profile
|
36
|
+
def related_link(person)
|
37
|
+
return person.css("h4").css("a")[0]["href"]
|
38
|
+
end
|
39
|
+
|
40
|
+
# Get related person's company
|
41
|
+
def related_person_company(person)
|
42
|
+
return person.css(".headline").text.split(" at ")[1]
|
43
|
+
end
|
44
|
+
|
45
|
+
# Get title of related person
|
46
|
+
def related_person_title(person)
|
47
|
+
return person.css(".headline").text.split(" at ")[0]
|
48
|
+
end
|
49
|
+
end
|
data/lib/utilities.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedinparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parses public LinkedIn profiles
|
14
14
|
email: shidash@shidash.com
|
@@ -16,10 +16,16 @@ executables: []
|
|
16
16
|
extensions: []
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
|
+
- lib/causes.rb
|
20
|
+
- lib/certifications.rb
|
21
|
+
- lib/education.rb
|
22
|
+
- lib/groups.rb
|
19
23
|
- lib/jobs.rb
|
24
|
+
- lib/languages.rb
|
20
25
|
- lib/linkedinparser.rb
|
21
26
|
- lib/personal_info.rb
|
22
27
|
- lib/picture.rb
|
28
|
+
- lib/related_people.rb
|
23
29
|
- lib/utilities.rb
|
24
30
|
homepage: https://github.com/TransparencyToolkit/linkedinparser
|
25
31
|
licenses:
|