linkedinparser 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/causes.rb +50 -0
- data/lib/certifications.rb +60 -0
- data/lib/education.rb +58 -0
- data/lib/groups.rb +37 -0
- data/lib/languages.rb +37 -0
- data/lib/linkedinparser.rb +18 -27
- data/lib/personal_info.rb +54 -4
- data/lib/picture.rb +2 -1
- data/lib/related_people.rb +49 -0
- data/lib/utilities.rb +9 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1bc27e7c376c9641f15b6dd2454318d2082fed25
|
4
|
+
data.tar.gz: 77872a929f8844da3afc4c8bea62a1a8ddec729f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c64448f45ef78f600251bdf43f56e4cda01d35528f7d0877d46b78cf1061a17407742d3d99d0732456d43f50505efcd8d9473873ac290cec04a7c8e5127a978
|
7
|
+
data.tar.gz: cb5649581361d23fa8357d04c336f45801eb8259b067c65ffc927d45ad8d6b2386dabb65751093178fff9fc650611690b497fd07de22d527fe3c9b6a24c1f966
|
data/lib/causes.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Causes
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_causes
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of causes
|
12
|
+
def get_causes
|
13
|
+
return @cause_hash
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_causes
|
17
|
+
volunteering = @html.css("#volunteering")
|
18
|
+
if !is_empty?(volunteering)
|
19
|
+
@cause_hash = Hash.new
|
20
|
+
@cause_hash[:volunteer_opportunities] = volunteer_opportunities(volunteering)
|
21
|
+
@cause_hash[:supported_causes] = supported_causes(volunteering)
|
22
|
+
@cause_hash[:supported_organizations] = supported_organizations(volunteering)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get opportunities they are looking for
|
27
|
+
def volunteer_opportunities(volunteering)
|
28
|
+
section = volunteering.css(".opportunities").css("li")
|
29
|
+
return make_list(section) if !is_empty?(section)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get causes they support
|
33
|
+
def supported_causes(volunteering)
|
34
|
+
section = get_right_section("Causes", volunteering.css(".extra-section"))
|
35
|
+
return make_list(section.css("li")) if !is_empty?(section)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Get organizations they support
|
39
|
+
def supported_organizations(volunteering)
|
40
|
+
section = get_right_section("Organizations", volunteering.css(".extra-section"))
|
41
|
+
return make_list(section.css("li")) if !is_empty?(section)
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_right_section(look_for, sections)
|
45
|
+
sections.each do |section|
|
46
|
+
return section if section.css("h4").text.include?(look_for)
|
47
|
+
end
|
48
|
+
return nil
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Certifications
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_certifications
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of certifications
|
12
|
+
def get_certifications
|
13
|
+
return @certificate_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_certifications
|
17
|
+
certifications = @html.css(".certifications").css("li")
|
18
|
+
|
19
|
+
@certificate_list = Array.new
|
20
|
+
certifications.each do |certificate|
|
21
|
+
@certificate_list.push({
|
22
|
+
certificate_name: certificate_name(certificate),
|
23
|
+
certificate_authority: certificate_authority(certificate),
|
24
|
+
license_num: license_num(certificate),
|
25
|
+
certificate_start: certificate_start(certificate),
|
26
|
+
certificate_end: certificate_end(certificate)
|
27
|
+
})
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Name of certification
|
32
|
+
def certificate_name(certificate)
|
33
|
+
cert_name = certificate.css("h4")
|
34
|
+
return cert_name.text if !is_empty?(cert_name)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Issuing authority
|
38
|
+
def certificate_authority(certificate)
|
39
|
+
cert_auth = certificate.css("h5")
|
40
|
+
return cert_auth.text.split(", ")[0] if !is_empty?(cert_auth)
|
41
|
+
end
|
42
|
+
|
43
|
+
# License Number
|
44
|
+
def license_num(certificate)
|
45
|
+
cert_num = certificate.css("h5")
|
46
|
+
return cert_num.text.split(", ")[1] if !is_empty?(cert_num)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Start date for certificate
|
50
|
+
def certificate_start(certificate)
|
51
|
+
cert_start = certificate.css(".date-range").css("time")
|
52
|
+
return cert_start[0].text if !is_empty?(cert_start[0])
|
53
|
+
end
|
54
|
+
|
55
|
+
# Expiry date for certificate
|
56
|
+
def certificate_end(certificate)
|
57
|
+
cert_end = certificate.css(".date-range").css("time")
|
58
|
+
return cert_end[1].text if !is_empty?(cert_end[1])
|
59
|
+
end
|
60
|
+
end
|
data/lib/education.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Education
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_education
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of jobs
|
12
|
+
def get_education
|
13
|
+
return @degree_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_education
|
17
|
+
schools = @html.css(".schools").css(".school")
|
18
|
+
|
19
|
+
@degree_list = Array.new
|
20
|
+
schools.each do |school|
|
21
|
+
@degree_list.push({
|
22
|
+
school_name: school_name(school),
|
23
|
+
education_desc: education_desc(school),
|
24
|
+
education_degree: education_degree(school),
|
25
|
+
degree_start_date: degree_start_date(school),
|
26
|
+
degree_end_date: degree_end_date(school)
|
27
|
+
})
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get the name of the school
|
33
|
+
def school_name(school)
|
34
|
+
return school.css("h4").text
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get the description
|
38
|
+
def education_desc(school)
|
39
|
+
return school.css(".description").text
|
40
|
+
end
|
41
|
+
|
42
|
+
# Get the degree info
|
43
|
+
def education_degree(school)
|
44
|
+
return school.css("h5").text
|
45
|
+
end
|
46
|
+
|
47
|
+
# Get the start date for the degree
|
48
|
+
def degree_start_date(school)
|
49
|
+
start_date = school.css(".date-range").css("time")
|
50
|
+
return start_date[0].text if !is_empty?(start_date[0])
|
51
|
+
end
|
52
|
+
|
53
|
+
# Get the end date for the degree
|
54
|
+
def degree_end_date(school)
|
55
|
+
end_date = school.css(".date-range").css("time")
|
56
|
+
return end_date[1].text if !is_empty?(end_date[1])
|
57
|
+
end
|
58
|
+
end
|
data/lib/groups.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Groups
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_groups
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of groups
|
12
|
+
def get_groups
|
13
|
+
return @group_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_groups
|
17
|
+
groups = @html.css('#groups').css('.group').css('.item-title')
|
18
|
+
|
19
|
+
@group_list = Array.new
|
20
|
+
groups.each do |group|
|
21
|
+
@group_list.push({
|
22
|
+
group_name: group_name(group),
|
23
|
+
group_link: group_link(group)
|
24
|
+
})
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get group name
|
29
|
+
def group_name(group)
|
30
|
+
return group.text
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get group link
|
34
|
+
def group_link(group)
|
35
|
+
return group.css("a")[0]["href"]
|
36
|
+
end
|
37
|
+
end
|
data/lib/languages.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class Languages
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_languages
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of langauges
|
12
|
+
def get_languages
|
13
|
+
return @language_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_languages
|
17
|
+
languages = @html.css("#languages").css("li")
|
18
|
+
|
19
|
+
@language_list = Array.new
|
20
|
+
languages.each do |l|
|
21
|
+
@language_list.push({
|
22
|
+
language: language(l),
|
23
|
+
proficiency: proficiency(l)
|
24
|
+
})
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Language name
|
29
|
+
def language(language_name)
|
30
|
+
language_name.css("h4").text
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get proficiency
|
34
|
+
def proficiency(language_name)
|
35
|
+
language_name.css(".proficiency").text
|
36
|
+
end
|
37
|
+
end
|
data/lib/linkedinparser.rb
CHANGED
@@ -14,12 +14,25 @@ class LinkedinParser
|
|
14
14
|
|
15
15
|
def parse
|
16
16
|
# Get details about the person
|
17
|
-
|
18
|
-
|
17
|
+
begin
|
18
|
+
p = PersonalInfo.new(@profile, @profile_url)
|
19
|
+
@personal_info = p.get_personal_info
|
20
|
+
@personal_info.merge!({parsing_failed: false})
|
21
|
+
rescue # Handle failed parsing
|
22
|
+
@personal_info = {
|
23
|
+
profile_url: @profile_url,
|
24
|
+
full_html: @profile,
|
25
|
+
parsing_failed: true
|
26
|
+
}
|
27
|
+
end
|
19
28
|
|
20
29
|
# Get job info
|
21
|
-
|
22
|
-
|
30
|
+
begin
|
31
|
+
j = Jobs.new(@profile)
|
32
|
+
@job_info = j.get_jobs
|
33
|
+
rescue # Handle failed job parsing
|
34
|
+
@job_info = {job_parsing_failed: true}
|
35
|
+
end
|
23
36
|
end
|
24
37
|
|
25
38
|
# Return results with new item for each job
|
@@ -38,27 +51,5 @@ class LinkedinParser
|
|
38
51
|
output[:jobs] = @job_info
|
39
52
|
output.merge!(@crawler_fields)
|
40
53
|
JSON.pretty_generate(output)
|
41
|
-
end
|
42
|
-
|
43
|
-
# TODO: Fields to add to parser-
|
44
|
-
# Organizations
|
45
|
-
# Education
|
46
|
-
# Projects
|
47
|
-
# Related people
|
48
|
-
# Languages
|
49
|
-
# Certifications
|
50
|
-
# Groups
|
54
|
+
end
|
51
55
|
end
|
52
|
-
|
53
|
-
# Test:
|
54
|
-
#profile = Selenium::WebDriver::Firefox::Profile.new
|
55
|
-
#profile['intl.accept_languages'] = 'en'
|
56
|
-
#profile["javascript.enabled"] = false
|
57
|
-
#driver = Selenium::WebDriver.for :firefox, profile: profile
|
58
|
-
#url = "https://www.linkedin.com/pub/christopher-mcclellan/5b/a09/ba9"
|
59
|
-
#url = "https://www.linkedin.com/pub/maryann-holmes/2b/770/3b2"
|
60
|
-
|
61
|
-
#url = "https://www.linkedin.com/pub/kenneth-chamberlin/32/8bb/b22"
|
62
|
-
#driver.navigate.to url
|
63
|
-
#l = LinkedinParser.new(driver.page_source, url, {timestamp: Time.now})
|
64
|
-
#puts l.results_by_job
|
data/lib/personal_info.rb
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
load 'picture.rb'
|
2
2
|
load 'utilities.rb'
|
3
|
+
load 'education.rb'
|
4
|
+
load 'groups.rb'
|
5
|
+
load 'languages.rb'
|
6
|
+
load 'related_people.rb'
|
7
|
+
load 'certifications.rb'
|
8
|
+
load 'causes.rb'
|
3
9
|
|
4
10
|
class PersonalInfo
|
5
11
|
include Utilities
|
@@ -23,6 +29,12 @@ class PersonalInfo
|
|
23
29
|
summary: summary,
|
24
30
|
current_title: title,
|
25
31
|
interests: interests,
|
32
|
+
education: education,
|
33
|
+
groups: groups,
|
34
|
+
causes: causes,
|
35
|
+
certifications: certifications,
|
36
|
+
languages: languages,
|
37
|
+
related_people: related_people,
|
26
38
|
number_of_connections: number_of_connections,
|
27
39
|
picture: p.picture,
|
28
40
|
pic_path: p.pic_path,
|
@@ -36,7 +48,9 @@ class PersonalInfo
|
|
36
48
|
|
37
49
|
# Get the full name of the person
|
38
50
|
def full_name
|
39
|
-
@html.css(".profile-overview").css('h1')
|
51
|
+
name = @html.css(".profile-overview").css('h1')
|
52
|
+
name = @html.css(".profile-overview-content").css('h1') if is_empty?(name)
|
53
|
+
return name.text
|
40
54
|
end
|
41
55
|
|
42
56
|
# Get first part of name
|
@@ -49,6 +63,42 @@ class PersonalInfo
|
|
49
63
|
full_name.split(" ", 2).last.strip
|
50
64
|
end
|
51
65
|
|
66
|
+
# Get education info
|
67
|
+
def education
|
68
|
+
e = Education.new(@html)
|
69
|
+
return e.get_education
|
70
|
+
end
|
71
|
+
|
72
|
+
# Get a list of groups they are in
|
73
|
+
def groups
|
74
|
+
g = Groups.new(@html)
|
75
|
+
return g.get_groups
|
76
|
+
end
|
77
|
+
|
78
|
+
# Get causes they care about
|
79
|
+
def causes
|
80
|
+
c = Causes.new(@html)
|
81
|
+
return c.get_causes
|
82
|
+
end
|
83
|
+
|
84
|
+
# Get the person's certifications
|
85
|
+
def certifications
|
86
|
+
c = Certifications.new(@html)
|
87
|
+
return c.get_certifications
|
88
|
+
end
|
89
|
+
|
90
|
+
# Get a list of languages they speak
|
91
|
+
def languages
|
92
|
+
l = Languages.new(@html)
|
93
|
+
return l.get_languages
|
94
|
+
end
|
95
|
+
|
96
|
+
# Get the people also viewed list from the side
|
97
|
+
def related_people
|
98
|
+
r = RelatedPeople.new(@html)
|
99
|
+
return r.get_related
|
100
|
+
end
|
101
|
+
|
52
102
|
# Get list of skills
|
53
103
|
def skills
|
54
104
|
skill_list = Array.new
|
@@ -71,12 +121,12 @@ class PersonalInfo
|
|
71
121
|
|
72
122
|
# Get town
|
73
123
|
def location
|
74
|
-
full_location.split(",").first.strip
|
124
|
+
full_location.split(",").first.strip if !full_location.empty?
|
75
125
|
end
|
76
126
|
|
77
127
|
# Get country/state
|
78
128
|
def area
|
79
|
-
full_location.split(",").last.strip
|
129
|
+
full_location.split(",").last.strip if !full_location.empty?
|
80
130
|
end
|
81
131
|
|
82
132
|
# Get the industry the person works in (2 different formats)
|
@@ -90,7 +140,7 @@ class PersonalInfo
|
|
90
140
|
def summary
|
91
141
|
summary = @html.css('#summary').css('.description')
|
92
142
|
summary = @html.css('.summary').first if is_empty?(summary)
|
93
|
-
return summary.text
|
143
|
+
return summary.text if summary
|
94
144
|
end
|
95
145
|
|
96
146
|
# Get the overall/current title
|
data/lib/picture.rb
CHANGED
@@ -0,0 +1,49 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
load 'utilities.rb'
|
3
|
+
|
4
|
+
class RelatedPeople
|
5
|
+
include Utilities
|
6
|
+
def initialize(html)
|
7
|
+
@html = html
|
8
|
+
parse_related
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get list of groups
|
12
|
+
def get_related
|
13
|
+
return @related_people_list
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_related
|
17
|
+
related_people = @html.css(".insights").css(".browse-map").css(".profile-card")
|
18
|
+
|
19
|
+
@related_people_list = Array.new
|
20
|
+
related_people.each do |person|
|
21
|
+
@related_people_list.push({
|
22
|
+
related_name: related_name(person),
|
23
|
+
related_link: related_link(person),
|
24
|
+
related_person_company: related_person_company(person),
|
25
|
+
related_person_title: related_person_title(person)
|
26
|
+
})
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get name of related person
|
31
|
+
def related_name(person)
|
32
|
+
return person.css("h4").text
|
33
|
+
end
|
34
|
+
|
35
|
+
# Get link to related person's profile
|
36
|
+
def related_link(person)
|
37
|
+
return person.css("h4").css("a")[0]["href"]
|
38
|
+
end
|
39
|
+
|
40
|
+
# Get related person's company
|
41
|
+
def related_person_company(person)
|
42
|
+
return person.css(".headline").text.split(" at ")[1]
|
43
|
+
end
|
44
|
+
|
45
|
+
# Get title of related person
|
46
|
+
def related_person_title(person)
|
47
|
+
return person.css(".headline").text.split(" at ")[0]
|
48
|
+
end
|
49
|
+
end
|
data/lib/utilities.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedinparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parses public LinkedIn profiles
|
14
14
|
email: shidash@shidash.com
|
@@ -16,10 +16,16 @@ executables: []
|
|
16
16
|
extensions: []
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
|
+
- lib/causes.rb
|
20
|
+
- lib/certifications.rb
|
21
|
+
- lib/education.rb
|
22
|
+
- lib/groups.rb
|
19
23
|
- lib/jobs.rb
|
24
|
+
- lib/languages.rb
|
20
25
|
- lib/linkedinparser.rb
|
21
26
|
- lib/personal_info.rb
|
22
27
|
- lib/picture.rb
|
28
|
+
- lib/related_people.rb
|
23
29
|
- lib/utilities.rb
|
24
30
|
homepage: https://github.com/TransparencyToolkit/linkedinparser
|
25
31
|
licenses:
|