linkedinparser 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3bfb352fd7f2c469a2d3858295f0a563e5ba3593
4
- data.tar.gz: da6090b7f7f54e641fcdd484f15bacb4da30d106
3
+ metadata.gz: 1bc27e7c376c9641f15b6dd2454318d2082fed25
4
+ data.tar.gz: 77872a929f8844da3afc4c8bea62a1a8ddec729f
5
5
  SHA512:
6
- metadata.gz: 5441c537eb7e899a7bf3c514336bb027d0b5f3afafe35cefc912dc86710fe628b125dfe776620df17887192663183a7b0c40d3005d35ca39452e1b806f640706
7
- data.tar.gz: 5bdff258b614db3b4d9b4e0a80ccde0accb537666deef3589179e0f3896aa9d4480011db81de87c2dc5f44f8df757855c8d753c1f29c2cd09ca6e45e76cca8e4
6
+ metadata.gz: 7c64448f45ef78f600251bdf43f56e4cda01d35528f7d0877d46b78cf1061a17407742d3d99d0732456d43f50505efcd8d9473873ac290cec04a7c8e5127a978
7
+ data.tar.gz: cb5649581361d23fa8357d04c336f45801eb8259b067c65ffc927d45ad8d6b2386dabb65751093178fff9fc650611690b497fd07de22d527fe3c9b6a24c1f966
data/lib/causes.rb ADDED
@@ -0,0 +1,50 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Causes
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_causes
9
+ end
10
+
11
+ # Get list of causes
12
+ def get_causes
13
+ return @cause_hash
14
+ end
15
+
16
+ def parse_causes
17
+ volunteering = @html.css("#volunteering")
18
+ if !is_empty?(volunteering)
19
+ @cause_hash = Hash.new
20
+ @cause_hash[:volunteer_opportunities] = volunteer_opportunities(volunteering)
21
+ @cause_hash[:supported_causes] = supported_causes(volunteering)
22
+ @cause_hash[:supported_organizations] = supported_organizations(volunteering)
23
+ end
24
+ end
25
+
26
+ # Get opportunities they are looking for
27
+ def volunteer_opportunities(volunteering)
28
+ section = volunteering.css(".opportunities").css("li")
29
+ return make_list(section) if !is_empty?(section)
30
+ end
31
+
32
+ # Get causes they support
33
+ def supported_causes(volunteering)
34
+ section = get_right_section("Causes", volunteering.css(".extra-section"))
35
+ return make_list(section.css("li")) if !is_empty?(section)
36
+ end
37
+
38
+ # Get organizations they support
39
+ def supported_organizations(volunteering)
40
+ section = get_right_section("Organizations", volunteering.css(".extra-section"))
41
+ return make_list(section.css("li")) if !is_empty?(section)
42
+ end
43
+
44
+ def get_right_section(look_for, sections)
45
+ sections.each do |section|
46
+ return section if section.css("h4").text.include?(look_for)
47
+ end
48
+ return nil
49
+ end
50
+ end
@@ -0,0 +1,60 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Certifications
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_certifications
9
+ end
10
+
11
+ # Get list of certifications
12
+ def get_certifications
13
+ return @certificate_list
14
+ end
15
+
16
+ def parse_certifications
17
+ certifications = @html.css(".certifications").css("li")
18
+
19
+ @certificate_list = Array.new
20
+ certifications.each do |certificate|
21
+ @certificate_list.push({
22
+ certificate_name: certificate_name(certificate),
23
+ certificate_authority: certificate_authority(certificate),
24
+ license_num: license_num(certificate),
25
+ certificate_start: certificate_start(certificate),
26
+ certificate_end: certificate_end(certificate)
27
+ })
28
+ end
29
+ end
30
+
31
+ # Name of certification
32
+ def certificate_name(certificate)
33
+ cert_name = certificate.css("h4")
34
+ return cert_name.text if !is_empty?(cert_name)
35
+ end
36
+
37
+ # Issuing authority
38
+ def certificate_authority(certificate)
39
+ cert_auth = certificate.css("h5")
40
+ return cert_auth.text.split(", ")[0] if !is_empty?(cert_auth)
41
+ end
42
+
43
+ # License Number
44
+ def license_num(certificate)
45
+ cert_num = certificate.css("h5")
46
+ return cert_num.text.split(", ")[1] if !is_empty?(cert_num)
47
+ end
48
+
49
+ # Start date for certificate
50
+ def certificate_start(certificate)
51
+ cert_start = certificate.css(".date-range").css("time")
52
+ return cert_start[0].text if !is_empty?(cert_start[0])
53
+ end
54
+
55
+ # Expiry date for certificate
56
+ def certificate_end(certificate)
57
+ cert_end = certificate.css(".date-range").css("time")
58
+ return cert_end[1].text if !is_empty?(cert_end[1])
59
+ end
60
+ end
data/lib/education.rb ADDED
@@ -0,0 +1,58 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Education
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_education
9
+ end
10
+
11
+ # Get list of jobs
12
+ def get_education
13
+ return @degree_list
14
+ end
15
+
16
+ def parse_education
17
+ schools = @html.css(".schools").css(".school")
18
+
19
+ @degree_list = Array.new
20
+ schools.each do |school|
21
+ @degree_list.push({
22
+ school_name: school_name(school),
23
+ education_desc: education_desc(school),
24
+ education_degree: education_degree(school),
25
+ degree_start_date: degree_start_date(school),
26
+ degree_end_date: degree_end_date(school)
27
+ })
28
+
29
+ end
30
+ end
31
+
32
+ # Get the name of the school
33
+ def school_name(school)
34
+ return school.css("h4").text
35
+ end
36
+
37
+ # Get the description
38
+ def education_desc(school)
39
+ return school.css(".description").text
40
+ end
41
+
42
+ # Get the degree info
43
+ def education_degree(school)
44
+ return school.css("h5").text
45
+ end
46
+
47
+ # Get the start date for the degree
48
+ def degree_start_date(school)
49
+ start_date = school.css(".date-range").css("time")
50
+ return start_date[0].text if !is_empty?(start_date[0])
51
+ end
52
+
53
+ # Get the end date for the degree
54
+ def degree_end_date(school)
55
+ end_date = school.css(".date-range").css("time")
56
+ return end_date[1].text if !is_empty?(end_date[1])
57
+ end
58
+ end
data/lib/groups.rb ADDED
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Groups
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_groups
9
+ end
10
+
11
+ # Get list of groups
12
+ def get_groups
13
+ return @group_list
14
+ end
15
+
16
+ def parse_groups
17
+ groups = @html.css('#groups').css('.group').css('.item-title')
18
+
19
+ @group_list = Array.new
20
+ groups.each do |group|
21
+ @group_list.push({
22
+ group_name: group_name(group),
23
+ group_link: group_link(group)
24
+ })
25
+ end
26
+ end
27
+
28
+ # Get group name
29
+ def group_name(group)
30
+ return group.text
31
+ end
32
+
33
+ # Get group link
34
+ def group_link(group)
35
+ return group.css("a")[0]["href"]
36
+ end
37
+ end
data/lib/languages.rb ADDED
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Languages
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_languages
9
+ end
10
+
11
+ # Get list of langauges
12
+ def get_languages
13
+ return @language_list
14
+ end
15
+
16
+ def parse_languages
17
+ languages = @html.css("#languages").css("li")
18
+
19
+ @language_list = Array.new
20
+ languages.each do |l|
21
+ @language_list.push({
22
+ language: language(l),
23
+ proficiency: proficiency(l)
24
+ })
25
+ end
26
+ end
27
+
28
+ # Language name
29
+ def language(language_name)
30
+ language_name.css("h4").text
31
+ end
32
+
33
+ # Get proficiency
34
+ def proficiency(language_name)
35
+ language_name.css(".proficiency").text
36
+ end
37
+ end
@@ -14,12 +14,25 @@ class LinkedinParser
14
14
 
15
15
  def parse
16
16
  # Get details about the person
17
- p = PersonalInfo.new(@profile, @profile_url)
18
- @personal_info = p.get_personal_info
17
+ begin
18
+ p = PersonalInfo.new(@profile, @profile_url)
19
+ @personal_info = p.get_personal_info
20
+ @personal_info.merge!({parsing_failed: false})
21
+ rescue # Handle failed parsing
22
+ @personal_info = {
23
+ profile_url: @profile_url,
24
+ full_html: @profile,
25
+ parsing_failed: true
26
+ }
27
+ end
19
28
 
20
29
  # Get job info
21
- j = Jobs.new(@profile)
22
- @job_info = j.get_jobs
30
+ begin
31
+ j = Jobs.new(@profile)
32
+ @job_info = j.get_jobs
33
+ rescue # Handle failed job parsing
34
+ @job_info = {job_parsing_failed: true}
35
+ end
23
36
  end
24
37
 
25
38
  # Return results with new item for each job
@@ -38,27 +51,5 @@ class LinkedinParser
38
51
  output[:jobs] = @job_info
39
52
  output.merge!(@crawler_fields)
40
53
  JSON.pretty_generate(output)
41
- end
42
-
43
- # TODO: Fields to add to parser-
44
- # Organizations
45
- # Education
46
- # Projects
47
- # Related people
48
- # Languages
49
- # Certifications
50
- # Groups
54
+ end
51
55
  end
52
-
53
- # Test:
54
- #profile = Selenium::WebDriver::Firefox::Profile.new
55
- #profile['intl.accept_languages'] = 'en'
56
- #profile["javascript.enabled"] = false
57
- #driver = Selenium::WebDriver.for :firefox, profile: profile
58
- #url = "https://www.linkedin.com/pub/christopher-mcclellan/5b/a09/ba9"
59
- #url = "https://www.linkedin.com/pub/maryann-holmes/2b/770/3b2"
60
-
61
- #url = "https://www.linkedin.com/pub/kenneth-chamberlin/32/8bb/b22"
62
- #driver.navigate.to url
63
- #l = LinkedinParser.new(driver.page_source, url, {timestamp: Time.now})
64
- #puts l.results_by_job
data/lib/personal_info.rb CHANGED
@@ -1,5 +1,11 @@
1
1
  load 'picture.rb'
2
2
  load 'utilities.rb'
3
+ load 'education.rb'
4
+ load 'groups.rb'
5
+ load 'languages.rb'
6
+ load 'related_people.rb'
7
+ load 'certifications.rb'
8
+ load 'causes.rb'
3
9
 
4
10
  class PersonalInfo
5
11
  include Utilities
@@ -23,6 +29,12 @@ class PersonalInfo
23
29
  summary: summary,
24
30
  current_title: title,
25
31
  interests: interests,
32
+ education: education,
33
+ groups: groups,
34
+ causes: causes,
35
+ certifications: certifications,
36
+ languages: languages,
37
+ related_people: related_people,
26
38
  number_of_connections: number_of_connections,
27
39
  picture: p.picture,
28
40
  pic_path: p.pic_path,
@@ -36,7 +48,9 @@ class PersonalInfo
36
48
 
37
49
  # Get the full name of the person
38
50
  def full_name
39
- @html.css(".profile-overview").css('h1').text
51
+ name = @html.css(".profile-overview").css('h1')
52
+ name = @html.css(".profile-overview-content").css('h1') if is_empty?(name)
53
+ return name.text
40
54
  end
41
55
 
42
56
  # Get first part of name
@@ -49,6 +63,42 @@ class PersonalInfo
49
63
  full_name.split(" ", 2).last.strip
50
64
  end
51
65
 
66
+ # Get education info
67
+ def education
68
+ e = Education.new(@html)
69
+ return e.get_education
70
+ end
71
+
72
+ # Get a list of groups they are in
73
+ def groups
74
+ g = Groups.new(@html)
75
+ return g.get_groups
76
+ end
77
+
78
+ # Get causes they care about
79
+ def causes
80
+ c = Causes.new(@html)
81
+ return c.get_causes
82
+ end
83
+
84
+ # Get the person's certifications
85
+ def certifications
86
+ c = Certifications.new(@html)
87
+ return c.get_certifications
88
+ end
89
+
90
+ # Get a list of languages they speak
91
+ def languages
92
+ l = Languages.new(@html)
93
+ return l.get_languages
94
+ end
95
+
96
+ # Get the people also viewed list from the side
97
+ def related_people
98
+ r = RelatedPeople.new(@html)
99
+ return r.get_related
100
+ end
101
+
52
102
  # Get list of skills
53
103
  def skills
54
104
  skill_list = Array.new
@@ -71,12 +121,12 @@ class PersonalInfo
71
121
 
72
122
  # Get town
73
123
  def location
74
- full_location.split(",").first.strip
124
+ full_location.split(",").first.strip if !full_location.empty?
75
125
  end
76
126
 
77
127
  # Get country/state
78
128
  def area
79
- full_location.split(",").last.strip
129
+ full_location.split(",").last.strip if !full_location.empty?
80
130
  end
81
131
 
82
132
  # Get the industry the person works in (2 different formats)
@@ -90,7 +140,7 @@ class PersonalInfo
90
140
  def summary
91
141
  summary = @html.css('#summary').css('.description')
92
142
  summary = @html.css('.summary').first if is_empty?(summary)
93
- return summary.text
143
+ return summary.text if summary
94
144
  end
95
145
 
96
146
  # Get the overall/current title
data/lib/picture.rb CHANGED
@@ -5,7 +5,8 @@ class Picture
5
5
 
6
6
  # Get path to the picture url
7
7
  def picture
8
- @html.css('.profile-picture').css('img').first['src']
8
+ pic = @html.css('.profile-picture').css('img').first
9
+ return pic['src'] if pic
9
10
  end
10
11
 
11
12
  # Download picture
@@ -0,0 +1,49 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class RelatedPeople
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_related
9
+ end
10
+
11
+ # Get list of groups
12
+ def get_related
13
+ return @related_people_list
14
+ end
15
+
16
+ def parse_related
17
+ related_people = @html.css(".insights").css(".browse-map").css(".profile-card")
18
+
19
+ @related_people_list = Array.new
20
+ related_people.each do |person|
21
+ @related_people_list.push({
22
+ related_name: related_name(person),
23
+ related_link: related_link(person),
24
+ related_person_company: related_person_company(person),
25
+ related_person_title: related_person_title(person)
26
+ })
27
+ end
28
+ end
29
+
30
+ # Get name of related person
31
+ def related_name(person)
32
+ return person.css("h4").text
33
+ end
34
+
35
+ # Get link to related person's profile
36
+ def related_link(person)
37
+ return person.css("h4").css("a")[0]["href"]
38
+ end
39
+
40
+ # Get related person's company
41
+ def related_person_company(person)
42
+ return person.css(".headline").text.split(" at ")[1]
43
+ end
44
+
45
+ # Get title of related person
46
+ def related_person_title(person)
47
+ return person.css(".headline").text.split(" at ")[0]
48
+ end
49
+ end
data/lib/utilities.rb CHANGED
@@ -3,4 +3,13 @@ module Utilities
3
3
  def is_empty?(item)
4
4
  item == nil || item.text.empty?
5
5
  end
6
+
7
+ # Make an array list of items
8
+ def make_list(elements)
9
+ listarr = Array.new
10
+ elements.each do |item|
11
+ listarr.push(item.text)
12
+ end
13
+ return listarr
14
+ end
6
15
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedinparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-03 00:00:00.000000000 Z
11
+ date: 2015-11-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Parses public LinkedIn profiles
14
14
  email: shidash@shidash.com
@@ -16,10 +16,16 @@ executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
+ - lib/causes.rb
20
+ - lib/certifications.rb
21
+ - lib/education.rb
22
+ - lib/groups.rb
19
23
  - lib/jobs.rb
24
+ - lib/languages.rb
20
25
  - lib/linkedinparser.rb
21
26
  - lib/personal_info.rb
22
27
  - lib/picture.rb
28
+ - lib/related_people.rb
23
29
  - lib/utilities.rb
24
30
  homepage: https://github.com/TransparencyToolkit/linkedinparser
25
31
  licenses: