linkedinparser 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3bfb352fd7f2c469a2d3858295f0a563e5ba3593
4
- data.tar.gz: da6090b7f7f54e641fcdd484f15bacb4da30d106
3
+ metadata.gz: 1bc27e7c376c9641f15b6dd2454318d2082fed25
4
+ data.tar.gz: 77872a929f8844da3afc4c8bea62a1a8ddec729f
5
5
  SHA512:
6
- metadata.gz: 5441c537eb7e899a7bf3c514336bb027d0b5f3afafe35cefc912dc86710fe628b125dfe776620df17887192663183a7b0c40d3005d35ca39452e1b806f640706
7
- data.tar.gz: 5bdff258b614db3b4d9b4e0a80ccde0accb537666deef3589179e0f3896aa9d4480011db81de87c2dc5f44f8df757855c8d753c1f29c2cd09ca6e45e76cca8e4
6
+ metadata.gz: 7c64448f45ef78f600251bdf43f56e4cda01d35528f7d0877d46b78cf1061a17407742d3d99d0732456d43f50505efcd8d9473873ac290cec04a7c8e5127a978
7
+ data.tar.gz: cb5649581361d23fa8357d04c336f45801eb8259b067c65ffc927d45ad8d6b2386dabb65751093178fff9fc650611690b497fd07de22d527fe3c9b6a24c1f966
data/lib/causes.rb ADDED
@@ -0,0 +1,50 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Causes
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_causes
9
+ end
10
+
11
+ # Get list of causes
12
+ def get_causes
13
+ return @cause_hash
14
+ end
15
+
16
+ def parse_causes
17
+ volunteering = @html.css("#volunteering")
18
+ if !is_empty?(volunteering)
19
+ @cause_hash = Hash.new
20
+ @cause_hash[:volunteer_opportunities] = volunteer_opportunities(volunteering)
21
+ @cause_hash[:supported_causes] = supported_causes(volunteering)
22
+ @cause_hash[:supported_organizations] = supported_organizations(volunteering)
23
+ end
24
+ end
25
+
26
+ # Get opportunities they are looking for
27
+ def volunteer_opportunities(volunteering)
28
+ section = volunteering.css(".opportunities").css("li")
29
+ return make_list(section) if !is_empty?(section)
30
+ end
31
+
32
+ # Get causes they support
33
+ def supported_causes(volunteering)
34
+ section = get_right_section("Causes", volunteering.css(".extra-section"))
35
+ return make_list(section.css("li")) if !is_empty?(section)
36
+ end
37
+
38
+ # Get organizations they support
39
+ def supported_organizations(volunteering)
40
+ section = get_right_section("Organizations", volunteering.css(".extra-section"))
41
+ return make_list(section.css("li")) if !is_empty?(section)
42
+ end
43
+
44
+ def get_right_section(look_for, sections)
45
+ sections.each do |section|
46
+ return section if section.css("h4").text.include?(look_for)
47
+ end
48
+ return nil
49
+ end
50
+ end
@@ -0,0 +1,60 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Certifications
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_certifications
9
+ end
10
+
11
+ # Get list of certifications
12
+ def get_certifications
13
+ return @certificate_list
14
+ end
15
+
16
+ def parse_certifications
17
+ certifications = @html.css(".certifications").css("li")
18
+
19
+ @certificate_list = Array.new
20
+ certifications.each do |certificate|
21
+ @certificate_list.push({
22
+ certificate_name: certificate_name(certificate),
23
+ certificate_authority: certificate_authority(certificate),
24
+ license_num: license_num(certificate),
25
+ certificate_start: certificate_start(certificate),
26
+ certificate_end: certificate_end(certificate)
27
+ })
28
+ end
29
+ end
30
+
31
+ # Name of certification
32
+ def certificate_name(certificate)
33
+ cert_name = certificate.css("h4")
34
+ return cert_name.text if !is_empty?(cert_name)
35
+ end
36
+
37
+ # Issuing authority
38
+ def certificate_authority(certificate)
39
+ cert_auth = certificate.css("h5")
40
+ return cert_auth.text.split(", ")[0] if !is_empty?(cert_auth)
41
+ end
42
+
43
+ # License Number
44
+ def license_num(certificate)
45
+ cert_num = certificate.css("h5")
46
+ return cert_num.text.split(", ")[1] if !is_empty?(cert_num)
47
+ end
48
+
49
+ # Start date for certificate
50
+ def certificate_start(certificate)
51
+ cert_start = certificate.css(".date-range").css("time")
52
+ return cert_start[0].text if !is_empty?(cert_start[0])
53
+ end
54
+
55
+ # Expiry date for certificate
56
+ def certificate_end(certificate)
57
+ cert_end = certificate.css(".date-range").css("time")
58
+ return cert_end[1].text if !is_empty?(cert_end[1])
59
+ end
60
+ end
data/lib/education.rb ADDED
@@ -0,0 +1,58 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Education
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_education
9
+ end
10
+
11
+ # Get list of jobs
12
+ def get_education
13
+ return @degree_list
14
+ end
15
+
16
+ def parse_education
17
+ schools = @html.css(".schools").css(".school")
18
+
19
+ @degree_list = Array.new
20
+ schools.each do |school|
21
+ @degree_list.push({
22
+ school_name: school_name(school),
23
+ education_desc: education_desc(school),
24
+ education_degree: education_degree(school),
25
+ degree_start_date: degree_start_date(school),
26
+ degree_end_date: degree_end_date(school)
27
+ })
28
+
29
+ end
30
+ end
31
+
32
+ # Get the name of the school
33
+ def school_name(school)
34
+ return school.css("h4").text
35
+ end
36
+
37
+ # Get the description
38
+ def education_desc(school)
39
+ return school.css(".description").text
40
+ end
41
+
42
+ # Get the degree info
43
+ def education_degree(school)
44
+ return school.css("h5").text
45
+ end
46
+
47
+ # Get the start date for the degree
48
+ def degree_start_date(school)
49
+ start_date = school.css(".date-range").css("time")
50
+ return start_date[0].text if !is_empty?(start_date[0])
51
+ end
52
+
53
+ # Get the end date for the degree
54
+ def degree_end_date(school)
55
+ end_date = school.css(".date-range").css("time")
56
+ return end_date[1].text if !is_empty?(end_date[1])
57
+ end
58
+ end
data/lib/groups.rb ADDED
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Groups
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_groups
9
+ end
10
+
11
+ # Get list of groups
12
+ def get_groups
13
+ return @group_list
14
+ end
15
+
16
+ def parse_groups
17
+ groups = @html.css('#groups').css('.group').css('.item-title')
18
+
19
+ @group_list = Array.new
20
+ groups.each do |group|
21
+ @group_list.push({
22
+ group_name: group_name(group),
23
+ group_link: group_link(group)
24
+ })
25
+ end
26
+ end
27
+
28
+ # Get group name
29
+ def group_name(group)
30
+ return group.text
31
+ end
32
+
33
+ # Get group link
34
+ def group_link(group)
35
+ return group.css("a")[0]["href"]
36
+ end
37
+ end
data/lib/languages.rb ADDED
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Languages
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_languages
9
+ end
10
+
11
+ # Get list of langauges
12
+ def get_languages
13
+ return @language_list
14
+ end
15
+
16
+ def parse_languages
17
+ languages = @html.css("#languages").css("li")
18
+
19
+ @language_list = Array.new
20
+ languages.each do |l|
21
+ @language_list.push({
22
+ language: language(l),
23
+ proficiency: proficiency(l)
24
+ })
25
+ end
26
+ end
27
+
28
+ # Language name
29
+ def language(language_name)
30
+ language_name.css("h4").text
31
+ end
32
+
33
+ # Get proficiency
34
+ def proficiency(language_name)
35
+ language_name.css(".proficiency").text
36
+ end
37
+ end
@@ -14,12 +14,25 @@ class LinkedinParser
14
14
 
15
15
  def parse
16
16
  # Get details about the person
17
- p = PersonalInfo.new(@profile, @profile_url)
18
- @personal_info = p.get_personal_info
17
+ begin
18
+ p = PersonalInfo.new(@profile, @profile_url)
19
+ @personal_info = p.get_personal_info
20
+ @personal_info.merge!({parsing_failed: false})
21
+ rescue # Handle failed parsing
22
+ @personal_info = {
23
+ profile_url: @profile_url,
24
+ full_html: @profile,
25
+ parsing_failed: true
26
+ }
27
+ end
19
28
 
20
29
  # Get job info
21
- j = Jobs.new(@profile)
22
- @job_info = j.get_jobs
30
+ begin
31
+ j = Jobs.new(@profile)
32
+ @job_info = j.get_jobs
33
+ rescue # Handle failed job parsing
34
+ @job_info = {job_parsing_failed: true}
35
+ end
23
36
  end
24
37
 
25
38
  # Return results with new item for each job
@@ -38,27 +51,5 @@ class LinkedinParser
38
51
  output[:jobs] = @job_info
39
52
  output.merge!(@crawler_fields)
40
53
  JSON.pretty_generate(output)
41
- end
42
-
43
- # TODO: Fields to add to parser-
44
- # Organizations
45
- # Education
46
- # Projects
47
- # Related people
48
- # Languages
49
- # Certifications
50
- # Groups
54
+ end
51
55
  end
52
-
53
- # Test:
54
- #profile = Selenium::WebDriver::Firefox::Profile.new
55
- #profile['intl.accept_languages'] = 'en'
56
- #profile["javascript.enabled"] = false
57
- #driver = Selenium::WebDriver.for :firefox, profile: profile
58
- #url = "https://www.linkedin.com/pub/christopher-mcclellan/5b/a09/ba9"
59
- #url = "https://www.linkedin.com/pub/maryann-holmes/2b/770/3b2"
60
-
61
- #url = "https://www.linkedin.com/pub/kenneth-chamberlin/32/8bb/b22"
62
- #driver.navigate.to url
63
- #l = LinkedinParser.new(driver.page_source, url, {timestamp: Time.now})
64
- #puts l.results_by_job
data/lib/personal_info.rb CHANGED
@@ -1,5 +1,11 @@
1
1
  load 'picture.rb'
2
2
  load 'utilities.rb'
3
+ load 'education.rb'
4
+ load 'groups.rb'
5
+ load 'languages.rb'
6
+ load 'related_people.rb'
7
+ load 'certifications.rb'
8
+ load 'causes.rb'
3
9
 
4
10
  class PersonalInfo
5
11
  include Utilities
@@ -23,6 +29,12 @@ class PersonalInfo
23
29
  summary: summary,
24
30
  current_title: title,
25
31
  interests: interests,
32
+ education: education,
33
+ groups: groups,
34
+ causes: causes,
35
+ certifications: certifications,
36
+ languages: languages,
37
+ related_people: related_people,
26
38
  number_of_connections: number_of_connections,
27
39
  picture: p.picture,
28
40
  pic_path: p.pic_path,
@@ -36,7 +48,9 @@ class PersonalInfo
36
48
 
37
49
  # Get the full name of the person
38
50
  def full_name
39
- @html.css(".profile-overview").css('h1').text
51
+ name = @html.css(".profile-overview").css('h1')
52
+ name = @html.css(".profile-overview-content").css('h1') if is_empty?(name)
53
+ return name.text
40
54
  end
41
55
 
42
56
  # Get first part of name
@@ -49,6 +63,42 @@ class PersonalInfo
49
63
  full_name.split(" ", 2).last.strip
50
64
  end
51
65
 
66
+ # Get education info
67
+ def education
68
+ e = Education.new(@html)
69
+ return e.get_education
70
+ end
71
+
72
+ # Get a list of groups they are in
73
+ def groups
74
+ g = Groups.new(@html)
75
+ return g.get_groups
76
+ end
77
+
78
+ # Get causes they care about
79
+ def causes
80
+ c = Causes.new(@html)
81
+ return c.get_causes
82
+ end
83
+
84
+ # Get the person's certifications
85
+ def certifications
86
+ c = Certifications.new(@html)
87
+ return c.get_certifications
88
+ end
89
+
90
+ # Get a list of languages they speak
91
+ def languages
92
+ l = Languages.new(@html)
93
+ return l.get_languages
94
+ end
95
+
96
+ # Get the people also viewed list from the side
97
+ def related_people
98
+ r = RelatedPeople.new(@html)
99
+ return r.get_related
100
+ end
101
+
52
102
  # Get list of skills
53
103
  def skills
54
104
  skill_list = Array.new
@@ -71,12 +121,12 @@ class PersonalInfo
71
121
 
72
122
  # Get town
73
123
  def location
74
- full_location.split(",").first.strip
124
+ full_location.split(",").first.strip if !full_location.empty?
75
125
  end
76
126
 
77
127
  # Get country/state
78
128
  def area
79
- full_location.split(",").last.strip
129
+ full_location.split(",").last.strip if !full_location.empty?
80
130
  end
81
131
 
82
132
  # Get the industry the person works in (2 different formats)
@@ -90,7 +140,7 @@ class PersonalInfo
90
140
  def summary
91
141
  summary = @html.css('#summary').css('.description')
92
142
  summary = @html.css('.summary').first if is_empty?(summary)
93
- return summary.text
143
+ return summary.text if summary
94
144
  end
95
145
 
96
146
  # Get the overall/current title
data/lib/picture.rb CHANGED
@@ -5,7 +5,8 @@ class Picture
5
5
 
6
6
  # Get path to the picture url
7
7
  def picture
8
- @html.css('.profile-picture').css('img').first['src']
8
+ pic = @html.css('.profile-picture').css('img').first
9
+ return pic['src'] if pic
9
10
  end
10
11
 
11
12
  # Download picture
@@ -0,0 +1,49 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class RelatedPeople
5
+ include Utilities
6
+ def initialize(html)
7
+ @html = html
8
+ parse_related
9
+ end
10
+
11
+ # Get list of groups
12
+ def get_related
13
+ return @related_people_list
14
+ end
15
+
16
+ def parse_related
17
+ related_people = @html.css(".insights").css(".browse-map").css(".profile-card")
18
+
19
+ @related_people_list = Array.new
20
+ related_people.each do |person|
21
+ @related_people_list.push({
22
+ related_name: related_name(person),
23
+ related_link: related_link(person),
24
+ related_person_company: related_person_company(person),
25
+ related_person_title: related_person_title(person)
26
+ })
27
+ end
28
+ end
29
+
30
+ # Get name of related person
31
+ def related_name(person)
32
+ return person.css("h4").text
33
+ end
34
+
35
+ # Get link to related person's profile
36
+ def related_link(person)
37
+ return person.css("h4").css("a")[0]["href"]
38
+ end
39
+
40
+ # Get related person's company
41
+ def related_person_company(person)
42
+ return person.css(".headline").text.split(" at ")[1]
43
+ end
44
+
45
+ # Get title of related person
46
+ def related_person_title(person)
47
+ return person.css(".headline").text.split(" at ")[0]
48
+ end
49
+ end
data/lib/utilities.rb CHANGED
@@ -3,4 +3,13 @@ module Utilities
3
3
  def is_empty?(item)
4
4
  item == nil || item.text.empty?
5
5
  end
6
+
7
+ # Make an array list of items
8
+ def make_list(elements)
9
+ listarr = Array.new
10
+ elements.each do |item|
11
+ listarr.push(item.text)
12
+ end
13
+ return listarr
14
+ end
6
15
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedinparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-03 00:00:00.000000000 Z
11
+ date: 2015-11-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Parses public LinkedIn profiles
14
14
  email: shidash@shidash.com
@@ -16,10 +16,16 @@ executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
+ - lib/causes.rb
20
+ - lib/certifications.rb
21
+ - lib/education.rb
22
+ - lib/groups.rb
19
23
  - lib/jobs.rb
24
+ - lib/languages.rb
20
25
  - lib/linkedinparser.rb
21
26
  - lib/personal_info.rb
22
27
  - lib/picture.rb
28
+ - lib/related_people.rb
23
29
  - lib/utilities.rb
24
30
  homepage: https://github.com/TransparencyToolkit/linkedinparser
25
31
  licenses: