linkedinparser 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 33dd5468e0b1c0ed881f443d224a247eda5b37d4
4
- data.tar.gz: 6eb3e10f6954f0ce5d82a60ecd9fc3041d70f1db
3
+ metadata.gz: 55b734b221bd743748b734b022e649c57461fdc5
4
+ data.tar.gz: b22cab96393fd2b48b1e4ff7c623fe93f5a09c88
5
5
  SHA512:
6
- metadata.gz: 609148d3105b8215854a96efd502b0dd1ca6662a54a8f8160b32eeb3037ca319abc80e4fdb8243119f0b15198ffa1eb99c795aed6aef95ebd86a7806a4d734d6
7
- data.tar.gz: 4404b31657ce3b78ddaec10a43c005bf86fc1321e0c2cc68865dcd2b2ea1603e53459bc6f29fb6a0ba2b128a7bb01d751cdf913b6250c6b53e01aecbfe7c95fe
6
+ metadata.gz: 7715be94c914d84bd0226e6bf1a9df77bc0b3275134e648089e7f8e1a692995b7a56f96eb9d424d97e21b43c27af96c243155ba349ce6b99e41f90dca667acdb
7
+ data.tar.gz: 1e2aa43c38a5e83298689bd2d6be9e880c4371493d21a424aa0400f6b60e35a238d118f93d57a41be2f91c4f6f5deba92f2b497230f67ecf69b22ab81d97664c
data/lib/jobs.rb ADDED
@@ -0,0 +1,92 @@
1
+ # coding: utf-8
2
+ load 'utilities.rb'
3
+
4
+ class Jobs
5
+ include Utilities
6
+ def initialize(profile)
7
+ @html = Nokogiri::HTML(profile)
8
+ parse_jobs
9
+ end
10
+
11
+ # Get list of jobs
12
+ def get_jobs
13
+ return @positions_list
14
+ end
15
+
16
+ def parse_jobs
17
+ # Multiple html options
18
+ positions = @html.css('#experience').css('.position')
19
+ positions = @html.css('#background-experience').css('.current-position') +
20
+ @html.css('#background-experience').css('.past-position') if is_empty?(positions)
21
+
22
+ # Get lists of positions
23
+ @positions_list = Array.new
24
+ positions.each do |position|
25
+ @positions_list.push({
26
+ title: title(position),
27
+ company: company(position),
28
+ description: description(position),
29
+ start_date: start_date(position),
30
+ end_date: end_date(position),
31
+ work_location: work_location(position),
32
+ current: current(position)})
33
+ end
34
+ end
35
+
36
+ # Check if it is a current position or not
37
+ def current(position)
38
+ if end_date(position) == "Present"
39
+ return "Yes"
40
+ else return "No"
41
+ end
42
+ end
43
+
44
+ # Get the job title
45
+ def title(position)
46
+ position.css('h4').text
47
+ end
48
+
49
+ # Get the company for the position
50
+ def company(position)
51
+ position.css('h5').text
52
+ end
53
+
54
+ # Get job description
55
+ def description(position)
56
+ position.css('.description').text
57
+ end
58
+
59
+ # Get dates
60
+ def get_dates(position)
61
+ dates = position.css('.meta').css('.date-range')
62
+ dates = position.css('.experience-date-locale') if is_empty?(dates)
63
+ return dates
64
+ end
65
+
66
+ # Get start date
67
+ def start_date(position)
68
+ start_date = get_dates(position).text.split(' – ')[0]
69
+ return date_parse(start_date)
70
+ end
71
+
72
+ # Get end date
73
+ def end_date(position)
74
+ end_date = get_dates(position).text.split(' – ').last.split("(").first.strip
75
+ if end_date == "Present"
76
+ return end_date
77
+ elsif end_date && !end_date.empty?
78
+ return Date.parse(end_date)
79
+ end
80
+ end
81
+
82
+ # Parse date
83
+ def date_parse(date)
84
+ date = date+"-01-01" if date =~ /^(19|20)\d{2}$/
85
+ Date.parse(date)
86
+ end
87
+
88
+ # Get location for work
89
+ def work_location(position)
90
+ position.css('.experience-date-locale').css('.locality').text
91
+ end
92
+ end
@@ -0,0 +1,63 @@
1
+ require 'selenium-webdriver'
2
+ require 'pry'
3
+ require 'nokogiri'
4
+ load 'personal_info.rb'
5
+ load 'jobs.rb'
6
+
7
+ class LinkedinParser
8
+ def initialize(profile, profile_url, crawler_fields)
9
+ @profile = profile
10
+ @profile_url = profile_url
11
+ @crawler_fields = crawler_fields
12
+ parse
13
+ end
14
+
15
+ def parse
16
+ # Get details about the person
17
+ p = PersonalInfo.new(@profile, @profile_url)
18
+ @personal_info = p.get_personal_info
19
+
20
+ # Get job info
21
+ j = Jobs.new(@profile)
22
+ @job_info = j.get_jobs
23
+ end
24
+
25
+ # Return results with new item for each job
26
+ def results_by_job
27
+ output = Array.new
28
+ @job_info.each do |job|
29
+ output.push(job.merge!(@personal_info).merge!(@crawler_fields))
30
+ end
31
+
32
+ JSON.pretty_generate(output)
33
+ end
34
+
35
+ # Return results in nested JSON
36
+ def results_by_person
37
+ output = @personal_info
38
+ output[:jobs] = @job_info
39
+ output.merge!(@crawler_fields)
40
+ JSON.pretty_generate(output)
41
+ end
42
+
43
+ # TODO: Fields to add to parser-
44
+ # Organizations
45
+ # Education
46
+ # Projects
47
+ # Related people
48
+ # Languages
49
+ # Certifications
50
+ # Groups
51
+ end
52
+
53
+ # Test:
54
+ #profile = Selenium::WebDriver::Firefox::Profile.new
55
+ #profile['intl.accept_languages'] = 'en'
56
+ #profile["javascript.enabled"] = false
57
+ #driver = Selenium::WebDriver.for :firefox, profile: profile
58
+ #url = "https://www.linkedin.com/pub/christopher-mcclellan/5b/a09/ba9"
59
+ #url = "https://www.linkedin.com/pub/maryann-holmes/2b/770/3b2"
60
+ #driver.navigate.to url
61
+
62
+ #l = LinkedinParser.new(driver.page_source, url, {timestamp: Time.now})
63
+ #puts l.results_by_job
@@ -0,0 +1,126 @@
1
+ load 'picture.rb'
2
+ load 'utilities.rb'
3
+
4
+ class PersonalInfo
5
+ include Utilities
6
+ def initialize(profile, profile_url)
7
+ @profile = profile
8
+ @html = Nokogiri::HTML(profile)
9
+ @profile_url = profile_url
10
+
11
+ # Parse attributes
12
+ p = Picture.new(@html)
13
+ @personal_info = {
14
+ profile_url: @profile_url,
15
+ full_name: full_name,
16
+ first_name: first_name,
17
+ last_name: last_name,
18
+ skills: skills,
19
+ full_location: full_location,
20
+ location: location,
21
+ area: area,
22
+ industry: industry,
23
+ summary: summary,
24
+ current_title: title,
25
+ interests: interests,
26
+ number_of_connections: number_of_connections,
27
+ picture: p.picture,
28
+ pic_path: p.pic_path,
29
+ full_html: full_html}
30
+ end
31
+
32
+ # Return person hash
33
+ def get_personal_info
34
+ return @personal_info
35
+ end
36
+
37
+ # Get the full name of the person
38
+ def full_name
39
+ @html.css(".profile-overview").css('h1').text
40
+ end
41
+
42
+ # Get first part of name
43
+ def first_name
44
+ full_name.split(" ", 2).first.strip
45
+ end
46
+
47
+ # Get last part of name
48
+ def last_name
49
+ full_name.split(" ", 2).last.strip
50
+ end
51
+
52
+ # Get list of skills
53
+ def skills
54
+ skill_list = Array.new
55
+
56
+ # Two formatting options for skills
57
+ skills = @html.css('#skills').css('.skill')
58
+ skills = @html.css('.skill-pill .endorse-item-name-text') if is_empty?(skills)
59
+
60
+ # Make list of skills
61
+ skills.each do |skill|
62
+ skill_list.push(skill.text)
63
+ end
64
+ return skill_list
65
+ end
66
+
67
+ # Get full location
68
+ def full_location
69
+ @html.css('.profile-overview').css('.locality').text
70
+ end
71
+
72
+ # Get town
73
+ def location
74
+ full_location.split(",").first.strip
75
+ end
76
+
77
+ # Get country/state
78
+ def area
79
+ full_location.split(",").last.strip
80
+ end
81
+
82
+ # Get the industry the person works in (2 different formats)
83
+ def industry
84
+ industry = @html.css('.profile-overview').css('.descriptor')[1]
85
+ industry = @html.css('.profile-overview').css('.industry') if is_empty?(industry)
86
+ return industry.text
87
+ end
88
+
89
+ # Get the summary field (2 different formats)
90
+ def summary
91
+ summary = @html.css('#summary').css('.description')
92
+ summary = @html.css('.summary').first if is_empty?(summary)
93
+ return summary.text
94
+ end
95
+
96
+ # Get the overall/current title
97
+ def title
98
+ title = @html.css('.title').css('.headline')
99
+ title = @html.css('#headline').css('.title') if is_empty?(title)
100
+ title = @html.css('.title') if is_empty?(title)
101
+ return title.text
102
+ end
103
+
104
+ # Get the number of connections
105
+ def number_of_connections
106
+ @html.css('.member-connections')[0].text.gsub("connections", "").strip
107
+ end
108
+
109
+ # Get list of interests
110
+ def interests
111
+ interest_list = Array.new
112
+ interests = @html.css('#interests').css('.interest')
113
+ interests = @html.css('#background-interests').css('.interest-item') if is_empty?(interests)
114
+
115
+ interests.each do |interest|
116
+ interest_list.push(interest.text)
117
+ end
118
+
119
+ return interest_list
120
+ end
121
+
122
+ # Save the full html of the page
123
+ def full_html
124
+ @profile
125
+ end
126
+ end
data/lib/picture.rb ADDED
@@ -0,0 +1,32 @@
1
+ class Picture
2
+ def initialize(html)
3
+ @html = html
4
+ end
5
+
6
+ # Get path to the picture url
7
+ def picture
8
+ @html.css('.profile-picture').css('img').first['src']
9
+ end
10
+
11
+ # Download picture
12
+ def pic_path
13
+ if picture
14
+ # Get path
15
+ dir = "pictures/"
16
+ full_path = dir+picture.split("/").last.chomp.strip
17
+
18
+ # Get file
19
+ `wget -P #{dir} #{picture}` if !File.file?(full_path)
20
+ delete_duplicate_pics
21
+ return full_path
22
+ end
23
+ end
24
+
25
+ # Deletes duplicate pictures
26
+ def delete_duplicate_pics
27
+ pics = Dir["public/uploads/pictures/*.jpg.*"]
28
+ pics.each do |p|
29
+ File.delete(p)
30
+ end
31
+ end
32
+ end
data/lib/utilities.rb ADDED
@@ -0,0 +1,6 @@
1
+ module Utilities
2
+ # Check if item is nil or empty
3
+ def is_empty?(item)
4
+ item == nil || item.text.empty?
5
+ end
6
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedinparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
@@ -15,7 +15,12 @@ email: shidash@shidash.com
15
15
  executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
18
- files: []
18
+ files:
19
+ - lib/jobs.rb
20
+ - lib/linkedinparser.rb
21
+ - lib/personal_info.rb
22
+ - lib/picture.rb
23
+ - lib/utilities.rb
19
24
  homepage: https://github.com/TransparencyToolkit/linkedinparser
20
25
  licenses:
21
26
  - GPL