linkedin-scraper 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c7cfeee1f051d529594d6d827d6ad373b6aca496
4
- data.tar.gz: 681cfad543c0d7daa2863e6c6c2525560cf640df
3
+ metadata.gz: 98225f23a99fa755b3e29e92cfcff488c11ede5a
4
+ data.tar.gz: aaaae8060e81d59cc8f17848606acd382a158cea
5
5
  SHA512:
6
- metadata.gz: 9c52c63f97a7855b088bb467bab0b72ac4a1616424348318b1072a0768dac512d4279fc04d1c1e8c213d3e57dc3eb368c43d757a8b937f979217d529f2a29510
7
- data.tar.gz: 7d6e965dd00cb7ffc23f244eaa70d342e63d58b4a8ce760726f7907661dfbdae76c6a0bb740b3cdbdc20bd32b2f18230a40c4be25b1da1e289373e5da098397c
6
+ metadata.gz: f6ae1cd6a3eb3b9b66d7b32cc340cfba163627c0c958d2162f5eec43782f00631e5dc8e0802e4df7781350ba9c08afc397c2495ff07624540d002d871bcf62f2
7
+ data.tar.gz: 0aed555859a8ef26ce93a724da89ad34011d43986e6a0e6b80d43d7424472b5a1ba4fefac8f17c0789b5326e3cd4464ab7ce2198d106b7c497c62ed6a2e091bc
data/.rubocop.yml ADDED
@@ -0,0 +1,11 @@
1
+ Documentation:
2
+ Enabled: false
3
+
4
+ DotPosition:
5
+ Enabled: false
6
+
7
+ LineLength:
8
+ Enabled: false
9
+
10
+ MethodLength:
11
+ Enabled: false
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
- [![Build Status](https://secure.travis-ci.org/yatishmehta27/linkedin-scraper.png)](http://travis-ci.org/yatishmehta27/linkedin-scraper)
1
+ [![Build Status](https://secure.travis-ci.org/yatish27/linkedin-scraper.png)](http://travis-ci.org/yatish27/linkedin-scraper)
2
+ [![Gem Version](https://badge.fury.io/rb/linkedin-scraper.png)](http://badge.fury.io/rb/linkedin-scraper)
2
3
 
3
4
  Linkedin Scraper
4
5
  ================
@@ -251,7 +252,7 @@ For current and past comapnies it also provides the details of the companies lik
251
252
  ]
252
253
 
253
254
 
254
- The gem also comes with a binary and can be used from teh command line to get a json response of the scraped data. It takes the url as the first argument.
255
+ The gem also comes with a binary and can be used from the command line to get a json response of the scraped data. It takes the url as the first argument.
255
256
 
256
257
  linkedin-scraper http://www.linkedin.com/in/jeffweiner08
257
258
 
data/bin/linkedin-scraper CHANGED
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env ruby
2
1
 
3
2
  require './lib/linkedin-scraper'
4
3
  profile = Linkedin::Profile.new(ARGV[0])
@@ -1,8 +1,5 @@
1
- require "rubygems"
2
- require "mechanize"
3
- require "cgi"
4
- require "net/http"
5
- Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin-scraper/*.rb"].each {|file| require file }
6
-
7
-
8
-
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+ require 'cgi'
4
+ require 'net/http'
5
+ Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin-scraper/*.rb"].each { |file| require file }
@@ -5,9 +5,9 @@ module Linkedin
5
5
  USER_AGENTS = ['Windows IE 6', 'Windows IE 7', 'Windows Mozilla', 'Mac Safari', 'Mac FireFox', 'Mac Mozilla', 'Linux Mozilla', 'Linux Firefox', 'Linux Konqueror']
6
6
 
7
7
  ATTRIBUTES = %w(name first_name last_name title location country industry summary picture linkedin_url education groups websites languages skills certifications organizations past_companies current_companies recommended_visitors)
8
-
8
+
9
9
  attr_reader :page, :linkedin_url
10
-
10
+
11
11
  def self.get_profile(url)
12
12
  begin
13
13
  Linkedin::Profile.new(url)
@@ -20,12 +20,12 @@ module Linkedin
20
20
  @linkedin_url = url
21
21
  @page = http_client.get(url)
22
22
  end
23
-
23
+
24
24
  def name
25
25
  "#{first_name} #{last_name}"
26
26
  end
27
-
28
- def first_name
27
+
28
+ def first_name
29
29
  @first_name ||= (@page.at('.given-name').text.strip if @page.at('.given-name'))
30
30
  end
31
31
 
@@ -33,7 +33,7 @@ module Linkedin
33
33
  @last_name ||= (@page.at('.family-name').text.strip if @page.at('.family-name'))
34
34
  end
35
35
 
36
- def title
36
+ def title
37
37
  @title ||= (@page.at('.headline-title').text.gsub(/\s+/, ' ').strip if @page.at('.headline-title'))
38
38
  end
39
39
 
@@ -77,12 +77,12 @@ module Linkedin
77
77
  name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip if item.at('h3')
78
78
  desc = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
79
79
  period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
80
-
80
+
81
81
  {:name => name, :description => desc, :period => period}
82
82
  end
83
83
  end
84
84
  end
85
- @education
85
+ @education
86
86
  end
87
87
 
88
88
  def websites
@@ -118,7 +118,7 @@ module Linkedin
118
118
  @organizations = []
119
119
  if @page.search('ul.organizations/li.organization').first
120
120
  @organizations = @page.search('ul.organizations/li.organization').map do |item|
121
-
121
+
122
122
  name = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
123
123
  start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
124
124
  start_date = Date.parse(start_date) rescue nil
@@ -153,21 +153,21 @@ module Linkedin
153
153
  authority = item.at('.specifics/.org').text.gsub(/\s+|\n/, ' ').strip rescue nil
154
154
  license = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
155
155
  start_date = item.at('.specifics/.dtstart').text.gsub(/\s+|\n/, ' ').strip rescue nil
156
-
156
+
157
157
  {:name => name, :authority => authority, :license => license, :start_date => start_date}
158
158
  end
159
159
  end
160
160
  end
161
161
  @certifications
162
162
  end
163
-
163
+
164
164
 
165
165
  def recommended_visitors
166
166
  unless @recommended_visitors
167
167
  @recommended_visitors = []
168
168
  if @page.at('.browsemap/.content/ul/li')
169
169
  @recommended_visitors = @page.search('.browsemap/.content/ul/li').map do |visitor|
170
- v = {}
170
+ v = {}
171
171
  v[:link] = visitor.at('a')['href']
172
172
  v[:name] = visitor.at('strong/a').text
173
173
  v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
@@ -181,46 +181,46 @@ module Linkedin
181
181
 
182
182
  def to_json
183
183
  require 'json'
184
- hash = {}
185
- ATTRIBUTES.each do |attribute|
186
- hash[attribute.to_sym] = self.send(attribute.to_sym)
187
- end
188
- hash.to_json
184
+ ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
189
185
  end
190
186
 
191
187
 
192
188
  private
193
-
189
+
194
190
  def get_companies(type)
195
191
  companies = []
196
192
  if @page.search(".position.experience.vevent.vcard.summary-#{type}").first
197
193
  @page.search(".position.experience.vevent.vcard.summary-#{type}").each do |node|
198
-
194
+
199
195
  company = {}
200
196
  company[:title] = node.at('h3').text.gsub(/\s+|\n/, ' ').strip if node.at('h3')
201
197
  company[:company] = node.at('h4').text.gsub(/\s+|\n/, ' ').strip if node.at('h4')
202
198
  company[:description] = node.at(".description.#{type}-position").text.gsub(/\s+|\n/, ' ').strip if node.at(".description.#{type}-position")
203
199
  start_date = node.at('.dtstart').text.gsub(/\s+|\n/, ' ').strip rescue nil
204
- company[:start_date] = Date.parse(start_date) rescue nil
200
+ company[:start_date] = parse_date(start_date) rescue nil
205
201
 
206
- end_date = node.at('.dtend').text.gsub(/\s+|\n/, ' ').strip rescue nil
207
- company[:end_date] = Date.parse(end_date) rescue nil
208
-
202
+ end_date = node.at('.dtend').text.gsub(/\s+|\n/, ' ').strip rescue nil
203
+ end_date ||= node.at('.dtstamp').text.gsub(/\s+|\n/, ' ').strip rescue nil
204
+ company[:end_date] = parse_date(end_date) rescue nil
209
205
 
210
206
  company_link = node.at('h4/strong/a')['href'] if node.at('h4/strong/a')
211
207
 
212
- result = get_company_details(company_link)
208
+ result = get_company_details(company_link)
213
209
  companies << company.merge!(result)
214
210
  end
215
211
  end
216
212
  companies
217
213
  end
218
214
 
219
-
215
+ def parse_date(date)
216
+ date = "#{date}-01-01" if date =~ /^(19|20)\d{2}$/
217
+ Date.parse(date)
218
+ end
219
+
220
220
  def get_company_details(link)
221
221
  result = {:linkedin_company_url => "http://www.linkedin.com#{link}"}
222
222
  page = http_client.get(result[:linkedin_company_url])
223
-
223
+
224
224
  result[:url] = page.at('.basic-info/div/dl/dd/a').text if page.at('.basic-info/div/dl/dd/a')
225
225
  node_2 = page.at('.basic-info/.content.inner-mod')
226
226
  if node_2
@@ -231,7 +231,7 @@ module Linkedin
231
231
  result[:address] = page.at('.vcard.hq').at('.adr').text.gsub("\n",' ').strip if page.at('.vcard.hq')
232
232
  result
233
233
  end
234
-
234
+
235
235
  def http_client
236
236
  Mechanize.new do |agent|
237
237
  agent.user_agent_alias = USER_AGENTS.sample
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = "0.0.11"
3
+ VERSION = '0.0.12'
4
4
  end
5
5
  end
@@ -2,20 +2,19 @@
2
2
  require File.expand_path('../lib/linkedin-scraper/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
- gem.authors = ["Yatish Mehta"]
5
+ gem.authors = ['Yatish Mehta']
6
6
  gem.description = %q{Scrapes the linkedin profile when a url is given }
7
7
  gem.summary = %q{when a url of public linkedin profile page is given it scrapes the entire page and converts into a accessible object}
8
- gem.homepage = "https://github.com/yatishmehta27/linkedin-scraper"
8
+ gem.homepage = 'https://github.com/yatishmehta27/linkedin-scraper'
9
9
  gem.files = `git ls-files`.split($\)
10
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
10
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
11
11
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
12
- gem.name = "linkedin-scraper"
13
- gem.require_paths = ["lib"]
12
+ gem.name = 'linkedin-scraper'
13
+ gem.require_paths = ['lib']
14
14
  gem.version = Linkedin::Scraper::VERSION
15
15
 
16
-
17
- gem.add_dependency(%q<mechanize>, [">= 0"])
18
- gem.add_development_dependency 'rspec','>=0'
16
+ gem.add_dependency(%q<mechanize>, ['>= 0'])
17
+ gem.add_development_dependency 'rspec', '>=0'
19
18
  gem.add_development_dependency 'rake'
20
19
 
21
20
  end
@@ -5,25 +5,25 @@ describe Linkedin::Profile do
5
5
 
6
6
 
7
7
  before(:all) do
8
- @page = Nokogiri::HTML(File.open("spec/fixtures/jgrevich.html", 'r') { |f| f.read })
9
- @profile = Linkedin::Profile.new("http://www.linkedin.com/in/jgrevich")
8
+ @page = Nokogiri::HTML(File.open('spec/fixtures/jgrevich.html', 'r') { |f| f.read })
9
+ @profile = Linkedin::Profile.new('http://www.linkedin.com/in/jgrevich')
10
10
  end
11
11
 
12
- describe ".get_profile" do
13
- it "Create an instance of Linkedin::Profile class" do
12
+ describe '.get_profile' do
13
+ it 'Create an instance of Linkedin::Profile class' do
14
14
  expect(@profile).to be_instance_of Linkedin::Profile
15
15
  end
16
16
  end
17
17
 
18
- describe "#first_name" do
18
+ describe '#first_name' do
19
19
  it 'returns the first name of the profile' do
20
- expect(@profile.first_name).to eq "Justin"
20
+ expect(@profile.first_name).to eq 'Justin'
21
21
  end
22
22
  end
23
23
 
24
- describe "#last_name" do
24
+ describe '#last_name' do
25
25
  it 'returns the last name of the profile' do
26
- expect(@profile.last_name).to eq "Grevich"
26
+ expect(@profile.last_name).to eq 'Grevich'
27
27
  end
28
28
  end
29
29
 
@@ -65,7 +65,7 @@ describe Linkedin::Profile do
65
65
 
66
66
  describe '#skills' do
67
67
  it 'returns the array of skills of the profile' do
68
- skills = ["Ruby", "Ruby on Rails", "Web Development", "Web Applications", "CSS3", "HTML 5", "Shell Scripting", "Python", "Chef", "Git", "Subversion", "JavaScript", "Rspec", "jQuery", "Capistrano", "Sinatra", "CoffeeScript", "Haml", "Standards Compliance", "MySQL", "PostgreSQL", "Solr", "Sphinx", "Heroku", "Amazon Web Services (AWS)", "Information Security", "Vulnerability Assessment", "SAN", "ZFS", "Backup Solutions", "SaaS", "System Administration", "Project Management", "Linux", "Troubleshooting", "Network Security", "OS X", "Bash", "Cloud Computing", "Web Design", "MongoDB", "Z-Wave", "Home Automation"]
68
+ skills = ['Ruby', 'Ruby on Rails', 'Web Development', 'Web Applications', 'CSS3', 'HTML 5', 'Shell Scripting', 'Python', 'Chef', 'Git', 'Subversion', 'JavaScript', 'Rspec', 'jQuery', 'Capistrano', 'Sinatra', 'CoffeeScript', 'Haml', 'Standards Compliance', 'MySQL', 'PostgreSQL', 'Solr', 'Sphinx', 'Heroku', 'Amazon Web Services (AWS)', 'Information Security', 'Vulnerability Assessment', 'SAN', 'ZFS', 'Backup Solutions', 'SaaS', 'System Administration', 'Project Management', 'Linux', 'Troubleshooting', 'Network Security', 'OS X', 'Bash', 'Cloud Computing', 'Web Design', 'MongoDB', 'Z-Wave', 'Home Automation']
69
69
  expect(@profile.skills).to include(*skills)
70
70
  end
71
71
  end
@@ -100,20 +100,20 @@ describe Linkedin::Profile do
100
100
  end
101
101
  end
102
102
 
103
- describe "#name" do
103
+ describe '#name' do
104
104
  it 'returns the first and last name of the profile' do
105
- expect(@profile.name).to eq "Justin Grevich"
105
+ expect(@profile.name).to eq 'Justin Grevich'
106
106
  end
107
- end
107
+ end
108
108
 
109
- describe "#organizations" do
109
+ describe '#organizations' do
110
110
  it 'returns an array of organization hashes for the profile' do
111
111
  expect(@profile.organizations.class).to eq Array
112
112
  expect(@profile.organizations.first[:name]).to eq 'San Diego Ruby'
113
113
  end
114
114
  end
115
115
 
116
- describe "#languages" do
116
+ describe '#languages' do
117
117
  it 'returns an array of languages hashes' do
118
118
  expect(@profile.languages.class).to eq Array
119
119
  end
@@ -133,8 +133,8 @@ describe Linkedin::Profile do
133
133
  end
134
134
  end
135
135
  end # context 'with language data' do
136
-
137
- end # describe ".languages" do
136
+
137
+ end # describe '.languages' do
138
138
 
139
139
  describe '#recommended_visitors' do
140
140
  it 'returns the array of hashes of recommended visitors' do
@@ -148,5 +148,10 @@ describe Linkedin::Profile do
148
148
  end
149
149
  end
150
150
 
151
+ describe '#to_json' do
152
+ it 'returns the json format of the profile' do
153
+ @profile.to_json
154
+ end
155
+ end
151
156
 
152
157
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yatish Mehta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-23 00:00:00.000000000 Z
11
+ date: 2013-11-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -60,6 +60,7 @@ extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
62
  - .gitignore
63
+ - .rubocop.yml
63
64
  - .travis.yml
64
65
  - Gemfile
65
66
  - LICENSE