linkedin-scraper 1.0.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51ab67fc48225482a632be818fe7dc158c089cd2
4
- data.tar.gz: 355c19cec2f5613b7dc8afd668e845dc406a3134
3
+ metadata.gz: 2c77bcfa337ca481575a21d58974d9f574c75578
4
+ data.tar.gz: e10ffd0759cb6535202a8977b9b9f3d0bb5530ce
5
5
  SHA512:
6
- metadata.gz: 0fd8ef9fba5d09b4ceb466cffb94d6f8327eacd10cf18ced217715374c7931d9a1b7c8a4ddb94445f8652af71c31e07ae862184acba37f0bfe8209ae1e9b7a52
7
- data.tar.gz: fa66286cb67770d52151d190e2f4b23ab3a3ebcc80d3201d6c87e3bf3b39ca98ca74254a4041e612a59c4b3d50a5ddcae90d5eaba9da53017da4116efe7224af
6
+ metadata.gz: 177977da49150f249c6bc7f5d7dcbaaf4ac2d167c8e4835c8e1d799e5ff126704972d959b4acd86db0b1a93baa0c43233621367721ced19bcee521c39ca317ea
7
+ data.tar.gz: c1e314563caecc20c601abcf1769846a20b807b0966dbfc804a152c721d0f116c9ab5156a9a5aceb0d6c76799281da7745eb24543979e9205a41508f9f68d03e
data/README.md CHANGED
@@ -18,7 +18,9 @@ Install the gem from RubyGems:
18
18
  This gem is tested on 1.9.2, 1.9.3, 2.0.0, 2.2, 2.3, JRuby1.9, rbx1.9,
19
19
 
20
20
  ## Usage
21
+ Include the gem
21
22
 
23
+ require 'linkedin_scraper'
22
24
 
23
25
  Initialize a scraper instance
24
26
 
@@ -270,7 +272,7 @@ It takes the url as the first argument.
270
272
 
271
273
  Bug reports and pull requests are welcome on GitHub at https://github.com/yatish27/linkedin-scraper.
272
274
  This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the
273
- [Contributor Covenant](contributor-covenant.org) code of conduct.
275
+ [Contributor Covenant](http://contributor-covenant.org) code of conduct.
274
276
 
275
277
 
276
278
  ## License
@@ -2,4 +2,5 @@ require "rubygems"
2
2
  require "mechanize"
3
3
  require "cgi"
4
4
  require "net/http"
5
+ require "random_user_agent"
5
6
  Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin_scraper/*.rb"].each { |file| require file }
@@ -2,25 +2,6 @@
2
2
  module Linkedin
3
3
  class Profile
4
4
 
5
- #USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac Firefox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
6
- USER_AGENTS = [
7
- "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6",
8
- "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:5.0) Gecko/20100101 Firefox/5.0",
9
- "Mozilla/5.0 (Windows NT 6.1.1; rv:5.0) Gecko/20100101 Firefox/5.0",
10
- "Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
11
- "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.825.0 Chrome/14.0.825.0 Safari/535.1",
12
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.824.0 Safari/535.1",
13
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:5.0) Gecko/20100101 Firefox/5.0",
14
- "Mozilla/5.0 (Macintosh; PPC MacOS X; rv:5.0) Gecko/20110615 Firefox/5.0",
15
- "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
16
- "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)",
17
- "Mozilla/5.0 (Windows; U; MSIE 7.0; Windows NT 6.0; en-US)",
18
- "Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)",
19
- "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
20
- "Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
21
- "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
22
- "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1"
23
- ]
24
5
  ATTRIBUTES = %w(
25
6
  name
26
7
  first_name
@@ -101,7 +82,7 @@ module Linkedin
101
82
  end
102
83
 
103
84
  def skills
104
- @skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil)
85
+ @skills ||= (@page.search(".pills .skill:not(.see-less)").map { |skill| skill.text.strip if skill.text } rescue nil)
105
86
  end
106
87
 
107
88
  def past_companies
@@ -185,13 +166,13 @@ module Linkedin
185
166
  def projects
186
167
  @projects ||= @page.search("#projects .project").map do |project|
187
168
  p = {}
188
- start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
169
+ start_date, end_date = project.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
189
170
 
190
171
  p[:title] = project.at(".item-title").text
191
172
  p[:link] = CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil
192
173
  p[:start_date] = parse_date(start_date) rescue nil
193
174
  p[:end_date] = parse_date(end_date) rescue nil
194
- p[:description] = project.at(".description").text rescue nil
175
+ p[:description] = project.at(".description").children().to_s rescue nil
195
176
  p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil
196
177
  p
197
178
  end
@@ -215,10 +196,12 @@ module Linkedin
215
196
  company = {}
216
197
  company[:title] = node.at(".item-title").text.gsub(/\s+|\n/, " ").strip if node.at(".item-title")
217
198
  company[:company] = node.at(".item-subtitle").text.gsub(/\s+|\n/, " ").strip if node.at(".item-subtitle")
199
+ company[:location] = node.at(".location").text if node.at(".location")
218
200
  company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
201
+ company[:company_logo] = node.at(".logo a img").first[1] if node.at(".logo")
219
202
 
220
- start_date, end_date = node.at(".meta").text.strip.split(" – ") rescue nil
221
- company[:duration] = node.at(".meta").text[/.*\((.*)\)/, 1]
203
+ start_date, end_date = node.at(".date-range").text.strip.split(" – ") rescue nil
204
+ company[:duration] = node.at(".date-range").text[/.*\((.*)\)/, 1]
222
205
  company[:start_date] = parse_date(start_date) rescue nil
223
206
 
224
207
  if end_date && end_date.match(/Present/)
@@ -261,7 +244,7 @@ module Linkedin
261
244
 
262
245
  def http_client
263
246
  Mechanize.new do |agent|
264
- agent.user_agent = USER_AGENTS.sample
247
+ agent.user_agent = RandomUserAgent.randomize
265
248
  unless @options.empty?
266
249
  agent.set_proxy(@options[:proxy_ip], @options[:proxy_port], @options[:username], @options[:password])
267
250
  end
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = '1.0.5'
3
+ VERSION = '1.1.0'
4
4
  end
5
5
  end
@@ -16,7 +16,8 @@ Gem::Specification.new do |gem|
16
16
  gem.license = "MIT"
17
17
 
18
18
  gem.add_dependency 'mechanize', '~> 2'
19
-
19
+ gem.add_dependency 'random_user_agent'
20
+
20
21
  gem.add_development_dependency 'rspec', '~> 3'
21
22
  gem.add_development_dependency 'rake', '~> 10'
22
23
  end
@@ -47,6 +47,10 @@ describe Linkedin::Profile do
47
47
  it "returns list of profile's skills" do
48
48
  expect(profile.skills).to include("Product Development")
49
49
  end
50
+
51
+ it 'does not return "See less"' do
52
+ expect(profile.skills).not_to include("See less")
53
+ end
50
54
  end
51
55
 
52
56
  describe '#websites' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yatish Mehta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-09 00:00:00.000000000 Z
11
+ date: 2016-05-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: random_user_agent
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: rspec
29
43
  requirement: !ruby/object:Gem::Requirement