linkedin-scraper 1.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51ab67fc48225482a632be818fe7dc158c089cd2
4
- data.tar.gz: 355c19cec2f5613b7dc8afd668e845dc406a3134
3
+ metadata.gz: 2c77bcfa337ca481575a21d58974d9f574c75578
4
+ data.tar.gz: e10ffd0759cb6535202a8977b9b9f3d0bb5530ce
5
5
  SHA512:
6
- metadata.gz: 0fd8ef9fba5d09b4ceb466cffb94d6f8327eacd10cf18ced217715374c7931d9a1b7c8a4ddb94445f8652af71c31e07ae862184acba37f0bfe8209ae1e9b7a52
7
- data.tar.gz: fa66286cb67770d52151d190e2f4b23ab3a3ebcc80d3201d6c87e3bf3b39ca98ca74254a4041e612a59c4b3d50a5ddcae90d5eaba9da53017da4116efe7224af
6
+ metadata.gz: 177977da49150f249c6bc7f5d7dcbaaf4ac2d167c8e4835c8e1d799e5ff126704972d959b4acd86db0b1a93baa0c43233621367721ced19bcee521c39ca317ea
7
+ data.tar.gz: c1e314563caecc20c601abcf1769846a20b807b0966dbfc804a152c721d0f116c9ab5156a9a5aceb0d6c76799281da7745eb24543979e9205a41508f9f68d03e
data/README.md CHANGED
@@ -18,7 +18,9 @@ Install the gem from RubyGems:
18
18
  This gem is tested on 1.9.2, 1.9.3, 2.0.0, 2.2, 2.3, JRuby1.9, rbx1.9,
19
19
 
20
20
  ## Usage
21
+ Include the gem
21
22
 
23
+ require 'linkedin_scraper'
22
24
 
23
25
  Initialize a scraper instance
24
26
 
@@ -270,7 +272,7 @@ It takes the url as the first argument.
270
272
 
271
273
  Bug reports and pull requests are welcome on GitHub at https://github.com/yatish27/linkedin-scraper.
272
274
  This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the
273
- [Contributor Covenant](contributor-covenant.org) code of conduct.
275
+ [Contributor Covenant](http://contributor-covenant.org) code of conduct.
274
276
 
275
277
 
276
278
  ## License
@@ -2,4 +2,5 @@ require "rubygems"
2
2
  require "mechanize"
3
3
  require "cgi"
4
4
  require "net/http"
5
+ require "random_user_agent"
5
6
  Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin_scraper/*.rb"].each { |file| require file }
@@ -2,25 +2,6 @@
2
2
  module Linkedin
3
3
  class Profile
4
4
 
5
- #USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac Firefox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
6
- USER_AGENTS = [
7
- "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6",
8
- "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:5.0) Gecko/20100101 Firefox/5.0",
9
- "Mozilla/5.0 (Windows NT 6.1.1; rv:5.0) Gecko/20100101 Firefox/5.0",
10
- "Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
11
- "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.825.0 Chrome/14.0.825.0 Safari/535.1",
12
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.824.0 Safari/535.1",
13
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:5.0) Gecko/20100101 Firefox/5.0",
14
- "Mozilla/5.0 (Macintosh; PPC MacOS X; rv:5.0) Gecko/20110615 Firefox/5.0",
15
- "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
16
- "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)",
17
- "Mozilla/5.0 (Windows; U; MSIE 7.0; Windows NT 6.0; en-US)",
18
- "Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)",
19
- "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
20
- "Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
21
- "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
22
- "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1"
23
- ]
24
5
  ATTRIBUTES = %w(
25
6
  name
26
7
  first_name
@@ -101,7 +82,7 @@ module Linkedin
101
82
  end
102
83
 
103
84
  def skills
104
- @skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil)
85
+ @skills ||= (@page.search(".pills .skill:not(.see-less)").map { |skill| skill.text.strip if skill.text } rescue nil)
105
86
  end
106
87
 
107
88
  def past_companies
@@ -185,13 +166,13 @@ module Linkedin
185
166
  def projects
186
167
  @projects ||= @page.search("#projects .project").map do |project|
187
168
  p = {}
188
- start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
169
+ start_date, end_date = project.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
189
170
 
190
171
  p[:title] = project.at(".item-title").text
191
172
  p[:link] = CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil
192
173
  p[:start_date] = parse_date(start_date) rescue nil
193
174
  p[:end_date] = parse_date(end_date) rescue nil
194
- p[:description] = project.at(".description").text rescue nil
175
+ p[:description] = project.at(".description").children().to_s rescue nil
195
176
  p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil
196
177
  p
197
178
  end
@@ -215,10 +196,12 @@ module Linkedin
215
196
  company = {}
216
197
  company[:title] = node.at(".item-title").text.gsub(/\s+|\n/, " ").strip if node.at(".item-title")
217
198
  company[:company] = node.at(".item-subtitle").text.gsub(/\s+|\n/, " ").strip if node.at(".item-subtitle")
199
+ company[:location] = node.at(".location").text if node.at(".location")
218
200
  company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
201
+ company[:company_logo] = node.at(".logo a img").first[1] if node.at(".logo")
219
202
 
220
- start_date, end_date = node.at(".meta").text.strip.split(" – ") rescue nil
221
- company[:duration] = node.at(".meta").text[/.*\((.*)\)/, 1]
203
+ start_date, end_date = node.at(".date-range").text.strip.split(" – ") rescue nil
204
+ company[:duration] = node.at(".date-range").text[/.*\((.*)\)/, 1]
222
205
  company[:start_date] = parse_date(start_date) rescue nil
223
206
 
224
207
  if end_date && end_date.match(/Present/)
@@ -261,7 +244,7 @@ module Linkedin
261
244
 
262
245
  def http_client
263
246
  Mechanize.new do |agent|
264
- agent.user_agent = USER_AGENTS.sample
247
+ agent.user_agent = RandomUserAgent.randomize
265
248
  unless @options.empty?
266
249
  agent.set_proxy(@options[:proxy_ip], @options[:proxy_port], @options[:username], @options[:password])
267
250
  end
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = '1.0.5'
3
+ VERSION = '1.1.0'
4
4
  end
5
5
  end
@@ -16,7 +16,8 @@ Gem::Specification.new do |gem|
16
16
  gem.license = "MIT"
17
17
 
18
18
  gem.add_dependency 'mechanize', '~> 2'
19
-
19
+ gem.add_dependency 'random_user_agent'
20
+
20
21
  gem.add_development_dependency 'rspec', '~> 3'
21
22
  gem.add_development_dependency 'rake', '~> 10'
22
23
  end
@@ -47,6 +47,10 @@ describe Linkedin::Profile do
47
47
  it "returns list of profile's skills" do
48
48
  expect(profile.skills).to include("Product Development")
49
49
  end
50
+
51
+ it 'does not return "See less"' do
52
+ expect(profile.skills).not_to include("See less")
53
+ end
50
54
  end
51
55
 
52
56
  describe '#websites' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yatish Mehta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-09 00:00:00.000000000 Z
11
+ date: 2016-05-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: random_user_agent
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: rspec
29
43
  requirement: !ruby/object:Gem::Requirement