linkedin-scraper 1.0.5 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/lib/linkedin_scraper.rb +1 -0
- data/lib/linkedin_scraper/profile.rb +8 -25
- data/lib/linkedin_scraper/version.rb +1 -1
- data/linkedin-scraper.gemspec +2 -1
- data/spec/linkedin_scraper/profile_spec.rb +4 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c77bcfa337ca481575a21d58974d9f574c75578
|
4
|
+
data.tar.gz: e10ffd0759cb6535202a8977b9b9f3d0bb5530ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 177977da49150f249c6bc7f5d7dcbaaf4ac2d167c8e4835c8e1d799e5ff126704972d959b4acd86db0b1a93baa0c43233621367721ced19bcee521c39ca317ea
|
7
|
+
data.tar.gz: c1e314563caecc20c601abcf1769846a20b807b0966dbfc804a152c721d0f116c9ab5156a9a5aceb0d6c76799281da7745eb24543979e9205a41508f9f68d03e
|
data/README.md
CHANGED
@@ -18,7 +18,9 @@ Install the gem from RubyGems:
|
|
18
18
|
This gem is tested on 1.9.2, 1.9.3, 2.0.0, 2.2, 2.3, JRuby1.9, rbx1.9,
|
19
19
|
|
20
20
|
## Usage
|
21
|
+
Include the gem
|
21
22
|
|
23
|
+
require 'linkedin_scraper'
|
22
24
|
|
23
25
|
Initialize a scraper instance
|
24
26
|
|
@@ -270,7 +272,7 @@ It takes the url as the first argument.
|
|
270
272
|
|
271
273
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yatish27/linkedin-scraper.
|
272
274
|
This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the
|
273
|
-
[Contributor Covenant](contributor-covenant.org) code of conduct.
|
275
|
+
[Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
274
276
|
|
275
277
|
|
276
278
|
## License
|
data/lib/linkedin_scraper.rb
CHANGED
@@ -2,25 +2,6 @@
|
|
2
2
|
module Linkedin
|
3
3
|
class Profile
|
4
4
|
|
5
|
-
#USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac Firefox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
|
6
|
-
USER_AGENTS = [
|
7
|
-
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6",
|
8
|
-
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:5.0) Gecko/20100101 Firefox/5.0",
|
9
|
-
"Mozilla/5.0 (Windows NT 6.1.1; rv:5.0) Gecko/20100101 Firefox/5.0",
|
10
|
-
"Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
|
11
|
-
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.825.0 Chrome/14.0.825.0 Safari/535.1",
|
12
|
-
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.824.0 Safari/535.1",
|
13
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:5.0) Gecko/20100101 Firefox/5.0",
|
14
|
-
"Mozilla/5.0 (Macintosh; PPC MacOS X; rv:5.0) Gecko/20110615 Firefox/5.0",
|
15
|
-
"Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
|
16
|
-
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)",
|
17
|
-
"Mozilla/5.0 (Windows; U; MSIE 7.0; Windows NT 6.0; en-US)",
|
18
|
-
"Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)",
|
19
|
-
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
|
20
|
-
"Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
|
21
|
-
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
|
22
|
-
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1"
|
23
|
-
]
|
24
5
|
ATTRIBUTES = %w(
|
25
6
|
name
|
26
7
|
first_name
|
@@ -101,7 +82,7 @@ module Linkedin
|
|
101
82
|
end
|
102
83
|
|
103
84
|
def skills
|
104
|
-
@skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil)
|
85
|
+
@skills ||= (@page.search(".pills .skill:not(.see-less)").map { |skill| skill.text.strip if skill.text } rescue nil)
|
105
86
|
end
|
106
87
|
|
107
88
|
def past_companies
|
@@ -185,13 +166,13 @@ module Linkedin
|
|
185
166
|
def projects
|
186
167
|
@projects ||= @page.search("#projects .project").map do |project|
|
187
168
|
p = {}
|
188
|
-
start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
|
169
|
+
start_date, end_date = project.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
|
189
170
|
|
190
171
|
p[:title] = project.at(".item-title").text
|
191
172
|
p[:link] = CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil
|
192
173
|
p[:start_date] = parse_date(start_date) rescue nil
|
193
174
|
p[:end_date] = parse_date(end_date) rescue nil
|
194
|
-
p[:description] = project.at(".description").
|
175
|
+
p[:description] = project.at(".description").children().to_s rescue nil
|
195
176
|
p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil
|
196
177
|
p
|
197
178
|
end
|
@@ -215,10 +196,12 @@ module Linkedin
|
|
215
196
|
company = {}
|
216
197
|
company[:title] = node.at(".item-title").text.gsub(/\s+|\n/, " ").strip if node.at(".item-title")
|
217
198
|
company[:company] = node.at(".item-subtitle").text.gsub(/\s+|\n/, " ").strip if node.at(".item-subtitle")
|
199
|
+
company[:location] = node.at(".location").text if node.at(".location")
|
218
200
|
company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
|
201
|
+
company[:company_logo] = node.at(".logo a img").first[1] if node.at(".logo")
|
219
202
|
|
220
|
-
start_date, end_date = node.at(".
|
221
|
-
company[:duration] = node.at(".
|
203
|
+
start_date, end_date = node.at(".date-range").text.strip.split(" – ") rescue nil
|
204
|
+
company[:duration] = node.at(".date-range").text[/.*\((.*)\)/, 1]
|
222
205
|
company[:start_date] = parse_date(start_date) rescue nil
|
223
206
|
|
224
207
|
if end_date && end_date.match(/Present/)
|
@@ -261,7 +244,7 @@ module Linkedin
|
|
261
244
|
|
262
245
|
def http_client
|
263
246
|
Mechanize.new do |agent|
|
264
|
-
agent.user_agent =
|
247
|
+
agent.user_agent = RandomUserAgent.randomize
|
265
248
|
unless @options.empty?
|
266
249
|
agent.set_proxy(@options[:proxy_ip], @options[:proxy_port], @options[:username], @options[:password])
|
267
250
|
end
|
data/linkedin-scraper.gemspec
CHANGED
@@ -47,6 +47,10 @@ describe Linkedin::Profile do
|
|
47
47
|
it "returns list of profile's skills" do
|
48
48
|
expect(profile.skills).to include("Product Development")
|
49
49
|
end
|
50
|
+
|
51
|
+
it 'does not return "See less"' do
|
52
|
+
expect(profile.skills).not_to include("See less")
|
53
|
+
end
|
50
54
|
end
|
51
55
|
|
52
56
|
describe '#websites' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedin-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yatish Mehta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: random_user_agent
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rspec
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|