linkedin-scraper 1.0.5 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/lib/linkedin_scraper.rb +1 -0
- data/lib/linkedin_scraper/profile.rb +8 -25
- data/lib/linkedin_scraper/version.rb +1 -1
- data/linkedin-scraper.gemspec +2 -1
- data/spec/linkedin_scraper/profile_spec.rb +4 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c77bcfa337ca481575a21d58974d9f574c75578
|
4
|
+
data.tar.gz: e10ffd0759cb6535202a8977b9b9f3d0bb5530ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 177977da49150f249c6bc7f5d7dcbaaf4ac2d167c8e4835c8e1d799e5ff126704972d959b4acd86db0b1a93baa0c43233621367721ced19bcee521c39ca317ea
|
7
|
+
data.tar.gz: c1e314563caecc20c601abcf1769846a20b807b0966dbfc804a152c721d0f116c9ab5156a9a5aceb0d6c76799281da7745eb24543979e9205a41508f9f68d03e
|
data/README.md
CHANGED
@@ -18,7 +18,9 @@ Install the gem from RubyGems:
|
|
18
18
|
This gem is tested on 1.9.2, 1.9.3, 2.0.0, 2.2, 2.3, JRuby1.9, rbx1.9,
|
19
19
|
|
20
20
|
## Usage
|
21
|
+
Include the gem
|
21
22
|
|
23
|
+
require 'linkedin_scraper'
|
22
24
|
|
23
25
|
Initialize a scraper instance
|
24
26
|
|
@@ -270,7 +272,7 @@ It takes the url as the first argument.
|
|
270
272
|
|
271
273
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yatish27/linkedin-scraper.
|
272
274
|
This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the
|
273
|
-
[Contributor Covenant](contributor-covenant.org) code of conduct.
|
275
|
+
[Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
274
276
|
|
275
277
|
|
276
278
|
## License
|
data/lib/linkedin_scraper.rb
CHANGED
@@ -2,25 +2,6 @@
|
|
2
2
|
module Linkedin
|
3
3
|
class Profile
|
4
4
|
|
5
|
-
#USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac Firefox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
|
6
|
-
USER_AGENTS = [
|
7
|
-
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6",
|
8
|
-
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:5.0) Gecko/20100101 Firefox/5.0",
|
9
|
-
"Mozilla/5.0 (Windows NT 6.1.1; rv:5.0) Gecko/20100101 Firefox/5.0",
|
10
|
-
"Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0",
|
11
|
-
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.825.0 Chrome/14.0.825.0 Safari/535.1",
|
12
|
-
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.824.0 Safari/535.1",
|
13
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:5.0) Gecko/20100101 Firefox/5.0",
|
14
|
-
"Mozilla/5.0 (Macintosh; PPC MacOS X; rv:5.0) Gecko/20110615 Firefox/5.0",
|
15
|
-
"Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
|
16
|
-
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)",
|
17
|
-
"Mozilla/5.0 (Windows; U; MSIE 7.0; Windows NT 6.0; en-US)",
|
18
|
-
"Mozilla/5.0 (compatible; Konqueror/4.5; FreeBSD) KHTML/4.5.4 (like Gecko)",
|
19
|
-
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
|
20
|
-
"Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
|
21
|
-
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
|
22
|
-
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1"
|
23
|
-
]
|
24
5
|
ATTRIBUTES = %w(
|
25
6
|
name
|
26
7
|
first_name
|
@@ -101,7 +82,7 @@ module Linkedin
|
|
101
82
|
end
|
102
83
|
|
103
84
|
def skills
|
104
|
-
@skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil)
|
85
|
+
@skills ||= (@page.search(".pills .skill:not(.see-less)").map { |skill| skill.text.strip if skill.text } rescue nil)
|
105
86
|
end
|
106
87
|
|
107
88
|
def past_companies
|
@@ -185,13 +166,13 @@ module Linkedin
|
|
185
166
|
def projects
|
186
167
|
@projects ||= @page.search("#projects .project").map do |project|
|
187
168
|
p = {}
|
188
|
-
start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
|
169
|
+
start_date, end_date = project.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
|
189
170
|
|
190
171
|
p[:title] = project.at(".item-title").text
|
191
172
|
p[:link] = CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil
|
192
173
|
p[:start_date] = parse_date(start_date) rescue nil
|
193
174
|
p[:end_date] = parse_date(end_date) rescue nil
|
194
|
-
p[:description] = project.at(".description").
|
175
|
+
p[:description] = project.at(".description").children().to_s rescue nil
|
195
176
|
p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil
|
196
177
|
p
|
197
178
|
end
|
@@ -215,10 +196,12 @@ module Linkedin
|
|
215
196
|
company = {}
|
216
197
|
company[:title] = node.at(".item-title").text.gsub(/\s+|\n/, " ").strip if node.at(".item-title")
|
217
198
|
company[:company] = node.at(".item-subtitle").text.gsub(/\s+|\n/, " ").strip if node.at(".item-subtitle")
|
199
|
+
company[:location] = node.at(".location").text if node.at(".location")
|
218
200
|
company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
|
201
|
+
company[:company_logo] = node.at(".logo a img").first[1] if node.at(".logo")
|
219
202
|
|
220
|
-
start_date, end_date = node.at(".
|
221
|
-
company[:duration] = node.at(".
|
203
|
+
start_date, end_date = node.at(".date-range").text.strip.split(" – ") rescue nil
|
204
|
+
company[:duration] = node.at(".date-range").text[/.*\((.*)\)/, 1]
|
222
205
|
company[:start_date] = parse_date(start_date) rescue nil
|
223
206
|
|
224
207
|
if end_date && end_date.match(/Present/)
|
@@ -261,7 +244,7 @@ module Linkedin
|
|
261
244
|
|
262
245
|
def http_client
|
263
246
|
Mechanize.new do |agent|
|
264
|
-
agent.user_agent =
|
247
|
+
agent.user_agent = RandomUserAgent.randomize
|
265
248
|
unless @options.empty?
|
266
249
|
agent.set_proxy(@options[:proxy_ip], @options[:proxy_port], @options[:username], @options[:password])
|
267
250
|
end
|
data/linkedin-scraper.gemspec
CHANGED
@@ -47,6 +47,10 @@ describe Linkedin::Profile do
|
|
47
47
|
it "returns list of profile's skills" do
|
48
48
|
expect(profile.skills).to include("Product Development")
|
49
49
|
end
|
50
|
+
|
51
|
+
it 'does not return "See less"' do
|
52
|
+
expect(profile.skills).not_to include("See less")
|
53
|
+
end
|
50
54
|
end
|
51
55
|
|
52
56
|
describe '#websites' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedin-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yatish Mehta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: random_user_agent
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rspec
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|