linkedin-scraper 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +0 -1
- data/bin/linkedin-scraper +1 -1
- data/lib/linkedin-scraper/profile.rb +36 -78
- data/lib/linkedin-scraper/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d72a6f4f56878e6396f87b2114ee75e73cb71fc4
|
4
|
+
data.tar.gz: 7ec26c6b44a2cec0551d544ecc72152683eb439a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f69f100f22938a93703177a2495ef439cdb7120c89d2d23ba231849a37a65a9b20173456d68421280faf572798b88e5271f042cb6a8fccba742bd5c2e48299c1
|
7
|
+
data.tar.gz: e2509eed12a03b23be0860b057c7031c9e467c26e8d2fe2048d7aefa381228ae3ef515fbc0708945a54c1f7e3912b24c0ebfeb3540cd6837350ad378971ea139
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/bin/linkedin-scraper
CHANGED
@@ -70,85 +70,51 @@ module Linkedin
|
|
70
70
|
end
|
71
71
|
|
72
72
|
def education
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
|
80
|
-
|
81
|
-
{:name => name, :description => desc, :period => period}
|
82
|
-
end
|
83
|
-
end
|
73
|
+
@education ||= @page.search('.position.education.vevent.vcard').map do |item|
|
74
|
+
name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip if item.at('h3')
|
75
|
+
desc = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
|
76
|
+
period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
|
77
|
+
|
78
|
+
{:name => name, :description => desc, :period => period}
|
84
79
|
end
|
85
|
-
@education
|
86
80
|
end
|
87
81
|
|
88
82
|
def websites
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
@websites = @page.search('.website').map do |site|
|
93
|
-
url = site.at('a')['href']
|
94
|
-
url = "http://www.linkedin.com#{url}"
|
95
|
-
CGI.parse(URI.parse(url).query)['url']
|
96
|
-
end.flatten!
|
97
|
-
end
|
83
|
+
@websites ||= @page.search('.website').flat_map do |site|
|
84
|
+
url = "http://www.linkedin.com#{site.at('a')['href']}"
|
85
|
+
CGI.parse(URI.parse(url).query)['url']
|
98
86
|
end
|
99
|
-
|
87
|
+
|
100
88
|
end
|
101
89
|
|
102
90
|
def groups
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
name = item.text.gsub(/\s+|\n/, ' ').strip
|
108
|
-
link = "http://www.linkedin.com#{item.at('a')['href']}"
|
109
|
-
{:name => name, :link => link}
|
110
|
-
end
|
111
|
-
end
|
91
|
+
@groups ||= @page.search('.group-data').map do |item|
|
92
|
+
name = item.text.gsub(/\s+|\n/, ' ').strip
|
93
|
+
link = "http://www.linkedin.com#{item.at('a')['href']}"
|
94
|
+
{:name => name, :link => link}
|
112
95
|
end
|
113
|
-
@groups
|
114
96
|
end
|
115
97
|
|
116
98
|
def organizations
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
|
124
|
-
start_date = Date.parse(start_date) rescue nil
|
125
|
-
end_date = Date.parse(end_date) rescue nil
|
126
|
-
{:name => name, :start_date => start_date, :end_date => end_date}
|
127
|
-
end
|
128
|
-
end
|
99
|
+
@organizations ||= @page.search('ul.organizations/li.organization').map do |item|
|
100
|
+
name = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
101
|
+
start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
|
102
|
+
start_date = Date.parse(start_date) rescue nil
|
103
|
+
end_date = Date.parse(end_date) rescue nil
|
104
|
+
{:name => name, :start_date => start_date, :end_date => end_date}
|
129
105
|
end
|
130
|
-
@organizations
|
131
106
|
end
|
132
107
|
|
133
108
|
def languages
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
language = item.at('h3').text rescue nil
|
139
|
-
proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
140
|
-
{:language=> language, :proficiency => proficiency }
|
141
|
-
end
|
142
|
-
end
|
109
|
+
@languages ||= @page.search('ul.languages/li.language').map do |item|
|
110
|
+
language = item.at('h3').text rescue nil
|
111
|
+
proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
112
|
+
{:language=> language, :proficiency => proficiency }
|
143
113
|
end
|
144
|
-
@languages
|
145
114
|
end
|
146
115
|
|
147
116
|
def certifications
|
148
|
-
|
149
|
-
@certifications = []
|
150
|
-
if @page.at('ul.certifications/li.certification')
|
151
|
-
@certifications = @page.search('ul.certifications/li.certification').map do |item|
|
117
|
+
@certifications ||= @page.search('ul.certifications/li.certification').map do |item|
|
152
118
|
name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
153
119
|
authority = item.at('.specifics/.org').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
154
120
|
license = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
@@ -156,27 +122,19 @@ module Linkedin
|
|
156
122
|
|
157
123
|
{:name => name, :authority => authority, :license => license, :start_date => start_date}
|
158
124
|
end
|
159
|
-
|
160
|
-
end
|
161
|
-
@certifications
|
125
|
+
|
162
126
|
end
|
163
127
|
|
164
128
|
|
165
129
|
def recommended_visitors
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
|
174
|
-
v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
|
175
|
-
v
|
176
|
-
end
|
177
|
-
end
|
130
|
+
@recommended_visitors ||= @page.search('.browsemap/.content/ul/li').map do |visitor|
|
131
|
+
v = {}
|
132
|
+
v[:link] = visitor.at('a')['href']
|
133
|
+
v[:name] = visitor.at('strong/a').text
|
134
|
+
v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
|
135
|
+
v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
|
136
|
+
v
|
178
137
|
end
|
179
|
-
@recommended_visitors
|
180
138
|
end
|
181
139
|
|
182
140
|
def to_json
|
@@ -196,11 +154,11 @@ module Linkedin
|
|
196
154
|
company[:title] = node.at('h3').text.gsub(/\s+|\n/, ' ').strip if node.at('h3')
|
197
155
|
company[:company] = node.at('h4').text.gsub(/\s+|\n/, ' ').strip if node.at('h4')
|
198
156
|
company[:description] = node.at(".description.#{type}-position").text.gsub(/\s+|\n/, ' ').strip if node.at(".description.#{type}-position")
|
199
|
-
|
157
|
+
|
158
|
+
start_date = node.at('.dtstart')['title'] rescue nil
|
200
159
|
company[:start_date] = parse_date(start_date) rescue nil
|
201
160
|
|
202
|
-
end_date = node.at('.dtend')
|
203
|
-
end_date ||= node.at('.dtstamp').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
161
|
+
end_date = node.at('.dtend')['title'] rescue nil
|
204
162
|
company[:end_date] = parse_date(end_date) rescue nil
|
205
163
|
|
206
164
|
company_link = node.at('h4/strong/a')['href'] if node.at('h4/strong/a')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedin-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yatish Mehta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-03-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
93
|
version: '0'
|
94
94
|
requirements: []
|
95
95
|
rubyforge_project:
|
96
|
-
rubygems_version: 2.1.
|
96
|
+
rubygems_version: 2.1.11
|
97
97
|
signing_key:
|
98
98
|
specification_version: 4
|
99
99
|
summary: when a url of public linkedin profile page is given it scrapes the entire
|
@@ -102,3 +102,4 @@ test_files:
|
|
102
102
|
- spec/fixtures/jgrevich.html
|
103
103
|
- spec/linkedin-scraper/profile_spec.rb
|
104
104
|
- spec/spec_helper.rb
|
105
|
+
has_rdoc:
|