linkedin-scraper 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +0 -1
- data/bin/linkedin-scraper +1 -1
- data/lib/linkedin-scraper/profile.rb +36 -78
- data/lib/linkedin-scraper/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d72a6f4f56878e6396f87b2114ee75e73cb71fc4
|
|
4
|
+
data.tar.gz: 7ec26c6b44a2cec0551d544ecc72152683eb439a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f69f100f22938a93703177a2495ef439cdb7120c89d2d23ba231849a37a65a9b20173456d68421280faf572798b88e5271f042cb6a8fccba742bd5c2e48299c1
|
|
7
|
+
data.tar.gz: e2509eed12a03b23be0860b057c7031c9e467c26e8d2fe2048d7aefa381228ae3ef515fbc0708945a54c1f7e3912b24c0ebfeb3540cd6837350ad378971ea139
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/bin/linkedin-scraper
CHANGED
|
@@ -70,85 +70,51 @@ module Linkedin
|
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
def education
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
|
|
80
|
-
|
|
81
|
-
{:name => name, :description => desc, :period => period}
|
|
82
|
-
end
|
|
83
|
-
end
|
|
73
|
+
@education ||= @page.search('.position.education.vevent.vcard').map do |item|
|
|
74
|
+
name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip if item.at('h3')
|
|
75
|
+
desc = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
|
|
76
|
+
period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
|
|
77
|
+
|
|
78
|
+
{:name => name, :description => desc, :period => period}
|
|
84
79
|
end
|
|
85
|
-
@education
|
|
86
80
|
end
|
|
87
81
|
|
|
88
82
|
def websites
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
@websites = @page.search('.website').map do |site|
|
|
93
|
-
url = site.at('a')['href']
|
|
94
|
-
url = "http://www.linkedin.com#{url}"
|
|
95
|
-
CGI.parse(URI.parse(url).query)['url']
|
|
96
|
-
end.flatten!
|
|
97
|
-
end
|
|
83
|
+
@websites ||= @page.search('.website').flat_map do |site|
|
|
84
|
+
url = "http://www.linkedin.com#{site.at('a')['href']}"
|
|
85
|
+
CGI.parse(URI.parse(url).query)['url']
|
|
98
86
|
end
|
|
99
|
-
|
|
87
|
+
|
|
100
88
|
end
|
|
101
89
|
|
|
102
90
|
def groups
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
name = item.text.gsub(/\s+|\n/, ' ').strip
|
|
108
|
-
link = "http://www.linkedin.com#{item.at('a')['href']}"
|
|
109
|
-
{:name => name, :link => link}
|
|
110
|
-
end
|
|
111
|
-
end
|
|
91
|
+
@groups ||= @page.search('.group-data').map do |item|
|
|
92
|
+
name = item.text.gsub(/\s+|\n/, ' ').strip
|
|
93
|
+
link = "http://www.linkedin.com#{item.at('a')['href']}"
|
|
94
|
+
{:name => name, :link => link}
|
|
112
95
|
end
|
|
113
|
-
@groups
|
|
114
96
|
end
|
|
115
97
|
|
|
116
98
|
def organizations
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
|
|
124
|
-
start_date = Date.parse(start_date) rescue nil
|
|
125
|
-
end_date = Date.parse(end_date) rescue nil
|
|
126
|
-
{:name => name, :start_date => start_date, :end_date => end_date}
|
|
127
|
-
end
|
|
128
|
-
end
|
|
99
|
+
@organizations ||= @page.search('ul.organizations/li.organization').map do |item|
|
|
100
|
+
name = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
|
101
|
+
start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
|
|
102
|
+
start_date = Date.parse(start_date) rescue nil
|
|
103
|
+
end_date = Date.parse(end_date) rescue nil
|
|
104
|
+
{:name => name, :start_date => start_date, :end_date => end_date}
|
|
129
105
|
end
|
|
130
|
-
@organizations
|
|
131
106
|
end
|
|
132
107
|
|
|
133
108
|
def languages
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
language = item.at('h3').text rescue nil
|
|
139
|
-
proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
|
140
|
-
{:language=> language, :proficiency => proficiency }
|
|
141
|
-
end
|
|
142
|
-
end
|
|
109
|
+
@languages ||= @page.search('ul.languages/li.language').map do |item|
|
|
110
|
+
language = item.at('h3').text rescue nil
|
|
111
|
+
proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
|
112
|
+
{:language=> language, :proficiency => proficiency }
|
|
143
113
|
end
|
|
144
|
-
@languages
|
|
145
114
|
end
|
|
146
115
|
|
|
147
116
|
def certifications
|
|
148
|
-
|
|
149
|
-
@certifications = []
|
|
150
|
-
if @page.at('ul.certifications/li.certification')
|
|
151
|
-
@certifications = @page.search('ul.certifications/li.certification').map do |item|
|
|
117
|
+
@certifications ||= @page.search('ul.certifications/li.certification').map do |item|
|
|
152
118
|
name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
|
153
119
|
authority = item.at('.specifics/.org').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
|
154
120
|
license = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
|
@@ -156,27 +122,19 @@ module Linkedin
|
|
|
156
122
|
|
|
157
123
|
{:name => name, :authority => authority, :license => license, :start_date => start_date}
|
|
158
124
|
end
|
|
159
|
-
|
|
160
|
-
end
|
|
161
|
-
@certifications
|
|
125
|
+
|
|
162
126
|
end
|
|
163
127
|
|
|
164
128
|
|
|
165
129
|
def recommended_visitors
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
|
|
174
|
-
v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
|
|
175
|
-
v
|
|
176
|
-
end
|
|
177
|
-
end
|
|
130
|
+
@recommended_visitors ||= @page.search('.browsemap/.content/ul/li').map do |visitor|
|
|
131
|
+
v = {}
|
|
132
|
+
v[:link] = visitor.at('a')['href']
|
|
133
|
+
v[:name] = visitor.at('strong/a').text
|
|
134
|
+
v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
|
|
135
|
+
v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
|
|
136
|
+
v
|
|
178
137
|
end
|
|
179
|
-
@recommended_visitors
|
|
180
138
|
end
|
|
181
139
|
|
|
182
140
|
def to_json
|
|
@@ -196,11 +154,11 @@ module Linkedin
|
|
|
196
154
|
company[:title] = node.at('h3').text.gsub(/\s+|\n/, ' ').strip if node.at('h3')
|
|
197
155
|
company[:company] = node.at('h4').text.gsub(/\s+|\n/, ' ').strip if node.at('h4')
|
|
198
156
|
company[:description] = node.at(".description.#{type}-position").text.gsub(/\s+|\n/, ' ').strip if node.at(".description.#{type}-position")
|
|
199
|
-
|
|
157
|
+
|
|
158
|
+
start_date = node.at('.dtstart')['title'] rescue nil
|
|
200
159
|
company[:start_date] = parse_date(start_date) rescue nil
|
|
201
160
|
|
|
202
|
-
end_date = node.at('.dtend')
|
|
203
|
-
end_date ||= node.at('.dtstamp').text.gsub(/\s+|\n/, ' ').strip rescue nil
|
|
161
|
+
end_date = node.at('.dtend')['title'] rescue nil
|
|
204
162
|
company[:end_date] = parse_date(end_date) rescue nil
|
|
205
163
|
|
|
206
164
|
company_link = node.at('h4/strong/a')['href'] if node.at('h4/strong/a')
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: linkedin-scraper
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yatish Mehta
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2014-03-25 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: mechanize
|
|
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
93
93
|
version: '0'
|
|
94
94
|
requirements: []
|
|
95
95
|
rubyforge_project:
|
|
96
|
-
rubygems_version: 2.1.
|
|
96
|
+
rubygems_version: 2.1.11
|
|
97
97
|
signing_key:
|
|
98
98
|
specification_version: 4
|
|
99
99
|
summary: when a url of public linkedin profile page is given it scrapes the entire
|
|
@@ -102,3 +102,4 @@ test_files:
|
|
|
102
102
|
- spec/fixtures/jgrevich.html
|
|
103
103
|
- spec/linkedin-scraper/profile_spec.rb
|
|
104
104
|
- spec/spec_helper.rb
|
|
105
|
+
has_rdoc:
|