linkedin-scraper 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 23c5331d7544944c0d719cac70b4b281f53c5de3
4
- data.tar.gz: 84d78b9812b6f5f0b572481bdad5b550e6d39b16
3
+ metadata.gz: d72a6f4f56878e6396f87b2114ee75e73cb71fc4
4
+ data.tar.gz: 7ec26c6b44a2cec0551d544ecc72152683eb439a
5
5
  SHA512:
6
- metadata.gz: 763a28b6d6a482f8534c3339ddedefcc31784000dce77eb129282b0cff4ee402a2c962e778d75b9925113830fa466058f6eee1dcad4378555d2a8db4bbc67e91
7
- data.tar.gz: 0d7f262e9aa12896e33fd9d9abe347ceb95854fbb9b9eef600b0e04d5cd9a3c6de3ccd81c2fdf20d0968dfb64af94be9e3c3c4457f39f3b420d700192d2c224a
6
+ metadata.gz: f69f100f22938a93703177a2495ef439cdb7120c89d2d23ba231849a37a65a9b20173456d68421280faf572798b88e5271f042cb6a8fccba742bd5c2e48299c1
7
+ data.tar.gz: e2509eed12a03b23be0860b057c7031c9e467c26e8d2fe2048d7aefa381228ae3ef515fbc0708945a54c1f7e3912b24c0ebfeb3540cd6837350ad378971ea139
data/.gitignore CHANGED
@@ -18,3 +18,4 @@ test/version_tmp
18
18
  tmp
19
19
  .ruby-version
20
20
  .ruby-gemset
21
+ .projectile
data/.travis.yml CHANGED
@@ -4,4 +4,3 @@ rvm:
4
4
  - 1.9.3
5
5
  - 1.9.2
6
6
  - jruby-19mode
7
- - rbx-19mode
data/bin/linkedin-scraper CHANGED
@@ -2,4 +2,4 @@
2
2
 
3
3
  require_relative '../lib/linkedin-scraper'
4
4
  profile = Linkedin::Profile.new(ARGV[0])
5
- puts profile.to_json
5
+ puts JSON.pretty_generate JSON.parse(profile.to_json)
@@ -70,85 +70,51 @@ module Linkedin
70
70
  end
71
71
 
72
72
  def education
73
- unless @education
74
- @education = []
75
- if @page.search('.position.education.vevent.vcard').first
76
- @education = @page.search('.position.education.vevent.vcard').map do |item|
77
- name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip if item.at('h3')
78
- desc = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
79
- period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
80
-
81
- {:name => name, :description => desc, :period => period}
82
- end
83
- end
73
+ @education ||= @page.search('.position.education.vevent.vcard').map do |item|
74
+ name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip if item.at('h3')
75
+ desc = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
76
+ period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
77
+
78
+ {:name => name, :description => desc, :period => period}
84
79
  end
85
- @education
86
80
  end
87
81
 
88
82
  def websites
89
- unless @websites
90
- @websites = []
91
- if @page.search('.website').first
92
- @websites = @page.search('.website').map do |site|
93
- url = site.at('a')['href']
94
- url = "http://www.linkedin.com#{url}"
95
- CGI.parse(URI.parse(url).query)['url']
96
- end.flatten!
97
- end
83
+ @websites ||= @page.search('.website').flat_map do |site|
84
+ url = "http://www.linkedin.com#{site.at('a')['href']}"
85
+ CGI.parse(URI.parse(url).query)['url']
98
86
  end
99
- @websites
87
+
100
88
  end
101
89
 
102
90
  def groups
103
- unless @groups
104
- @groups = []
105
- if page.search('.group-data').first
106
- @groups = page.search('.group-data').map do |item|
107
- name = item.text.gsub(/\s+|\n/, ' ').strip
108
- link = "http://www.linkedin.com#{item.at('a')['href']}"
109
- {:name => name, :link => link}
110
- end
111
- end
91
+ @groups ||= @page.search('.group-data').map do |item|
92
+ name = item.text.gsub(/\s+|\n/, ' ').strip
93
+ link = "http://www.linkedin.com#{item.at('a')['href']}"
94
+ {:name => name, :link => link}
112
95
  end
113
- @groups
114
96
  end
115
97
 
116
98
  def organizations
117
- unless @organizations
118
- @organizations = []
119
- if @page.search('ul.organizations/li.organization').first
120
- @organizations = @page.search('ul.organizations/li.organization').map do |item|
121
-
122
- name = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
123
- start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
124
- start_date = Date.parse(start_date) rescue nil
125
- end_date = Date.parse(end_date) rescue nil
126
- {:name => name, :start_date => start_date, :end_date => end_date}
127
- end
128
- end
99
+ @organizations ||= @page.search('ul.organizations/li.organization').map do |item|
100
+ name = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
101
+ start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
102
+ start_date = Date.parse(start_date) rescue nil
103
+ end_date = Date.parse(end_date) rescue nil
104
+ {:name => name, :start_date => start_date, :end_date => end_date}
129
105
  end
130
- @organizations
131
106
  end
132
107
 
133
108
  def languages
134
- unless @languages
135
- @languages = []
136
- if @page.at('ul.languages/li.language')
137
- @languages = @page.search('ul.languages/li.language').map do |item|
138
- language = item.at('h3').text rescue nil
139
- proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
140
- {:language=> language, :proficiency => proficiency }
141
- end
142
- end
109
+ @languages ||= @page.search('ul.languages/li.language').map do |item|
110
+ language = item.at('h3').text rescue nil
111
+ proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
112
+ {:language=> language, :proficiency => proficiency }
143
113
  end
144
- @languages
145
114
  end
146
115
 
147
116
  def certifications
148
- unless @certtifications
149
- @certifications = []
150
- if @page.at('ul.certifications/li.certification')
151
- @certifications = @page.search('ul.certifications/li.certification').map do |item|
117
+ @certifications ||= @page.search('ul.certifications/li.certification').map do |item|
152
118
  name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
153
119
  authority = item.at('.specifics/.org').text.gsub(/\s+|\n/, ' ').strip rescue nil
154
120
  license = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
@@ -156,27 +122,19 @@ module Linkedin
156
122
 
157
123
  {:name => name, :authority => authority, :license => license, :start_date => start_date}
158
124
  end
159
- end
160
- end
161
- @certifications
125
+
162
126
  end
163
127
 
164
128
 
165
129
  def recommended_visitors
166
- unless @recommended_visitors
167
- @recommended_visitors = []
168
- if @page.at('.browsemap/.content/ul/li')
169
- @recommended_visitors = @page.search('.browsemap/.content/ul/li').map do |visitor|
170
- v = {}
171
- v[:link] = visitor.at('a')['href']
172
- v[:name] = visitor.at('strong/a').text
173
- v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
174
- v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
175
- v
176
- end
177
- end
130
+ @recommended_visitors ||= @page.search('.browsemap/.content/ul/li').map do |visitor|
131
+ v = {}
132
+ v[:link] = visitor.at('a')['href']
133
+ v[:name] = visitor.at('strong/a').text
134
+ v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
135
+ v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
136
+ v
178
137
  end
179
- @recommended_visitors
180
138
  end
181
139
 
182
140
  def to_json
@@ -196,11 +154,11 @@ module Linkedin
196
154
  company[:title] = node.at('h3').text.gsub(/\s+|\n/, ' ').strip if node.at('h3')
197
155
  company[:company] = node.at('h4').text.gsub(/\s+|\n/, ' ').strip if node.at('h4')
198
156
  company[:description] = node.at(".description.#{type}-position").text.gsub(/\s+|\n/, ' ').strip if node.at(".description.#{type}-position")
199
- start_date = node.at('.dtstart').text.gsub(/\s+|\n/, ' ').strip rescue nil
157
+
158
+ start_date = node.at('.dtstart')['title'] rescue nil
200
159
  company[:start_date] = parse_date(start_date) rescue nil
201
160
 
202
- end_date = node.at('.dtend').text.gsub(/\s+|\n/, ' ').strip rescue nil
203
- end_date ||= node.at('.dtstamp').text.gsub(/\s+|\n/, ' ').strip rescue nil
161
+ end_date = node.at('.dtend')['title'] rescue nil
204
162
  company[:end_date] = parse_date(end_date) rescue nil
205
163
 
206
164
  company_link = node.at('h4/strong/a')['href'] if node.at('h4/strong/a')
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = '0.1.0'
3
+ VERSION = '0.1.1'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yatish Mehta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-13 00:00:00.000000000 Z
11
+ date: 2014-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
93
  version: '0'
94
94
  requirements: []
95
95
  rubyforge_project:
96
- rubygems_version: 2.1.2
96
+ rubygems_version: 2.1.11
97
97
  signing_key:
98
98
  specification_version: 4
99
99
  summary: when a url of public linkedin profile page is given it scrapes the entire
@@ -102,3 +102,4 @@ test_files:
102
102
  - spec/fixtures/jgrevich.html
103
103
  - spec/linkedin-scraper/profile_spec.rb
104
104
  - spec/spec_helper.rb
105
+ has_rdoc: