linkedin-scraper 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 23c5331d7544944c0d719cac70b4b281f53c5de3
4
- data.tar.gz: 84d78b9812b6f5f0b572481bdad5b550e6d39b16
3
+ metadata.gz: d72a6f4f56878e6396f87b2114ee75e73cb71fc4
4
+ data.tar.gz: 7ec26c6b44a2cec0551d544ecc72152683eb439a
5
5
  SHA512:
6
- metadata.gz: 763a28b6d6a482f8534c3339ddedefcc31784000dce77eb129282b0cff4ee402a2c962e778d75b9925113830fa466058f6eee1dcad4378555d2a8db4bbc67e91
7
- data.tar.gz: 0d7f262e9aa12896e33fd9d9abe347ceb95854fbb9b9eef600b0e04d5cd9a3c6de3ccd81c2fdf20d0968dfb64af94be9e3c3c4457f39f3b420d700192d2c224a
6
+ metadata.gz: f69f100f22938a93703177a2495ef439cdb7120c89d2d23ba231849a37a65a9b20173456d68421280faf572798b88e5271f042cb6a8fccba742bd5c2e48299c1
7
+ data.tar.gz: e2509eed12a03b23be0860b057c7031c9e467c26e8d2fe2048d7aefa381228ae3ef515fbc0708945a54c1f7e3912b24c0ebfeb3540cd6837350ad378971ea139
data/.gitignore CHANGED
@@ -18,3 +18,4 @@ test/version_tmp
18
18
  tmp
19
19
  .ruby-version
20
20
  .ruby-gemset
21
+ .projectile
data/.travis.yml CHANGED
@@ -4,4 +4,3 @@ rvm:
4
4
  - 1.9.3
5
5
  - 1.9.2
6
6
  - jruby-19mode
7
- - rbx-19mode
data/bin/linkedin-scraper CHANGED
@@ -2,4 +2,4 @@
2
2
 
3
3
  require_relative '../lib/linkedin-scraper'
4
4
  profile = Linkedin::Profile.new(ARGV[0])
5
- puts profile.to_json
5
+ puts JSON.pretty_generate JSON.parse(profile.to_json)
@@ -70,85 +70,51 @@ module Linkedin
70
70
  end
71
71
 
72
72
  def education
73
- unless @education
74
- @education = []
75
- if @page.search('.position.education.vevent.vcard').first
76
- @education = @page.search('.position.education.vevent.vcard').map do |item|
77
- name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip if item.at('h3')
78
- desc = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
79
- period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
80
-
81
- {:name => name, :description => desc, :period => period}
82
- end
83
- end
73
+ @education ||= @page.search('.position.education.vevent.vcard').map do |item|
74
+ name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip if item.at('h3')
75
+ desc = item.at('h4').text.gsub(/\s+|\n/, ' ').strip if item.at('h4')
76
+ period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
77
+
78
+ {:name => name, :description => desc, :period => period}
84
79
  end
85
- @education
86
80
  end
87
81
 
88
82
  def websites
89
- unless @websites
90
- @websites = []
91
- if @page.search('.website').first
92
- @websites = @page.search('.website').map do |site|
93
- url = site.at('a')['href']
94
- url = "http://www.linkedin.com#{url}"
95
- CGI.parse(URI.parse(url).query)['url']
96
- end.flatten!
97
- end
83
+ @websites ||= @page.search('.website').flat_map do |site|
84
+ url = "http://www.linkedin.com#{site.at('a')['href']}"
85
+ CGI.parse(URI.parse(url).query)['url']
98
86
  end
99
- @websites
87
+
100
88
  end
101
89
 
102
90
  def groups
103
- unless @groups
104
- @groups = []
105
- if page.search('.group-data').first
106
- @groups = page.search('.group-data').map do |item|
107
- name = item.text.gsub(/\s+|\n/, ' ').strip
108
- link = "http://www.linkedin.com#{item.at('a')['href']}"
109
- {:name => name, :link => link}
110
- end
111
- end
91
+ @groups ||= @page.search('.group-data').map do |item|
92
+ name = item.text.gsub(/\s+|\n/, ' ').strip
93
+ link = "http://www.linkedin.com#{item.at('a')['href']}"
94
+ {:name => name, :link => link}
112
95
  end
113
- @groups
114
96
  end
115
97
 
116
98
  def organizations
117
- unless @organizations
118
- @organizations = []
119
- if @page.search('ul.organizations/li.organization').first
120
- @organizations = @page.search('ul.organizations/li.organization').map do |item|
121
-
122
- name = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
123
- start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
124
- start_date = Date.parse(start_date) rescue nil
125
- end_date = Date.parse(end_date) rescue nil
126
- {:name => name, :start_date => start_date, :end_date => end_date}
127
- end
128
- end
99
+ @organizations ||= @page.search('ul.organizations/li.organization').map do |item|
100
+ name = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
101
+ start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
102
+ start_date = Date.parse(start_date) rescue nil
103
+ end_date = Date.parse(end_date) rescue nil
104
+ {:name => name, :start_date => start_date, :end_date => end_date}
129
105
  end
130
- @organizations
131
106
  end
132
107
 
133
108
  def languages
134
- unless @languages
135
- @languages = []
136
- if @page.at('ul.languages/li.language')
137
- @languages = @page.search('ul.languages/li.language').map do |item|
138
- language = item.at('h3').text rescue nil
139
- proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
140
- {:language=> language, :proficiency => proficiency }
141
- end
142
- end
109
+ @languages ||= @page.search('ul.languages/li.language').map do |item|
110
+ language = item.at('h3').text rescue nil
111
+ proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
112
+ {:language=> language, :proficiency => proficiency }
143
113
  end
144
- @languages
145
114
  end
146
115
 
147
116
  def certifications
148
- unless @certtifications
149
- @certifications = []
150
- if @page.at('ul.certifications/li.certification')
151
- @certifications = @page.search('ul.certifications/li.certification').map do |item|
117
+ @certifications ||= @page.search('ul.certifications/li.certification').map do |item|
152
118
  name = item.at('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
153
119
  authority = item.at('.specifics/.org').text.gsub(/\s+|\n/, ' ').strip rescue nil
154
120
  license = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
@@ -156,27 +122,19 @@ module Linkedin
156
122
 
157
123
  {:name => name, :authority => authority, :license => license, :start_date => start_date}
158
124
  end
159
- end
160
- end
161
- @certifications
125
+
162
126
  end
163
127
 
164
128
 
165
129
  def recommended_visitors
166
- unless @recommended_visitors
167
- @recommended_visitors = []
168
- if @page.at('.browsemap/.content/ul/li')
169
- @recommended_visitors = @page.search('.browsemap/.content/ul/li').map do |visitor|
170
- v = {}
171
- v[:link] = visitor.at('a')['href']
172
- v[:name] = visitor.at('strong/a').text
173
- v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
174
- v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
175
- v
176
- end
177
- end
130
+ @recommended_visitors ||= @page.search('.browsemap/.content/ul/li').map do |visitor|
131
+ v = {}
132
+ v[:link] = visitor.at('a')['href']
133
+ v[:name] = visitor.at('strong/a').text
134
+ v[:title] = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
135
+ v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
136
+ v
178
137
  end
179
- @recommended_visitors
180
138
  end
181
139
 
182
140
  def to_json
@@ -196,11 +154,11 @@ module Linkedin
196
154
  company[:title] = node.at('h3').text.gsub(/\s+|\n/, ' ').strip if node.at('h3')
197
155
  company[:company] = node.at('h4').text.gsub(/\s+|\n/, ' ').strip if node.at('h4')
198
156
  company[:description] = node.at(".description.#{type}-position").text.gsub(/\s+|\n/, ' ').strip if node.at(".description.#{type}-position")
199
- start_date = node.at('.dtstart').text.gsub(/\s+|\n/, ' ').strip rescue nil
157
+
158
+ start_date = node.at('.dtstart')['title'] rescue nil
200
159
  company[:start_date] = parse_date(start_date) rescue nil
201
160
 
202
- end_date = node.at('.dtend').text.gsub(/\s+|\n/, ' ').strip rescue nil
203
- end_date ||= node.at('.dtstamp').text.gsub(/\s+|\n/, ' ').strip rescue nil
161
+ end_date = node.at('.dtend')['title'] rescue nil
204
162
  company[:end_date] = parse_date(end_date) rescue nil
205
163
 
206
164
  company_link = node.at('h4/strong/a')['href'] if node.at('h4/strong/a')
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = '0.1.0'
3
+ VERSION = '0.1.1'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yatish Mehta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-13 00:00:00.000000000 Z
11
+ date: 2014-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
93
  version: '0'
94
94
  requirements: []
95
95
  rubyforge_project:
96
- rubygems_version: 2.1.2
96
+ rubygems_version: 2.1.11
97
97
  signing_key:
98
98
  specification_version: 4
99
99
  summary: when a url of public linkedin profile page is given it scrapes the entire
@@ -102,3 +102,4 @@ test_files:
102
102
  - spec/fixtures/jgrevich.html
103
103
  - spec/linkedin-scraper/profile_spec.rb
104
104
  - spec/spec_helper.rb
105
+ has_rdoc: