linkedin-scraper-v2 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ea72cf17a2f1766bdb4eb7b320d3251392d21824
4
+ data.tar.gz: ebccbf3dcd9f3511b452e103986916067a502e6d
5
+ SHA512:
6
+ metadata.gz: cd1a28b3081b4d1ab7f86423cc169af184822cdb0ae96b5077a59314b6012a8e3ed837cc03e032c0878df7bc32c672bcfb1c32e8f3cbf56ea989b075c5468d71
7
+ data.tar.gz: 7d5c7acbde250b8976ddb64d0b0b496131baf3784993e5f6df17756762a9cdd37461771fcc12146e6174b64444ec64c460c2a7e9dd78b0edcd58ddbcd9327d15
data/.gitignore ADDED
@@ -0,0 +1,23 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .rspec
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ .ruby-version
20
+ .ruby-gemset
21
+ .projectile
22
+ *.DS_Store
23
+ .idea/*
data/.rubocop.yml ADDED
@@ -0,0 +1,11 @@
1
+ Documentation:
2
+ Enabled: false
3
+
4
+ DotPosition:
5
+ Enabled: false
6
+
7
+ LineLength:
8
+ Enabled: false
9
+
10
+ MethodLength:
11
+ Enabled: false
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.0
4
+ - 2.0.0
5
+ - 1.9.3
6
+ - jruby-19mode
7
+ - 2.1.1
8
+ - 2.2.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in linkedin-scraper.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Yatish Mehta
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,272 @@
1
+ [![Build Status](https://secure.travis-ci.org/yatish27/linkedin-scraper.png)](http://travis-ci.org/yatish27/linkedin-scraper)
2
+ [![Gem Version](https://badge.fury.io/rb/linkedin-scraper.png)](http://badge.fury.io/rb/linkedin-scraper)
3
+
4
+ Linkedin Scraper
5
+ ================
6
+
7
+ Linkedin-scraper is a gem for scraping linkedin public profiles.
8
+ Given the URL of the profile, it gets the name, country, title, area, current companies, past companies,
9
+ organizations, skills, groups, etc
10
+
11
+
12
+ ## Installation
13
+
14
+ Install the gem from RubyGems:
15
+
16
+ gem install linkedin-scraper
17
+
18
+ This gem is tested on 1.9.2, 1.9.3, 2.0.0, JRuby1.9, rbx1.9,
19
+
20
+ ## Usage
21
+
22
+
23
+ Initialize a scraper instance
24
+
25
+ profile = Linkedin::Profile.get_profile("http://www.linkedin.com/in/jeffweiner08")
26
+
27
+ The returning object responds to the following methods
28
+
29
+
30
+ profile.first_name # The first name of the contact
31
+
32
+ profile.last_name # The last name of the contact
33
+
34
+ profile.name # The full name of the profile
35
+
36
+ profile.title # The job title
37
+
38
+ profile.summary # The summary of the profile
39
+
40
+ profile.location # The location of the contact
41
+
42
+ profile.country # The country of the contact
43
+
44
+ profile.industry # The domain for which the contact belongs
45
+
46
+ profile.picture # The profile picture link of profile
47
+
48
+ profile.skills # Array of skills of the profile
49
+
50
+ profile.organizations # Array organizations of the profile
51
+
52
+ profile.education # Array of hashes for education
53
+
54
+ profile.websites # Array of websites
55
+
56
+ profile.groups # Array of groups
57
+
58
+ profile.languages # Array of languages
59
+
60
+ profile.certifications # Array of certifications
61
+
62
+ profile.number_of_connections # The number of connections as a string
63
+
64
+
65
+ For current and past companies it also provides the details of the companies like company size, industry, address, etc
66
+
67
+ profile.current_companies
68
+
69
+ [
70
+ [0] {
71
+ :current_company => "LinkedIn",
72
+ :current_title => "CEO",
73
+ :current_company_url => "http://www.linkedin.com",
74
+ :description => nil,
75
+ :linkedin_company_url => "http://www.linkedin.com/company/linkedin?trk=ppro_cprof",
76
+ :url => "http://www.linkedin.com",
77
+ :type => "Public Company",
78
+ :company_size => "1001-5000 employees",
79
+ :website => "http://www.linkedin.com",
80
+ :industry => "Internet",
81
+ :founded => "2003",
82
+ :address => "2029 Stierlin Court Mountain View, CA 94043 United States"
83
+ },
84
+ [1] {
85
+ :current_company => "Intuit",
86
+ :current_title => "Member, Board of Directors",
87
+ :current_company_url => "http://network.intuit.com/",
88
+ :description => nil,
89
+ :linkedin_company_url => "http://www.linkedin.com/company/intuit?trk=ppro_cprof",
90
+ :url => "http://network.intuit.com/",
91
+ :type => "Public Company",
92
+ :company_size => "5001-10,000 employees",
93
+ :website => "http://network.intuit.com/",
94
+ :industry => "Computer Software",
95
+ :founded => "1983",
96
+ :address => "2632 Marine Way Mountain View, CA 94043 United States"
97
+ },
98
+ [2] {
99
+ :current_company => "DonorsChoose",
100
+ :current_title => "Member, Board of Directors",
101
+ :current_company_url => "http://www.donorschoose.org",
102
+ :description => nil,
103
+ :linkedin_company_url => "http://www.linkedin.com/company/donorschoose.org?trk=ppro_cprof",
104
+ :url => "http://www.donorschoose.org",
105
+ :type => "Nonprofit",
106
+ :company_size => "51-200 employees",
107
+ :website => "http://www.donorschoose.org",
108
+ :industry => "Nonprofit Organization Management",
109
+ :founded => "2000",
110
+ :address => "213 West 35th Street 2nd Floor East New York, NY 10001 United States"
111
+ },
112
+ [3] {
113
+ :current_company => "Malaria No More",
114
+ :current_title => "Member, Board of Directors",
115
+ :current_company_url => nil,
116
+ :description => nil
117
+ },
118
+ [4] {
119
+ :current_company => "Venture For America",
120
+ :current_title => "Member, Advisory Board",
121
+ :current_company_url => "http://ventureforamerica.org/",
122
+ :description => nil,
123
+ :linkedin_company_url => "http://www.linkedin.com/company/venture-for-america?trk=ppro_cprof",
124
+ :url => "http://ventureforamerica.org/",
125
+ :type => "Nonprofit",
126
+ :company_size => "1-10 employees",
127
+ :website => "http://ventureforamerica.org/",
128
+ :industry => "Nonprofit Organization Management",
129
+ :founded => "2011"
130
+ }
131
+ ]
132
+
133
+
134
+ profile.past_companies
135
+ [
136
+ [0] {
137
+ :past_company => "Accel Partners",
138
+ :past_title => "Executive in Residence",
139
+ :past_company_website => "http://www.facebook.com/accel",
140
+ :description => nil,
141
+ :linkedin_company_url => "http://www.linkedin.com/company/accel-partners?trk=ppro_cprof",
142
+ :url => "http://www.facebook.com/accel",
143
+ :type => "Partnership",
144
+ :company_size => "51-200 employees",
145
+ :website => "http://www.facebook.com/accel",
146
+ :industry => "Venture Capital & Private Equity",
147
+ :address => "428 University Palo Alto, CA 94301 United States"
148
+ },
149
+ [1] {
150
+ :past_company => "Greylock",
151
+ :past_title => "Executive in Residence",
152
+ :past_company_website => "http://www.greylock.com",
153
+ :description => nil,
154
+ :linkedin_company_url => "http://www.linkedin.com/company/greylock-partners?trk=ppro_cprof",
155
+ :url => "http://www.greylock.com",
156
+ :type => "Partnership",
157
+ :company_size => "51-200 employees",
158
+ :website => "http://www.greylock.com",
159
+ :industry => "Venture Capital & Private Equity",
160
+ :address => "2550 Sand Hill Road Menlo Park, CA 94025 United States"
161
+ },
162
+ [2] {
163
+ :past_company => "Yahoo!",
164
+ :past_title => "Executive Vice President Network Division",
165
+ :past_company_website => "http://www.yahoo.com",
166
+ :description => nil,
167
+ :linkedin_company_url => "http://www.linkedin.com/company/yahoo?trk=ppro_cprof",
168
+ :url => "http://www.yahoo.com",
169
+ :type => "Public Company",
170
+ :company_size => "10,001+ employees",
171
+ :website => "http://www.yahoo.com",
172
+ :industry => "Internet",
173
+ :founded => "1994",
174
+ :address => "701 First Avenue Sunnyvale, CA 94089 United States"
175
+ },
176
+ [3] {
177
+ :past_company => "Windsor Media",
178
+ :past_title => "Founding Partner",
179
+ :past_company_website => nil,
180
+ :description => nil
181
+ },
182
+ [4] {
183
+ :past_company => "Warner Bros.",
184
+ :past_title => "Vice President Online",
185
+ :past_company_website => "http://www.warnerbros.com/",
186
+ :description => nil,
187
+ :linkedin_company_url => "http://www.linkedin.com/company/warner-bros.-entertainment-group-of-companies?trk=ppro_cprof",
188
+ :url => "http://www.warnerbros.com/",
189
+ :type => "Public Company",
190
+ :company_size => "10,001+ employees",
191
+ :website => "http://www.warnerbros.com/",
192
+ :industry => "Entertainment",
193
+ :address => "4000 Warner Boulevard Burbank, CA 91522 United States"
194
+ }
195
+ ]
196
+
197
+
198
+ profile.recommended_visitors
199
+ #It is the list of visitors "Viewers of this profile also viewed..."
200
+ [
201
+ [0] {
202
+ :link => "http://www.linkedin.com/in/barackobama?trk=pub-pbmap",
203
+ :name => "Barack Obama",
204
+ :title => "President of the United States of ",
205
+ :company => nil
206
+ },
207
+ [1] {
208
+ :link => "http://www.linkedin.com/in/marissamayer?trk=pub-pbmap",
209
+ :name => "Marissa Mayer",
210
+ :title => "Yahoo!, President & CEO",
211
+ :company => nil
212
+ },
213
+ [2] {
214
+ :link => "http://www.linkedin.com/pub/sean-parker/0/1/826?trk=pub-pbmap",
215
+ :name => "Sean Parker",
216
+ :title => nil,
217
+ :company => nil
218
+ },
219
+ [3] {
220
+ :link => "http://www.linkedin.com/pub/eduardo-saverin/0/70a/31b?trk=pub-pbmap",
221
+ :name => "Eduardo Saverin",
222
+ :title => nil,
223
+ :company => nil
224
+ },
225
+ [4] {
226
+ :link => "http://www.linkedin.com/in/rbranson?trk=pub-pbmap",
227
+ :name => "Richard Branson",
228
+ :title => "Founder",
229
+ :company => "Virgin Group"
230
+ },
231
+ [5] {
232
+ :link => "http://www.linkedin.com/in/reidhoffman?trk=pub-pbmap",
233
+ :name => "Reid Hoffman",
234
+ :title => "Entrepreneur. Product Strategist. ",
235
+ :company => nil
236
+ },
237
+ [6] {
238
+ :link => "http://www.linkedin.com/in/mdell?trk=pub-pbmap",
239
+ :name => "Michael Dell",
240
+ :title => "Chairman and CEO",
241
+ :company => "Dell"
242
+ },
243
+ [7] {
244
+ :link => "http://www.linkedin.com/in/mittromney?trk=pub-pbmap",
245
+ :name => "Mitt Romney",
246
+ :title => "Believe in America",
247
+ :company => nil
248
+ },
249
+ [8] {
250
+ :link => "http://www.linkedin.com/pub/sheryl-sandberg/2/665/512?trk=pub-pbmap",
251
+ :name => "Sheryl Sandberg",
252
+ :title => nil,
253
+ :company => nil
254
+ }
255
+ ]
256
+
257
+
258
+ The gem also comes with a binary and can be used from the command line to get a json response of the scraped data.
259
+ It takes the url as the first argument.
260
+
261
+ linkedin-scraper http://www.linkedin.com/in/jeffweiner08
262
+
263
+ ## Contributing
264
+
265
+ Bug reports and pull requests are welcome on GitHub at https://github.com/yatish27/linkedin-scraper.
266
+ This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the
267
+ [Contributor Covenant](contributor-covenant.org) code of conduct.
268
+
269
+
270
+ ## License
271
+
272
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require 'rspec/core/rake_task'
2
+ task :default => :spec
3
+ RSpec::Core::RakeTask.new
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/linkedin_scraper'
4
+ profile = Linkedin::Profile.new(ARGV[0])
5
+ puts JSON.pretty_generate JSON.parse(profile.to_json)
@@ -0,0 +1,5 @@
1
+ require "rubygems"
2
+ require "mechanize"
3
+ require "cgi"
4
+ require "net/http"
5
+ Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin_scraper/*.rb"].each { |file| require file }
@@ -0,0 +1,252 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Linkedin
3
+ class Profile
4
+
5
+ USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
6
+ ATTRIBUTES = %w(
7
+ name
8
+ first_name
9
+ last_name
10
+ title
11
+ location
12
+ number_of_connections
13
+ country
14
+ industry
15
+ summary
16
+ picture
17
+ projects
18
+ linkedin_url
19
+ education
20
+ groups
21
+ websites
22
+ languages
23
+ skills
24
+ certifications
25
+ organizations
26
+ past_companies
27
+ current_companies
28
+ recommended_visitors)
29
+
30
+ attr_reader :page, :linkedin_url
31
+
32
+ def self.get_profile(url, options = {})
33
+ Linkedin::Profile.new(url, options)
34
+ rescue => e
35
+ puts e
36
+ end
37
+
38
+ def initialize(url, options = {})
39
+ @linkedin_url = url
40
+ @options = options
41
+ @page = http_client.get(url)
42
+ end
43
+
44
+ def name
45
+ "#{first_name} #{last_name}"
46
+ end
47
+
48
+ def first_name
49
+ @first_name ||= (@page.at(".full-name").text.split(" ", 2)[0].strip if @page.at(".full-name"))
50
+ end
51
+
52
+ def last_name
53
+ @last_name ||= (@page.at(".full-name").text.split(" ", 2)[1].strip if @page.at(".full-name"))
54
+ end
55
+
56
+ def title
57
+ @title ||= (@page.at(".title").text.gsub(/\s+/, " ").strip if @page.at(".title"))
58
+ end
59
+
60
+ def location
61
+ @location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality"))
62
+ end
63
+
64
+ def number_of_connections
65
+ @connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0])
66
+ end
67
+
68
+ def country
69
+ @country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality"))
70
+ end
71
+
72
+ def industry
73
+ @industry ||= (@page.at(".industry").text.gsub(/\s+/, " ").strip if @page.at(".industry"))
74
+ end
75
+
76
+ def summary
77
+ @summary ||= (@page.at(".summary .description").text.gsub(/\s+/, " ").strip if @page.at(".summary .description"))
78
+ end
79
+
80
+ def picture
81
+ @picture ||= (@page.at(".profile-picture img").attributes["src"].value.strip if @page.at(".profile-picture img"))
82
+ end
83
+
84
+ def skills
85
+ @skills ||= (@page.search(".skill-pill .endorse-item-name-text").map { |skill| skill.text.strip if skill.text } rescue nil)
86
+ end
87
+
88
+ def past_companies
89
+ @past_companies ||= get_companies("past")
90
+ end
91
+
92
+ def current_companies
93
+ @current_companies ||= get_companies("current")
94
+ end
95
+
96
+ def education
97
+ @education ||= @page.search(".background-education .education").map do |item|
98
+ name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
99
+ desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last
100
+ degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree")
101
+ major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip if item.search("h5").last.at(".major")
102
+ period = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip if item.at(".education-date")
103
+ start_date, end_date = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
104
+ {:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date }
105
+ end
106
+ end
107
+
108
+ def websites
109
+ @websites ||= @page.search("#overview-summary-websites").flat_map do |site|
110
+ url = "http://www.linkedin.com#{site.at("a")["href"]}"
111
+ CGI.parse(URI.parse(url).query)["url"]
112
+ end
113
+ end
114
+
115
+ def groups
116
+ @groups ||= @page.search(".groups-name").map do |item|
117
+ name = item.text.gsub(/\s+|\n/, " ").strip
118
+ link = "http://www.linkedin.com#{item.at("a")["href"]}"
119
+ { :name => name, :link => link }
120
+ end
121
+ end
122
+
123
+ def organizations
124
+ @organizations ||= @page.search("#background-organizations .section-item").map do |item|
125
+ name = item.at(".summary").text.gsub(/\s+|\n/, " ").strip rescue nil
126
+ start_date, end_date = item.at(".organizations-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
127
+ start_date = Date.parse(start_date) rescue nil
128
+ end_date = Date.parse(end_date) rescue nil
129
+ { :name => name, :start_date => start_date, :end_date => end_date }
130
+ end
131
+ end
132
+
133
+ def languages
134
+ @languages ||= @page.search(".background-languages #languages ol li").map do |item|
135
+ language = item.at("h4").text rescue nil
136
+ proficiency = item.at("div.languages-proficiency").text.gsub(/\s+|\n/, " ").strip rescue nil
137
+ { :language => language, :proficiency => proficiency }
138
+ end
139
+ end
140
+
141
+ def certifications
142
+ @certifications ||= @page.search("background-certifications").map do |item|
143
+ name = item.at("h4").text.gsub(/\s+|\n/, " ").strip rescue nil
144
+ authority = item.at("h5").text.gsub(/\s+|\n/, " ").strip rescue nil
145
+ license = item.at(".specifics/.licence-number").text.gsub(/\s+|\n/, " ").strip rescue nil
146
+ start_date = item.at(".certification-date").text.gsub(/\s+|\n/, " ").strip rescue nil
147
+
148
+ { :name => name, :authority => authority, :license => license, :start_date => start_date }
149
+ end
150
+ end
151
+
152
+
153
+ def recommended_visitors
154
+ @recommended_visitors ||= @page.search(".insights-browse-map/ul/li").map do |visitor|
155
+ v = {}
156
+ v[:link] = visitor.at("a")["href"]
157
+ v[:name] = visitor.at("h4/a").text
158
+ v[:title] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ").first
159
+ v[:company] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ")[1]
160
+ v
161
+ end
162
+ end
163
+
164
+ def projects
165
+ @projects ||= @page.search(".background-projects/div").map do |project|
166
+ project = project.at("div")
167
+
168
+ p = {}
169
+ start_date, end_date = project.at(".projects-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
170
+
171
+ p[:title] = project.at("hgroup/h4 span:first-of-type").text rescue nil
172
+ p[:link] = project.at("hgroup/h4 a:first-of-type")['href'] rescue nil
173
+ p[:start_date] = parse_date(start_date) rescue nil
174
+ p[:end_date] = parse_date(end_date) rescue nil
175
+ p[:description] = project.at(".description").text rescue nil
176
+ p[:associates] = project.at(".associated-list ul").children.map{ |c| c.at("a").text } rescue nil
177
+ p
178
+ end
179
+ end
180
+
181
+ def to_json
182
+ require "json"
183
+ ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
184
+ end
185
+
186
+ private
187
+
188
+ def get_companies(type)
189
+ companies = []
190
+ if @page.search(".background-experience .#{type}-position").first
191
+ @page.search(".background-experience .#{type}-position").each do |node|
192
+
193
+ company = {}
194
+ company[:title] = node.at("h4").text.gsub(/\s+|\n/, " ").strip if node.at("h4")
195
+ company[:company] = node.at("h4").next.text.gsub(/\s+|\n/, " ").strip if node.at("h4").next
196
+ company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
197
+
198
+ start_date, end_date = node.at(".experience-date-locale").text.strip.split(" – ") rescue nil
199
+ company[:duration] = node.at(".experience-date-locale").text[/.*\((.*)\)/, 1]
200
+ company[:start_date] = parse_date(start_date) rescue nil
201
+ company[:end_date] = parse_date(end_date) rescue nil
202
+
203
+ company_link = node.at("h4").next.at("a")["href"] if node.at("h4").next.at("a")
204
+
205
+ result = get_company_details(company_link)
206
+ companies << company.merge!(result)
207
+ end
208
+ end
209
+ companies
210
+ end
211
+
212
+ def parse_date(date)
213
+ date = "#{date}-01-01" if date =~ /^(19|20)\d{2}$/
214
+ Date.parse(date)
215
+ end
216
+
217
+ def get_company_details(link)
218
+ result = { :linkedin_company_url => get_linkedin_company_url(link) }
219
+ page = http_client.get(result[:linkedin_company_url])
220
+
221
+ result[:url] = page.at(".basic-info-about/ul/li/p/a").text if page.at(".basic-info-about/ul/li/p/a")
222
+ node_2 = page.at(".basic-info-about/ul")
223
+ if node_2
224
+ node_2.search("p").zip(node_2.search("h4")).each do |value, title|
225
+ result[title.text.gsub(" ", "_").downcase.to_sym] = value.text.strip
226
+ end
227
+ end
228
+ result[:address] = page.at(".vcard.hq").at(".adr").text.gsub("\n", " ").strip if page.at(".vcard.hq")
229
+ result
230
+ end
231
+
232
+ def http_client
233
+ Mechanize.new do |agent|
234
+ agent.user_agent_alias = USER_AGENTS.sample
235
+ unless @options.empty?
236
+ agent.set_proxy(@options[:proxy_ip], @options[:proxy_port])
237
+ end
238
+ agent.max_history = 0
239
+ end
240
+ end
241
+
242
+ def get_linkedin_company_url(link)
243
+ http = %r{http://www.linkedin.com/}
244
+ https = %r{https://www.linkedin.com/}
245
+ if http.match(link) || https.match(link)
246
+ link
247
+ else
248
+ "http://www.linkedin.com/#{link}"
249
+ end
250
+ end
251
+ end
252
+ end