linkedin-scraper-v2 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ea72cf17a2f1766bdb4eb7b320d3251392d21824
4
+ data.tar.gz: ebccbf3dcd9f3511b452e103986916067a502e6d
5
+ SHA512:
6
+ metadata.gz: cd1a28b3081b4d1ab7f86423cc169af184822cdb0ae96b5077a59314b6012a8e3ed837cc03e032c0878df7bc32c672bcfb1c32e8f3cbf56ea989b075c5468d71
7
+ data.tar.gz: 7d5c7acbde250b8976ddb64d0b0b496131baf3784993e5f6df17756762a9cdd37461771fcc12146e6174b64444ec64c460c2a7e9dd78b0edcd58ddbcd9327d15
data/.gitignore ADDED
@@ -0,0 +1,23 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .rspec
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ .ruby-version
20
+ .ruby-gemset
21
+ .projectile
22
+ *.DS_Store
23
+ .idea/*
data/.rubocop.yml ADDED
@@ -0,0 +1,11 @@
1
+ Documentation:
2
+ Enabled: false
3
+
4
+ DotPosition:
5
+ Enabled: false
6
+
7
+ LineLength:
8
+ Enabled: false
9
+
10
+ MethodLength:
11
+ Enabled: false
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.0
4
+ - 2.0.0
5
+ - 1.9.3
6
+ - jruby-19mode
7
+ - 2.1.1
8
+ - 2.2.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in linkedin-scraper.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Yatish Mehta
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,272 @@
1
+ [![Build Status](https://secure.travis-ci.org/yatish27/linkedin-scraper.png)](http://travis-ci.org/yatish27/linkedin-scraper)
2
+ [![Gem Version](https://badge.fury.io/rb/linkedin-scraper.png)](http://badge.fury.io/rb/linkedin-scraper)
3
+
4
+ Linkedin Scraper
5
+ ================
6
+
7
+ Linkedin-scraper is a gem for scraping linkedin public profiles.
8
+ Given the URL of the profile, it gets the name, country, title, area, current companies, past companies,
9
+ organizations, skills, groups, etc
10
+
11
+
12
+ ## Installation
13
+
14
+ Install the gem from RubyGems:
15
+
16
+ gem install linkedin-scraper
17
+
18
+ This gem is tested on 1.9.2, 1.9.3, 2.0.0, JRuby1.9, rbx1.9,
19
+
20
+ ## Usage
21
+
22
+
23
+ Initialize a scraper instance
24
+
25
+ profile = Linkedin::Profile.get_profile("http://www.linkedin.com/in/jeffweiner08")
26
+
27
+ The returning object responds to the following methods
28
+
29
+
30
+ profile.first_name # The first name of the contact
31
+
32
+ profile.last_name # The last name of the contact
33
+
34
+ profile.name # The full name of the profile
35
+
36
+ profile.title # The job title
37
+
38
+ profile.summary # The summary of the profile
39
+
40
+ profile.location # The location of the contact
41
+
42
+ profile.country # The country of the contact
43
+
44
+ profile.industry # The domain for which the contact belongs
45
+
46
+ profile.picture # The profile picture link of profile
47
+
48
+ profile.skills # Array of skills of the profile
49
+
50
+ profile.organizations # Array organizations of the profile
51
+
52
+ profile.education # Array of hashes for education
53
+
54
+ profile.websites # Array of websites
55
+
56
+ profile.groups # Array of groups
57
+
58
+ profile.languages # Array of languages
59
+
60
+ profile.certifications # Array of certifications
61
+
62
+ profile.number_of_connections # The number of connections as a string
63
+
64
+
65
+ For current and past companies it also provides the details of the companies like company size, industry, address, etc
66
+
67
+ profile.current_companies
68
+
69
+ [
70
+ [0] {
71
+ :current_company => "LinkedIn",
72
+ :current_title => "CEO",
73
+ :current_company_url => "http://www.linkedin.com",
74
+ :description => nil,
75
+ :linkedin_company_url => "http://www.linkedin.com/company/linkedin?trk=ppro_cprof",
76
+ :url => "http://www.linkedin.com",
77
+ :type => "Public Company",
78
+ :company_size => "1001-5000 employees",
79
+ :website => "http://www.linkedin.com",
80
+ :industry => "Internet",
81
+ :founded => "2003",
82
+ :address => "2029 Stierlin Court Mountain View, CA 94043 United States"
83
+ },
84
+ [1] {
85
+ :current_company => "Intuit",
86
+ :current_title => "Member, Board of Directors",
87
+ :current_company_url => "http://network.intuit.com/",
88
+ :description => nil,
89
+ :linkedin_company_url => "http://www.linkedin.com/company/intuit?trk=ppro_cprof",
90
+ :url => "http://network.intuit.com/",
91
+ :type => "Public Company",
92
+ :company_size => "5001-10,000 employees",
93
+ :website => "http://network.intuit.com/",
94
+ :industry => "Computer Software",
95
+ :founded => "1983",
96
+ :address => "2632 Marine Way Mountain View, CA 94043 United States"
97
+ },
98
+ [2] {
99
+ :current_company => "DonorsChoose",
100
+ :current_title => "Member, Board of Directors",
101
+ :current_company_url => "http://www.donorschoose.org",
102
+ :description => nil,
103
+ :linkedin_company_url => "http://www.linkedin.com/company/donorschoose.org?trk=ppro_cprof",
104
+ :url => "http://www.donorschoose.org",
105
+ :type => "Nonprofit",
106
+ :company_size => "51-200 employees",
107
+ :website => "http://www.donorschoose.org",
108
+ :industry => "Nonprofit Organization Management",
109
+ :founded => "2000",
110
+ :address => "213 West 35th Street 2nd Floor East New York, NY 10001 United States"
111
+ },
112
+ [3] {
113
+ :current_company => "Malaria No More",
114
+ :current_title => "Member, Board of Directors",
115
+ :current_company_url => nil,
116
+ :description => nil
117
+ },
118
+ [4] {
119
+ :current_company => "Venture For America",
120
+ :current_title => "Member, Advisory Board",
121
+ :current_company_url => "http://ventureforamerica.org/",
122
+ :description => nil,
123
+ :linkedin_company_url => "http://www.linkedin.com/company/venture-for-america?trk=ppro_cprof",
124
+ :url => "http://ventureforamerica.org/",
125
+ :type => "Nonprofit",
126
+ :company_size => "1-10 employees",
127
+ :website => "http://ventureforamerica.org/",
128
+ :industry => "Nonprofit Organization Management",
129
+ :founded => "2011"
130
+ }
131
+ ]
132
+
133
+
134
+ profile.past_companies
135
+ [
136
+ [0] {
137
+ :past_company => "Accel Partners",
138
+ :past_title => "Executive in Residence",
139
+ :past_company_website => "http://www.facebook.com/accel",
140
+ :description => nil,
141
+ :linkedin_company_url => "http://www.linkedin.com/company/accel-partners?trk=ppro_cprof",
142
+ :url => "http://www.facebook.com/accel",
143
+ :type => "Partnership",
144
+ :company_size => "51-200 employees",
145
+ :website => "http://www.facebook.com/accel",
146
+ :industry => "Venture Capital & Private Equity",
147
+ :address => "428 University Palo Alto, CA 94301 United States"
148
+ },
149
+ [1] {
150
+ :past_company => "Greylock",
151
+ :past_title => "Executive in Residence",
152
+ :past_company_website => "http://www.greylock.com",
153
+ :description => nil,
154
+ :linkedin_company_url => "http://www.linkedin.com/company/greylock-partners?trk=ppro_cprof",
155
+ :url => "http://www.greylock.com",
156
+ :type => "Partnership",
157
+ :company_size => "51-200 employees",
158
+ :website => "http://www.greylock.com",
159
+ :industry => "Venture Capital & Private Equity",
160
+ :address => "2550 Sand Hill Road Menlo Park, CA 94025 United States"
161
+ },
162
+ [2] {
163
+ :past_company => "Yahoo!",
164
+ :past_title => "Executive Vice President Network Division",
165
+ :past_company_website => "http://www.yahoo.com",
166
+ :description => nil,
167
+ :linkedin_company_url => "http://www.linkedin.com/company/yahoo?trk=ppro_cprof",
168
+ :url => "http://www.yahoo.com",
169
+ :type => "Public Company",
170
+ :company_size => "10,001+ employees",
171
+ :website => "http://www.yahoo.com",
172
+ :industry => "Internet",
173
+ :founded => "1994",
174
+ :address => "701 First Avenue Sunnyvale, CA 94089 United States"
175
+ },
176
+ [3] {
177
+ :past_company => "Windsor Media",
178
+ :past_title => "Founding Partner",
179
+ :past_company_website => nil,
180
+ :description => nil
181
+ },
182
+ [4] {
183
+ :past_company => "Warner Bros.",
184
+ :past_title => "Vice President Online",
185
+ :past_company_website => "http://www.warnerbros.com/",
186
+ :description => nil,
187
+ :linkedin_company_url => "http://www.linkedin.com/company/warner-bros.-entertainment-group-of-companies?trk=ppro_cprof",
188
+ :url => "http://www.warnerbros.com/",
189
+ :type => "Public Company",
190
+ :company_size => "10,001+ employees",
191
+ :website => "http://www.warnerbros.com/",
192
+ :industry => "Entertainment",
193
+ :address => "4000 Warner Boulevard Burbank, CA 91522 United States"
194
+ }
195
+ ]
196
+
197
+
198
+ profile.recommended_visitors
199
+ #It is the list of visitors "Viewers of this profile also viewed..."
200
+ [
201
+ [0] {
202
+ :link => "http://www.linkedin.com/in/barackobama?trk=pub-pbmap",
203
+ :name => "Barack Obama",
204
+ :title => "President of the United States of ",
205
+ :company => nil
206
+ },
207
+ [1] {
208
+ :link => "http://www.linkedin.com/in/marissamayer?trk=pub-pbmap",
209
+ :name => "Marissa Mayer",
210
+ :title => "Yahoo!, President & CEO",
211
+ :company => nil
212
+ },
213
+ [2] {
214
+ :link => "http://www.linkedin.com/pub/sean-parker/0/1/826?trk=pub-pbmap",
215
+ :name => "Sean Parker",
216
+ :title => nil,
217
+ :company => nil
218
+ },
219
+ [3] {
220
+ :link => "http://www.linkedin.com/pub/eduardo-saverin/0/70a/31b?trk=pub-pbmap",
221
+ :name => "Eduardo Saverin",
222
+ :title => nil,
223
+ :company => nil
224
+ },
225
+ [4] {
226
+ :link => "http://www.linkedin.com/in/rbranson?trk=pub-pbmap",
227
+ :name => "Richard Branson",
228
+ :title => "Founder",
229
+ :company => "Virgin Group"
230
+ },
231
+ [5] {
232
+ :link => "http://www.linkedin.com/in/reidhoffman?trk=pub-pbmap",
233
+ :name => "Reid Hoffman",
234
+ :title => "Entrepreneur. Product Strategist. ",
235
+ :company => nil
236
+ },
237
+ [6] {
238
+ :link => "http://www.linkedin.com/in/mdell?trk=pub-pbmap",
239
+ :name => "Michael Dell",
240
+ :title => "Chairman and CEO",
241
+ :company => "Dell"
242
+ },
243
+ [7] {
244
+ :link => "http://www.linkedin.com/in/mittromney?trk=pub-pbmap",
245
+ :name => "Mitt Romney",
246
+ :title => "Believe in America",
247
+ :company => nil
248
+ },
249
+ [8] {
250
+ :link => "http://www.linkedin.com/pub/sheryl-sandberg/2/665/512?trk=pub-pbmap",
251
+ :name => "Sheryl Sandberg",
252
+ :title => nil,
253
+ :company => nil
254
+ }
255
+ ]
256
+
257
+
258
+ The gem also comes with a binary and can be used from the command line to get a json response of the scraped data.
259
+ It takes the url as the first argument.
260
+
261
+ linkedin-scraper http://www.linkedin.com/in/jeffweiner08
262
+
263
+ ## Contributing
264
+
265
+ Bug reports and pull requests are welcome on GitHub at https://github.com/yatish27/linkedin-scraper.
266
+ This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the
267
+ [Contributor Covenant](contributor-covenant.org) code of conduct.
268
+
269
+
270
+ ## License
271
+
272
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require 'rspec/core/rake_task'
2
+ task :default => :spec
3
+ RSpec::Core::RakeTask.new
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/linkedin_scraper'
4
+ profile = Linkedin::Profile.new(ARGV[0])
5
+ puts JSON.pretty_generate JSON.parse(profile.to_json)
@@ -0,0 +1,5 @@
1
+ require "rubygems"
2
+ require "mechanize"
3
+ require "cgi"
4
+ require "net/http"
5
+ Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin_scraper/*.rb"].each { |file| require file }
@@ -0,0 +1,252 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Linkedin
3
+ class Profile
4
+
5
+ USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
6
+ ATTRIBUTES = %w(
7
+ name
8
+ first_name
9
+ last_name
10
+ title
11
+ location
12
+ number_of_connections
13
+ country
14
+ industry
15
+ summary
16
+ picture
17
+ projects
18
+ linkedin_url
19
+ education
20
+ groups
21
+ websites
22
+ languages
23
+ skills
24
+ certifications
25
+ organizations
26
+ past_companies
27
+ current_companies
28
+ recommended_visitors)
29
+
30
+ attr_reader :page, :linkedin_url
31
+
32
+ def self.get_profile(url, options = {})
33
+ Linkedin::Profile.new(url, options)
34
+ rescue => e
35
+ puts e
36
+ end
37
+
38
+ def initialize(url, options = {})
39
+ @linkedin_url = url
40
+ @options = options
41
+ @page = http_client.get(url)
42
+ end
43
+
44
+ def name
45
+ "#{first_name} #{last_name}"
46
+ end
47
+
48
+ def first_name
49
+ @first_name ||= (@page.at(".full-name").text.split(" ", 2)[0].strip if @page.at(".full-name"))
50
+ end
51
+
52
+ def last_name
53
+ @last_name ||= (@page.at(".full-name").text.split(" ", 2)[1].strip if @page.at(".full-name"))
54
+ end
55
+
56
+ def title
57
+ @title ||= (@page.at(".title").text.gsub(/\s+/, " ").strip if @page.at(".title"))
58
+ end
59
+
60
+ def location
61
+ @location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality"))
62
+ end
63
+
64
+ def number_of_connections
65
+ @connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0])
66
+ end
67
+
68
+ def country
69
+ @country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality"))
70
+ end
71
+
72
+ def industry
73
+ @industry ||= (@page.at(".industry").text.gsub(/\s+/, " ").strip if @page.at(".industry"))
74
+ end
75
+
76
+ def summary
77
+ @summary ||= (@page.at(".summary .description").text.gsub(/\s+/, " ").strip if @page.at(".summary .description"))
78
+ end
79
+
80
+ def picture
81
+ @picture ||= (@page.at(".profile-picture img").attributes["src"].value.strip if @page.at(".profile-picture img"))
82
+ end
83
+
84
+ def skills
85
+ @skills ||= (@page.search(".skill-pill .endorse-item-name-text").map { |skill| skill.text.strip if skill.text } rescue nil)
86
+ end
87
+
88
+ def past_companies
89
+ @past_companies ||= get_companies("past")
90
+ end
91
+
92
+ def current_companies
93
+ @current_companies ||= get_companies("current")
94
+ end
95
+
96
+ def education
97
+ @education ||= @page.search(".background-education .education").map do |item|
98
+ name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
99
+ desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last
100
+ degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree")
101
+ major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip if item.search("h5").last.at(".major")
102
+ period = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip if item.at(".education-date")
103
+ start_date, end_date = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
104
+ {:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date }
105
+ end
106
+ end
107
+
108
+ def websites
109
+ @websites ||= @page.search("#overview-summary-websites").flat_map do |site|
110
+ url = "http://www.linkedin.com#{site.at("a")["href"]}"
111
+ CGI.parse(URI.parse(url).query)["url"]
112
+ end
113
+ end
114
+
115
+ def groups
116
+ @groups ||= @page.search(".groups-name").map do |item|
117
+ name = item.text.gsub(/\s+|\n/, " ").strip
118
+ link = "http://www.linkedin.com#{item.at("a")["href"]}"
119
+ { :name => name, :link => link }
120
+ end
121
+ end
122
+
123
+ def organizations
124
+ @organizations ||= @page.search("#background-organizations .section-item").map do |item|
125
+ name = item.at(".summary").text.gsub(/\s+|\n/, " ").strip rescue nil
126
+ start_date, end_date = item.at(".organizations-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
127
+ start_date = Date.parse(start_date) rescue nil
128
+ end_date = Date.parse(end_date) rescue nil
129
+ { :name => name, :start_date => start_date, :end_date => end_date }
130
+ end
131
+ end
132
+
133
+ def languages
134
+ @languages ||= @page.search(".background-languages #languages ol li").map do |item|
135
+ language = item.at("h4").text rescue nil
136
+ proficiency = item.at("div.languages-proficiency").text.gsub(/\s+|\n/, " ").strip rescue nil
137
+ { :language => language, :proficiency => proficiency }
138
+ end
139
+ end
140
+
141
+ def certifications
142
+ @certifications ||= @page.search("background-certifications").map do |item|
143
+ name = item.at("h4").text.gsub(/\s+|\n/, " ").strip rescue nil
144
+ authority = item.at("h5").text.gsub(/\s+|\n/, " ").strip rescue nil
145
+ license = item.at(".specifics/.licence-number").text.gsub(/\s+|\n/, " ").strip rescue nil
146
+ start_date = item.at(".certification-date").text.gsub(/\s+|\n/, " ").strip rescue nil
147
+
148
+ { :name => name, :authority => authority, :license => license, :start_date => start_date }
149
+ end
150
+ end
151
+
152
+
153
+ def recommended_visitors
154
+ @recommended_visitors ||= @page.search(".insights-browse-map/ul/li").map do |visitor|
155
+ v = {}
156
+ v[:link] = visitor.at("a")["href"]
157
+ v[:name] = visitor.at("h4/a").text
158
+ v[:title] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ").first
159
+ v[:company] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ")[1]
160
+ v
161
+ end
162
+ end
163
+
164
+ def projects
165
+ @projects ||= @page.search(".background-projects/div").map do |project|
166
+ project = project.at("div")
167
+
168
+ p = {}
169
+ start_date, end_date = project.at(".projects-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
170
+
171
+ p[:title] = project.at("hgroup/h4 span:first-of-type").text rescue nil
172
+ p[:link] = project.at("hgroup/h4 a:first-of-type")['href'] rescue nil
173
+ p[:start_date] = parse_date(start_date) rescue nil
174
+ p[:end_date] = parse_date(end_date) rescue nil
175
+ p[:description] = project.at(".description").text rescue nil
176
+ p[:associates] = project.at(".associated-list ul").children.map{ |c| c.at("a").text } rescue nil
177
+ p
178
+ end
179
+ end
180
+
181
+ def to_json
182
+ require "json"
183
+ ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
184
+ end
185
+
186
+ private
187
+
188
+ def get_companies(type)
189
+ companies = []
190
+ if @page.search(".background-experience .#{type}-position").first
191
+ @page.search(".background-experience .#{type}-position").each do |node|
192
+
193
+ company = {}
194
+ company[:title] = node.at("h4").text.gsub(/\s+|\n/, " ").strip if node.at("h4")
195
+ company[:company] = node.at("h4").next.text.gsub(/\s+|\n/, " ").strip if node.at("h4").next
196
+ company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
197
+
198
+ start_date, end_date = node.at(".experience-date-locale").text.strip.split(" – ") rescue nil
199
+ company[:duration] = node.at(".experience-date-locale").text[/.*\((.*)\)/, 1]
200
+ company[:start_date] = parse_date(start_date) rescue nil
201
+ company[:end_date] = parse_date(end_date) rescue nil
202
+
203
+ company_link = node.at("h4").next.at("a")["href"] if node.at("h4").next.at("a")
204
+
205
+ result = get_company_details(company_link)
206
+ companies << company.merge!(result)
207
+ end
208
+ end
209
+ companies
210
+ end
211
+
212
+ def parse_date(date)
213
+ date = "#{date}-01-01" if date =~ /^(19|20)\d{2}$/
214
+ Date.parse(date)
215
+ end
216
+
217
+ def get_company_details(link)
218
+ result = { :linkedin_company_url => get_linkedin_company_url(link) }
219
+ page = http_client.get(result[:linkedin_company_url])
220
+
221
+ result[:url] = page.at(".basic-info-about/ul/li/p/a").text if page.at(".basic-info-about/ul/li/p/a")
222
+ node_2 = page.at(".basic-info-about/ul")
223
+ if node_2
224
+ node_2.search("p").zip(node_2.search("h4")).each do |value, title|
225
+ result[title.text.gsub(" ", "_").downcase.to_sym] = value.text.strip
226
+ end
227
+ end
228
+ result[:address] = page.at(".vcard.hq").at(".adr").text.gsub("\n", " ").strip if page.at(".vcard.hq")
229
+ result
230
+ end
231
+
232
+ def http_client
233
+ Mechanize.new do |agent|
234
+ agent.user_agent_alias = USER_AGENTS.sample
235
+ unless @options.empty?
236
+ agent.set_proxy(@options[:proxy_ip], @options[:proxy_port])
237
+ end
238
+ agent.max_history = 0
239
+ end
240
+ end
241
+
242
+ def get_linkedin_company_url(link)
243
+ http = %r{http://www.linkedin.com/}
244
+ https = %r{https://www.linkedin.com/}
245
+ if http.match(link) || https.match(link)
246
+ link
247
+ else
248
+ "http://www.linkedin.com/#{link}"
249
+ end
250
+ end
251
+ end
252
+ end