linkedin-scraper 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4821d7e30aa48eac2fe54f5feaebe664f8c7e65f
4
+ data.tar.gz: 55f77b9d3dc2cee5a4e0f27b50116b058b585a72
5
+ SHA512:
6
+ metadata.gz: 83aa94cbaa5de9e2711d2cbb8672c9a81986ac48eb379fdcf99d9a64e3791bd5a945d76ea5266daa9781886de5feb5b1c3a0905107e3ea71894937e455e2aa7f
7
+ data.tar.gz: 6dbcdab792fb1551d9e2fc79909e94ed5f4fa2da5ece3ef8e0b37307f1bbcf69b1d1c9e8d773fb9ece146f5ff64eba7b04716b71a735ecf998740ecd34865a41
data/README.md ADDED
@@ -0,0 +1,256 @@
1
+ Linkedin Scraper
2
+ ================
3
+
4
+ Linkedin-scraper is a gem for scraping linkedin public profiles.
5
+ You give it an URL, and it lets you easily get its title, name, country, area, current_companies and much more.
6
+
7
+ Installation
8
+ ------------
9
+
10
+ Install the gem from RubyGems:
11
+
12
+ gem install linkedin-scraper
13
+
14
+ This gem is tested on Ruby versions 1.8.7, 1.9.2 1.9.3 and 2.0.0
15
+
16
+ Usage
17
+ -----
18
+
19
+ Initialize a scraper instance for an URL, like this:
20
+
21
+ profile = Linkedin::Profile.get_profile("http://www.linkedin.com/in/jeffweiner08")
22
+
23
+ Then you can see the scraped data like this:
24
+
25
+
26
+ profile.first_name #the First name of the contact
27
+
28
+ profile.last_name #the last name of the contact
29
+
30
+ profile.title #the linkedin job title
31
+
32
+ profile.location #the location of the contact
33
+
34
+ profile.country #the country of the contact
35
+
36
+ profile.industry #the domain for which the contact belongs
37
+
38
+ profile.picture #the profile pic url of contact
39
+
40
+ profile.current_companies
41
+
42
+ [
43
+ [0] {
44
+ :current_company => "LinkedIn",
45
+ :current_title => "CEO",
46
+ :current_company_url => "http://www.linkedin.com",
47
+ :description => nil,
48
+ :linkedin_company_url => "http://www.linkedin.com/company/linkedin?trk=ppro_cprof",
49
+ :url => "http://www.linkedin.com",
50
+ :type => "Public Company",
51
+ :company_size => "1001-5000 employees",
52
+ :website => "http://www.linkedin.com",
53
+ :industry => "Internet",
54
+ :founded => "2003",
55
+ :address => "2029 Stierlin Court Mountain View, CA 94043 United States"
56
+ },
57
+ [1] {
58
+ :current_company => "Intuit",
59
+ :current_title => "Member, Board of Directors",
60
+ :current_company_url => "http://network.intuit.com/",
61
+ :description => nil,
62
+ :linkedin_company_url => "http://www.linkedin.com/company/intuit?trk=ppro_cprof",
63
+ :url => "http://network.intuit.com/",
64
+ :type => "Public Company",
65
+ :company_size => "5001-10,000 employees",
66
+ :website => "http://network.intuit.com/",
67
+ :industry => "Computer Software",
68
+ :founded => "1983",
69
+ :address => "2632 Marine Way Mountain View, CA 94043 United States"
70
+ },
71
+ [2] {
72
+ :current_company => "DonorsChoose",
73
+ :current_title => "Member, Board of Directors",
74
+ :current_company_url => "http://www.donorschoose.org",
75
+ :description => nil,
76
+ :linkedin_company_url => "http://www.linkedin.com/company/donorschoose.org?trk=ppro_cprof",
77
+ :url => "http://www.donorschoose.org",
78
+ :type => "Nonprofit",
79
+ :company_size => "51-200 employees",
80
+ :website => "http://www.donorschoose.org",
81
+ :industry => "Nonprofit Organization Management",
82
+ :founded => "2000",
83
+ :address => "213 West 35th Street 2nd Floor East New York, NY 10001 United States"
84
+ },
85
+ [3] {
86
+ :current_company => "Malaria No More",
87
+ :current_title => "Member, Board of Directors",
88
+ :current_company_url => nil,
89
+ :description => nil
90
+ },
91
+ [4] {
92
+ :current_company => "Venture For America",
93
+ :current_title => "Member, Advisory Board",
94
+ :current_company_url => "http://ventureforamerica.org/",
95
+ :description => nil,
96
+ :linkedin_company_url => "http://www.linkedin.com/company/venture-for-america?trk=ppro_cprof",
97
+ :url => "http://ventureforamerica.org/",
98
+ :type => "Nonprofit",
99
+ :company_size => "1-10 employees",
100
+ :website => "http://ventureforamerica.org/",
101
+ :industry => "Nonprofit Organization Management",
102
+ :founded => "2011"
103
+ }
104
+ ]
105
+
106
+
107
+ profile.past_companies
108
+ #Array of hash containing its past job companies and job profile
109
+ #Example
110
+ [
111
+ [0] {
112
+ :past_company => "Accel Partners",
113
+ :past_title => "Executive in Residence",
114
+ :past_company_website => "http://www.facebook.com/accel",
115
+ :description => nil,
116
+ :linkedin_company_url => "http://www.linkedin.com/company/accel-partners?trk=ppro_cprof",
117
+ :url => "http://www.facebook.com/accel",
118
+ :type => "Partnership",
119
+ :company_size => "51-200 employees",
120
+ :website => "http://www.facebook.com/accel",
121
+ :industry => "Venture Capital & Private Equity",
122
+ :address => "428 University Palo Alto, CA 94301 United States"
123
+ },
124
+ [1] {
125
+ :past_company => "Greylock",
126
+ :past_title => "Executive in Residence",
127
+ :past_company_website => "http://www.greylock.com",
128
+ :description => nil,
129
+ :linkedin_company_url => "http://www.linkedin.com/company/greylock-partners?trk=ppro_cprof",
130
+ :url => "http://www.greylock.com",
131
+ :type => "Partnership",
132
+ :company_size => "51-200 employees",
133
+ :website => "http://www.greylock.com",
134
+ :industry => "Venture Capital & Private Equity",
135
+ :address => "2550 Sand Hill Road Menlo Park, CA 94025 United States"
136
+ },
137
+ [2] {
138
+ :past_company => "Yahoo!",
139
+ :past_title => "Executive Vice President Network Division",
140
+ :past_company_website => "http://www.yahoo.com",
141
+ :description => nil,
142
+ :linkedin_company_url => "http://www.linkedin.com/company/yahoo?trk=ppro_cprof",
143
+ :url => "http://www.yahoo.com",
144
+ :type => "Public Company",
145
+ :company_size => "10,001+ employees",
146
+ :website => "http://www.yahoo.com",
147
+ :industry => "Internet",
148
+ :founded => "1994",
149
+ :address => "701 First Avenue Sunnyvale, CA 94089 United States"
150
+ },
151
+ [3] {
152
+ :past_company => "Windsor Media",
153
+ :past_title => "Founding Partner",
154
+ :past_company_website => nil,
155
+ :description => nil
156
+ },
157
+ [4] {
158
+ :past_company => "Warner Bros.",
159
+ :past_title => "Vice President Online",
160
+ :past_company_website => "http://www.warnerbros.com/",
161
+ :description => nil,
162
+ :linkedin_company_url => "http://www.linkedin.com/company/warner-bros.-entertainment-group-of-companies?trk=ppro_cprof",
163
+ :url => "http://www.warnerbros.com/",
164
+ :type => "Public Company",
165
+ :company_size => "10,001+ employees",
166
+ :website => "http://www.warnerbros.com/",
167
+ :industry => "Entertainment",
168
+ :address => "4000 Warner Boulevard Burbank, CA 91522 United States"
169
+ }
170
+ ]
171
+
172
+
173
+ profile.linkedin_url #url of the profile
174
+
175
+ profile.websites
176
+ #Array of websites
177
+ [
178
+ [0] "http://www.linkedin.com/"
179
+ ]
180
+
181
+ profile.groups
182
+ #Array of hashes containing group name and link
183
+
184
+
185
+ profile.education
186
+ #Array of hashes for eduction
187
+
188
+ profile.skills
189
+ #Array of skills
190
+
191
+ profile.picture
192
+ #url of the profile picture
193
+
194
+
195
+ profile.recommended_visitors
196
+ #Its the list of visitors "Viewers of this profile also viewed..."
197
+ [
198
+ [0] {
199
+ :link => "http://www.linkedin.com/in/barackobama?trk=pub-pbmap",
200
+ :name => "Barack Obama",
201
+ :title => "President of the United States of ",
202
+ :company => nil
203
+ },
204
+ [1] {
205
+ :link => "http://www.linkedin.com/in/marissamayer?trk=pub-pbmap",
206
+ :name => "Marissa Mayer",
207
+ :title => "Yahoo!, President & CEO",
208
+ :company => nil
209
+ },
210
+ [2] {
211
+ :link => "http://www.linkedin.com/pub/sean-parker/0/1/826?trk=pub-pbmap",
212
+ :name => "Sean Parker",
213
+ :title => nil,
214
+ :company => nil
215
+ },
216
+ [3] {
217
+ :link => "http://www.linkedin.com/pub/eduardo-saverin/0/70a/31b?trk=pub-pbmap",
218
+ :name => "Eduardo Saverin",
219
+ :title => nil,
220
+ :company => nil
221
+ },
222
+ [4] {
223
+ :link => "http://www.linkedin.com/in/rbranson?trk=pub-pbmap",
224
+ :name => "Richard Branson",
225
+ :title => "Founder",
226
+ :company => "Virgin Group"
227
+ },
228
+ [5] {
229
+ :link => "http://www.linkedin.com/in/reidhoffman?trk=pub-pbmap",
230
+ :name => "Reid Hoffman",
231
+ :title => "Entrepreneur. Product Strategist. ",
232
+ :company => nil
233
+ },
234
+ [6] {
235
+ :link => "http://www.linkedin.com/in/mdell?trk=pub-pbmap",
236
+ :name => "Michael Dell",
237
+ :title => "Chairman and CEO",
238
+ :company => "Dell"
239
+ },
240
+ [7] {
241
+ :link => "http://www.linkedin.com/in/mittromney?trk=pub-pbmap",
242
+ :name => "Mitt Romney",
243
+ :title => "Believe in America",
244
+ :company => nil
245
+ },
246
+ [8] {
247
+ :link => "http://www.linkedin.com/pub/sheryl-sandberg/2/665/512?trk=pub-pbmap",
248
+ :name => "Sheryl Sandberg",
249
+ :title => nil,
250
+ :company => nil
251
+ }
252
+ ]
253
+
254
+
255
+
256
+ You're welcome to fork this project and send pull requests. I want to thank specially:
@@ -1,132 +1,76 @@
1
- USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
1
+ # -*- coding: utf-8 -*-
2
2
  module Linkedin
3
- class Profile
4
- #the First name of the contact
5
- attr_accessor :first_name,:last_name,:title,:location,:country,
6
- :industry, :linkedin_url,:recommended_visitors,:page
7
- #Array of hashes for eduction
8
- # [
9
- # [0] {
10
- # :name => "Vishwakarma Institute of Technology",
11
- # :description => "B.Tech, Computer Engineering",
12
- # :period => "2007 – 2011"
13
- # },
14
- # [1] {
15
- # :name => "St Ursula's High School",
16
- # :description => "Secondary School Education",
17
- # :period => nil
18
- # }
19
- # ]
20
- attr_accessor :education
3
+ class Profile
21
4
 
22
- #Array of websites
23
- #[
24
- #[0] "http://www.yatishmehta.in"
25
- #]
26
- attr_accessor :websites
27
- #array of hashes containing group name and link
28
- # [
29
- # [ 0] {
30
- # :name => "Business on Rails",
31
- # :link => "http://www.linkedin.com/groups/Business-on-Rails-27822"
32
- # },
33
- # [ 1] {
34
- # :name => "HTML5 Technologies",
35
- # :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882"
36
- # },
37
- # [ 2] {
38
- # :name => "India on Rails",
39
- # :link => "http://www.linkedin.com/groups/India-on-Rails-149940"
40
- # :name => "Open Source",
41
- # :link => "http://www.linkedin.com/groups?gid=43875"
42
- # },
43
- # [ 4] {
44
- # :name => "Rails Developers",
45
- # :link => "http://www.linkedin.com/groups?gid=77764"
46
- # },
47
- # ]
48
- attr_accessor:groups
5
+ USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
49
6
 
7
+ attr_accessor :first_name,:last_name,:title,:location,:country, :industry,:picture,:linkedin_url,:recommended_visitors,:page
8
+
9
+ attr_accessor :education
50
10
 
51
- #Array of hash containing its past job companies and job profile
52
- #Example
53
- # [
54
- # [0] {
55
- # :past_company => "Consumyze Software",
56
- # :past_title => "Trainee",
57
- # :past_company_website => "http://www.consumyze.com",
58
- # :description => "Responsible for design and development"
59
- # },
60
- # [1] {
61
- # :past_company => "SunGard Global Services",
62
- # :past_title => "Project Intern",
63
- # :past_company_website => "http://www.sungard.com/globalservices/learnmore",
64
- # :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame"
65
- # }
66
- # ]
11
+ attr_accessor :websites
67
12
 
13
+ attr_accessor:groups
68
14
 
69
15
  attr_accessor :past_companies
70
- #Array of hash containing its current job companies and job profile
71
- #Example
72
- # [
73
- # [0] {
74
- # :current_title => "Intern",
75
- # :current_company => "Sungard"
76
- # :current_company_url=>"http://www.betterlabs.net",
77
- # :description=>"Responsible for design and development of projects on Ruby on Rails."
78
- # },
79
- # [1] {
80
- # :current_title => "Software Developer",
81
- # :current_company => "Microsoft"
82
- # :current_company_url =>"http://www.microsoft.net",
83
- # :description =>"Development and design"
84
-
85
- # }
86
- # ]
16
+
87
17
  attr_accessor :current_companies
88
- #url of the profile
89
-
90
-
91
- def initialize(page,url)
92
- @first_name=get_first_name(page)
93
- @last_name=get_last_name(page)
94
- @title=get_title(page)
95
- @location=get_location(page)
96
- @country=get_country(page)
97
- @industry=get_industry(page)
98
- @current_companies=get_current_companies page
99
- @past_companies=get_past_companies page
100
- @recommended_visitors=get_recommended_visitors page
101
- @education=get_education page
102
- @linkedin_url=url
103
- @websites=get_websites page
104
- @groups=get_groups page
105
- @page=page
18
+
19
+ attr_accessor :skills
20
+
21
+ def initialize(page,url)
22
+ @first_name = get_first_name(page)
23
+ @last_name = get_last_name(page)
24
+ @title = get_title(page)
25
+ @location = get_location(page)
26
+ @country = get_country(page)
27
+ @industry = get_industry(page)
28
+ @picture = get_picture(page)
29
+ @current_companies = get_current_companies(page)
30
+ @past_companies = get_past_companies(page)
31
+ @recommended_visitors = get_recommended_visitors(page)
32
+ @education = get_education(page)
33
+ @linkedin_url = url
34
+ @websites = get_websites(page)
35
+ @groups = get_groups(page)
36
+ @skills = get_skills(page)
37
+ @page = page
106
38
  end
107
39
  #returns:nil if it gives a 404 request
108
40
 
109
- def self.get_profile url
41
+ def self.get_profile(url)
110
42
  begin
111
- @agent=Mechanize.new
43
+ @agent = Mechanize.new
112
44
  @agent.user_agent_alias = USER_AGENTS.sample
113
45
  @agent.max_history = 0
114
- page=@agent.get url
46
+ page = @agent.get(url)
115
47
  return Linkedin::Profile.new(page, url)
116
48
  rescue=>e
117
49
  puts e
118
50
  end
119
51
  end
120
52
 
121
- def get_company_url node
53
+ def get_skills(page)
54
+ page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
55
+ end
56
+
57
+ def get_company_url(node)
58
+ result={}
122
59
  if node.at("h4/strong/a")
123
- link=node.at("h4/strong/a")["href"]
124
- @agent=Mechanize.new
60
+ link = node.at("h4/strong/a")["href"]
61
+ @agent = Mechanize.new
125
62
  @agent.user_agent_alias = USER_AGENTS.sample
126
63
  @agent.max_history = 0
127
- page=@agent.get("http://www.linkedin.com"+link)
128
- url=page.at(".basic-info/div/dl/dd/a").text if page.at(".basic-info/div/dl/dd/a")
64
+ page = @agent.get("http://www.linkedin.com"+link)
65
+ result[:linkedin_company_url] = "http://www.linkedin.com"+link
66
+ result[:url] = page.at(".basic-info/div/dl/dd/a").text if page.at(".basic-info/div/dl/dd/a")
67
+ node_2 = page.at(".basic-info").at(".content.inner-mod")
68
+ node_2.search("dd").zip(node_2.search("dt")).each do |value,title|
69
+ result[title.text.gsub(" ","_").downcase.to_sym] = value.text.strip
70
+ end
71
+ result[:address] = page.at(".vcard.hq").at(".adr").text.gsub("\n"," ").strip if page.at(".vcard.hq")
129
72
  end
73
+ result
130
74
  end
131
75
 
132
76
  private
@@ -155,89 +99,92 @@ module Linkedin
155
99
  return page.at(".industry").text.gsub(/\s+/, " ").strip if page.search(".industry").first
156
100
  end
157
101
 
102
+ def get_picture page
103
+ return page.at("#profile-picture/img.photo").attributes['src'].value.strip if page.search("#profile-picture/img.photo").first
104
+ end
105
+
158
106
  def get_past_companies page
159
107
  past_cs=[]
160
108
  if page.search(".position.experience.vevent.vcard.summary-past").first
161
109
  page.search(".position.experience.vevent.vcard.summary-past").each do |past_company|
162
- url=get_company_url past_company
163
- title=past_company.at("h3").text.gsub(/\s+|\n/, " ").strip if past_company.at("h3")
164
- company=past_company.at("h4").text.gsub(/\s+|\n/, " ").strip if past_company.at("h4")
165
- description=past_company.at(".description.past-position").text.gsub(/\s+|\n/, " ").strip if past_company.at(".description.past-position")
166
- past_company={:past_company=>company,:past_title=> title,:past_company_website=>url,:description=>description}
167
- past_cs<<past_company
110
+ result = get_company_url past_company
111
+ url = result[:url]
112
+ title = past_company.at("h3").text.gsub(/\s+|\n/, " ").strip if past_company.at("h3")
113
+ company = past_company.at("h4").text.gsub(/\s+|\n/, " ").strip if past_company.at("h4")
114
+ description = past_company.at(".description.past-position").text.gsub(/\s+|\n/, " ").strip if past_company.at(".description.past-position")
115
+ p_company = {:past_company=>company,:past_title=> title,:past_company_website=>url,:description=>description}
116
+ p_company = p_company.merge(result)
117
+ past_cs << p_company
168
118
  end
169
119
  return past_cs
170
120
  end
171
121
  end
172
122
 
173
123
  def get_current_companies page
174
- current_cs=[]
124
+ current_cs = []
175
125
  if page.search(".position.experience.vevent.vcard.summary-current").first
176
126
  page.search(".position.experience.vevent.vcard.summary-current").each do |current_company|
177
- url=get_company_url current_company
178
- title=current_company.at("h3").text.gsub(/\s+|\n/, " ").strip if current_company.at("h3")
179
- company=current_company.at("h4").text.gsub(/\s+|\n/, " ").strip if current_company.at("h4")
180
- description=current_company.at(".description.current-position").text.gsub(/\s+|\n/, " ").strip if current_company.at(".description.current-position")
181
- current_company={:current_company=>company,:current_title=> title,:current_company_url=>url,:description=>description}
182
- current_cs<<current_company
127
+ result = get_company_url current_company
128
+ url = result[:url]
129
+ title = current_company.at("h3").text.gsub(/\s+|\n/, " ").strip if current_company.at("h3")
130
+ company = current_company.at("h4").text.gsub(/\s+|\n/, " ").strip if current_company.at("h4")
131
+ description = current_company.at(".description.current-position").text.gsub(/\s+|\n/, " ").strip if current_company.at(".description.current-position")
132
+ current_company = {:current_company=>company,:current_title=> title,:current_company_url=>url,:description=>description}
133
+ current_cs << current_company.merge(result)
183
134
  end
184
135
  return current_cs
185
136
  end
186
137
  end
187
138
 
188
- def get_education page
139
+ def get_education(page)
189
140
  education=[]
190
141
  if page.search(".position.education.vevent.vcard").first
191
142
  page.search(".position.education.vevent.vcard").each do |item|
192
- name=item.at("h3").text.gsub(/\s+|\n/, " ").strip if item.at("h3")
193
- desc=item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
194
- period=item.at(".period").text.gsub(/\s+|\n/, " ").strip if item.at(".period")
195
- edu={:name=>name,:description=>desc,:period=>period}
196
- education<<edu
143
+ name = item.at("h3").text.gsub(/\s+|\n/, " ").strip if item.at("h3")
144
+ desc = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
145
+ period = item.at(".period").text.gsub(/\s+|\n/, " ").strip if item.at(".period")
146
+ edu = {:name => name,:description => desc,:period => period}
147
+ education << edu
197
148
  end
198
149
  return education
199
150
  end
200
151
  end
201
152
 
202
- def get_websites page
153
+ def get_websites(page)
203
154
  websites=[]
204
155
  if page.search(".website").first
205
156
  page.search(".website").each do |site|
206
- url=site.at("a")["href"]
207
- url="http://www.linkedin.com"+url
208
- url=CGI.parse(URI.parse(url).query)["url"]
209
- websites<<url
157
+ url = site.at("a")["href"]
158
+ url = "http://www.linkedin.com"+url
159
+ url = CGI.parse(URI.parse(url).query)["url"]
160
+ websites << url
210
161
  end
211
162
  return websites.flatten!
212
- end
163
+ end
213
164
  end
214
165
 
215
- def get_groups page
216
- groups=[]
166
+ def get_groups(page)
167
+ groups = []
217
168
  if page.search(".group-data").first
218
169
  page.search(".group-data").each do |item|
219
- name=item.text.gsub(/\s+|\n/, " ").strip
220
- link="http://www.linkedin.com"+item.at("a")["href"]
221
- groups<<{:name=>name,:link=>link}
170
+ name = item.text.gsub(/\s+|\n/, " ").strip
171
+ link = "http://www.linkedin.com"+item.at("a")["href"]
172
+ groups << {:name=>name,:link=>link}
222
173
  end
223
174
  return groups
224
175
  end
225
-
226
176
  end
227
177
 
228
-
229
-
230
-
231
- def get_recommended_visitors page
178
+ def get_recommended_visitors(page)
232
179
  recommended_vs=[]
233
180
  if page.search(".browsemap").first
234
181
  page.at(".browsemap").at("ul").search("li").each do |visitor|
235
- v={}
236
- v[:link]=visitor.at('a').attributes["href"]
237
- v[:name]=visitor.at('a').text
238
- v[:title]=visitor.at('.headline').text.split(" at ").first
239
- v[:company]=visitor.at('.headline').text.split(" at ").last
240
- recommended_vs<<v
182
+ v = {}
183
+ v[:link] = visitor.at('a')["href"]
184
+ v[:name] = visitor.at('strong/a').text
185
+ v[:title] = visitor.at('.headline').text.gsub("..."," ").split(" at ").first
186
+ v[:company] = visitor.at('.headline').text.gsub("..."," ").split(" at ")[1]
187
+ recommended_vs << v
241
188
  end
242
189
  return recommended_vs
243
190
  end
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = "0.0.7"
3
+ VERSION = "0.0.8"
4
4
  end
5
5
  end
@@ -3,13 +3,10 @@ require File.expand_path('../lib/linkedin-scraper/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.authors = ["Yatish Mehta"]
6
- gem.email = ["yatishmehta27@gmail.com"]
7
6
  gem.description = %q{Scrapes the linkedin profile when a url is given }
8
7
  gem.summary = %q{when a url of public linkedin profile page is given it scrapes the entire page and converts into a accessible object}
9
8
  gem.homepage = "https://github.com/yatishmehta27/linkedin-scraper"
10
-
11
- gem.add_dependency(%q<mechanize>, [">= 0"])
12
-
9
+ gem.add_dependency(%q<mechanize>, [">= 0"])
13
10
  gem.files = `git ls-files`.split($\)
14
11
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
12
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+ require 'linkedin-scraper'
3
+
4
+
5
+ describe Linkedin::Profile do
6
+ describe "::get_profile" do
7
+ it "Create an instance of profile class and populate it will all details" do
8
+ @profile = Linkedin::Profile.get_profile("http://www.linkedin.com/in/jeffweiner08")
9
+ @profile.first_name.should == "Jeff"
10
+ #other parameters may change with time
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ $: << File.join(File.dirname(__FILE__), '../lib')
2
+ # This file was generated by the `rspec --init` command. Conventionally, all
3
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
4
+ # Require this file using `require "spec_helper"` to ensure that it is only
5
+ # loaded once.
6
+ #
7
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
8
+ RSpec.configure do |config|
9
+ config.treat_symbols_as_metadata_keys_with_true_values = true
10
+ config.run_all_when_everything_filtered = true
11
+ config.filter_run :focus
12
+
13
+ # Run specs in random order to surface order dependencies. If you find an
14
+ # order dependency and want to debug it, you can fix the order by providing
15
+ # the seed, which is printed after each run.
16
+ # --seed 1234
17
+ config.order = 'random'
18
+ end
metadata CHANGED
@@ -1,35 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
5
- prerelease:
4
+ version: 0.0.8
6
5
  platform: ruby
7
6
  authors:
8
7
  - Yatish Mehta
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-08-02 00:00:00.000000000 Z
11
+ date: 2013-03-12 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: mechanize
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - '>='
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
- description: ! 'Scrapes the linkedin profile when a url is given '
31
- email:
32
- - yatishmehta27@gmail.com
27
+ description: 'Scrapes the linkedin profile when a url is given '
28
+ email:
33
29
  executables: []
34
30
  extensions: []
35
31
  extra_rdoc_files: []
@@ -37,35 +33,38 @@ files:
37
33
  - .gitignore
38
34
  - Gemfile
39
35
  - LICENSE
40
- - README.rdoc
36
+ - README.md
41
37
  - Rakefile
42
38
  - lib/linkedin-scraper.rb
43
39
  - lib/linkedin-scraper/profile.rb
44
40
  - lib/linkedin-scraper/version.rb
45
41
  - linkedin-scraper.gemspec
42
+ - spec/linkedin-scraper/profile_spec.rb
43
+ - spec/spec_helper.rb
46
44
  homepage: https://github.com/yatishmehta27/linkedin-scraper
47
45
  licenses: []
46
+ metadata: {}
48
47
  post_install_message:
49
48
  rdoc_options: []
50
49
  require_paths:
51
50
  - lib
52
51
  required_ruby_version: !ruby/object:Gem::Requirement
53
- none: false
54
52
  requirements:
55
- - - ! '>='
53
+ - - '>='
56
54
  - !ruby/object:Gem::Version
57
55
  version: '0'
58
56
  required_rubygems_version: !ruby/object:Gem::Requirement
59
- none: false
60
57
  requirements:
61
- - - ! '>='
58
+ - - '>='
62
59
  - !ruby/object:Gem::Version
63
60
  version: '0'
64
61
  requirements: []
65
62
  rubyforge_project:
66
- rubygems_version: 1.8.24
63
+ rubygems_version: 2.0.0
67
64
  signing_key:
68
- specification_version: 3
65
+ specification_version: 4
69
66
  summary: when a url of public linkedin profile page is given it scrapes the entire
70
67
  page and converts into a accessible object
71
- test_files: []
68
+ test_files:
69
+ - spec/linkedin-scraper/profile_spec.rb
70
+ - spec/spec_helper.rb
data/README.rdoc DELETED
@@ -1,134 +0,0 @@
1
- = Linkedin-Scraper {<img src="http://travis-ci.org/jaimeiniesta/metainspector.png" />}[http://travis-ci.org/jaimeiniesta/metainspector]
2
-
3
- Linkedin-scraper is a gem for scraping linkedin public profiles. You give it an URL, and it lets you easily get its title,name,country,area,current_companies .
4
-
5
- = Installation
6
-
7
- Install the gem from RubyGems:
8
-
9
- gem install linkedin-scraper
10
-
11
- This gem is tested on Ruby versions 1.8.7, 1.9.2 and 1.9.3.
12
-
13
- = Usage
14
-
15
- Initialize a scraper instance for an URL, like this:
16
-
17
- profile = Linkedin::Profile.get_profile('http://in.linkedin.com/pub/yatish-mehta/22/460/a86')
18
-
19
- Then you can see the scraped data like this:
20
-
21
-
22
- profile.first_name #the First name of the contact
23
-
24
- profile.last_name #the last name of the contact
25
-
26
- profile.title #the linkedin job title
27
-
28
- profile.location #the location of the contact
29
-
30
- profile.country #the country of the contact
31
-
32
- profile.industry #the domain for which the contact belongs
33
-
34
- profile.past_companies
35
- #Array of hash containing its past job companies and job profile
36
- #Example
37
- # [
38
- # [0] {
39
- # :past_company => "Consumyze Software",
40
- # :past_title => "Trainee",
41
- # :past_company_website => "http://www.consumyze.com",
42
- # :description => "Responsible for design and development"
43
- # },
44
- # [1] {
45
- # :past_company => "SunGard Global Services",
46
- # :past_title => "Project Intern",
47
- # :past_company_website => "http://www.sungard.com/globalservices/learnmore",
48
- # :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame"
49
- # }
50
- # ]
51
- profile.current_companies
52
- #Array of hash containing its current job companies and job profile
53
- #Example
54
- # [
55
- # [0] {
56
- # :current_title => "Intern",
57
- # :current_company => "Sungard"
58
- # :current_company_url=>"http://www.betterlabs.net",
59
- # :description=>"Responsible for design and development of projects on Ruby on Rails."
60
- # },
61
- # [1] {
62
- # :current_title => "Software Developer",
63
- # :current_company => "Microsoft"
64
- # :current_company_url =>"http://www.microsoft.net",
65
- # :description =>"Development and design"
66
-
67
- # }
68
- # ]
69
-
70
-
71
- profile.linkedin_url #url of the profile
72
-
73
- profile.websites
74
- #Array of websites
75
- #[
76
- # [0] "http://www.yatishmehta.in"
77
- #]
78
-
79
- profile.groups
80
- #array of hashes containing group name and link
81
- # [
82
- # [ 0] {
83
- # :name => "Business on Rails",
84
- # :link => "http://www.linkedin.com/groups/Business-on-Rails-27822"
85
- # },
86
- # [ 1] {
87
- # :name => "HTML5 Technologies",
88
- # :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882"
89
- # },
90
- # [ 2] {
91
- # :name => "India on Rails",
92
- # :link => "http://www.linkedin.com/groups/India-on-Rails-149940"
93
- # :name => "Open Source",
94
- # :link => "http://www.linkedin.com/groups?gid=43875"
95
- # },
96
- # [ 4] {
97
- # :name => "Rails Developers",
98
- # :link => "http://www.linkedin.com/groups?gid=77764"
99
- # },
100
- # ]
101
-
102
- profile.education
103
- #Array of hashes for eduction
104
- # [
105
- # [0] {
106
- # :name => "Vishwakarma Institute of Technology",
107
- # :description => "B.Tech, Computer Engineering",
108
- # :period => "2007 – 2011"
109
- # },
110
- # [1] {
111
- # :name => "St Ursula's High School",
112
- # :description => "Secondary School Education",
113
- # :period => nil
114
- # }
115
- # ]
116
-
117
- profile.recommended_visitors
118
- #Its the list of visitors "Viewers of this profile also viewed..."
119
- #attr_accessor :recommended_visitors = [
120
- # [0] {
121
- # :link => href="http://in.linkedin.com/in/nileshavhad?trk=pub-pbmap",
122
- # :name => "Nilesh Avhad",
123
- # :title => "Engineering Manager",
124
- # :company => "Better Labs"
125
- # },
126
-
127
-
128
- = ZOMG Fork! Thank you!
129
-
130
- You're welcome to fork this project and send pull requests. I want to thank specially:
131
-
132
- = To Do
133
- *
134
- Copyright (c) 2009-2012 Yatish Mehta, released under the MIT license