linkedin-scraper 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4821d7e30aa48eac2fe54f5feaebe664f8c7e65f
4
+ data.tar.gz: 55f77b9d3dc2cee5a4e0f27b50116b058b585a72
5
+ SHA512:
6
+ metadata.gz: 83aa94cbaa5de9e2711d2cbb8672c9a81986ac48eb379fdcf99d9a64e3791bd5a945d76ea5266daa9781886de5feb5b1c3a0905107e3ea71894937e455e2aa7f
7
+ data.tar.gz: 6dbcdab792fb1551d9e2fc79909e94ed5f4fa2da5ece3ef8e0b37307f1bbcf69b1d1c9e8d773fb9ece146f5ff64eba7b04716b71a735ecf998740ecd34865a41
data/README.md ADDED
@@ -0,0 +1,256 @@
1
+ Linkedin Scraper
2
+ ================
3
+
4
+ Linkedin-scraper is a gem for scraping linkedin public profiles.
5
+ You give it an URL, and it lets you easily get its title, name, country, area, current_companies and much more.
6
+
7
+ Installation
8
+ ------------
9
+
10
+ Install the gem from RubyGems:
11
+
12
+ gem install linkedin-scraper
13
+
14
+ This gem is tested on Ruby versions 1.8.7, 1.9.2 1.9.3 and 2.0.0
15
+
16
+ Usage
17
+ -----
18
+
19
+ Initialize a scraper instance for an URL, like this:
20
+
21
+ profile = Linkedin::Profile.get_profile("http://www.linkedin.com/in/jeffweiner08")
22
+
23
+ Then you can see the scraped data like this:
24
+
25
+
26
+ profile.first_name #the First name of the contact
27
+
28
+ profile.last_name #the last name of the contact
29
+
30
+ profile.title #the linkedin job title
31
+
32
+ profile.location #the location of the contact
33
+
34
+ profile.country #the country of the contact
35
+
36
+ profile.industry #the domain for which the contact belongs
37
+
38
+ profile.picture #the profile pic url of contact
39
+
40
+ profile.current_companies
41
+
42
+ [
43
+ [0] {
44
+ :current_company => "LinkedIn",
45
+ :current_title => "CEO",
46
+ :current_company_url => "http://www.linkedin.com",
47
+ :description => nil,
48
+ :linkedin_company_url => "http://www.linkedin.com/company/linkedin?trk=ppro_cprof",
49
+ :url => "http://www.linkedin.com",
50
+ :type => "Public Company",
51
+ :company_size => "1001-5000 employees",
52
+ :website => "http://www.linkedin.com",
53
+ :industry => "Internet",
54
+ :founded => "2003",
55
+ :address => "2029 Stierlin Court Mountain View, CA 94043 United States"
56
+ },
57
+ [1] {
58
+ :current_company => "Intuit",
59
+ :current_title => "Member, Board of Directors",
60
+ :current_company_url => "http://network.intuit.com/",
61
+ :description => nil,
62
+ :linkedin_company_url => "http://www.linkedin.com/company/intuit?trk=ppro_cprof",
63
+ :url => "http://network.intuit.com/",
64
+ :type => "Public Company",
65
+ :company_size => "5001-10,000 employees",
66
+ :website => "http://network.intuit.com/",
67
+ :industry => "Computer Software",
68
+ :founded => "1983",
69
+ :address => "2632 Marine Way Mountain View, CA 94043 United States"
70
+ },
71
+ [2] {
72
+ :current_company => "DonorsChoose",
73
+ :current_title => "Member, Board of Directors",
74
+ :current_company_url => "http://www.donorschoose.org",
75
+ :description => nil,
76
+ :linkedin_company_url => "http://www.linkedin.com/company/donorschoose.org?trk=ppro_cprof",
77
+ :url => "http://www.donorschoose.org",
78
+ :type => "Nonprofit",
79
+ :company_size => "51-200 employees",
80
+ :website => "http://www.donorschoose.org",
81
+ :industry => "Nonprofit Organization Management",
82
+ :founded => "2000",
83
+ :address => "213 West 35th Street 2nd Floor East New York, NY 10001 United States"
84
+ },
85
+ [3] {
86
+ :current_company => "Malaria No More",
87
+ :current_title => "Member, Board of Directors",
88
+ :current_company_url => nil,
89
+ :description => nil
90
+ },
91
+ [4] {
92
+ :current_company => "Venture For America",
93
+ :current_title => "Member, Advisory Board",
94
+ :current_company_url => "http://ventureforamerica.org/",
95
+ :description => nil,
96
+ :linkedin_company_url => "http://www.linkedin.com/company/venture-for-america?trk=ppro_cprof",
97
+ :url => "http://ventureforamerica.org/",
98
+ :type => "Nonprofit",
99
+ :company_size => "1-10 employees",
100
+ :website => "http://ventureforamerica.org/",
101
+ :industry => "Nonprofit Organization Management",
102
+ :founded => "2011"
103
+ }
104
+ ]
105
+
106
+
107
+ profile.past_companies
108
+ #Array of hash containing its past job companies and job profile
109
+ #Example
110
+ [
111
+ [0] {
112
+ :past_company => "Accel Partners",
113
+ :past_title => "Executive in Residence",
114
+ :past_company_website => "http://www.facebook.com/accel",
115
+ :description => nil,
116
+ :linkedin_company_url => "http://www.linkedin.com/company/accel-partners?trk=ppro_cprof",
117
+ :url => "http://www.facebook.com/accel",
118
+ :type => "Partnership",
119
+ :company_size => "51-200 employees",
120
+ :website => "http://www.facebook.com/accel",
121
+ :industry => "Venture Capital & Private Equity",
122
+ :address => "428 University Palo Alto, CA 94301 United States"
123
+ },
124
+ [1] {
125
+ :past_company => "Greylock",
126
+ :past_title => "Executive in Residence",
127
+ :past_company_website => "http://www.greylock.com",
128
+ :description => nil,
129
+ :linkedin_company_url => "http://www.linkedin.com/company/greylock-partners?trk=ppro_cprof",
130
+ :url => "http://www.greylock.com",
131
+ :type => "Partnership",
132
+ :company_size => "51-200 employees",
133
+ :website => "http://www.greylock.com",
134
+ :industry => "Venture Capital & Private Equity",
135
+ :address => "2550 Sand Hill Road Menlo Park, CA 94025 United States"
136
+ },
137
+ [2] {
138
+ :past_company => "Yahoo!",
139
+ :past_title => "Executive Vice President Network Division",
140
+ :past_company_website => "http://www.yahoo.com",
141
+ :description => nil,
142
+ :linkedin_company_url => "http://www.linkedin.com/company/yahoo?trk=ppro_cprof",
143
+ :url => "http://www.yahoo.com",
144
+ :type => "Public Company",
145
+ :company_size => "10,001+ employees",
146
+ :website => "http://www.yahoo.com",
147
+ :industry => "Internet",
148
+ :founded => "1994",
149
+ :address => "701 First Avenue Sunnyvale, CA 94089 United States"
150
+ },
151
+ [3] {
152
+ :past_company => "Windsor Media",
153
+ :past_title => "Founding Partner",
154
+ :past_company_website => nil,
155
+ :description => nil
156
+ },
157
+ [4] {
158
+ :past_company => "Warner Bros.",
159
+ :past_title => "Vice President Online",
160
+ :past_company_website => "http://www.warnerbros.com/",
161
+ :description => nil,
162
+ :linkedin_company_url => "http://www.linkedin.com/company/warner-bros.-entertainment-group-of-companies?trk=ppro_cprof",
163
+ :url => "http://www.warnerbros.com/",
164
+ :type => "Public Company",
165
+ :company_size => "10,001+ employees",
166
+ :website => "http://www.warnerbros.com/",
167
+ :industry => "Entertainment",
168
+ :address => "4000 Warner Boulevard Burbank, CA 91522 United States"
169
+ }
170
+ ]
171
+
172
+
173
+ profile.linkedin_url #url of the profile
174
+
175
+ profile.websites
176
+ #Array of websites
177
+ [
178
+ [0] "http://www.linkedin.com/"
179
+ ]
180
+
181
+ profile.groups
182
+ #Array of hashes containing group name and link
183
+
184
+
185
+ profile.education
186
+ #Array of hashes for eduction
187
+
188
+ profile.skills
189
+ #Array of skills
190
+
191
+ profile.picture
192
+ #url of the profile picture
193
+
194
+
195
+ profile.recommended_visitors
196
+ #Its the list of visitors "Viewers of this profile also viewed..."
197
+ [
198
+ [0] {
199
+ :link => "http://www.linkedin.com/in/barackobama?trk=pub-pbmap",
200
+ :name => "Barack Obama",
201
+ :title => "President of the United States of ",
202
+ :company => nil
203
+ },
204
+ [1] {
205
+ :link => "http://www.linkedin.com/in/marissamayer?trk=pub-pbmap",
206
+ :name => "Marissa Mayer",
207
+ :title => "Yahoo!, President & CEO",
208
+ :company => nil
209
+ },
210
+ [2] {
211
+ :link => "http://www.linkedin.com/pub/sean-parker/0/1/826?trk=pub-pbmap",
212
+ :name => "Sean Parker",
213
+ :title => nil,
214
+ :company => nil
215
+ },
216
+ [3] {
217
+ :link => "http://www.linkedin.com/pub/eduardo-saverin/0/70a/31b?trk=pub-pbmap",
218
+ :name => "Eduardo Saverin",
219
+ :title => nil,
220
+ :company => nil
221
+ },
222
+ [4] {
223
+ :link => "http://www.linkedin.com/in/rbranson?trk=pub-pbmap",
224
+ :name => "Richard Branson",
225
+ :title => "Founder",
226
+ :company => "Virgin Group"
227
+ },
228
+ [5] {
229
+ :link => "http://www.linkedin.com/in/reidhoffman?trk=pub-pbmap",
230
+ :name => "Reid Hoffman",
231
+ :title => "Entrepreneur. Product Strategist. ",
232
+ :company => nil
233
+ },
234
+ [6] {
235
+ :link => "http://www.linkedin.com/in/mdell?trk=pub-pbmap",
236
+ :name => "Michael Dell",
237
+ :title => "Chairman and CEO",
238
+ :company => "Dell"
239
+ },
240
+ [7] {
241
+ :link => "http://www.linkedin.com/in/mittromney?trk=pub-pbmap",
242
+ :name => "Mitt Romney",
243
+ :title => "Believe in America",
244
+ :company => nil
245
+ },
246
+ [8] {
247
+ :link => "http://www.linkedin.com/pub/sheryl-sandberg/2/665/512?trk=pub-pbmap",
248
+ :name => "Sheryl Sandberg",
249
+ :title => nil,
250
+ :company => nil
251
+ }
252
+ ]
253
+
254
+
255
+
256
+ You're welcome to fork this project and send pull requests. I want to thank specially:
@@ -1,132 +1,76 @@
1
- USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
1
+ # -*- coding: utf-8 -*-
2
2
  module Linkedin
3
- class Profile
4
- #the First name of the contact
5
- attr_accessor :first_name,:last_name,:title,:location,:country,
6
- :industry, :linkedin_url,:recommended_visitors,:page
7
- #Array of hashes for eduction
8
- # [
9
- # [0] {
10
- # :name => "Vishwakarma Institute of Technology",
11
- # :description => "B.Tech, Computer Engineering",
12
- # :period => "2007 – 2011"
13
- # },
14
- # [1] {
15
- # :name => "St Ursula's High School",
16
- # :description => "Secondary School Education",
17
- # :period => nil
18
- # }
19
- # ]
20
- attr_accessor :education
3
+ class Profile
21
4
 
22
- #Array of websites
23
- #[
24
- #[0] "http://www.yatishmehta.in"
25
- #]
26
- attr_accessor :websites
27
- #array of hashes containing group name and link
28
- # [
29
- # [ 0] {
30
- # :name => "Business on Rails",
31
- # :link => "http://www.linkedin.com/groups/Business-on-Rails-27822"
32
- # },
33
- # [ 1] {
34
- # :name => "HTML5 Technologies",
35
- # :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882"
36
- # },
37
- # [ 2] {
38
- # :name => "India on Rails",
39
- # :link => "http://www.linkedin.com/groups/India-on-Rails-149940"
40
- # :name => "Open Source",
41
- # :link => "http://www.linkedin.com/groups?gid=43875"
42
- # },
43
- # [ 4] {
44
- # :name => "Rails Developers",
45
- # :link => "http://www.linkedin.com/groups?gid=77764"
46
- # },
47
- # ]
48
- attr_accessor:groups
5
+ USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
49
6
 
7
+ attr_accessor :first_name,:last_name,:title,:location,:country, :industry,:picture,:linkedin_url,:recommended_visitors,:page
8
+
9
+ attr_accessor :education
50
10
 
51
- #Array of hash containing its past job companies and job profile
52
- #Example
53
- # [
54
- # [0] {
55
- # :past_company => "Consumyze Software",
56
- # :past_title => "Trainee",
57
- # :past_company_website => "http://www.consumyze.com",
58
- # :description => "Responsible for design and development"
59
- # },
60
- # [1] {
61
- # :past_company => "SunGard Global Services",
62
- # :past_title => "Project Intern",
63
- # :past_company_website => "http://www.sungard.com/globalservices/learnmore",
64
- # :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame"
65
- # }
66
- # ]
11
+ attr_accessor :websites
67
12
 
13
+ attr_accessor:groups
68
14
 
69
15
  attr_accessor :past_companies
70
- #Array of hash containing its current job companies and job profile
71
- #Example
72
- # [
73
- # [0] {
74
- # :current_title => "Intern",
75
- # :current_company => "Sungard"
76
- # :current_company_url=>"http://www.betterlabs.net",
77
- # :description=>"Responsible for design and development of projects on Ruby on Rails."
78
- # },
79
- # [1] {
80
- # :current_title => "Software Developer",
81
- # :current_company => "Microsoft"
82
- # :current_company_url =>"http://www.microsoft.net",
83
- # :description =>"Development and design"
84
-
85
- # }
86
- # ]
16
+
87
17
  attr_accessor :current_companies
88
- #url of the profile
89
-
90
-
91
- def initialize(page,url)
92
- @first_name=get_first_name(page)
93
- @last_name=get_last_name(page)
94
- @title=get_title(page)
95
- @location=get_location(page)
96
- @country=get_country(page)
97
- @industry=get_industry(page)
98
- @current_companies=get_current_companies page
99
- @past_companies=get_past_companies page
100
- @recommended_visitors=get_recommended_visitors page
101
- @education=get_education page
102
- @linkedin_url=url
103
- @websites=get_websites page
104
- @groups=get_groups page
105
- @page=page
18
+
19
+ attr_accessor :skills
20
+
21
+ def initialize(page,url)
22
+ @first_name = get_first_name(page)
23
+ @last_name = get_last_name(page)
24
+ @title = get_title(page)
25
+ @location = get_location(page)
26
+ @country = get_country(page)
27
+ @industry = get_industry(page)
28
+ @picture = get_picture(page)
29
+ @current_companies = get_current_companies(page)
30
+ @past_companies = get_past_companies(page)
31
+ @recommended_visitors = get_recommended_visitors(page)
32
+ @education = get_education(page)
33
+ @linkedin_url = url
34
+ @websites = get_websites(page)
35
+ @groups = get_groups(page)
36
+ @skills = get_skills(page)
37
+ @page = page
106
38
  end
107
39
  #returns:nil if it gives a 404 request
108
40
 
109
- def self.get_profile url
41
+ def self.get_profile(url)
110
42
  begin
111
- @agent=Mechanize.new
43
+ @agent = Mechanize.new
112
44
  @agent.user_agent_alias = USER_AGENTS.sample
113
45
  @agent.max_history = 0
114
- page=@agent.get url
46
+ page = @agent.get(url)
115
47
  return Linkedin::Profile.new(page, url)
116
48
  rescue=>e
117
49
  puts e
118
50
  end
119
51
  end
120
52
 
121
- def get_company_url node
53
+ def get_skills(page)
54
+ page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
55
+ end
56
+
57
+ def get_company_url(node)
58
+ result={}
122
59
  if node.at("h4/strong/a")
123
- link=node.at("h4/strong/a")["href"]
124
- @agent=Mechanize.new
60
+ link = node.at("h4/strong/a")["href"]
61
+ @agent = Mechanize.new
125
62
  @agent.user_agent_alias = USER_AGENTS.sample
126
63
  @agent.max_history = 0
127
- page=@agent.get("http://www.linkedin.com"+link)
128
- url=page.at(".basic-info/div/dl/dd/a").text if page.at(".basic-info/div/dl/dd/a")
64
+ page = @agent.get("http://www.linkedin.com"+link)
65
+ result[:linkedin_company_url] = "http://www.linkedin.com"+link
66
+ result[:url] = page.at(".basic-info/div/dl/dd/a").text if page.at(".basic-info/div/dl/dd/a")
67
+ node_2 = page.at(".basic-info").at(".content.inner-mod")
68
+ node_2.search("dd").zip(node_2.search("dt")).each do |value,title|
69
+ result[title.text.gsub(" ","_").downcase.to_sym] = value.text.strip
70
+ end
71
+ result[:address] = page.at(".vcard.hq").at(".adr").text.gsub("\n"," ").strip if page.at(".vcard.hq")
129
72
  end
73
+ result
130
74
  end
131
75
 
132
76
  private
@@ -155,89 +99,92 @@ module Linkedin
155
99
  return page.at(".industry").text.gsub(/\s+/, " ").strip if page.search(".industry").first
156
100
  end
157
101
 
102
+ def get_picture page
103
+ return page.at("#profile-picture/img.photo").attributes['src'].value.strip if page.search("#profile-picture/img.photo").first
104
+ end
105
+
158
106
  def get_past_companies page
159
107
  past_cs=[]
160
108
  if page.search(".position.experience.vevent.vcard.summary-past").first
161
109
  page.search(".position.experience.vevent.vcard.summary-past").each do |past_company|
162
- url=get_company_url past_company
163
- title=past_company.at("h3").text.gsub(/\s+|\n/, " ").strip if past_company.at("h3")
164
- company=past_company.at("h4").text.gsub(/\s+|\n/, " ").strip if past_company.at("h4")
165
- description=past_company.at(".description.past-position").text.gsub(/\s+|\n/, " ").strip if past_company.at(".description.past-position")
166
- past_company={:past_company=>company,:past_title=> title,:past_company_website=>url,:description=>description}
167
- past_cs<<past_company
110
+ result = get_company_url past_company
111
+ url = result[:url]
112
+ title = past_company.at("h3").text.gsub(/\s+|\n/, " ").strip if past_company.at("h3")
113
+ company = past_company.at("h4").text.gsub(/\s+|\n/, " ").strip if past_company.at("h4")
114
+ description = past_company.at(".description.past-position").text.gsub(/\s+|\n/, " ").strip if past_company.at(".description.past-position")
115
+ p_company = {:past_company=>company,:past_title=> title,:past_company_website=>url,:description=>description}
116
+ p_company = p_company.merge(result)
117
+ past_cs << p_company
168
118
  end
169
119
  return past_cs
170
120
  end
171
121
  end
172
122
 
173
123
  def get_current_companies page
174
- current_cs=[]
124
+ current_cs = []
175
125
  if page.search(".position.experience.vevent.vcard.summary-current").first
176
126
  page.search(".position.experience.vevent.vcard.summary-current").each do |current_company|
177
- url=get_company_url current_company
178
- title=current_company.at("h3").text.gsub(/\s+|\n/, " ").strip if current_company.at("h3")
179
- company=current_company.at("h4").text.gsub(/\s+|\n/, " ").strip if current_company.at("h4")
180
- description=current_company.at(".description.current-position").text.gsub(/\s+|\n/, " ").strip if current_company.at(".description.current-position")
181
- current_company={:current_company=>company,:current_title=> title,:current_company_url=>url,:description=>description}
182
- current_cs<<current_company
127
+ result = get_company_url current_company
128
+ url = result[:url]
129
+ title = current_company.at("h3").text.gsub(/\s+|\n/, " ").strip if current_company.at("h3")
130
+ company = current_company.at("h4").text.gsub(/\s+|\n/, " ").strip if current_company.at("h4")
131
+ description = current_company.at(".description.current-position").text.gsub(/\s+|\n/, " ").strip if current_company.at(".description.current-position")
132
+ current_company = {:current_company=>company,:current_title=> title,:current_company_url=>url,:description=>description}
133
+ current_cs << current_company.merge(result)
183
134
  end
184
135
  return current_cs
185
136
  end
186
137
  end
187
138
 
188
- def get_education page
139
+ def get_education(page)
189
140
  education=[]
190
141
  if page.search(".position.education.vevent.vcard").first
191
142
  page.search(".position.education.vevent.vcard").each do |item|
192
- name=item.at("h3").text.gsub(/\s+|\n/, " ").strip if item.at("h3")
193
- desc=item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
194
- period=item.at(".period").text.gsub(/\s+|\n/, " ").strip if item.at(".period")
195
- edu={:name=>name,:description=>desc,:period=>period}
196
- education<<edu
143
+ name = item.at("h3").text.gsub(/\s+|\n/, " ").strip if item.at("h3")
144
+ desc = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
145
+ period = item.at(".period").text.gsub(/\s+|\n/, " ").strip if item.at(".period")
146
+ edu = {:name => name,:description => desc,:period => period}
147
+ education << edu
197
148
  end
198
149
  return education
199
150
  end
200
151
  end
201
152
 
202
- def get_websites page
153
+ def get_websites(page)
203
154
  websites=[]
204
155
  if page.search(".website").first
205
156
  page.search(".website").each do |site|
206
- url=site.at("a")["href"]
207
- url="http://www.linkedin.com"+url
208
- url=CGI.parse(URI.parse(url).query)["url"]
209
- websites<<url
157
+ url = site.at("a")["href"]
158
+ url = "http://www.linkedin.com"+url
159
+ url = CGI.parse(URI.parse(url).query)["url"]
160
+ websites << url
210
161
  end
211
162
  return websites.flatten!
212
- end
163
+ end
213
164
  end
214
165
 
215
- def get_groups page
216
- groups=[]
166
+ def get_groups(page)
167
+ groups = []
217
168
  if page.search(".group-data").first
218
169
  page.search(".group-data").each do |item|
219
- name=item.text.gsub(/\s+|\n/, " ").strip
220
- link="http://www.linkedin.com"+item.at("a")["href"]
221
- groups<<{:name=>name,:link=>link}
170
+ name = item.text.gsub(/\s+|\n/, " ").strip
171
+ link = "http://www.linkedin.com"+item.at("a")["href"]
172
+ groups << {:name=>name,:link=>link}
222
173
  end
223
174
  return groups
224
175
  end
225
-
226
176
  end
227
177
 
228
-
229
-
230
-
231
- def get_recommended_visitors page
178
+ def get_recommended_visitors(page)
232
179
  recommended_vs=[]
233
180
  if page.search(".browsemap").first
234
181
  page.at(".browsemap").at("ul").search("li").each do |visitor|
235
- v={}
236
- v[:link]=visitor.at('a').attributes["href"]
237
- v[:name]=visitor.at('a').text
238
- v[:title]=visitor.at('.headline').text.split(" at ").first
239
- v[:company]=visitor.at('.headline').text.split(" at ").last
240
- recommended_vs<<v
182
+ v = {}
183
+ v[:link] = visitor.at('a')["href"]
184
+ v[:name] = visitor.at('strong/a').text
185
+ v[:title] = visitor.at('.headline').text.gsub("..."," ").split(" at ").first
186
+ v[:company] = visitor.at('.headline').text.gsub("..."," ").split(" at ")[1]
187
+ recommended_vs << v
241
188
  end
242
189
  return recommended_vs
243
190
  end
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = "0.0.7"
3
+ VERSION = "0.0.8"
4
4
  end
5
5
  end
@@ -3,13 +3,10 @@ require File.expand_path('../lib/linkedin-scraper/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.authors = ["Yatish Mehta"]
6
- gem.email = ["yatishmehta27@gmail.com"]
7
6
  gem.description = %q{Scrapes the linkedin profile when a url is given }
8
7
  gem.summary = %q{when a url of public linkedin profile page is given it scrapes the entire page and converts into a accessible object}
9
8
  gem.homepage = "https://github.com/yatishmehta27/linkedin-scraper"
10
-
11
- gem.add_dependency(%q<mechanize>, [">= 0"])
12
-
9
+ gem.add_dependency(%q<mechanize>, [">= 0"])
13
10
  gem.files = `git ls-files`.split($\)
14
11
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
12
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+ require 'linkedin-scraper'
3
+
4
+
5
+ describe Linkedin::Profile do
6
+ describe "::get_profile" do
7
+ it "Create an instance of profile class and populate it will all details" do
8
+ @profile = Linkedin::Profile.get_profile("http://www.linkedin.com/in/jeffweiner08")
9
+ @profile.first_name.should == "Jeff"
10
+ #other parameters may change with time
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ $: << File.join(File.dirname(__FILE__), '../lib')
2
+ # This file was generated by the `rspec --init` command. Conventionally, all
3
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
4
+ # Require this file using `require "spec_helper"` to ensure that it is only
5
+ # loaded once.
6
+ #
7
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
8
+ RSpec.configure do |config|
9
+ config.treat_symbols_as_metadata_keys_with_true_values = true
10
+ config.run_all_when_everything_filtered = true
11
+ config.filter_run :focus
12
+
13
+ # Run specs in random order to surface order dependencies. If you find an
14
+ # order dependency and want to debug it, you can fix the order by providing
15
+ # the seed, which is printed after each run.
16
+ # --seed 1234
17
+ config.order = 'random'
18
+ end
metadata CHANGED
@@ -1,35 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
5
- prerelease:
4
+ version: 0.0.8
6
5
  platform: ruby
7
6
  authors:
8
7
  - Yatish Mehta
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-08-02 00:00:00.000000000 Z
11
+ date: 2013-03-12 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: mechanize
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - '>='
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
- description: ! 'Scrapes the linkedin profile when a url is given '
31
- email:
32
- - yatishmehta27@gmail.com
27
+ description: 'Scrapes the linkedin profile when a url is given '
28
+ email:
33
29
  executables: []
34
30
  extensions: []
35
31
  extra_rdoc_files: []
@@ -37,35 +33,38 @@ files:
37
33
  - .gitignore
38
34
  - Gemfile
39
35
  - LICENSE
40
- - README.rdoc
36
+ - README.md
41
37
  - Rakefile
42
38
  - lib/linkedin-scraper.rb
43
39
  - lib/linkedin-scraper/profile.rb
44
40
  - lib/linkedin-scraper/version.rb
45
41
  - linkedin-scraper.gemspec
42
+ - spec/linkedin-scraper/profile_spec.rb
43
+ - spec/spec_helper.rb
46
44
  homepage: https://github.com/yatishmehta27/linkedin-scraper
47
45
  licenses: []
46
+ metadata: {}
48
47
  post_install_message:
49
48
  rdoc_options: []
50
49
  require_paths:
51
50
  - lib
52
51
  required_ruby_version: !ruby/object:Gem::Requirement
53
- none: false
54
52
  requirements:
55
- - - ! '>='
53
+ - - '>='
56
54
  - !ruby/object:Gem::Version
57
55
  version: '0'
58
56
  required_rubygems_version: !ruby/object:Gem::Requirement
59
- none: false
60
57
  requirements:
61
- - - ! '>='
58
+ - - '>='
62
59
  - !ruby/object:Gem::Version
63
60
  version: '0'
64
61
  requirements: []
65
62
  rubyforge_project:
66
- rubygems_version: 1.8.24
63
+ rubygems_version: 2.0.0
67
64
  signing_key:
68
- specification_version: 3
65
+ specification_version: 4
69
66
  summary: when a url of public linkedin profile page is given it scrapes the entire
70
67
  page and converts into a accessible object
71
- test_files: []
68
+ test_files:
69
+ - spec/linkedin-scraper/profile_spec.rb
70
+ - spec/spec_helper.rb
data/README.rdoc DELETED
@@ -1,134 +0,0 @@
1
- = Linkedin-Scraper {<img src="http://travis-ci.org/jaimeiniesta/metainspector.png" />}[http://travis-ci.org/jaimeiniesta/metainspector]
2
-
3
- Linkedin-scraper is a gem for scraping linkedin public profiles. You give it an URL, and it lets you easily get its title,name,country,area,current_companies .
4
-
5
- = Installation
6
-
7
- Install the gem from RubyGems:
8
-
9
- gem install linkedin-scraper
10
-
11
- This gem is tested on Ruby versions 1.8.7, 1.9.2 and 1.9.3.
12
-
13
- = Usage
14
-
15
- Initialize a scraper instance for an URL, like this:
16
-
17
- profile = Linkedin::Profile.get_profile('http://in.linkedin.com/pub/yatish-mehta/22/460/a86')
18
-
19
- Then you can see the scraped data like this:
20
-
21
-
22
- profile.first_name #the First name of the contact
23
-
24
- profile.last_name #the last name of the contact
25
-
26
- profile.title #the linkedin job title
27
-
28
- profile.location #the location of the contact
29
-
30
- profile.country #the country of the contact
31
-
32
- profile.industry #the domain for which the contact belongs
33
-
34
- profile.past_companies
35
- #Array of hash containing its past job companies and job profile
36
- #Example
37
- # [
38
- # [0] {
39
- # :past_company => "Consumyze Software",
40
- # :past_title => "Trainee",
41
- # :past_company_website => "http://www.consumyze.com",
42
- # :description => "Responsible for design and development"
43
- # },
44
- # [1] {
45
- # :past_company => "SunGard Global Services",
46
- # :past_title => "Project Intern",
47
- # :past_company_website => "http://www.sungard.com/globalservices/learnmore",
48
- # :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame"
49
- # }
50
- # ]
51
- profile.current_companies
52
- #Array of hash containing its current job companies and job profile
53
- #Example
54
- # [
55
- # [0] {
56
- # :current_title => "Intern",
57
- # :current_company => "Sungard"
58
- # :current_company_url=>"http://www.betterlabs.net",
59
- # :description=>"Responsible for design and development of projects on Ruby on Rails."
60
- # },
61
- # [1] {
62
- # :current_title => "Software Developer",
63
- # :current_company => "Microsoft"
64
- # :current_company_url =>"http://www.microsoft.net",
65
- # :description =>"Development and design"
66
-
67
- # }
68
- # ]
69
-
70
-
71
- profile.linkedin_url #url of the profile
72
-
73
- profile.websites
74
- #Array of websites
75
- #[
76
- # [0] "http://www.yatishmehta.in"
77
- #]
78
-
79
- profile.groups
80
- #array of hashes containing group name and link
81
- # [
82
- # [ 0] {
83
- # :name => "Business on Rails",
84
- # :link => "http://www.linkedin.com/groups/Business-on-Rails-27822"
85
- # },
86
- # [ 1] {
87
- # :name => "HTML5 Technologies",
88
- # :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882"
89
- # },
90
- # [ 2] {
91
- # :name => "India on Rails",
92
- # :link => "http://www.linkedin.com/groups/India-on-Rails-149940"
93
- # :name => "Open Source",
94
- # :link => "http://www.linkedin.com/groups?gid=43875"
95
- # },
96
- # [ 4] {
97
- # :name => "Rails Developers",
98
- # :link => "http://www.linkedin.com/groups?gid=77764"
99
- # },
100
- # ]
101
-
102
- profile.education
103
- #Array of hashes for eduction
104
- # [
105
- # [0] {
106
- # :name => "Vishwakarma Institute of Technology",
107
- # :description => "B.Tech, Computer Engineering",
108
- # :period => "2007 – 2011"
109
- # },
110
- # [1] {
111
- # :name => "St Ursula's High School",
112
- # :description => "Secondary School Education",
113
- # :period => nil
114
- # }
115
- # ]
116
-
117
- profile.recommended_visitors
118
- #Its the list of visitors "Viewers of this profile also viewed..."
119
- #attr_accessor :recommended_visitors = [
120
- # [0] {
121
- # :link => href="http://in.linkedin.com/in/nileshavhad?trk=pub-pbmap",
122
- # :name => "Nilesh Avhad",
123
- # :title => "Engineering Manager",
124
- # :company => "Better Labs"
125
- # },
126
-
127
-
128
- = ZOMG Fork! Thank you!
129
-
130
- You're welcome to fork this project and send pull requests. I want to thank specially:
131
-
132
- = To Do
133
- *
134
- Copyright (c) 2009-2012 Yatish Mehta, released under the MIT license