linkedin-scraper 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +76 -23
- data/lib/linkedin-scraper.rb +2 -1
- data/lib/linkedin-scraper/profile.rb +136 -26
- data/lib/linkedin-scraper/version.rb +1 -1
- metadata +2 -2
data/README.rdoc
CHANGED
@@ -32,34 +32,87 @@ Then you can see the scraped data like this:
|
|
32
32
|
profile.industry #the domain for which the contact belongs
|
33
33
|
|
34
34
|
profile.past_companies
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
35
|
+
#Array of hash containing its past job companies and job profile
|
36
|
+
#Example
|
37
|
+
# [
|
38
|
+
# [0] {
|
39
|
+
# :past_company => "Consumyze Software",
|
40
|
+
# :past_title => "Trainee",
|
41
|
+
# :past_company_website => "http://www.consumyze.com",
|
42
|
+
# :description => "Responsible for design and development"
|
43
|
+
# },
|
44
|
+
# [1] {
|
45
|
+
# :past_company => "SunGard Global Services",
|
46
|
+
# :past_title => "Project Intern",
|
47
|
+
# :past_company_website => "http://www.sungard.com/globalservices/learnmore",
|
48
|
+
# :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame"
|
49
|
+
# }
|
50
|
+
# ]
|
47
51
|
profile.current_companies
|
48
52
|
#Array of hash containing its current job companies and job profile
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
#Example
|
54
|
+
# [
|
55
|
+
# [0] {
|
56
|
+
# :current_title => "Intern",
|
57
|
+
# :current_company => "Sungard"
|
58
|
+
# :current_company_url=>"http://www.betterlabs.net",
|
59
|
+
# :description=>"Responsible for design and development of projects on Ruby on Rails."
|
60
|
+
# },
|
61
|
+
# [1] {
|
62
|
+
# :current_title => "Software Developer",
|
63
|
+
# :current_company => "Microsoft"
|
64
|
+
# :current_company_url =>"http://www.microsoft.net",
|
65
|
+
# :description =>"Development and design"
|
66
|
+
|
67
|
+
# }
|
68
|
+
# ]
|
60
69
|
|
61
70
|
|
62
71
|
profile.linkedin_url #url of the profile
|
72
|
+
|
73
|
+
profile.websites
|
74
|
+
#Array of websites
|
75
|
+
#[
|
76
|
+
# [0] "http://www.yatishmehta.in"
|
77
|
+
#]
|
78
|
+
|
79
|
+
profile.groups
|
80
|
+
#array of hashes containing group name and link
|
81
|
+
# [
|
82
|
+
# [ 0] {
|
83
|
+
# :name => "Business on Rails",
|
84
|
+
# :link => "http://www.linkedin.com/groups/Business-on-Rails-27822"
|
85
|
+
# },
|
86
|
+
# [ 1] {
|
87
|
+
# :name => "HTML5 Technologies",
|
88
|
+
# :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882"
|
89
|
+
# },
|
90
|
+
# [ 2] {
|
91
|
+
# :name => "India on Rails",
|
92
|
+
# :link => "http://www.linkedin.com/groups/India-on-Rails-149940"
|
93
|
+
# :name => "Open Source",
|
94
|
+
# :link => "http://www.linkedin.com/groups?gid=43875"
|
95
|
+
# },
|
96
|
+
# [ 4] {
|
97
|
+
# :name => "Rails Developers",
|
98
|
+
# :link => "http://www.linkedin.com/groups?gid=77764"
|
99
|
+
# },
|
100
|
+
# ]
|
101
|
+
|
102
|
+
profile.education
|
103
|
+
#Array of hashes for eduction
|
104
|
+
# [
|
105
|
+
# [0] {
|
106
|
+
# :name => "Vishwakarma Institute of Technology",
|
107
|
+
# :description => "B.Tech, Computer Engineering",
|
108
|
+
# :period => "2007 – 2011"
|
109
|
+
# },
|
110
|
+
# [1] {
|
111
|
+
# :name => "St Ursula's High School",
|
112
|
+
# :description => "Secondary School Education",
|
113
|
+
# :period => nil
|
114
|
+
# }
|
115
|
+
# ]
|
63
116
|
|
64
117
|
profile.recommended_visitors
|
65
118
|
#Its the list of visitors "Viewers of this profile also viewed..."
|
data/lib/linkedin-scraper.rb
CHANGED
@@ -3,22 +3,68 @@ module Linkedin
|
|
3
3
|
class Profile
|
4
4
|
#the First name of the contact
|
5
5
|
attr_accessor :first_name,:last_name,:title,:location,:country,
|
6
|
-
|
7
|
-
|
6
|
+
:industry, :linkedin_url,:recommended_visitors,:page
|
7
|
+
#Array of hashes for eduction
|
8
|
+
# [
|
9
|
+
# [0] {
|
10
|
+
# :name => "Vishwakarma Institute of Technology",
|
11
|
+
# :description => "B.Tech, Computer Engineering",
|
12
|
+
# :period => "2007 – 2011"
|
13
|
+
# },
|
14
|
+
# [1] {
|
15
|
+
# :name => "St Ursula's High School",
|
16
|
+
# :description => "Secondary School Education",
|
17
|
+
# :period => nil
|
18
|
+
# }
|
19
|
+
# ]
|
20
|
+
attr_accessor :education
|
21
|
+
|
22
|
+
#Array of websites
|
23
|
+
#[
|
24
|
+
#[0] "http://www.yatishmehta.in"
|
25
|
+
#]
|
26
|
+
attr_accessor :websites
|
27
|
+
#array of hashes containing group name and link
|
28
|
+
# [
|
29
|
+
# [ 0] {
|
30
|
+
# :name => "Business on Rails",
|
31
|
+
# :link => "http://www.linkedin.com/groups/Business-on-Rails-27822"
|
32
|
+
# },
|
33
|
+
# [ 1] {
|
34
|
+
# :name => "HTML5 Technologies",
|
35
|
+
# :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882"
|
36
|
+
# },
|
37
|
+
# [ 2] {
|
38
|
+
# :name => "India on Rails",
|
39
|
+
# :link => "http://www.linkedin.com/groups/India-on-Rails-149940"
|
40
|
+
# :name => "Open Source",
|
41
|
+
# :link => "http://www.linkedin.com/groups?gid=43875"
|
42
|
+
# },
|
43
|
+
# [ 4] {
|
44
|
+
# :name => "Rails Developers",
|
45
|
+
# :link => "http://www.linkedin.com/groups?gid=77764"
|
46
|
+
# },
|
47
|
+
# ]
|
48
|
+
attr_accessor:groups
|
8
49
|
|
9
50
|
|
10
51
|
#Array of hash containing its past job companies and job profile
|
11
52
|
#Example
|
12
53
|
# [
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
54
|
+
# [0] {
|
55
|
+
# :past_company => "Consumyze Software",
|
56
|
+
# :past_title => "Trainee",
|
57
|
+
# :past_company_website => "http://www.consumyze.com",
|
58
|
+
# :description => "Responsible for design and development"
|
59
|
+
# },
|
60
|
+
# [1] {
|
61
|
+
# :past_company => "SunGard Global Services",
|
62
|
+
# :past_title => "Project Intern",
|
63
|
+
# :past_company_website => "http://www.sungard.com/globalservices/learnmore",
|
64
|
+
# :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame"
|
65
|
+
# }
|
66
|
+
# ]
|
67
|
+
|
22
68
|
|
23
69
|
attr_accessor :past_companies
|
24
70
|
#Array of hash containing its current job companies and job profile
|
@@ -27,10 +73,15 @@ module Linkedin
|
|
27
73
|
# [0] {
|
28
74
|
# :current_title => "Intern",
|
29
75
|
# :current_company => "Sungard"
|
76
|
+
# :current_company_url=>"http://www.betterlabs.net",
|
77
|
+
# :description=>"Responsible for design and development of projects on Ruby on Rails."
|
30
78
|
# },
|
31
79
|
# [1] {
|
32
|
-
# :current_title
|
33
|
-
# :current_company
|
80
|
+
# :current_title => "Software Developer",
|
81
|
+
# :current_company => "Microsoft"
|
82
|
+
# :current_company_url =>"http://www.microsoft.net",
|
83
|
+
# :description =>"Development and design"
|
84
|
+
|
34
85
|
# }
|
35
86
|
# ]
|
36
87
|
attr_accessor :current_companies
|
@@ -47,10 +98,14 @@ module Linkedin
|
|
47
98
|
@current_companies=get_current_companies page
|
48
99
|
@past_companies=get_past_companies page
|
49
100
|
@recommended_visitors=get_recommended_visitors page
|
101
|
+
@education=get_education page
|
50
102
|
@linkedin_url=url
|
103
|
+
@websites=get_websites page
|
104
|
+
@groups=get_groups page
|
51
105
|
@page=page
|
52
106
|
end
|
53
107
|
#returns:nil if it gives a 404 request
|
108
|
+
|
54
109
|
def self.get_profile url
|
55
110
|
begin
|
56
111
|
@agent=Mechanize.new
|
@@ -63,6 +118,17 @@ module Linkedin
|
|
63
118
|
end
|
64
119
|
end
|
65
120
|
|
121
|
+
def get_company_url node
|
122
|
+
if node.at("h4/strong/a")
|
123
|
+
link=node.at("h4/strong/a")["href"]
|
124
|
+
@agent=Mechanize.new
|
125
|
+
@agent.user_agent_alias = USER_AGENTS.sample
|
126
|
+
@agent.max_history = 0
|
127
|
+
page=@agent.get("http://www.linkedin.com"+link)
|
128
|
+
url=page.at(".basic-info/div/dl/dd/a").text if page.at(".basic-info/div/dl/dd/a")
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
66
132
|
private
|
67
133
|
|
68
134
|
def get_first_name page
|
@@ -91,12 +157,13 @@ module Linkedin
|
|
91
157
|
|
92
158
|
def get_past_companies page
|
93
159
|
past_cs=[]
|
94
|
-
if page.search(".past").first
|
95
|
-
page.search(".past").
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
160
|
+
if page.search(".position.experience.vevent.vcard.summary-past").first
|
161
|
+
page.search(".position.experience.vevent.vcard.summary-past").each do |past_company|
|
162
|
+
url=get_company_url past_company
|
163
|
+
title=past_company.at("h3").text.gsub(/\s+|\n/, " ").strip if past_company.at("h3")
|
164
|
+
company=past_company.at("h4").text.gsub(/\s+|\n/, " ").strip if past_company.at("h4")
|
165
|
+
description=past_company.at(".description.past-position").text.gsub(/\s+|\n/, " ").strip if past_company.at(".description.past-position")
|
166
|
+
past_company={:past_company=>company,:past_title=> title,:past_company_website=>url,:description=>description}
|
100
167
|
past_cs<<past_company
|
101
168
|
end
|
102
169
|
return past_cs
|
@@ -105,18 +172,62 @@ module Linkedin
|
|
105
172
|
|
106
173
|
def get_current_companies page
|
107
174
|
current_cs=[]
|
108
|
-
if page.search(".current").first
|
109
|
-
page.search(".current").
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
175
|
+
if page.search(".position.experience.vevent.vcard.summary-current").first
|
176
|
+
page.search(".position.experience.vevent.vcard.summary-current").each do |current_company|
|
177
|
+
url=get_company_url current_company
|
178
|
+
title=current_company.at("h3").text.gsub(/\s+|\n/, " ").strip if current_company.at("h3")
|
179
|
+
company=current_company.at("h4").text.gsub(/\s+|\n/, " ").strip if current_company.at("h4")
|
180
|
+
description=current_company.at(".description.current-position").text.gsub(/\s+|\n/, " ").strip if current_company.at(".description.current-position")
|
181
|
+
current_company={:current_company=>company,:current_title=> title,:current_company_url=>url,:description=>description}
|
114
182
|
current_cs<<current_company
|
115
183
|
end
|
116
184
|
return current_cs
|
117
185
|
end
|
118
186
|
end
|
119
187
|
|
188
|
+
def get_education page
|
189
|
+
education=[]
|
190
|
+
if page.search(".position.education.vevent.vcard").first
|
191
|
+
page.search(".position.education.vevent.vcard").each do |item|
|
192
|
+
name=item.at("h3").text.gsub(/\s+|\n/, " ").strip if item.at("h3")
|
193
|
+
desc=item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
|
194
|
+
period=item.at(".period").text.gsub(/\s+|\n/, " ").strip if item.at(".period")
|
195
|
+
edu={:name=>name,:description=>desc,:period=>period}
|
196
|
+
education<<edu
|
197
|
+
end
|
198
|
+
return education
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def get_websites page
|
203
|
+
websites=[]
|
204
|
+
if page.search(".website").first
|
205
|
+
page.search(".website").each do |site|
|
206
|
+
url=site.at("a")["href"]
|
207
|
+
url="http://www.linkedin.com"+url
|
208
|
+
url=CGI.parse(URI.parse(url).query)["url"]
|
209
|
+
websites<<url
|
210
|
+
end
|
211
|
+
return websites.flatten!
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def get_groups page
|
216
|
+
groups=[]
|
217
|
+
if page.search(".group-data").first
|
218
|
+
page.search(".group-data").each do |item|
|
219
|
+
name=item.text.gsub(/\s+|\n/, " ").strip
|
220
|
+
link="http://www.linkedin.com"+item.at("a")["href"]
|
221
|
+
groups<<{:name=>name,:link=>link}
|
222
|
+
end
|
223
|
+
return groups
|
224
|
+
end
|
225
|
+
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
|
230
|
+
|
120
231
|
def get_recommended_visitors page
|
121
232
|
recommended_vs=[]
|
122
233
|
if page.search(".browsemap").first
|
@@ -130,7 +241,6 @@ module Linkedin
|
|
130
241
|
end
|
131
242
|
return recommended_vs
|
132
243
|
end
|
133
|
-
|
134
244
|
end
|
135
245
|
end
|
136
246
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedin-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|