linkedin-scraper 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,34 +32,87 @@ Then you can see the scraped data like this:
32
32
  profile.industry #the domain for which the contact belongs
33
33
 
34
34
  profile.past_companies
35
- #Array of hash containing its past job companies and job profile
36
- #Example
37
- # [
38
- # [0] {
39
- # :past_title => "Intern",
40
- # :past_company => "Sungard"
41
- # },
42
- # [1] {
43
- # :past_title => "Software Developer",
44
- # :past_company => "Microsoft"
45
- # }
46
- # ]
35
+ #Array of hash containing its past job companies and job profile
36
+ #Example
37
+ # [
38
+ # [0] {
39
+ # :past_company => "Consumyze Software",
40
+ # :past_title => "Trainee",
41
+ # :past_company_website => "http://www.consumyze.com",
42
+ # :description => "Responsible for design and development"
43
+ # },
44
+ # [1] {
45
+ # :past_company => "SunGard Global Services",
46
+ # :past_title => "Project Intern",
47
+ # :past_company_website => "http://www.sungard.com/globalservices/learnmore",
48
+ # :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame"
49
+ # }
50
+ # ]
47
51
  profile.current_companies
48
52
  #Array of hash containing its current job companies and job profile
49
- #Example
50
- # [
51
- # [0] {
52
- # :current_title => "Intern",
53
- # :current_company => "Sungard"
54
- # },
55
- # [1] {
56
- # :current_title => "Software Developer",
57
- # :current_company => "Microsoft"
58
- # }
59
- # ]
53
+ #Example
54
+ # [
55
+ # [0] {
56
+ # :current_title => "Intern",
57
+ # :current_company => "Sungard"
58
+ # :current_company_url=>"http://www.betterlabs.net",
59
+ # :description=>"Responsible for design and development of projects on Ruby on Rails."
60
+ # },
61
+ # [1] {
62
+ # :current_title => "Software Developer",
63
+ # :current_company => "Microsoft"
64
+ # :current_company_url =>"http://www.microsoft.net",
65
+ # :description =>"Development and design"
66
+
67
+ # }
68
+ # ]
60
69
 
61
70
 
62
71
  profile.linkedin_url #url of the profile
72
+
73
+ profile.websites
74
+ #Array of websites
75
+ #[
76
+ # [0] "http://www.yatishmehta.in"
77
+ #]
78
+
79
+ profile.groups
80
+ #array of hashes containing group name and link
81
+ # [
82
+ # [ 0] {
83
+ # :name => "Business on Rails",
84
+ # :link => "http://www.linkedin.com/groups/Business-on-Rails-27822"
85
+ # },
86
+ # [ 1] {
87
+ # :name => "HTML5 Technologies",
88
+ # :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882"
89
+ # },
90
+ # [ 2] {
91
+ # :name => "India on Rails",
92
+ # :link => "http://www.linkedin.com/groups/India-on-Rails-149940"
93
+ # :name => "Open Source",
94
+ # :link => "http://www.linkedin.com/groups?gid=43875"
95
+ # },
96
+ # [ 4] {
97
+ # :name => "Rails Developers",
98
+ # :link => "http://www.linkedin.com/groups?gid=77764"
99
+ # },
100
+ # ]
101
+
102
+ profile.education
103
+ #Array of hashes for eduction
104
+ # [
105
+ # [0] {
106
+ # :name => "Vishwakarma Institute of Technology",
107
+ # :description => "B.Tech, Computer Engineering",
108
+ # :period => "2007 – 2011"
109
+ # },
110
+ # [1] {
111
+ # :name => "St Ursula's High School",
112
+ # :description => "Secondary School Education",
113
+ # :period => nil
114
+ # }
115
+ # ]
63
116
 
64
117
  profile.recommended_visitors
65
118
  #Its the list of visitors "Viewers of this profile also viewed..."
@@ -1,6 +1,7 @@
1
- require "linkedin-scraper/version"
2
1
  require "rubygems"
3
2
  require "mechanize"
3
+ require "cgi"
4
+ require "net/http"
4
5
  Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin-scraper/*.rb"].each {|file| require file }
5
6
 
6
7
 
@@ -3,22 +3,68 @@ module Linkedin
3
3
  class Profile
4
4
  #the First name of the contact
5
5
  attr_accessor :first_name,:last_name,:title,:location,:country,
6
- :industry, :linkedin_url,:recommended_visitors,:profile,
7
- :page
6
+ :industry, :linkedin_url,:recommended_visitors,:page
7
+ #Array of hashes for eduction
8
+ # [
9
+ # [0] {
10
+ # :name => "Vishwakarma Institute of Technology",
11
+ # :description => "B.Tech, Computer Engineering",
12
+ # :period => "2007 – 2011"
13
+ # },
14
+ # [1] {
15
+ # :name => "St Ursula's High School",
16
+ # :description => "Secondary School Education",
17
+ # :period => nil
18
+ # }
19
+ # ]
20
+ attr_accessor :education
21
+
22
+ #Array of websites
23
+ #[
24
+ #[0] "http://www.yatishmehta.in"
25
+ #]
26
+ attr_accessor :websites
27
+ #array of hashes containing group name and link
28
+ # [
29
+ # [ 0] {
30
+ # :name => "Business on Rails",
31
+ # :link => "http://www.linkedin.com/groups/Business-on-Rails-27822"
32
+ # },
33
+ # [ 1] {
34
+ # :name => "HTML5 Technologies",
35
+ # :link => "http://www.linkedin.com/groups/HTML5-Technologies-2868882"
36
+ # },
37
+ # [ 2] {
38
+ # :name => "India on Rails",
39
+ # :link => "http://www.linkedin.com/groups/India-on-Rails-149940"
40
+ # :name => "Open Source",
41
+ # :link => "http://www.linkedin.com/groups?gid=43875"
42
+ # },
43
+ # [ 4] {
44
+ # :name => "Rails Developers",
45
+ # :link => "http://www.linkedin.com/groups?gid=77764"
46
+ # },
47
+ # ]
48
+ attr_accessor:groups
8
49
 
9
50
 
10
51
  #Array of hash containing its past job companies and job profile
11
52
  #Example
12
53
  # [
13
- # [0] {
14
- # :past_title => "Intern",
15
- # :past_company => "Sungard"
16
- # },
17
- # [1] {
18
- # :past_title => "Software Developer",
19
- # :past_company => "Microsoft"
20
- # }
21
- # ]
54
+ # [0] {
55
+ # :past_company => "Consumyze Software",
56
+ # :past_title => "Trainee",
57
+ # :past_company_website => "http://www.consumyze.com",
58
+ # :description => "Responsible for design and development"
59
+ # },
60
+ # [1] {
61
+ # :past_company => "SunGard Global Services",
62
+ # :past_title => "Project Intern",
63
+ # :past_company_website => "http://www.sungard.com/globalservices/learnmore",
64
+ # :description => "Fame PassPoint. Developed an entirely Ajax based online control panel for user management and Data access for Fame"
65
+ # }
66
+ # ]
67
+
22
68
 
23
69
  attr_accessor :past_companies
24
70
  #Array of hash containing its current job companies and job profile
@@ -27,10 +73,15 @@ module Linkedin
27
73
  # [0] {
28
74
  # :current_title => "Intern",
29
75
  # :current_company => "Sungard"
76
+ # :current_company_url=>"http://www.betterlabs.net",
77
+ # :description=>"Responsible for design and development of projects on Ruby on Rails."
30
78
  # },
31
79
  # [1] {
32
- # :current_title => "Software Developer",
33
- # :current_company => "Microsoft"
80
+ # :current_title => "Software Developer",
81
+ # :current_company => "Microsoft"
82
+ # :current_company_url =>"http://www.microsoft.net",
83
+ # :description =>"Development and design"
84
+
34
85
  # }
35
86
  # ]
36
87
  attr_accessor :current_companies
@@ -47,10 +98,14 @@ module Linkedin
47
98
  @current_companies=get_current_companies page
48
99
  @past_companies=get_past_companies page
49
100
  @recommended_visitors=get_recommended_visitors page
101
+ @education=get_education page
50
102
  @linkedin_url=url
103
+ @websites=get_websites page
104
+ @groups=get_groups page
51
105
  @page=page
52
106
  end
53
107
  #returns:nil if it gives a 404 request
108
+
54
109
  def self.get_profile url
55
110
  begin
56
111
  @agent=Mechanize.new
@@ -63,6 +118,17 @@ module Linkedin
63
118
  end
64
119
  end
65
120
 
121
+ def get_company_url node
122
+ if node.at("h4/strong/a")
123
+ link=node.at("h4/strong/a")["href"]
124
+ @agent=Mechanize.new
125
+ @agent.user_agent_alias = USER_AGENTS.sample
126
+ @agent.max_history = 0
127
+ page=@agent.get("http://www.linkedin.com"+link)
128
+ url=page.at(".basic-info/div/dl/dd/a").text if page.at(".basic-info/div/dl/dd/a")
129
+ end
130
+ end
131
+
66
132
  private
67
133
 
68
134
  def get_first_name page
@@ -91,12 +157,13 @@ module Linkedin
91
157
 
92
158
  def get_past_companies page
93
159
  past_cs=[]
94
- if page.search(".past").first
95
- page.search(".past").search("li").each do |past_company|
96
- title,company=past_company.text.strip.split(" at ")
97
- company=company.gsub(/\s+/, " ").strip if company
98
- title=title.gsub(/\s+/, " ").strip if title
99
- past_company={:past_company=>company,:past_title=> title}
160
+ if page.search(".position.experience.vevent.vcard.summary-past").first
161
+ page.search(".position.experience.vevent.vcard.summary-past").each do |past_company|
162
+ url=get_company_url past_company
163
+ title=past_company.at("h3").text.gsub(/\s+|\n/, " ").strip if past_company.at("h3")
164
+ company=past_company.at("h4").text.gsub(/\s+|\n/, " ").strip if past_company.at("h4")
165
+ description=past_company.at(".description.past-position").text.gsub(/\s+|\n/, " ").strip if past_company.at(".description.past-position")
166
+ past_company={:past_company=>company,:past_title=> title,:past_company_website=>url,:description=>description}
100
167
  past_cs<<past_company
101
168
  end
102
169
  return past_cs
@@ -105,18 +172,62 @@ module Linkedin
105
172
 
106
173
  def get_current_companies page
107
174
  current_cs=[]
108
- if page.search(".current").first
109
- page.search(".current").search("li").each do |past_company|
110
- title,company=past_company.text.strip.split(" at ")
111
- company=company.gsub(/\s+/, " ").strip if company
112
- title=title.gsub(/\s+/, " ").strip if title
113
- current_company={:current_company=>company,:current_title=> title}
175
+ if page.search(".position.experience.vevent.vcard.summary-current").first
176
+ page.search(".position.experience.vevent.vcard.summary-current").each do |current_company|
177
+ url=get_company_url current_company
178
+ title=current_company.at("h3").text.gsub(/\s+|\n/, " ").strip if current_company.at("h3")
179
+ company=current_company.at("h4").text.gsub(/\s+|\n/, " ").strip if current_company.at("h4")
180
+ description=current_company.at(".description.current-position").text.gsub(/\s+|\n/, " ").strip if current_company.at(".description.current-position")
181
+ current_company={:current_company=>company,:current_title=> title,:current_company_url=>url,:description=>description}
114
182
  current_cs<<current_company
115
183
  end
116
184
  return current_cs
117
185
  end
118
186
  end
119
187
 
188
+ def get_education page
189
+ education=[]
190
+ if page.search(".position.education.vevent.vcard").first
191
+ page.search(".position.education.vevent.vcard").each do |item|
192
+ name=item.at("h3").text.gsub(/\s+|\n/, " ").strip if item.at("h3")
193
+ desc=item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
194
+ period=item.at(".period").text.gsub(/\s+|\n/, " ").strip if item.at(".period")
195
+ edu={:name=>name,:description=>desc,:period=>period}
196
+ education<<edu
197
+ end
198
+ return education
199
+ end
200
+ end
201
+
202
+ def get_websites page
203
+ websites=[]
204
+ if page.search(".website").first
205
+ page.search(".website").each do |site|
206
+ url=site.at("a")["href"]
207
+ url="http://www.linkedin.com"+url
208
+ url=CGI.parse(URI.parse(url).query)["url"]
209
+ websites<<url
210
+ end
211
+ return websites.flatten!
212
+ end
213
+ end
214
+
215
+ def get_groups page
216
+ groups=[]
217
+ if page.search(".group-data").first
218
+ page.search(".group-data").each do |item|
219
+ name=item.text.gsub(/\s+|\n/, " ").strip
220
+ link="http://www.linkedin.com"+item.at("a")["href"]
221
+ groups<<{:name=>name,:link=>link}
222
+ end
223
+ return groups
224
+ end
225
+
226
+ end
227
+
228
+
229
+
230
+
120
231
  def get_recommended_visitors page
121
232
  recommended_vs=[]
122
233
  if page.search(".browsemap").first
@@ -130,7 +241,6 @@ module Linkedin
130
241
  end
131
242
  return recommended_vs
132
243
  end
133
-
134
244
  end
135
245
  end
136
246
  end
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = "0.0.6"
3
+ VERSION = "0.0.7"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-23 00:00:00.000000000 Z
12
+ date: 2012-08-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize