esearchy 0.2.0.5 → 0.2.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/esearchy +32 -1
- data/lib/esearchy/SocialEngines/jigsaw.rb +36 -0
- data/lib/esearchy/SocialEngines/plaxo.rb +36 -0
- data/lib/esearchy/SocialEngines/ziggs.rb +36 -0
- data/lib/esearchy/esearchy.rb +5 -3
- data/lib/esearchy/socialengines.rb +4 -1
- metadata +6 -4
- data/lib/esearchy/SocialEngines/linkedinfull.rb +0 -100
data/bin/esearchy
CHANGED
@@ -58,6 +58,9 @@ opts = GetoptLong.new(
|
|
58
58
|
[ '--enable-naymz', GetoptLong::NO_ARGUMENT ],
|
59
59
|
[ '--enable-ggroups', GetoptLong::NO_ARGUMENT ],
|
60
60
|
[ '--enable-spoke', GetoptLong::NO_ARGUMENT ],
|
61
|
+
[ '--enable-jigsaw', GetoptLong::NO_ARGUMENT ],
|
62
|
+
[ '--enable-ziggs', GetoptLong::NO_ARGUMENT ],
|
63
|
+
[ '--enable-plaxo', GetoptLong::NO_ARGUMENT ],
|
61
64
|
[ '--enable-pgp', GetoptLong::NO_ARGUMENT ],
|
62
65
|
[ '--enable-usenet', GetoptLong::NO_ARGUMENT ],
|
63
66
|
[ '--enable-spider', GetoptLong::NO_ARGUMENT ],
|
@@ -70,6 +73,9 @@ opts = GetoptLong.new(
|
|
70
73
|
[ '--disable-naymz', GetoptLong::NO_ARGUMENT ],
|
71
74
|
[ '--disable-ggroups', GetoptLong::NO_ARGUMENT ],
|
72
75
|
[ '--disable-spoke', GetoptLong::NO_ARGUMENT ],
|
76
|
+
[ '--disable-jigsaw', GetoptLong::NO_ARGUMENT ],
|
77
|
+
[ '--disable-ziggs', GetoptLong::NO_ARGUMENT ],
|
78
|
+
[ '--disable-plaxo', GetoptLong::NO_ARGUMENT ],
|
73
79
|
[ '--disable-pgp', GetoptLong::NO_ARGUMENT ],
|
74
80
|
[ '--disable-usenet', GetoptLong::NO_ARGUMENT ],
|
75
81
|
[ '--disable-spider', GetoptLong::NO_ARGUMENT ],
|
@@ -309,6 +315,12 @@ opts.each do |opt, arg|
|
|
309
315
|
puts "\t Enables Spoke searches.\n"
|
310
316
|
puts "--enable-ggroups"
|
311
317
|
puts "\t Enables Google Groups searches.\n"
|
318
|
+
puts "--enable-jigsaw"
|
319
|
+
puts "\t Enables Jigsaw searches.\n"
|
320
|
+
puts "--enable-ziggs"
|
321
|
+
puts "\t Enables Ziggs searches.\n"
|
322
|
+
puts "--enable-plaxo"
|
323
|
+
puts "\t Enables Plaxo searches.\n"
|
312
324
|
puts "--enable-pgp"
|
313
325
|
puts "\t Enables PGP searches.\n"
|
314
326
|
puts "--enable-usenet"
|
@@ -329,6 +341,12 @@ opts.each do |opt, arg|
|
|
329
341
|
puts "\t Disables Naymz searches.\n"
|
330
342
|
puts "--disable-spoke"
|
331
343
|
puts "\t Disables Spoke searches.\n"
|
344
|
+
puts "--disable-jigsaw"
|
345
|
+
puts "\t Disables Jigsaw searches.\n"
|
346
|
+
puts "--disable-ziggs"
|
347
|
+
puts "\t Disables Ziggs searches.\n"
|
348
|
+
puts "--disable-plaxo"
|
349
|
+
puts "\t Disables Plaxo searches.\n"
|
332
350
|
puts "--disable-ggroups"
|
333
351
|
puts "\t Disables Google Groups searches.\n"
|
334
352
|
puts "--disable-pgp"
|
@@ -339,7 +357,8 @@ opts.each do |opt, arg|
|
|
339
357
|
#END OF HELP
|
340
358
|
exit(0)
|
341
359
|
when '--enable-all' then
|
342
|
-
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
|
360
|
+
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
|
361
|
+
:Spoke, :JigSaw, :Ziggs, :Plaxo]
|
343
362
|
@email_engines = [:Google, :Bing, :Yahoo, :Altavista, :PGP, :Spider ,:Usenet, :GoogleGroups ]
|
344
363
|
when '--enable-people' then
|
345
364
|
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles, :Spoke]
|
@@ -367,6 +386,12 @@ opts.each do |opt, arg|
|
|
367
386
|
@email_engines << :GoogleGroups
|
368
387
|
when '--enable-spoke' then
|
369
388
|
@people_engines << :Spoke
|
389
|
+
when '--enable-jigsaw' then
|
390
|
+
@people_engines << :JigSaw
|
391
|
+
when '--enable-ziggs' then
|
392
|
+
@people_engines << :Ziggs
|
393
|
+
when '--enable-plaxo' then
|
394
|
+
@people_engines << :Plaxo
|
370
395
|
when '--enable-pgp' then
|
371
396
|
@email_engines << :PGP
|
372
397
|
when '--enable-usenet' then
|
@@ -397,6 +422,12 @@ opts.each do |opt, arg|
|
|
397
422
|
@email_engines.delete(:GoogleGroups)
|
398
423
|
when '--disable-spoke' then
|
399
424
|
@people_engines.delete(:Spoke)
|
425
|
+
when '--disable-jigsaw' then
|
426
|
+
@people_engines.delete(:JigSaw)
|
427
|
+
when '--disable-ziggs' then
|
428
|
+
@people_engines.delete(:Ziggs)
|
429
|
+
when '--disable-plaxo' then
|
430
|
+
@people_engines.delete(:Plaxo)
|
400
431
|
when '--disable-pgp' then
|
401
432
|
@email_engines.delete(:PGP)
|
402
433
|
when '--disable-usenet' then
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module SocialEngines
|
3
|
+
class JigSaw < ESearchy::GenericEngine
|
4
|
+
ENGINE = "www.google.com"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 100
|
7
|
+
TYPE = 2
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/cse?q=site:www.jigsaw.com/++%2B+#{CGI.escape(@company)}+%2B" +
|
11
|
+
"+intitle:\"#{CGI.escape(@company)}\"" +
|
12
|
+
"&hl=en&cof=&num=100&filter=0&safe=off&start=" or
|
13
|
+
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse( html )
|
18
|
+
#Results <b>1</b> - <b>8</b> of <b>8</b> from <b>www.google.com</b>
|
19
|
+
hits = html.scan(/<\/b> of [\w\s]*<b>(.*)<\/b> from /)
|
20
|
+
if hits.empty? or hits == nil
|
21
|
+
@totalhits = 0
|
22
|
+
else
|
23
|
+
@totalhits = totalhits(hits[0][0].gsub(",","").to_i)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def crawl_people(text)
|
28
|
+
text.scan(/<a href="(http\:\/\/www.jigsaw.com\/scid[0-9A-Za-z\/?&=@+%.;'_-]+\.xhtml)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
|
29
|
+
p = profile[1].split(" ")
|
30
|
+
@people << p
|
31
|
+
@results << [p, "P", self.class.to_s.upcase, "N"]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module SocialEngines
|
3
|
+
class Plaxo < ESearchy::GenericEngine
|
4
|
+
ENGINE = "www.google.com"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 100
|
7
|
+
TYPE = 2
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/cse?q=site:plaxo.com/profile/show/+Work+*+" +
|
11
|
+
"#{CGI.escape(@company)}\"" +
|
12
|
+
"&hl=en&cof=&num=100&filter=0&safe=off&start=" or
|
13
|
+
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse( html )
|
18
|
+
#Results <b>1</b> - <b>8</b> of <b>8</b> from <b>www.google.com</b>
|
19
|
+
hits = html.scan(/<\/b> of [\w\s]*<b>(.*)<\/b> from /)
|
20
|
+
if hits.empty? or hits == nil
|
21
|
+
@totalhits = 0
|
22
|
+
else
|
23
|
+
@totalhits = totalhits(hits[0][0].gsub(",","").to_i)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def crawl_people(text)
|
28
|
+
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*)'/).each do |profile|
|
29
|
+
p = profile[1].split(" ")
|
30
|
+
@people << p
|
31
|
+
@results << [p, "P", self.class.to_s.upcase, "N"]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module SocialEngines
|
3
|
+
class Ziggs < ESearchy::GenericEngine
|
4
|
+
ENGINE = "www.google.com"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 100
|
7
|
+
TYPE = 2
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/cse?q=site:www.ziggs.com/apps/profile/+\"Company:+*+" +
|
11
|
+
"#{CGI.escape(@company)}\"" +
|
12
|
+
"&hl=en&cof=&num=100&filter=0&safe=off&start=" or
|
13
|
+
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse( html )
|
18
|
+
#Results <b>1</b> - <b>8</b> of <b>8</b> from <b>www.google.com</b>
|
19
|
+
hits = html.scan(/<\/b> of [\w\s]*<b>(.*)<\/b> from /)
|
20
|
+
if hits.empty? or hits == nil
|
21
|
+
@totalhits = 0
|
22
|
+
else
|
23
|
+
@totalhits = totalhits(hits[0][0].gsub(",","").to_i)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def crawl_people(text)
|
28
|
+
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
29
|
+
p = profile[1].split(" ")
|
30
|
+
@people << p
|
31
|
+
@results << [p, "P", self.class.to_s.upcase, "N"]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/esearchy/esearchy.rb
CHANGED
@@ -3,9 +3,8 @@ def D m
|
|
3
3
|
end
|
4
4
|
|
5
5
|
module ESearchy
|
6
|
-
VERSION = "0.2.0.
|
6
|
+
VERSION = "0.2.0.6"
|
7
7
|
@@log = false
|
8
|
-
#BUGMENOT = ESearchy::Bugmenot::fetch("linkedin.com")
|
9
8
|
|
10
9
|
def self.log
|
11
10
|
@@log
|
@@ -39,7 +38,10 @@ module ESearchy
|
|
39
38
|
:GoogleProfiles => ESearchy::SocialEngines::GoogleProfiles,
|
40
39
|
:Naymz => ESearchy::SocialEngines::Naymz,
|
41
40
|
:Classmates => ESearchy::SocialEngines::Classmates,
|
42
|
-
:Spoke => ESearchy::SocialEngines::Spoke
|
41
|
+
:Spoke => ESearchy::SocialEngines::Spoke,
|
42
|
+
:JigSaw => ESearchy::SocialEngines::JigSaw,
|
43
|
+
:Ziggs => ESearchy::SocialEngines::Ziggs,
|
44
|
+
:Plaxo => ESearchy::SocialEngines::Plaxo
|
43
45
|
}
|
44
46
|
|
45
47
|
def initialize(args, &block)
|
@@ -2,4 +2,7 @@ require 'esearchy/SocialEngines/classmates'
|
|
2
2
|
require 'esearchy/SocialEngines/googleprofiles'
|
3
3
|
require 'esearchy/SocialEngines/linkedin'
|
4
4
|
require 'esearchy/SocialEngines/naymz'
|
5
|
-
require 'esearchy/SocialEngines/spoke'
|
5
|
+
require 'esearchy/SocialEngines/spoke'
|
6
|
+
require 'esearchy/SocialEngines/jigsaw'
|
7
|
+
require 'esearchy/SocialEngines/ziggs'
|
8
|
+
require 'esearchy/SocialEngines/plaxo'
|
metadata
CHANGED
@@ -6,8 +6,8 @@ version: !ruby/object:Gem::Version
|
|
6
6
|
- 0
|
7
7
|
- 2
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.2.0.
|
9
|
+
- 6
|
10
|
+
version: 0.2.0.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matias P. Brutti
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-03-
|
18
|
+
date: 2010-03-09 00:00:00 -03:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -102,10 +102,12 @@ files:
|
|
102
102
|
- lib/esearchy/searchengines.rb
|
103
103
|
- lib/esearchy/SocialEngines/classmates.rb
|
104
104
|
- lib/esearchy/SocialEngines/googleprofiles.rb
|
105
|
+
- lib/esearchy/SocialEngines/jigsaw.rb
|
105
106
|
- lib/esearchy/SocialEngines/linkedin.rb
|
106
|
-
- lib/esearchy/SocialEngines/linkedinfull.rb
|
107
107
|
- lib/esearchy/SocialEngines/naymz.rb
|
108
|
+
- lib/esearchy/SocialEngines/plaxo.rb
|
108
109
|
- lib/esearchy/SocialEngines/spoke.rb
|
110
|
+
- lib/esearchy/SocialEngines/ziggs.rb
|
109
111
|
- lib/esearchy/socialengines.rb
|
110
112
|
- lib/esearchy/useragent.rb
|
111
113
|
- lib/esearchy.rb
|
@@ -1,100 +0,0 @@
|
|
1
|
-
require 'net/https'
|
2
|
-
module ESearchy
|
3
|
-
module SocialEngines
|
4
|
-
class LinkedIn < ESearchy::GenericEngine
|
5
|
-
ENGINE = "www.linkedin.com"
|
6
|
-
PORT = 80
|
7
|
-
NUM = 1
|
8
|
-
TYPE = 2
|
9
|
-
|
10
|
-
def search
|
11
|
-
@querypath = "/search?search=¤tCompany=co&company=" + CGI.escape(@company) +
|
12
|
-
"&proposalType=Y&newnessType=Y&pplSearchOrigin=MDYS&searchLocationType=Y&page_num=" or
|
13
|
-
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
14
|
-
super
|
15
|
-
end
|
16
|
-
|
17
|
-
def parse( html )
|
18
|
-
p html
|
19
|
-
p html.scan(/<p class="summary">[\n\s]+<strong>(.*)<\/strong> results/)#.gsub(/,|./,"")
|
20
|
-
#unless @was_here
|
21
|
-
# @totalhits= totalhits html.scan(/<p class="summary">[\n\s]+<strong>(.*)<\/strong> results/)[0][0].to_i
|
22
|
-
#end
|
23
|
-
end
|
24
|
-
|
25
|
-
def credentials=(c)
|
26
|
-
@user = c[0].to_s
|
27
|
-
@pwd = c[1].to_s
|
28
|
-
LinkedIn.const_set :HEADER, login
|
29
|
-
self.start=(1)
|
30
|
-
end
|
31
|
-
|
32
|
-
def maxhits=(v)
|
33
|
-
super v/10
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
def crawl_people(html)
|
38
|
-
list = html.scan(/title="View profile">[\n\s]+<span class="given-name">(.*)<\/span>\
|
39
|
-
[\n\s]+<span class="family-name">(.*)<\/span>/)
|
40
|
-
@people.concat(list).uniq!
|
41
|
-
list.each { |p| @results << [p, "P", self.class.to_s.upcase, "N"] }
|
42
|
-
end
|
43
|
-
|
44
|
-
def login
|
45
|
-
begin
|
46
|
-
get ENGINE, PORT, "/secure/login?trk=hb_signin", {'User-Agent' => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.5) Gecko/20091102"} do |r|
|
47
|
-
@l_headers = r.to_hash
|
48
|
-
@l_headers.each {|k,v| @l_headers[k] = v.to_s}
|
49
|
-
@csrfToken = r.body.scan(/<input type="hidden" name="csrfToken" value="ajax:(.*)">/)[0][0]
|
50
|
-
puts "------------------------------------------------------------------------------------"
|
51
|
-
puts "------------------------------------------------------------------------------------"
|
52
|
-
p @l_headers
|
53
|
-
p @csrfToken
|
54
|
-
puts "------------------------------------------------------------------------------------"
|
55
|
-
puts "------------------------------------------------------------------------------------"
|
56
|
-
end
|
57
|
-
http = Net::HTTP.new(ENGINE,443)
|
58
|
-
http.use_ssl = true
|
59
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
60
|
-
http.start do |http|
|
61
|
-
body = "csrfToken=ajax:#{@csrfToken}" +
|
62
|
-
"session_key=#{@user}" +
|
63
|
-
"&session_password=#{@pwd}" +
|
64
|
-
"&session_login=Sign+In&session_login=&session_rikey="
|
65
|
-
|
66
|
-
@l_headers['Host'] = "www.linkedin.com"
|
67
|
-
@l_headers['User-Agent'] = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5"
|
68
|
-
@l_headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
69
|
-
@l_headers['Accept-Language'] = "en-us,en;q=0.5"
|
70
|
-
@l_headers['Accept-Charset'] = "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
|
71
|
-
@l_headers['Keep-Alive'] = "300"
|
72
|
-
@l_headers['Connection'] = "keep-alive"
|
73
|
-
@l_headers['Referer'] = "https://www.linkedin.com/secure/login?trk=hb_signin"
|
74
|
-
@l_headers['Cookie'] = "JSESSIONID=\"ajax:5367441617418183976\"; visit=G; bcookie=\"v=1&8231965c-b4b7-48f2-8349-76514ba89b69\"; lang=\"v=2&lang=en&c=\"; NSC_MC_QH_MFP=e242089229a3; __utma=226841088.2037160969.1259078198.1259078198.1259078198.1; __utmb=226841088.2.10.1259078198; __utmc=226841088; __utmz=226841088.1259078198.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=226841088.user; leo_auth_token=\"GST:9_t6crYtB4AWStfoqhWQ6LYPKakWfHk_dotQyAHagiRX1HlEvqVt5-:1259081816:56d4aecb2e985d7f8a30d74e758f261ea8b92065\"; NSC_MC_WT_YUSL_IUUQ=e2420f8429a0"
|
75
|
-
@l_headers['Content-Type'] = "application/x-www-form-urlencoded"
|
76
|
-
@l_headers['Content-Length'] = body.size.to_s
|
77
|
-
|
78
|
-
request = Net::HTTP::Post.new("/secure/login", @l_headers)
|
79
|
-
request.body = CGI.escape(body)
|
80
|
-
response = http.request(request)
|
81
|
-
case response
|
82
|
-
when Net::HTTPSuccess, Net::HTTPRedirection
|
83
|
-
puts "------------------------------------------------------------------------------"
|
84
|
-
puts "------------------------------------------------------------------------------"
|
85
|
-
p response.to_hash
|
86
|
-
p response.body
|
87
|
-
puts "-----------------------------------------------------------------------------"
|
88
|
-
puts "-----------------------------------------------------------------------------"
|
89
|
-
return {'Cookie' => response['Set-Cookie'], 'User-Agent' => UserAgent::fetch}
|
90
|
-
else
|
91
|
-
return response.error!
|
92
|
-
end
|
93
|
-
end
|
94
|
-
rescue Net::HTTPFatalError
|
95
|
-
D "Error: Something went wrong while login to LinkedIn.\n\t${$@}"
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|