esearchy 0.2.0.5 → 0.2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/esearchy +32 -1
- data/lib/esearchy/SocialEngines/jigsaw.rb +36 -0
- data/lib/esearchy/SocialEngines/plaxo.rb +36 -0
- data/lib/esearchy/SocialEngines/ziggs.rb +36 -0
- data/lib/esearchy/esearchy.rb +5 -3
- data/lib/esearchy/socialengines.rb +4 -1
- metadata +6 -4
- data/lib/esearchy/SocialEngines/linkedinfull.rb +0 -100
data/bin/esearchy
CHANGED
@@ -58,6 +58,9 @@ opts = GetoptLong.new(
|
|
58
58
|
[ '--enable-naymz', GetoptLong::NO_ARGUMENT ],
|
59
59
|
[ '--enable-ggroups', GetoptLong::NO_ARGUMENT ],
|
60
60
|
[ '--enable-spoke', GetoptLong::NO_ARGUMENT ],
|
61
|
+
[ '--enable-jigsaw', GetoptLong::NO_ARGUMENT ],
|
62
|
+
[ '--enable-ziggs', GetoptLong::NO_ARGUMENT ],
|
63
|
+
[ '--enable-plaxo', GetoptLong::NO_ARGUMENT ],
|
61
64
|
[ '--enable-pgp', GetoptLong::NO_ARGUMENT ],
|
62
65
|
[ '--enable-usenet', GetoptLong::NO_ARGUMENT ],
|
63
66
|
[ '--enable-spider', GetoptLong::NO_ARGUMENT ],
|
@@ -70,6 +73,9 @@ opts = GetoptLong.new(
|
|
70
73
|
[ '--disable-naymz', GetoptLong::NO_ARGUMENT ],
|
71
74
|
[ '--disable-ggroups', GetoptLong::NO_ARGUMENT ],
|
72
75
|
[ '--disable-spoke', GetoptLong::NO_ARGUMENT ],
|
76
|
+
[ '--disable-jigsaw', GetoptLong::NO_ARGUMENT ],
|
77
|
+
[ '--disable-ziggs', GetoptLong::NO_ARGUMENT ],
|
78
|
+
[ '--disable-plaxo', GetoptLong::NO_ARGUMENT ],
|
73
79
|
[ '--disable-pgp', GetoptLong::NO_ARGUMENT ],
|
74
80
|
[ '--disable-usenet', GetoptLong::NO_ARGUMENT ],
|
75
81
|
[ '--disable-spider', GetoptLong::NO_ARGUMENT ],
|
@@ -309,6 +315,12 @@ opts.each do |opt, arg|
|
|
309
315
|
puts "\t Enables Spoke searches.\n"
|
310
316
|
puts "--enable-ggroups"
|
311
317
|
puts "\t Enables Google Groups searches.\n"
|
318
|
+
puts "--enable-jigsaw"
|
319
|
+
puts "\t Enables Jigsaw searches.\n"
|
320
|
+
puts "--enable-ziggs"
|
321
|
+
puts "\t Enables Ziggs searches.\n"
|
322
|
+
puts "--enable-plaxo"
|
323
|
+
puts "\t Enables Plaxo searches.\n"
|
312
324
|
puts "--enable-pgp"
|
313
325
|
puts "\t Enables PGP searches.\n"
|
314
326
|
puts "--enable-usenet"
|
@@ -329,6 +341,12 @@ opts.each do |opt, arg|
|
|
329
341
|
puts "\t Disables Naymz searches.\n"
|
330
342
|
puts "--disable-spoke"
|
331
343
|
puts "\t Disables Spoke searches.\n"
|
344
|
+
puts "--disable-jigsaw"
|
345
|
+
puts "\t Disables Jigsaw searches.\n"
|
346
|
+
puts "--disable-ziggs"
|
347
|
+
puts "\t Disables Ziggs searches.\n"
|
348
|
+
puts "--disable-plaxo"
|
349
|
+
puts "\t Disables Plaxo searches.\n"
|
332
350
|
puts "--disable-ggroups"
|
333
351
|
puts "\t Disables Google Groups searches.\n"
|
334
352
|
puts "--disable-pgp"
|
@@ -339,7 +357,8 @@ opts.each do |opt, arg|
|
|
339
357
|
#END OF HELP
|
340
358
|
exit(0)
|
341
359
|
when '--enable-all' then
|
342
|
-
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
|
360
|
+
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
|
361
|
+
:Spoke, :JigSaw, :Ziggs, :Plaxo]
|
343
362
|
@email_engines = [:Google, :Bing, :Yahoo, :Altavista, :PGP, :Spider ,:Usenet, :GoogleGroups ]
|
344
363
|
when '--enable-people' then
|
345
364
|
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles, :Spoke]
|
@@ -367,6 +386,12 @@ opts.each do |opt, arg|
|
|
367
386
|
@email_engines << :GoogleGroups
|
368
387
|
when '--enable-spoke' then
|
369
388
|
@people_engines << :Spoke
|
389
|
+
when '--enable-jigsaw' then
|
390
|
+
@people_engines << :JigSaw
|
391
|
+
when '--enable-ziggs' then
|
392
|
+
@people_engines << :Ziggs
|
393
|
+
when '--enable-plaxo' then
|
394
|
+
@people_engines << :Plaxo
|
370
395
|
when '--enable-pgp' then
|
371
396
|
@email_engines << :PGP
|
372
397
|
when '--enable-usenet' then
|
@@ -397,6 +422,12 @@ opts.each do |opt, arg|
|
|
397
422
|
@email_engines.delete(:GoogleGroups)
|
398
423
|
when '--disable-spoke' then
|
399
424
|
@people_engines.delete(:Spoke)
|
425
|
+
when '--disable-jigsaw' then
|
426
|
+
@people_engines.delete(:JigSaw)
|
427
|
+
when '--disable-ziggs' then
|
428
|
+
@people_engines.delete(:Ziggs)
|
429
|
+
when '--disable-plaxo' then
|
430
|
+
@people_engines.delete(:Plaxo)
|
400
431
|
when '--disable-pgp' then
|
401
432
|
@email_engines.delete(:PGP)
|
402
433
|
when '--disable-usenet' then
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module SocialEngines
|
3
|
+
class JigSaw < ESearchy::GenericEngine
|
4
|
+
ENGINE = "www.google.com"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 100
|
7
|
+
TYPE = 2
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/cse?q=site:www.jigsaw.com/++%2B+#{CGI.escape(@company)}+%2B" +
|
11
|
+
"+intitle:\"#{CGI.escape(@company)}\"" +
|
12
|
+
"&hl=en&cof=&num=100&filter=0&safe=off&start=" or
|
13
|
+
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse( html )
|
18
|
+
#Results <b>1</b> - <b>8</b> of <b>8</b> from <b>www.google.com</b>
|
19
|
+
hits = html.scan(/<\/b> of [\w\s]*<b>(.*)<\/b> from /)
|
20
|
+
if hits.empty? or hits == nil
|
21
|
+
@totalhits = 0
|
22
|
+
else
|
23
|
+
@totalhits = totalhits(hits[0][0].gsub(",","").to_i)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def crawl_people(text)
|
28
|
+
text.scan(/<a href="(http\:\/\/www.jigsaw.com\/scid[0-9A-Za-z\/?&=@+%.;'_-]+\.xhtml)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
|
29
|
+
p = profile[1].split(" ")
|
30
|
+
@people << p
|
31
|
+
@results << [p, "P", self.class.to_s.upcase, "N"]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module SocialEngines
|
3
|
+
class Plaxo < ESearchy::GenericEngine
|
4
|
+
ENGINE = "www.google.com"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 100
|
7
|
+
TYPE = 2
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/cse?q=site:plaxo.com/profile/show/+Work+*+" +
|
11
|
+
"#{CGI.escape(@company)}\"" +
|
12
|
+
"&hl=en&cof=&num=100&filter=0&safe=off&start=" or
|
13
|
+
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse( html )
|
18
|
+
#Results <b>1</b> - <b>8</b> of <b>8</b> from <b>www.google.com</b>
|
19
|
+
hits = html.scan(/<\/b> of [\w\s]*<b>(.*)<\/b> from /)
|
20
|
+
if hits.empty? or hits == nil
|
21
|
+
@totalhits = 0
|
22
|
+
else
|
23
|
+
@totalhits = totalhits(hits[0][0].gsub(",","").to_i)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def crawl_people(text)
|
28
|
+
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*)'/).each do |profile|
|
29
|
+
p = profile[1].split(" ")
|
30
|
+
@people << p
|
31
|
+
@results << [p, "P", self.class.to_s.upcase, "N"]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module SocialEngines
|
3
|
+
class Ziggs < ESearchy::GenericEngine
|
4
|
+
ENGINE = "www.google.com"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 100
|
7
|
+
TYPE = 2
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/cse?q=site:www.ziggs.com/apps/profile/+\"Company:+*+" +
|
11
|
+
"#{CGI.escape(@company)}\"" +
|
12
|
+
"&hl=en&cof=&num=100&filter=0&safe=off&start=" or
|
13
|
+
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse( html )
|
18
|
+
#Results <b>1</b> - <b>8</b> of <b>8</b> from <b>www.google.com</b>
|
19
|
+
hits = html.scan(/<\/b> of [\w\s]*<b>(.*)<\/b> from /)
|
20
|
+
if hits.empty? or hits == nil
|
21
|
+
@totalhits = 0
|
22
|
+
else
|
23
|
+
@totalhits = totalhits(hits[0][0].gsub(",","").to_i)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def crawl_people(text)
|
28
|
+
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
29
|
+
p = profile[1].split(" ")
|
30
|
+
@people << p
|
31
|
+
@results << [p, "P", self.class.to_s.upcase, "N"]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/esearchy/esearchy.rb
CHANGED
@@ -3,9 +3,8 @@ def D m
|
|
3
3
|
end
|
4
4
|
|
5
5
|
module ESearchy
|
6
|
-
VERSION = "0.2.0.
|
6
|
+
VERSION = "0.2.0.6"
|
7
7
|
@@log = false
|
8
|
-
#BUGMENOT = ESearchy::Bugmenot::fetch("linkedin.com")
|
9
8
|
|
10
9
|
def self.log
|
11
10
|
@@log
|
@@ -39,7 +38,10 @@ module ESearchy
|
|
39
38
|
:GoogleProfiles => ESearchy::SocialEngines::GoogleProfiles,
|
40
39
|
:Naymz => ESearchy::SocialEngines::Naymz,
|
41
40
|
:Classmates => ESearchy::SocialEngines::Classmates,
|
42
|
-
:Spoke => ESearchy::SocialEngines::Spoke
|
41
|
+
:Spoke => ESearchy::SocialEngines::Spoke,
|
42
|
+
:JigSaw => ESearchy::SocialEngines::JigSaw,
|
43
|
+
:Ziggs => ESearchy::SocialEngines::Ziggs,
|
44
|
+
:Plaxo => ESearchy::SocialEngines::Plaxo
|
43
45
|
}
|
44
46
|
|
45
47
|
def initialize(args, &block)
|
@@ -2,4 +2,7 @@ require 'esearchy/SocialEngines/classmates'
|
|
2
2
|
require 'esearchy/SocialEngines/googleprofiles'
|
3
3
|
require 'esearchy/SocialEngines/linkedin'
|
4
4
|
require 'esearchy/SocialEngines/naymz'
|
5
|
-
require 'esearchy/SocialEngines/spoke'
|
5
|
+
require 'esearchy/SocialEngines/spoke'
|
6
|
+
require 'esearchy/SocialEngines/jigsaw'
|
7
|
+
require 'esearchy/SocialEngines/ziggs'
|
8
|
+
require 'esearchy/SocialEngines/plaxo'
|
metadata
CHANGED
@@ -6,8 +6,8 @@ version: !ruby/object:Gem::Version
|
|
6
6
|
- 0
|
7
7
|
- 2
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.2.0.
|
9
|
+
- 6
|
10
|
+
version: 0.2.0.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matias P. Brutti
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-03-
|
18
|
+
date: 2010-03-09 00:00:00 -03:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -102,10 +102,12 @@ files:
|
|
102
102
|
- lib/esearchy/searchengines.rb
|
103
103
|
- lib/esearchy/SocialEngines/classmates.rb
|
104
104
|
- lib/esearchy/SocialEngines/googleprofiles.rb
|
105
|
+
- lib/esearchy/SocialEngines/jigsaw.rb
|
105
106
|
- lib/esearchy/SocialEngines/linkedin.rb
|
106
|
-
- lib/esearchy/SocialEngines/linkedinfull.rb
|
107
107
|
- lib/esearchy/SocialEngines/naymz.rb
|
108
|
+
- lib/esearchy/SocialEngines/plaxo.rb
|
108
109
|
- lib/esearchy/SocialEngines/spoke.rb
|
110
|
+
- lib/esearchy/SocialEngines/ziggs.rb
|
109
111
|
- lib/esearchy/socialengines.rb
|
110
112
|
- lib/esearchy/useragent.rb
|
111
113
|
- lib/esearchy.rb
|
@@ -1,100 +0,0 @@
|
|
1
|
-
require 'net/https'
|
2
|
-
module ESearchy
|
3
|
-
module SocialEngines
|
4
|
-
class LinkedIn < ESearchy::GenericEngine
|
5
|
-
ENGINE = "www.linkedin.com"
|
6
|
-
PORT = 80
|
7
|
-
NUM = 1
|
8
|
-
TYPE = 2
|
9
|
-
|
10
|
-
def search
|
11
|
-
@querypath = "/search?search=¤tCompany=co&company=" + CGI.escape(@company) +
|
12
|
-
"&proposalType=Y&newnessType=Y&pplSearchOrigin=MDYS&searchLocationType=Y&page_num=" or
|
13
|
-
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
14
|
-
super
|
15
|
-
end
|
16
|
-
|
17
|
-
def parse( html )
|
18
|
-
p html
|
19
|
-
p html.scan(/<p class="summary">[\n\s]+<strong>(.*)<\/strong> results/)#.gsub(/,|./,"")
|
20
|
-
#unless @was_here
|
21
|
-
# @totalhits= totalhits html.scan(/<p class="summary">[\n\s]+<strong>(.*)<\/strong> results/)[0][0].to_i
|
22
|
-
#end
|
23
|
-
end
|
24
|
-
|
25
|
-
def credentials=(c)
|
26
|
-
@user = c[0].to_s
|
27
|
-
@pwd = c[1].to_s
|
28
|
-
LinkedIn.const_set :HEADER, login
|
29
|
-
self.start=(1)
|
30
|
-
end
|
31
|
-
|
32
|
-
def maxhits=(v)
|
33
|
-
super v/10
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
def crawl_people(html)
|
38
|
-
list = html.scan(/title="View profile">[\n\s]+<span class="given-name">(.*)<\/span>\
|
39
|
-
[\n\s]+<span class="family-name">(.*)<\/span>/)
|
40
|
-
@people.concat(list).uniq!
|
41
|
-
list.each { |p| @results << [p, "P", self.class.to_s.upcase, "N"] }
|
42
|
-
end
|
43
|
-
|
44
|
-
def login
|
45
|
-
begin
|
46
|
-
get ENGINE, PORT, "/secure/login?trk=hb_signin", {'User-Agent' => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.5) Gecko/20091102"} do |r|
|
47
|
-
@l_headers = r.to_hash
|
48
|
-
@l_headers.each {|k,v| @l_headers[k] = v.to_s}
|
49
|
-
@csrfToken = r.body.scan(/<input type="hidden" name="csrfToken" value="ajax:(.*)">/)[0][0]
|
50
|
-
puts "------------------------------------------------------------------------------------"
|
51
|
-
puts "------------------------------------------------------------------------------------"
|
52
|
-
p @l_headers
|
53
|
-
p @csrfToken
|
54
|
-
puts "------------------------------------------------------------------------------------"
|
55
|
-
puts "------------------------------------------------------------------------------------"
|
56
|
-
end
|
57
|
-
http = Net::HTTP.new(ENGINE,443)
|
58
|
-
http.use_ssl = true
|
59
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
60
|
-
http.start do |http|
|
61
|
-
body = "csrfToken=ajax:#{@csrfToken}" +
|
62
|
-
"session_key=#{@user}" +
|
63
|
-
"&session_password=#{@pwd}" +
|
64
|
-
"&session_login=Sign+In&session_login=&session_rikey="
|
65
|
-
|
66
|
-
@l_headers['Host'] = "www.linkedin.com"
|
67
|
-
@l_headers['User-Agent'] = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5"
|
68
|
-
@l_headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
69
|
-
@l_headers['Accept-Language'] = "en-us,en;q=0.5"
|
70
|
-
@l_headers['Accept-Charset'] = "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
|
71
|
-
@l_headers['Keep-Alive'] = "300"
|
72
|
-
@l_headers['Connection'] = "keep-alive"
|
73
|
-
@l_headers['Referer'] = "https://www.linkedin.com/secure/login?trk=hb_signin"
|
74
|
-
@l_headers['Cookie'] = "JSESSIONID=\"ajax:5367441617418183976\"; visit=G; bcookie=\"v=1&8231965c-b4b7-48f2-8349-76514ba89b69\"; lang=\"v=2&lang=en&c=\"; NSC_MC_QH_MFP=e242089229a3; __utma=226841088.2037160969.1259078198.1259078198.1259078198.1; __utmb=226841088.2.10.1259078198; __utmc=226841088; __utmz=226841088.1259078198.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=226841088.user; leo_auth_token=\"GST:9_t6crYtB4AWStfoqhWQ6LYPKakWfHk_dotQyAHagiRX1HlEvqVt5-:1259081816:56d4aecb2e985d7f8a30d74e758f261ea8b92065\"; NSC_MC_WT_YUSL_IUUQ=e2420f8429a0"
|
75
|
-
@l_headers['Content-Type'] = "application/x-www-form-urlencoded"
|
76
|
-
@l_headers['Content-Length'] = body.size.to_s
|
77
|
-
|
78
|
-
request = Net::HTTP::Post.new("/secure/login", @l_headers)
|
79
|
-
request.body = CGI.escape(body)
|
80
|
-
response = http.request(request)
|
81
|
-
case response
|
82
|
-
when Net::HTTPSuccess, Net::HTTPRedirection
|
83
|
-
puts "------------------------------------------------------------------------------"
|
84
|
-
puts "------------------------------------------------------------------------------"
|
85
|
-
p response.to_hash
|
86
|
-
p response.body
|
87
|
-
puts "-----------------------------------------------------------------------------"
|
88
|
-
puts "-----------------------------------------------------------------------------"
|
89
|
-
return {'Cookie' => response['Set-Cookie'], 'User-Agent' => UserAgent::fetch}
|
90
|
-
else
|
91
|
-
return response.error!
|
92
|
-
end
|
93
|
-
end
|
94
|
-
rescue Net::HTTPFatalError
|
95
|
-
D "Error: Something went wrong while login to LinkedIn.\n\t${$@}"
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|