FreedomCoder-esearchy 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +25 -8
- data/bin/esearchy +3 -3
- data/lib/esearchy/bing.rb +4 -4
- data/lib/esearchy/google.rb +8 -7
- data/lib/esearchy/googlegroups.rb +6 -5
- data/lib/esearchy/linkedin.rb +10 -9
- data/lib/esearchy/pgp.rb +2 -2
- data/lib/esearchy/searchy.rb +76 -58
- data/lib/esearchy/yahoo.rb +3 -3
- data/lib/esearchy.rb +23 -11
- metadata +1 -1
data/README.rdoc
CHANGED
@@ -4,6 +4,9 @@
|
|
4
4
|
Esearchy is a small library capable of searching the internet for email addresses. Currently, the supported search methods are engines such as Google, Bing, Yahoo, PGP servers, GoogleGroups, Linkedin, etc , but I intend to add many more.
|
5
5
|
|
6
6
|
Also, the library searches inside .pdf, .docx, .xlsx, .pptx, asn and .txt files for emails addresses and adds them to the list of found accounts. Finally, we have support for .docs files but for now only in Windows Platforms.
|
7
|
+
In order to parse Microsoft Word (.doc):
|
8
|
+
* You Either need a windows Platform with Word installed.
|
9
|
+
* Install AntiWord. ( http://www.winfield.demon.nl/ )
|
7
10
|
|
8
11
|
NOTE: THIS IS STILL BEING DEVELOPED CODE IS SUBMITTED DAILY SO BE AWARE THAT CODE MIGHT NOT WORK PROPERLY AL THE TIME. IF SOMETHING GOES WRONG PLEASE RAISE AN ISSUE.
|
9
12
|
|
@@ -48,14 +51,28 @@ We now also have a LinkedIn search which looks for Names in the site. With those
|
|
48
51
|
* creates emails based on those emails.
|
49
52
|
* searches Google and Yahoo for emails related to those people.
|
50
53
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
54
|
+
ESearchy.create "domain.com" do |d|
|
55
|
+
d.engines = { "LinkedIn" => Linkedin.new}
|
56
|
+
d.company_name "Domain Corp"
|
57
|
+
d.linkedin_credentials "myuser@linkedin.com", "12345"
|
58
|
+
d.maxhits = 100
|
59
|
+
d.search
|
60
|
+
d.save_to_file "linkedin_emails.txt"
|
61
|
+
end
|
62
|
+
|
63
|
+
Not short of that now, we also have the possibility of choosing between a Library output mode (no output to IO) and an APP mode (Old output mode). It's important to mention that the default output mode is LIBRARY
|
64
|
+
|
65
|
+
require 'esearchy'
|
66
|
+
ESearchy::LOG.level = ESearchy::APP
|
67
|
+
|
68
|
+
ESearchy.create "domain.com" do |d|
|
69
|
+
d.yahoo_key = "dsdsdsdsdsdsdsd"
|
70
|
+
d.bing_key = "dsdsdsdsdsdsdsdsd"
|
71
|
+
d.company_name "Domain Corp"
|
72
|
+
d.linkedin_credentials "myuser@linkedin.com", "12345"
|
73
|
+
d.maxhits = 500
|
74
|
+
d.search
|
75
|
+
end
|
59
76
|
|
60
77
|
== REQUIREMENTS:
|
61
78
|
|
data/bin/esearchy
CHANGED
@@ -25,7 +25,7 @@ opts.each do |opt, arg|
|
|
25
25
|
case opt
|
26
26
|
when '--help':
|
27
27
|
# BEGIN OF HELP
|
28
|
-
puts "\nHELP for Esearchy\n---------------------\n
|
28
|
+
ESearchy::LOG.puts "\nHELP for Esearchy\n---------------------\n
|
29
29
|
--help, -h
|
30
30
|
\tWell I guess you know what this is for (To obtain this Help).\n
|
31
31
|
--domain, -d [domain.com]
|
@@ -47,7 +47,7 @@ opts.each do |opt, arg|
|
|
47
47
|
@domains << line
|
48
48
|
end
|
49
49
|
else
|
50
|
-
puts "File not found"
|
50
|
+
ESearchy::LOG.puts "File not found"
|
51
51
|
end
|
52
52
|
when '--yahoo_key':
|
53
53
|
@yahoo_key = arg
|
@@ -60,7 +60,7 @@ opts.each do |opt, arg|
|
|
60
60
|
when '--maxhits':
|
61
61
|
@maxhits = arg
|
62
62
|
else
|
63
|
-
puts "Unknown command. Please try again"
|
63
|
+
ESearchy::LOG.puts "Unknown command. Please try again"
|
64
64
|
exit(0)
|
65
65
|
end
|
66
66
|
end
|
data/lib/esearchy/bing.rb
CHANGED
@@ -35,12 +35,12 @@ class Bing
|
|
35
35
|
parse(response.body)
|
36
36
|
@start = @start + 50
|
37
37
|
if @totalhits > @start
|
38
|
-
puts "Searching in URL: #{self.class} up to point #{@start}"
|
38
|
+
ESearchy::LOG.puts "Searching in URL: #{self.class} up to point #{@start}"
|
39
39
|
search_emails(response.body)
|
40
40
|
sleep(4)
|
41
41
|
search(query)
|
42
42
|
else
|
43
|
-
puts "Searching in URL: #{self.class} up to point #{@start}"
|
43
|
+
ESearchy::LOG.puts "Searching in URL: #{self.class} up to point #{@start}"
|
44
44
|
search_emails(response.body)
|
45
45
|
end
|
46
46
|
else
|
@@ -48,9 +48,9 @@ class Bing
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
rescue Net::HTTPFatalError
|
51
|
-
puts "Error: Something went wrong with the HTTP request"
|
51
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request"
|
52
52
|
rescue Errno::ECONNREFUSED
|
53
|
-
puts "Error: < Connection Refused > Hopefuly they have not banned us. :)"
|
53
|
+
ESearchy::LOG.puts "Error: < Connection Refused > Hopefuly they have not banned us. :)"
|
54
54
|
end
|
55
55
|
|
56
56
|
end
|
data/lib/esearchy/google.rb
CHANGED
@@ -17,7 +17,6 @@ class Google
|
|
17
17
|
@lock = Mutex.new
|
18
18
|
@threads = []
|
19
19
|
end
|
20
|
-
|
21
20
|
attr_accessor :emails
|
22
21
|
|
23
22
|
def search(query)
|
@@ -35,12 +34,12 @@ class Google
|
|
35
34
|
parse(response.body)
|
36
35
|
@start = @start + 100
|
37
36
|
if @totalhits > @start
|
38
|
-
puts "Searching in URL: #{self.class} up to point #{@start}"
|
37
|
+
ESearchy::LOG.puts "Searching in URL: #{self.class} up to point #{@start}"
|
39
38
|
search_emails(response.body)
|
40
39
|
sleep(4)
|
41
40
|
search(query)
|
42
41
|
else
|
43
|
-
puts "Searching in URL: #{self.class} up to point #{@start}"
|
42
|
+
ESearchy::LOG.puts "Searching in URL: #{self.class} up to point #{@start}"
|
44
43
|
search_emails(response.body)
|
45
44
|
end
|
46
45
|
else
|
@@ -48,13 +47,15 @@ class Google
|
|
48
47
|
end
|
49
48
|
end
|
50
49
|
rescue Net::HTTPFatalError
|
51
|
-
puts "Error: Something went wrong with the HTTP request"
|
50
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request"
|
52
51
|
end
|
53
52
|
end
|
54
53
|
|
55
54
|
def parse(html)
|
56
|
-
@totalhits= html.scan(/<\/b> of about <b>(.*)<\/b> for /)[0][0].gsub(",","").to_i
|
57
|
-
html.scan(/<div class=g><span class="b w xsm">\[([A-Z]+)\]<\/span> <h2 class=r><a href="
|
55
|
+
@totalhits= html.scan(/<\/b> of about <b>(.*)<\/b> for /)[0][0].gsub(",","").to_i if @totalhits == 0
|
56
|
+
html.scan(/<div class=g><span class="b w xsm">\[([A-Z]+)\]<\/span> <h2 class=r><a href="\
|
57
|
+
([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)"|<h2 class=r><a href="\
|
58
|
+
([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)"/).each do |result|
|
58
59
|
case result[0]
|
59
60
|
when /PDF/
|
60
61
|
@r_pdfs << result[1]
|
@@ -79,7 +80,7 @@ class Google
|
|
79
80
|
@r_urls << result[2]
|
80
81
|
end
|
81
82
|
else
|
82
|
-
puts "I do not parse the #{result[0]} filetype yet:)"
|
83
|
+
ESearchy::LOG.puts "I do not parse the #{result[0]} filetype yet:)"
|
83
84
|
end
|
84
85
|
end
|
85
86
|
end
|
@@ -33,12 +33,12 @@ class GoogleGroups
|
|
33
33
|
parse(response.body)
|
34
34
|
@start = @start + 100
|
35
35
|
if @totalhits > @start
|
36
|
-
puts "Searching in URL: #{self.class} up to point #{@start}"
|
36
|
+
ESearchy::LOG.puts "Searching in URL: #{self.class} up to point #{@start}"
|
37
37
|
search_emails(response.body)
|
38
38
|
sleep(4)
|
39
39
|
search(query)
|
40
40
|
else
|
41
|
-
puts "Searching in URL: #{self.class} up to point #{@start}"
|
41
|
+
ESearchy::LOG.puts "Searching in URL: #{self.class} up to point #{@start}"
|
42
42
|
search_emails(response.body)
|
43
43
|
end
|
44
44
|
else
|
@@ -46,13 +46,14 @@ class GoogleGroups
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
rescue Net::HTTPFatalError
|
49
|
-
puts "Error: Something went wrong with the HTTP request"
|
49
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request"
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
53
|
def parse(html)
|
54
|
-
@totalhits=
|
55
|
-
html.scan(/<div class=g align="left"
|
54
|
+
@totalhits=html.scan(/<\/b> of about <b>(.*)<\/b> for /)[0][0].gsub(",","").to_i if @totalhits == 0
|
55
|
+
html.scan(/<div class=g align="left">\
|
56
|
+
<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" target=""/).each do |result|
|
56
57
|
case result[0]
|
57
58
|
when /.pdf$/i
|
58
59
|
@r_pdfs << result[0]
|
data/lib/esearchy/linkedin.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
local_path = "#{File.dirname(__FILE__)}/"
|
3
3
|
%w{yahoo google useragent}.each {|lib| require local_path + lib}
|
4
4
|
|
5
|
-
# http:///
|
6
5
|
class Linkedin
|
7
6
|
include Searchy
|
8
7
|
|
@@ -41,7 +40,7 @@ class Linkedin
|
|
41
40
|
end
|
42
41
|
end
|
43
42
|
rescue Net::HTTPFatalError
|
44
|
-
puts "Error: Something went wrong with the HTTP request"
|
43
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request"
|
45
44
|
end
|
46
45
|
end
|
47
46
|
|
@@ -50,7 +49,7 @@ class Linkedin
|
|
50
49
|
begin
|
51
50
|
@cookie = login
|
52
51
|
rescue
|
53
|
-
puts "Unable to parse Linkedin. Something went Wrong with the Credentials"
|
52
|
+
ESearchy::LOG.puts "Unable to parse Linkedin. Something went Wrong with the Credentials"
|
54
53
|
return nil
|
55
54
|
end
|
56
55
|
begin
|
@@ -60,7 +59,8 @@ class Linkedin
|
|
60
59
|
# "&searchLocationType=Y&newnessType=Y" +
|
61
60
|
# "&proposalType=Y&pplSearchOrigin=ADVS&company=#{CGI.escape(@company_name)}" +
|
62
61
|
# "&sortCriteria=Relevance&page_num=#{@pages}", {'Cookie' => @cookie} )
|
63
|
-
|
62
|
+
|
63
|
+
headers = {'Cookie' => @cookie, 'User-Agent' => UserAgent::fetch}
|
64
64
|
request = Net::HTTP::Get.new("/search?search=&company=" +
|
65
65
|
CGI.escape(@company_name) +
|
66
66
|
"¤tCompany=currentCompany" +
|
@@ -73,13 +73,13 @@ class Linkedin
|
|
73
73
|
@start = @start + 10
|
74
74
|
if @totalhits > @start
|
75
75
|
@pages = @pages + 1
|
76
|
-
puts "Searching in: #{self.class} up to point #{@start}"
|
76
|
+
ESearchy::LOG.puts "Searching in: #{self.class} up to point #{@start}"
|
77
77
|
search_people(response.body)
|
78
78
|
create_emails
|
79
79
|
sleep(4)
|
80
80
|
search(@query)
|
81
81
|
else
|
82
|
-
puts "Searching in: #{self.class} up to point #{@start}"
|
82
|
+
ESearchy::LOG.puts "Searching in: #{self.class} up to point #{@start}"
|
83
83
|
search_people(response.body)
|
84
84
|
create_emails
|
85
85
|
end
|
@@ -88,7 +88,7 @@ class Linkedin
|
|
88
88
|
end
|
89
89
|
end
|
90
90
|
rescue Net::HTTPFatalError
|
91
|
-
puts "Error: Something went wrong with the HTTP request"
|
91
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request"
|
92
92
|
end
|
93
93
|
end
|
94
94
|
|
@@ -97,7 +97,8 @@ class Linkedin
|
|
97
97
|
end
|
98
98
|
|
99
99
|
def search_people(string)
|
100
|
-
@people = string.scan(/
|
100
|
+
@people = string.scan(/title="View profile">[\n\s]+<span class="given-name">(.*)<\/span>\
|
101
|
+
[\n\s]+<span class="family-name">(.*)<\/span>/)
|
101
102
|
end
|
102
103
|
def search_person(name,last)
|
103
104
|
email = []
|
@@ -117,7 +118,7 @@ class Linkedin
|
|
117
118
|
@people.each do |person|
|
118
119
|
name,last = person
|
119
120
|
@emails << "#{name.split(' ')[0]}.#{last.split(' ')[0]}#{@domain}"
|
120
|
-
@emails << "#{name
|
121
|
+
@emails << "#{name[0,1]}#{last.split(' ')[0]}#{@domain}"
|
121
122
|
#@emails.concat(fix(search_person(name,last)))
|
122
123
|
@emails.uniq!
|
123
124
|
end
|
data/lib/esearchy/pgp.rb
CHANGED
@@ -16,14 +16,14 @@ class PGP
|
|
16
16
|
response = http.request(request)
|
17
17
|
case response
|
18
18
|
when Net::HTTPSuccess, Net::HTTPRedirection
|
19
|
-
puts "Searching #{self.class}"
|
19
|
+
ESearchy::LOG.puts "Searching #{self.class}"
|
20
20
|
search_emails(response.body)
|
21
21
|
else
|
22
22
|
return response.error!
|
23
23
|
end
|
24
24
|
end
|
25
25
|
rescue Net::HTTPFatalError
|
26
|
-
puts "Error: Something went wrong with the HTTP request"
|
26
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request"
|
27
27
|
end
|
28
28
|
end
|
29
29
|
end
|
data/lib/esearchy/searchy.rb
CHANGED
@@ -9,8 +9,6 @@ if RUBY_PLATFORM =~ /mingw|mswin/
|
|
9
9
|
require local_path + 'wcol'
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
12
|
module Searchy
|
15
13
|
case RUBY_PLATFORM
|
16
14
|
when /mingw|mswin/
|
@@ -20,13 +18,17 @@ module Searchy
|
|
20
18
|
end
|
21
19
|
|
22
20
|
def search_emails(string)
|
23
|
-
string = string.gsub("<em>","") if self.class == Google
|
24
|
-
# OLD regex list = string.scan(/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)
|
25
|
-
|
26
|
-
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)
|
27
|
-
|
28
|
-
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*\s
|
29
|
-
[a-z0-9
|
21
|
+
string = string.gsub("<em>","") if self.class == Google
|
22
|
+
# OLD regex list = string.scan(/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*@\
|
23
|
+
# (?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/)
|
24
|
+
list = string.scan(/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*_at_\
|
25
|
+
(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
|
26
|
+
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]\
|
27
|
+
*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|[a-z0-9!#$&'*+=?^_`{|}~-]+\
|
28
|
+
(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
|
29
|
+
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*\s@\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+\
|
30
|
+
[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\sdot\s[a-z0-9!#$&'*+=?\^_`\
|
31
|
+
{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\sdot\s)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/)
|
30
32
|
@lock.synchronize do
|
31
33
|
print_emails(list)
|
32
34
|
@emails.concat(fix(list)).uniq!
|
@@ -37,7 +39,7 @@ module Searchy
|
|
37
39
|
while urls.size >= 1
|
38
40
|
@threads << Thread.new do
|
39
41
|
web = URI.parse(urls.pop)
|
40
|
-
puts "Searching in PDF: #{web.to_s}\n"
|
42
|
+
ESearchy::LOG.puts "Searching in PDF: #{web.to_s}\n"
|
41
43
|
begin
|
42
44
|
http = Net::HTTP.new(web.host,80)
|
43
45
|
http.start do |http|
|
@@ -54,9 +56,9 @@ module Searchy
|
|
54
56
|
pdf = PDF::Reader.file(name, receiver)
|
55
57
|
search_emails(receiver.content.inspect)
|
56
58
|
rescue PDF::Reader::UnsupportedFeatureError
|
57
|
-
puts "Encrypted PDF: Unable to parse it.\n"
|
59
|
+
ESearchy::LOG.puts "Encrypted PDF: Unable to parse it.\n"
|
58
60
|
rescue PDF::Reader::MalformedPDFError
|
59
|
-
puts "Malformed PDF: Unable to parse it.\n"
|
61
|
+
ESearchy::LOG.puts "Malformed PDF: Unable to parse it.\n"
|
60
62
|
end
|
61
63
|
`rm "#{name}"`
|
62
64
|
else
|
@@ -64,34 +66,34 @@ module Searchy
|
|
64
66
|
end
|
65
67
|
end
|
66
68
|
rescue Net::HTTPFatalError
|
67
|
-
puts "Error: Something went wrong with the HTTP request.\n"
|
69
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request.\n"
|
68
70
|
rescue Net::HTTPServerException
|
69
|
-
puts "Error: Not longer there. 404 Not Found.\n"
|
71
|
+
ESearchy::LOG.puts "Error: Not longer there. 404 Not Found.\n"
|
70
72
|
rescue
|
71
|
-
puts "Error: < .. SocketError .. >\n"
|
73
|
+
ESearchy::LOG.puts "Error: < .. SocketError .. >\n"
|
72
74
|
end
|
73
75
|
end
|
74
76
|
end
|
75
77
|
@threads.each {|t| t.join } if @threads != nil
|
76
78
|
end
|
77
79
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
80
|
+
def search_docs(urls)
|
81
|
+
while urls.size >= 1
|
82
|
+
@threads << Thread.new do
|
83
|
+
web = URI.parse(urls.pop)
|
84
|
+
ESearchy::LOG.puts "Searching in DOC: #{web.to_s}\n"
|
85
|
+
begin
|
86
|
+
http = Net::HTTP.new(web.host,80)
|
87
|
+
http.start do |http|
|
88
|
+
request = Net::HTTP::Get.new("#{web.path}#{web.query}")
|
89
|
+
response = http.request(request)
|
90
|
+
case response
|
91
|
+
when Net::HTTPSuccess, Net::HTTPRedirection
|
92
|
+
name = Searchy::TEMP + "#{hash_url(web.to_s)}.doc"
|
93
|
+
open(name, "wb") do |file|
|
94
|
+
file.write(response.body)
|
95
|
+
end
|
96
|
+
if RUBY_PLATFORM =~ /mingw|mswin/
|
95
97
|
begin
|
96
98
|
word = WIN32OLE.new('word.application')
|
97
99
|
word.documents.open(name)
|
@@ -100,24 +102,40 @@ module Searchy
|
|
100
102
|
word.activedocument.close( false )
|
101
103
|
word.quit
|
102
104
|
rescue
|
103
|
-
|
105
|
+
if File.exists?("C:\\Program Files\\...\antiword.exe")
|
106
|
+
search_emails(`antiword "#{name}" -f -s`)
|
107
|
+
else
|
108
|
+
ESearchy::LOG.puts "Something went wrong parsing the .doc}\n"
|
109
|
+
end
|
104
110
|
end
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
111
|
+
elsif RUBY_PLATFORM =~ /linux|darwin/
|
112
|
+
begin
|
113
|
+
if File.exists?("/usr/bin/antiword") or
|
114
|
+
File.exists?("/usr/local/bin/antiword") or
|
115
|
+
File.exists?("/opt/local/bin/antiword")
|
116
|
+
search_emails(`antiword "#{name}" -f -s`)
|
117
|
+
end
|
118
|
+
rescue
|
119
|
+
ESearchy::LOG.puts "Something went wrong parsing the .doc\n"
|
120
|
+
end
|
121
|
+
else
|
122
|
+
ESearchy::LOG.puts "This platform is not currently supported."
|
123
|
+
end
|
124
|
+
`rm "#{name}"`
|
125
|
+
else
|
126
|
+
return response.error!
|
109
127
|
end
|
110
|
-
rescue Net::HTTPFatalError
|
111
|
-
puts "Error: Something went wrong with the HTTP request.\n"
|
112
|
-
rescue Net::HTTPServerException
|
113
|
-
puts "Error: Not longer there. 404 Not Found.\n"
|
114
|
-
rescue
|
115
|
-
puts "Error: < .. SocketError .. >\n"
|
116
128
|
end
|
129
|
+
rescue Net::HTTPFatalError
|
130
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request.\n"
|
131
|
+
rescue Net::HTTPServerException
|
132
|
+
ESearchy::LOG.puts "Error: Not longer there. 404 Not Found.\n"
|
133
|
+
rescue
|
134
|
+
ESearchy::LOG.puts "Error: < .. SocketError .. >\n"
|
117
135
|
end
|
118
136
|
end
|
119
|
-
|
120
|
-
|
137
|
+
end
|
138
|
+
@threads.each {|t| t.join } if @threads != nil
|
121
139
|
end
|
122
140
|
|
123
141
|
def search_office_xml(urls)
|
@@ -125,7 +143,7 @@ module Searchy
|
|
125
143
|
@threads << Thread.new do
|
126
144
|
web = URI.parse(urls.pop)
|
127
145
|
format = web.scan(/docx|xlsx|pptx/i)[0]
|
128
|
-
puts "Searching in #{format.upcase}: #{web.to_s}\n"
|
146
|
+
ESearchy::LOG.puts "Searching in #{format.upcase}: #{web.to_s}\n"
|
129
147
|
begin
|
130
148
|
http = Net::HTTP.new(web.host,80)
|
131
149
|
http.start do |http|
|
@@ -143,7 +161,7 @@ module Searchy
|
|
143
161
|
search_emails(text)
|
144
162
|
end
|
145
163
|
rescue
|
146
|
-
puts "Something went wrong parsing the .#{format.downcase}\n"
|
164
|
+
ESearchy::LOG.puts "Something went wrong parsing the .#{format.downcase}\n"
|
147
165
|
end
|
148
166
|
`rm "#{name}"`
|
149
167
|
else
|
@@ -151,11 +169,11 @@ module Searchy
|
|
151
169
|
end
|
152
170
|
end
|
153
171
|
rescue Net::HTTPFatalError
|
154
|
-
puts "Error: Something went wrong with the HTTP request.\n"
|
172
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request.\n"
|
155
173
|
rescue Net::HTTPServerException
|
156
|
-
puts "Error: Not longer there. 404 Not Found.\n"
|
174
|
+
ESearchy::LOG.puts "Error: Not longer there. 404 Not Found.\n"
|
157
175
|
rescue
|
158
|
-
puts "Error: < .. SocketError .. >\n"
|
176
|
+
ESearchy::LOG.puts "Error: < .. SocketError .. >\n"
|
159
177
|
end
|
160
178
|
end
|
161
179
|
end
|
@@ -166,7 +184,7 @@ module Searchy
|
|
166
184
|
while urls.size >= 1
|
167
185
|
@threads << Thread.new do
|
168
186
|
web = URI.parse(urls.pop)
|
169
|
-
puts "Searching in #{web.to_s.scan(/txt|rtf|ans/i)[0].upcase}: #{web.to_s}\n"
|
187
|
+
ESearchy::LOG.puts "Searching in #{web.to_s.scan(/txt|rtf|ans/i)[0].upcase}: #{web.to_s}\n"
|
170
188
|
begin
|
171
189
|
http = Net::HTTP.new(web.host,80)
|
172
190
|
http.start do |http|
|
@@ -180,11 +198,11 @@ module Searchy
|
|
180
198
|
end
|
181
199
|
end
|
182
200
|
rescue Net::HTTPFatalError
|
183
|
-
puts "Error: Something went wrong with the HTTP request\n"
|
201
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request\n"
|
184
202
|
rescue Net::HTTPServerException
|
185
|
-
puts "Error: Not longer there. 404 Not Found.\n"
|
203
|
+
ESearchy::LOG.puts "Error: Not longer there. 404 Not Found.\n"
|
186
204
|
rescue
|
187
|
-
puts "Error: < .... >"
|
205
|
+
ESearchy::LOG.puts "Error: < .... >"
|
188
206
|
end
|
189
207
|
end
|
190
208
|
end
|
@@ -198,18 +216,18 @@ module Searchy
|
|
198
216
|
unless @emails.include?(email)
|
199
217
|
unless RUBY_PLATFORM =~ /mingw|mswin/
|
200
218
|
if email.match(/#{@query.gsub("@","").split('.')[0]}/)
|
201
|
-
puts "\033[31m" + email + "\033\[0m"
|
219
|
+
ESearchy::LOG.puts "\033[31m" + email + "\033\[0m"
|
202
220
|
else
|
203
|
-
puts "\033[32m" + email + "\033\[0m"
|
221
|
+
ESearchy::LOG.puts "\033[32m" + email + "\033\[0m"
|
204
222
|
end
|
205
223
|
else
|
206
224
|
if email.match(/#{@query.gsub("@","").split('.')[0]}/)
|
207
225
|
Wcol::color(12)
|
208
|
-
puts email
|
226
|
+
ESearchy::LOG.puts email
|
209
227
|
Wcol::color(7)
|
210
228
|
else
|
211
229
|
Wcol::color(2)
|
212
|
-
puts email
|
230
|
+
ESearchy::LOG.puts email
|
213
231
|
Wcol::color(7)
|
214
232
|
end
|
215
233
|
end
|
data/lib/esearchy/yahoo.rb
CHANGED
@@ -35,12 +35,12 @@ class Yahoo
|
|
35
35
|
parse(response.body)
|
36
36
|
@start = @start + 50
|
37
37
|
if @totalhits > @start
|
38
|
-
puts "Searching in URL: #{self.class} up to point #{@start}"
|
38
|
+
ESearchy::LOG.puts "Searching in URL: #{self.class} up to point #{@start}"
|
39
39
|
search_emails(response.body)
|
40
40
|
sleep(4)
|
41
41
|
search(@query)
|
42
42
|
else
|
43
|
-
puts "Searching in URL: #{self.class} up to point #{@start}"
|
43
|
+
ESearchy::LOG.puts "Searching in URL: #{self.class} up to point #{@start}"
|
44
44
|
search_emails(response.body)
|
45
45
|
end
|
46
46
|
else
|
@@ -48,7 +48,7 @@ class Yahoo
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
rescue Net::HTTPFatalError
|
51
|
-
puts "Error: Something went wrong with the HTTP request"
|
51
|
+
ESearchy::LOG.puts "Error: Something went wrong with the HTTP request"
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
data/lib/esearchy.rb
CHANGED
@@ -1,18 +1,29 @@
|
|
1
1
|
local_path = "#{File.dirname(__FILE__) + '/esearchy/'}"
|
2
|
-
%w{google bing yahoo PGP keys linkedin}.each { |lib| require local_path + lib }
|
2
|
+
%w{google bing yahoo PGP keys linkedin logger}.each { |lib| require local_path + lib }
|
3
3
|
|
4
4
|
class ESearchy
|
5
|
+
LIBRARY = 1
|
6
|
+
APP = 2
|
7
|
+
|
8
|
+
LOG = Logger.new(1, $stdout)
|
9
|
+
|
10
|
+
def log_type=(value)
|
11
|
+
ESearchy::LOG.level = value
|
12
|
+
end
|
13
|
+
|
14
|
+
def log_file=(value)
|
15
|
+
ESearchy::LOG.file = value
|
16
|
+
end
|
17
|
+
|
18
|
+
DEFAULT_ENGINES = {"Google" => Google, "Bing" => Bing, "Yahoo" => Yahoo,
|
19
|
+
"PGP" => PGP, "LinkedIn" => Linkedin }
|
20
|
+
|
5
21
|
def initialize(options={}, &block)
|
6
22
|
@query = options[:query]
|
7
23
|
@depth_search = options[:depth] || true
|
8
24
|
@maxhits = options[:maxhits]
|
9
|
-
@engines = options[:engines] ||
|
10
|
-
"Bing" => Bing,
|
11
|
-
"Yahoo" => Yahoo,
|
12
|
-
"PGP" => PGP,
|
13
|
-
"LinkedIn" => Linkedin }
|
25
|
+
@engines = options[:engines] || DEFAULT_ENGINES
|
14
26
|
@engines.each {|n,e| @engines[n] = e.new(@maxhits)}
|
15
|
-
@emails = Array.new
|
16
27
|
@threads = Array.new
|
17
28
|
block.call(self) if block_given?
|
18
29
|
end
|
@@ -21,18 +32,19 @@ class ESearchy
|
|
21
32
|
|
22
33
|
def search(query=nil)
|
23
34
|
@engines.each do |n,e|
|
24
|
-
puts "+--- Launching Search for #{n} ---+\n"
|
35
|
+
LOG.puts "+--- Launching Search for #{n} ---+\n"
|
25
36
|
e.search(query || @query)
|
26
37
|
e.search_depth if depth_search?
|
27
|
-
puts "+--- Finishing Search for #{n} ---+\n"
|
38
|
+
LOG.puts "+--- Finishing Search for #{n} ---+\n"
|
28
39
|
end
|
29
40
|
end
|
30
41
|
|
31
42
|
def emails
|
43
|
+
emails = []
|
32
44
|
@engines.each do |n,e|
|
33
|
-
|
45
|
+
emails.concat(@engines[n].emails).uniq!
|
34
46
|
end
|
35
|
-
|
47
|
+
emails
|
36
48
|
end
|
37
49
|
|
38
50
|
def clean(&block)
|