esearchy 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +112 -0
- data/bin/esearchy +334 -0
- data/lib/esearchy/LocalEngines/directory.rb +16 -0
- data/lib/esearchy/OtherEngines/googlegroups.rb +27 -0
- data/lib/esearchy/OtherEngines/ldap.rb +44 -0
- data/lib/esearchy/OtherEngines/pgp.rb +22 -0
- data/lib/esearchy/OtherEngines/spider.rb +43 -0
- data/lib/esearchy/OtherEngines/usenet.rb +22 -0
- data/lib/esearchy/SearchEngines/altavista.rb +25 -0
- data/lib/esearchy/SearchEngines/bing.rb +32 -0
- data/lib/esearchy/SearchEngines/google.rb +30 -0
- data/lib/esearchy/SearchEngines/yahoo.rb +32 -0
- data/lib/esearchy/SocialEngines/classmates.rb +33 -0
- data/lib/esearchy/SocialEngines/googleprofiles.rb +36 -0
- data/lib/esearchy/SocialEngines/linkedin.rb +35 -0
- data/lib/esearchy/SocialEngines/linkedinfull.rb +100 -0
- data/lib/esearchy/SocialEngines/naymz.rb +36 -0
- data/lib/esearchy/bugmenot.rb +26 -0
- data/lib/esearchy/docs.rb +267 -0
- data/lib/esearchy/esearchy.rb +195 -0
- data/lib/esearchy/genericengine.rb +153 -0
- data/lib/esearchy/localengines.rb +1 -0
- data/lib/esearchy/otherengines.rb +5 -0
- data/lib/esearchy/searchengines.rb +4 -0
- data/lib/esearchy/socialengines.rb +4 -0
- data/lib/esearchy/useragent.rb +188 -0
- data/lib/esearchy.rb +24 -0
- metadata +129 -0
data/README.rdoc
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
= Esearch
|
2
|
+
|
3
|
+
== DESCRIPTION
|
4
|
+
Esearchy is a small library capable of searching the internet for email addresses. Currently, the supported search methods are, but not limited to:
|
5
|
+
|
6
|
+
* Search engines:
|
7
|
+
* Google
|
8
|
+
* Bing
|
9
|
+
* Yahoo,
|
10
|
+
* AltaVista
|
11
|
+
* Social Engines:
|
12
|
+
* LinkedIn
|
13
|
+
* Google Profiles ( Based on DigiNinja's idea http://www.digininja.org/projects/gpscan.php)
|
14
|
+
* Naymz
|
15
|
+
* Classmantes
|
16
|
+
* Other Engines
|
17
|
+
* PGP servers
|
18
|
+
* Usenets
|
19
|
+
* GoogleGroups Search
|
20
|
+
* Spider
|
21
|
+
* LDAP
|
22
|
+
|
23
|
+
|
24
|
+
But Searches do not stop there, ESearchy it also looks for emails inside:
|
25
|
+
|
26
|
+
* PDF
|
27
|
+
* DOC
|
28
|
+
* DOCX
|
29
|
+
* XLSX
|
30
|
+
* PPTX
|
31
|
+
* ODT
|
32
|
+
* ODP
|
33
|
+
* ODS
|
34
|
+
* ODB
|
35
|
+
* ASN
|
36
|
+
* TXT
|
37
|
+
|
38
|
+
Once all the text is parsed within the file the emails are added to the list of found accounts.
|
39
|
+
|
40
|
+
In order to parse Microsoft Word (.doc):
|
41
|
+
* You Either need a windows Platform with Word installed.
|
42
|
+
* Install AntiWord. ( http://www.winfield.demon.nl/ )
|
43
|
+
* Or if non of the above is on the OS, we perform a raw search inside the file.
|
44
|
+
|
45
|
+
NOTE: THIS IS STILL BEING DEVELOPED CODE IS SUBMITTED DAILY SO BE AWARE THAT CODE MIGHT NOT WORK PROPERLY AL THE TIME. IF SOMETHING GOES WRONG PLEASE RAISE AN ISSUE.
|
46
|
+
|
47
|
+
|
48
|
+
== SUPPORT:
|
49
|
+
|
50
|
+
* http://github.com/FreedomCoder/esearchy/issues
|
51
|
+
* Emails from github.
|
52
|
+
|
53
|
+
== SYNOPSIS:
|
54
|
+
|
55
|
+
For now, there are two main ways of performing a search:
|
56
|
+
|
57
|
+
* Executable CLI command
|
58
|
+
|
59
|
+
|
60
|
+
* Library
|
61
|
+
|
62
|
+
For thouse who want to integrate this to their application you can use it in "the ruby way"
|
63
|
+
|
64
|
+
== REQUIREMENTS:
|
65
|
+
|
66
|
+
* ruby 1.8 or 1.9
|
67
|
+
* cgi
|
68
|
+
* pdf/reader
|
69
|
+
* json
|
70
|
+
* spidr
|
71
|
+
* ldap
|
72
|
+
* rubyzip ( Migrating to FreedomCoder-rubyzip 0.9.2 so it's 1.9 compatible)
|
73
|
+
|
74
|
+
== INSTALL:
|
75
|
+
* > sudo gem sources -a http://gems.github.com (If you do not have the repository)
|
76
|
+
* > sudo gem install FreedomCoder-esearchy
|
77
|
+
|
78
|
+
== THANKS:
|
79
|
+
|
80
|
+
* http://www.penetrationtests.com/ [Penetration Testing Directory]
|
81
|
+
* http://www.mundoruby.com.ar/ [Ruby's News Aggregator]
|
82
|
+
* http://www.digininja.org/ [DigiNinja's Website]
|
83
|
+
* http://hexale.blogspot.com/ [8A's webite]
|
84
|
+
* http://www.kalmbach.com.ar/ [/jk's webite]
|
85
|
+
|
86
|
+
== LICENSE:
|
87
|
+
|
88
|
+
(The MIT License)
|
89
|
+
|
90
|
+
Copyright (c) 2008 - 2009:
|
91
|
+
|
92
|
+
* {Matias P. Brutti}[http://www.freedomcoder.com.ar]
|
93
|
+
|
94
|
+
|
95
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
96
|
+
a copy of this software and associated documentation files (the
|
97
|
+
'Software'), to deal in the Software without restriction, including
|
98
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
99
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
100
|
+
permit persons to whom the Software is furnished to do so, subject to
|
101
|
+
the following conditions:
|
102
|
+
|
103
|
+
The above copyright notice and this permission notice shall be
|
104
|
+
included in all copies or substantial portions of the Software.
|
105
|
+
|
106
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
107
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
108
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
109
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
110
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
111
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
112
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/bin/esearchy
ADDED
@@ -0,0 +1,334 @@
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
2
|
+
# esearchy
|
3
|
+
#
|
4
|
+
# Created by FreedomCoder on 2009-10-24.
|
5
|
+
# Copyright 2009 FreedomCoder's Labs. All rights reserved.
|
6
|
+
#
|
7
|
+
if RUBY_PLATFORM =~ /mingw|mswin/
|
8
|
+
require 'Win32API'
|
9
|
+
class Wcol
|
10
|
+
gsh = Win32API.new("kernel32", "GetStdHandle", ['L'], 'L')
|
11
|
+
@textAttr = Win32API.new("kernel32","SetConsoleTextAttribute", ['L','N'], 'I')
|
12
|
+
@h = gsh.call(-11)
|
13
|
+
|
14
|
+
def self.color(col)
|
15
|
+
@textAttr.call(@h,col)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.puts(color, text)
|
19
|
+
self.color(@h,color)
|
20
|
+
puts text
|
21
|
+
self.color(@h,7)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
require 'rubygems'
|
28
|
+
require 'getoptlong'
|
29
|
+
require 'sqlite3'
|
30
|
+
require 'prawn'
|
31
|
+
require 'prawn/layout'
|
32
|
+
|
33
|
+
require '../lib/esearchy.rb'
|
34
|
+
|
35
|
+
ESearchy::log = true
|
36
|
+
|
37
|
+
@yahoo_key = nil
|
38
|
+
@bing_key = nil
|
39
|
+
@maxhits = nil
|
40
|
+
@params = {}
|
41
|
+
@list = []
|
42
|
+
@output = nil
|
43
|
+
@email_engines = [:Google,:Yahoo,:Bing,:Altavista,:Usenet,:PGP,:Spider,:GoogleGroups]
|
44
|
+
@people_engines = [:LinkedIn,:Naymz,:Classmates,:GoogleProfiles]
|
45
|
+
|
46
|
+
opts = GetoptLong.new(
|
47
|
+
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
48
|
+
[ '--disable-google', GetoptLong::NO_ARGUMENT ],
|
49
|
+
[ '--disable-yahoo', GetoptLong::NO_ARGUMENT ],
|
50
|
+
[ '--disable-bing', GetoptLong::NO_ARGUMENT ],
|
51
|
+
[ '--disable-altavisa', GetoptLong::NO_ARGUMENT ],
|
52
|
+
[ '--disable-linkedin', GetoptLong::NO_ARGUMENT ],
|
53
|
+
[ '--disable-gprofiles', GetoptLong::NO_ARGUMENT ],
|
54
|
+
[ '--disable-naymz', GetoptLong::NO_ARGUMENT ],
|
55
|
+
[ '--disable-ggroups', GetoptLong::NO_ARGUMENT ],
|
56
|
+
[ '--disable-pgp', GetoptLong::NO_ARGUMENT ],
|
57
|
+
[ '--disable-usenet', GetoptLong::NO_ARGUMENT ],
|
58
|
+
[ '--disable-spider', GetoptLong::NO_ARGUMENT ],
|
59
|
+
[ '--query','-q', GetoptLong::REQUIRED_ARGUMENT ],
|
60
|
+
[ '--company','-c', GetoptLong::REQUIRED_ARGUMENT ],
|
61
|
+
[ '--website','-w', GetoptLong::REQUIRED_ARGUMENT ],
|
62
|
+
[ '--file','-f', GetoptLong::REQUIRED_ARGUMENT ],
|
63
|
+
[ '--filter','-p', GetoptLong::REQUIRED_ARGUMENT ],
|
64
|
+
[ '--output','-o', GetoptLong::REQUIRED_ARGUMENT ],
|
65
|
+
[ '--yahookey','-y', GetoptLong::REQUIRED_ARGUMENT ],
|
66
|
+
[ '--bingkey','-b', GetoptLong::REQUIRED_ARGUMENT ],
|
67
|
+
[ '--maxhits','-m', GetoptLong::REQUIRED_ARGUMENT ]
|
68
|
+
)
|
69
|
+
|
70
|
+
opts.each do |opt, arg|
|
71
|
+
case opt
|
72
|
+
when '--help':
|
73
|
+
# BEGIN OF HELP
|
74
|
+
ESearchy::LOG.puts "\nHELP for Esearchy\n---------------------\n
|
75
|
+
--help, -h
|
76
|
+
\tWell I guess you know what this is for (To obtain this Help).\n
|
77
|
+
INPUT PARAMS:
|
78
|
+
--query, -q [@domain.com]
|
79
|
+
\t The domain name to search.\n
|
80
|
+
--company, -c [Company Inc]
|
81
|
+
\t The company name to search.\n
|
82
|
+
--website, -w [www.domain.com]
|
83
|
+
\t The website name to spider.\n
|
84
|
+
--yahookey, -y [key]
|
85
|
+
\t The Yahoo API Key .\n
|
86
|
+
--bingkey, -b [key]
|
87
|
+
\t The Bing API Key .\n
|
88
|
+
--filter, -p
|
89
|
+
\t The pattern to use to filter emails.(not fully implemented)\n
|
90
|
+
--file, -f [file_name]
|
91
|
+
\tIf we need to search more than one domain we can provide a list.\n
|
92
|
+
--output, -o
|
93
|
+
\tThe output file name.\n\n
|
94
|
+
PLUGIN OPTIONS:
|
95
|
+
--disable-google
|
96
|
+
\t Disables Google searches.\n
|
97
|
+
--disable-yahoo
|
98
|
+
\t Disables Yahoo searches.\n
|
99
|
+
--disable-bing
|
100
|
+
\t Disables Bing searches.\n
|
101
|
+
--disable-linkedin
|
102
|
+
\t Disables LinkedIn searches.\n
|
103
|
+
--disable-gprogiles
|
104
|
+
\t Disables Google Profiles searches.\n
|
105
|
+
--disable-naymz
|
106
|
+
\t Disables Naymz searches.\n
|
107
|
+
--disable-ggroups
|
108
|
+
\t Disables Google Groups searches.\n
|
109
|
+
--disable-pgp
|
110
|
+
\t Disables PGP searches.\n
|
111
|
+
--disable-usenet
|
112
|
+
\t Disables Usenet searches.\n\n
|
113
|
+
Copyright 2009 - FreedomCoder\n"
|
114
|
+
#END OF HELP
|
115
|
+
exit(0)
|
116
|
+
when '--disable-google':
|
117
|
+
@email_engines.delete(:Google)
|
118
|
+
when '--disable-yahoo':
|
119
|
+
@email_engines.delete(:Yahoo)
|
120
|
+
when '--disable-bing':
|
121
|
+
@email_engines.delete(:Bing)
|
122
|
+
when '--disable-altavisa':
|
123
|
+
@email_engines.delete(:Altavista)
|
124
|
+
when '--disable-spider':
|
125
|
+
@email_engines.delete(:Spider)
|
126
|
+
when '--disable-linkedin':
|
127
|
+
@people_engines.delete(:LinkedIn)
|
128
|
+
when '--disable-gprofiles':
|
129
|
+
@people_engines.delete(:GoogleProfiles)
|
130
|
+
when '--disable-naymz':
|
131
|
+
@people_engines.delete(:Naymz)
|
132
|
+
when '--disable-classmates':
|
133
|
+
@email_engines.delete(:Classmates)
|
134
|
+
when '--disable-ggroups':
|
135
|
+
@email_engines.delete(:GoogleGroups)
|
136
|
+
when '--disable-pgp':
|
137
|
+
@email_engines.delete(:PGP)
|
138
|
+
when '--disable-usenet':
|
139
|
+
@email_engines.delete(:Usenet)
|
140
|
+
when '--query':
|
141
|
+
@params[:query] = arg
|
142
|
+
when '--company':
|
143
|
+
@params[:company] = arg
|
144
|
+
when '--file':
|
145
|
+
if File.exists?(arg)
|
146
|
+
open(arg,'r').each_line do |line|
|
147
|
+
temp[:query],temp[:company],temp[:website] = line.split(',')
|
148
|
+
@list << temp
|
149
|
+
end
|
150
|
+
else
|
151
|
+
raise ArgumentError, "File not found"
|
152
|
+
exit(0)
|
153
|
+
end
|
154
|
+
when '--yahookey':
|
155
|
+
@yahoo_key = arg
|
156
|
+
when '--bingkey':
|
157
|
+
@bing_key = arg
|
158
|
+
when '--filter':
|
159
|
+
@pattern = arg
|
160
|
+
when '--output':
|
161
|
+
@output = Output.new arg
|
162
|
+
when '--maxhits':
|
163
|
+
@params[:maxhits] = arg.to_i
|
164
|
+
else
|
165
|
+
puts "Unknown command. Please try again"
|
166
|
+
exit(0)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
def configure(maxhits = 500, yahoo_key = nil, bing_key = nil)
|
172
|
+
File.new(ENV['HOME'] + "/.esearchyrc", +w ) do |line|
|
173
|
+
line << "MAXHITS=" + maxhits
|
174
|
+
line << "YAHOOKEY=" + yahoo_key if @yahoo_key
|
175
|
+
line << "BINGKEY=" + bing_key if @bing_key
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def read_conf
|
180
|
+
File.read((ENV['HOME'] + "/.esearchyrc").readlines.each do |line|
|
181
|
+
key, value = line.split("=")
|
182
|
+
case key
|
183
|
+
when "MAXHITS"
|
184
|
+
@params[:maxhits] ||= value
|
185
|
+
when "YAHOOKEY"
|
186
|
+
@yahoo_key ||= value
|
187
|
+
when "BINGKEY"
|
188
|
+
@bing_key = ||value
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def dump(results)
|
194
|
+
# this is for session handling.
|
195
|
+
end
|
196
|
+
|
197
|
+
### PRINTING FINDING METHODS ###
|
198
|
+
def print_(list)
|
199
|
+
list.each do |email|
|
200
|
+
unless @emails.include?(email)
|
201
|
+
case RUBY_PLATFORM
|
202
|
+
when /mingw|mswin/
|
203
|
+
print_windows
|
204
|
+
when /linux|darwin/
|
205
|
+
print_linux
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def print_linux(email)
|
212
|
+
if email.match(/#{@query.gsub("@","").split('.')[0]}/)
|
213
|
+
puts "\033[31m" + email + "\033\[0m"
|
214
|
+
else
|
215
|
+
puts "\033[32m" + email + "\033\[0m"
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def print_windows(email)
|
220
|
+
if email.match(/#{@query.gsub("@","").split('.')[0]}/)
|
221
|
+
Wcol::puts(12, email)
|
222
|
+
else
|
223
|
+
Wcol::puts(2, email)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
### SAVING TO DISK ###
|
228
|
+
class Output
|
229
|
+
|
230
|
+
def initialize(name)
|
231
|
+
@output = name
|
232
|
+
end
|
233
|
+
|
234
|
+
def save(data)
|
235
|
+
case @output
|
236
|
+
when /pdf/
|
237
|
+
save_pdf
|
238
|
+
when /csv/
|
239
|
+
save_csv
|
240
|
+
when /sqlite/
|
241
|
+
save_sqlite
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
private
|
246
|
+
def save_csv(data)
|
247
|
+
out = File.new(name, "w")
|
248
|
+
out << "EMAILS/PERSON, TYPE, CLASS, MATCH\n"
|
249
|
+
data.each do |r|
|
250
|
+
out << r.each { |x| x
|
251
|
+
end
|
252
|
+
|
253
|
+
end
|
254
|
+
|
255
|
+
def save_pdf(data)
|
256
|
+
Prawn::Document.generate(name) do
|
257
|
+
table data,
|
258
|
+
:position => :center,
|
259
|
+
:headers => ["Email/Person", "Type", "Class", "Match"],
|
260
|
+
:header_color => "0046f9",
|
261
|
+
:row_colors => :pdf_writer, #["ffffff","ffff00"],
|
262
|
+
:font_size => 10,
|
263
|
+
:vertical_padding => 2,
|
264
|
+
:horizontal_padding => 5
|
265
|
+
end
|
266
|
+
|
267
|
+
def save_sqlite(data)
|
268
|
+
@db = SQLite3::Database.new(file)
|
269
|
+
@db.execute("CREATE TABLE IF NOT EXISTS results (
|
270
|
+
id integer primary key asc,
|
271
|
+
object text,
|
272
|
+
type char,
|
273
|
+
class text,
|
274
|
+
match char);")
|
275
|
+
|
276
|
+
@results.each do |r|
|
277
|
+
@db.execute("INTERT INTO results (object,type,class,match)
|
278
|
+
VALUES (#{r[0].to_s},#{r[1]},#{r[2]},#{r[3]});")
|
279
|
+
end
|
280
|
+
@db.commit
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
def execute(p)
|
285
|
+
if p[:query]
|
286
|
+
search = ESearchy::Search.new(p)
|
287
|
+
|
288
|
+
search.start do |s|
|
289
|
+
s.Emails(@email_engines) do |e|
|
290
|
+
e.Yahoo.appid= @yahoo_key if @yahoo_key
|
291
|
+
e.Bing.appid= @bing_key if @bing_key
|
292
|
+
e.search do |e|
|
293
|
+
dump(e.results)
|
294
|
+
print_(e.emails.uniq)
|
295
|
+
end
|
296
|
+
e.docs do |e|
|
297
|
+
dump(e.results)
|
298
|
+
print_(e.emails.uniq)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
s.People(@people_engines) do |p|
|
303
|
+
p.search { |p| dump(p.results) }
|
304
|
+
end
|
305
|
+
end
|
306
|
+
@output.save(search.results)
|
307
|
+
puts "-------RESULTS--------"
|
308
|
+
print_ search.emails
|
309
|
+
print_ search.people
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
puts "DISCLOSURE: This is just an example tool ESearchy is more and more a piece
|
314
|
+
of code intended to work as a Library and you should create your own little.rb file :)"
|
315
|
+
puts "------------------------------------------------------------------------"
|
316
|
+
puts "REMINDER:"
|
317
|
+
puts "- if you want to use GoogleProfiles, LinkedIn, Classmates or Naymz, you will need to use the --company (-c) <company_name> option"
|
318
|
+
puts "- If you want to spider a website you need to use the --website (-w) <URL> option"
|
319
|
+
|
320
|
+
|
321
|
+
unless File.exist?(ENV['HOME'] + "/.esearchyrc")
|
322
|
+
configure(@params[:maxhits],@yahoo_key,@bing_key)
|
323
|
+
else
|
324
|
+
read_conf
|
325
|
+
end
|
326
|
+
|
327
|
+
unless @list.empty?
|
328
|
+
@list.each {|p| execute(p)}
|
329
|
+
end
|
330
|
+
|
331
|
+
unless @params.empty?
|
332
|
+
execute(@params)
|
333
|
+
end
|
334
|
+
puts "Happy Hacking :)\nGood Bye.\n"
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module LocalEngines
|
3
|
+
class Directory
|
4
|
+
def initialize(dir)
|
5
|
+
@documents = Queue.new
|
6
|
+
@emails = []
|
7
|
+
end
|
8
|
+
|
9
|
+
def search
|
10
|
+
files = Dir["#{@dir}/**/*.*"]
|
11
|
+
files.select {|x| /.pdf$|.doc$|.docx$|.xlsx$|.pptx$|.odt$|.odp$|.ods$|.odb$|.txt$|.rtf$|.ans$|.csv$|.xml|.json$|.html$/i}.each { |f| @documents.push(f) }
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module OtherEngines
|
3
|
+
class GoogleGroups < ESearchy::GenericEngine
|
4
|
+
ENGINE = "groups.google.com"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 100
|
7
|
+
TYPE = 1
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/groups/search?&safe=off&num=100&q=" + @query + "&btnG=Search&start="
|
11
|
+
super
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse( html )
|
15
|
+
hits = html.scan(/<\/b> of about <b>(.*)<\/b> for /)
|
16
|
+
if hits.empty? or hits == nil
|
17
|
+
@totalhits = 0
|
18
|
+
else
|
19
|
+
@totalhits = totalhits(hits[0][0].gsub(",","").to_i)
|
20
|
+
end
|
21
|
+
super html.scan(/<div class=g align="left"><a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" target=""/)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module OtherEngines
|
3
|
+
class Ldap
|
4
|
+
|
5
|
+
def initialize(host = nil, port = nil, base = nil,
|
6
|
+
scope = nil, filter = nil, attr = nil)
|
7
|
+
@HOST = host
|
8
|
+
@PORT = port || LDAP::LDAP_PORT
|
9
|
+
@base = base || "dc=localhost,dc=#{host}"
|
10
|
+
@scope = scope || 2
|
11
|
+
@filter = filter || '(objectclass=person)'
|
12
|
+
@attrs = attr || ['sn', 'cn']
|
13
|
+
end
|
14
|
+
attr_accessor :HOST, :PORT, :SSLPORT, :base, :scope, :filter, :attrs
|
15
|
+
|
16
|
+
def search(bind, &block)
|
17
|
+
connect(bind)
|
18
|
+
begin
|
19
|
+
block.call(self) if block_given?
|
20
|
+
@conn.search(base, scope, filter, attrs, block)
|
21
|
+
rescue LDAP::ResultError
|
22
|
+
@conn.perror("search")
|
23
|
+
end
|
24
|
+
@conn.perror("search")
|
25
|
+
close
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def connect(bind)
|
31
|
+
begin
|
32
|
+
@conn = LDAP::Conn.new(@HOST, @PORT)
|
33
|
+
@conn.bind(bind) # i.e. 'cn=root, dc=localhost, dc=localdomain','secret'
|
34
|
+
rescue LDAP::Error
|
35
|
+
@conn.perror("bind")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def close
|
40
|
+
@conn.unbind
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module OtherEngines
|
3
|
+
class PGP < ESearchy::GenericEngine
|
4
|
+
ENGINE = "pgp.mit.edu"
|
5
|
+
PORT = 11371
|
6
|
+
NUM = 0 # Do not really ned it :)
|
7
|
+
TYPE = 1
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/pks/lookup?search=" + @query
|
11
|
+
get ENGINE, PORT, @querypath, {'User-Agent' => UserAgent::fetch } do |r|
|
12
|
+
D "Searching #{self.class}"
|
13
|
+
crawler(r.body)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse( html )
|
18
|
+
super html.scan(/href=["|']([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)["|']/)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'spidr'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module ESearchy
|
5
|
+
module OtherEngines
|
6
|
+
class Spider < ESearchy::GenericEngine
|
7
|
+
ENGINE = "" #Do not really need any of them.
|
8
|
+
PORT = 0
|
9
|
+
NUM = 0
|
10
|
+
TYPE = 1
|
11
|
+
|
12
|
+
def search
|
13
|
+
Spidr.site(website()) do |spider|
|
14
|
+
spider.every_page do |page|
|
15
|
+
D page.url
|
16
|
+
crawler(page.body)
|
17
|
+
parse(page.body)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def website
|
23
|
+
begin
|
24
|
+
ESearchy::Search.website || @website
|
25
|
+
rescue
|
26
|
+
raise ESearchyMissingWebsite, "Mssing website url Object.website=(value)"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def website=(v)
|
31
|
+
@website=v
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse( html )
|
35
|
+
array = html.scan(/href=["|']([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)["|']/).map! do |r|
|
36
|
+
r[0].match(/http:\/\/|https:\/\/|ftp:\/\//) ? r : [website() + r[0]]
|
37
|
+
end
|
38
|
+
super array
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module OtherEngines
|
3
|
+
class Usenet
|
4
|
+
ENGINE = "usenet-addresses.mit.edu"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 0 # Do not really ned it :)
|
7
|
+
TYPE = 1
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/cgi-bin/udb?T=" + @query + "&G=&S=&N=&O=&M=500"
|
11
|
+
get ENGINE, PORT, @querypath, {'User-Agent' => UserAgent::fetch } do |r|
|
12
|
+
D "Searching #{self.class}"
|
13
|
+
crawler(r.body)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse( html )
|
18
|
+
super html.scan(/href=["|']([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)["|']/)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module SearchEngines
|
3
|
+
class Altavista < ESearchy::GenericEngine
|
4
|
+
ENGINE = "www.altavista.com"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 100
|
7
|
+
TYPE = 1
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/web/results?itag=ody&kgs=0&kls=0&nbq=50&q=" + @query + "&stq="
|
11
|
+
super
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse( html )
|
15
|
+
hits = html.scan(/AltaVista found (.*) results<\/A>/)
|
16
|
+
if hits.empty? or hits == nil
|
17
|
+
@totalhits = 0
|
18
|
+
else
|
19
|
+
@totalhits = totalhits(hits[0][0].gsub(',','').to_i)
|
20
|
+
end
|
21
|
+
super html.scan(/<a class='res' href='([a-zA-Z0-9:\/\/.&?%=\-_+]*)'>/)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module SearchEngines
|
3
|
+
class Bing < ESearchy::GenericEngine
|
4
|
+
ENGINE = "api.search.live.net"
|
5
|
+
PORT = 80
|
6
|
+
NUM = 50
|
7
|
+
TYPE = 1
|
8
|
+
|
9
|
+
def search
|
10
|
+
@querypath = "/json.aspx?AppId=" + @appid + "&query=" + @query +
|
11
|
+
"&Sources=Web&Web.Count=50&Web.Offset=" or
|
12
|
+
raise ESearchyMissingAppID, "Missing AppID <Class.appid=>"
|
13
|
+
super
|
14
|
+
end
|
15
|
+
|
16
|
+
def appid=(value)
|
17
|
+
@appid = value
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse(json)
|
21
|
+
doc = JSON.parse(json)
|
22
|
+
hits = doc["SearchResponse"]["Web"]["Total"].to_i
|
23
|
+
if hits == nil or hits == 0
|
24
|
+
@totalhits = 0
|
25
|
+
else
|
26
|
+
@totalhits = totalhits(hits)
|
27
|
+
end
|
28
|
+
super doc["SearchResponse"]["Web"]["Results"]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|