RubyGems - FreedomCoder-esearchy - Versions diffs - 0.0.5 - Mend

FreedomCoder-esearchy 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

data/README.rdoc +81 -0
data/bin/esearchy +78 -0
data/data/bing.key +1 -0
data/data/yahoo.key +1 -0
data/esearchy.rb +1 -0
data/lib/esearchy/bing.rb +76 -0
data/lib/esearchy/google.rb +85 -0
data/lib/esearchy/googlegroups.rb +69 -0
data/lib/esearchy/keys.rb +5 -0
data/lib/esearchy/linkedin.rb +76 -0
data/lib/esearchy/pdf2txt.rb +30 -0
data/lib/esearchy/pgp.rb +29 -0
data/lib/esearchy/searchy.rb +248 -0
data/lib/esearchy/wcol.rb +10 -0
data/lib/esearchy/yahoo.rb +73 -0
data/lib/esearchy.rb +79 -0
metadata +101 -0

data/README.rdoc ADDED Viewed

@@ -0,0 +1,81 @@
+= Esearch
+== DESCRIPTION
+Esearchy is a small library capable of searching the internet for email addresses. Currently, we the supported search methods are engines such as Google, Bing, Yahoo, PGP servers, GoogleGroups, etc , but I intend to add many more.
+Also, the library searches inside .pdf, .docx, .xlsx, .pptx, asn and .txt files for emails addresses and adds them to the list of found accounts. Finally, we have support for .docs files but for now only in Windows Platforms.
+NOTE: In order to work Bing and Yahoo need an appid, for which you will have to create one for each and place them in fles so the library will be able to work properly.
+* data/yahoo.key
+* data/bing.key
+Soon, users should be able to also pass them as parameters.
+== SUPPORT:
+* http://github.com/FreedomCoder/esearchy/issues
+* Emails from github.
+== SYNOPSIS:
+For now, there are two main ways of performing a search:
+* Executable CLI command
+  esearchy --domain domain.com --maxhits 500 --yahoo_key dkajsdkajskdad  --output "~/emails.txt"
+* Library
+For thouse who want to integrate this to their application you can use it in "the ruby way"
+ Esearchy.create "domain.com" do |domain|
+   domain.maxhits = 500
+   domain.search
+   domain.clean {|e| e =~ /<|>/ }
+   domain.save_to_file "~/emails.txt"
+ end
+or in the more classic way in which you can create an Esearchy objetc and work on it
+  domain = Esearchy.new :query => "domain.com", :maxhits => 500
+  domain.search
+  domain.save_to_file "~/emails.txt"
+== REQUIREMENTS:
+* ruby 1.8 or 1.9
+* cgi
+* pdf/reader
+* json
+== INSTALL:
+* sudo gem install freedomcoder-esearchy
+== LICENSE:
+(The MIT License)
+Copyright (c) 2008 - 2009:
+* {Matias P. Brutti}[http://www.freedomcoder.com.ar]
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/bin/esearchy ADDED Viewed

@@ -0,0 +1,78 @@
+#!/usr/bin/ruby
+require 'rubygems'
+require 'getoptlong'
+require 'esearchy'
+@yahoo_key = nil
+@bing_key = nil
+@maxhits = nil
+@domains = []
+@output = nil
+opts = GetoptLong.new(
+[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
+['--domain','-d', GetoptLong::REQUIRED_ARGUMENT ],
+['--file','-f', GetoptLong::REQUIRED_ARGUMENT ],
+['--filter','-p', GetoptLong::REQUIRED_ARGUMENT ],
+['--output','-o', GetoptLong::REQUIRED_ARGUMENT ],
+['--yahoo_key','-y', GetoptLong::REQUIRED_ARGUMENT ],
+['--bing_key','-b', GetoptLong::REQUIRED_ARGUMENT ],
+['--maxhits','-m', GetoptLong::REQUIRED_ARGUMENT ]
+)
+opts.each do |opt, arg|
+  case opt
+    when '--help':
+      # BEGIN OF HELP
+      puts "\nHELP for Esearchy\n---------------------\n
+      --help, -h
+      \tWell I guess you know what this is for (To obtain this Help).\n
+      --domain, -d [domain.com]
+      \t The domain name to search.\n
+      --filter, -p
+      \t The pattern to use to filter emails.\n
+      --file, -f [file_name]
+      \tIf we need to search more than one domain we can provide a list.\n
+      --output, -o
+      \tThe output file name.
+      Copyright 2009 - FreedomCoder\n"
+      #END OF HELP
+      exit(0)
+    when '--domain':
+      @domains << arg
+    when '--file':
+      if File.exists?(arg)
+        open(arg,'r').each_line do |line|
+          @domains << line
+        end
+      else
+        puts "File not found"
+      end
+    when '--yahoo_key':
+      @yahoo_key = arg
+    when '--bing_key':
+      @bing_key = arg
+    when '--filter':
+      @pattern = arg
+    when '--output':
+      @output = arg
+    when '--maxhits':
+      @maxhits = arg
+    else
+      puts "Unknown command. Please try again"
+      exit(0)
+  end
+end
+require 'esearchy'
+@domains.each do |domain|
+  ESearchy.create domain do |d|
+    d.yahoo_key = @yahoo_key if @yahoo_key
+    d.bing_key = @bing_key if @bing_key
+    d.maxhits = @maxhits if @maxhits
+    d.search
+    d.save_to_file @output if @output
+  end
+end

data/data/bing.key ADDED Viewed

	@@ -0,0 +1 @@
1	+ skdajsdksaldksadjlaksdlaskd

data/data/yahoo.key ADDED Viewed

	@@ -0,0 +1 @@
1	+ dsjdkajsdasjdkasjdlkasjdsakdjskldjsdkaj

data/esearchy.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'lib/esearchy.rb'

data/lib/esearchy/bing.rb ADDED Viewed

@@ -0,0 +1,76 @@
+%w{rubygems json cgi net/http}.each { |lib| require lib }
+local_path = "#{File.dirname(__FILE__)}/"
+%w{searchy keys}.each {|lib| require local_path + lib}
+class Bing
+  include Searchy
+  def initialize(maxhits = nil, appid=nil, start=nil)
+    @appid = appid || Keys::BING_APP_KEY
+    @start = start || 0
+    @emails = []
+    @threads = []
+    @totalhits = maxhits || 0
+    @r_urls = Queue.new
+    @r_docs = Queue.new
+    @r_pdfs = Queue.new
+    @r_officexs = Queue.new
+    @r_txts = Queue.new
+    @lock = Mutex.new
+  end
+  attr_accessor :emails, :appid
+  def search(query)
+    @query = query
+    begin
+      http = Net::HTTP.new("api.search.live.net",80)
+      http.start do |http|
+        request = Net::HTTP::Get.new("/json.aspx" + "?Appid="+ @appid +
+                                    "&query=" + CGI.escape(query) +
+                                    "&sources=web&web.count=50&start=#{@start}")
+        response = http.request(request)
+        case response
+        when Net::HTTPSuccess, Net::HTTPRedirection
+          parse(response.body)
+          @start = @start + 50
+          if @totalhits > @start
+            puts "Searching in URL: #{self.class} up to point #{@start}"
+            search_emails(response.body)
+            sleep(4)
+            search(query)
+          else
+            puts "Searching in URL: #{self.class} up to point #{@start}"
+            search_emails(response.body)
+          end
+        else
+          return response.error!
+        end
+      end
+    rescue Net::HTTPFatalError
+      puts "Error: Something went wrong with the HTTP request"
+    rescue Errno::ECONNREFUSED
+      puts "Error: < Connection Refused > Hopefuly they have not banned us. :)"
+    end
+  end
+  def parse(json)
+    doc = JSON.parse(json)
+    @totalhits = doc["SearchResponse"]["Web"]["Total"].to_i  if @totalhits == 0
+    doc["SearchResponse"]["Web"]["Results"].each do |result|
+      case result["Url"]
+      when /.pdf$/i
+        @r_pdfs << result["Url"]
+      when /.docx$|.xlsx$|.pptx$/i
+        @r_officexs << result["Url"]
+      when /.doc$/i
+        @r_docs << result["Url"]
+      when /.txt$|.rtf$|ans$/i
+        @r_txts << result["Url"]
+      else
+        @r_urls << result["Url"]
+      end
+    end
+  end
+end

data/lib/esearchy/google.rb ADDED Viewed

@@ -0,0 +1,85 @@
+%w{rubygems cgi net/http}.each { |lib| require lib }
+local_path = "#{File.dirname(__FILE__)}/"
+%w{searchy keys}.each {|lib| require local_path + lib}
+class Google
+  include Searchy
+  def initialize(maxhits = nil, start = nil)
+    @start = start || 0
+    @totalhits = maxhits || 0
+    @emails = []
+    @r_urls = Queue.new
+    @r_docs = Queue.new
+    @r_pdfs = Queue.new
+    @r_txts = Queue.new
+    @r_officexs = Queue.new
+    @lock = Mutex.new
+    @threads = []
+  end
+  attr_accessor :emails
+  def search(query)
+    @query = query
+    http = Net::HTTP.new("www.google.com",80)
+    begin
+      http.start do |http|
+        request = Net::HTTP::Get.new( "/cse?&safe=off&num=100&site=" +
+                                       "&q=" + CGI.escape(query) +
+                                       "&btnG=Search&start=#{@start}")
+        response = http.request(request)
+        case response
+        when Net::HTTPSuccess, Net::HTTPRedirection
+          parse(response.body)
+          @start = @start + 100
+          if @totalhits > @start
+            puts "Searching in URL: #{self.class} up to point #{@start}"
+            search_emails(response.body)
+            sleep(4)
+            search(query)
+          else
+            puts "Searching in URL: #{self.class} up to point #{@start}"
+            search_emails(response.body)
+          end
+        else
+          return response.error!
+        end
+      end
+    rescue Net::HTTPFatalError
+      puts "Error: Something went wrong with the HTTP request"
+    end
+  end
+  def parse(html)
+    @totalhits= html.scan(/<\/b> of about <b>(.*)<\/b> for /)[0][0].gsub(",","").to_i  if @totalhits == 0
+    html.scan(/<div class=g><span class="b w xsm">\[([A-Z]+)\]<\/span> <h2 class=r><a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)"|<h2 class=r><a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)"/).each do |result|
+      case result[0]
+      when /PDF/
+        @r_pdfs << result[1]
+      when /DOC|XLS|PPT/
+        case result[1]
+        when /.doc$/i
+          @r_docs << result[1]
+        when /.docx$|.xlsx$|.pptx$/i
+          @r_officexs << result[1]
+        end
+      when nil
+        case result[2]
+        when /.pdf$/i
+          @r_pdfs << result[2]
+        when /.doc$/i
+          @r_docs << result[2]
+        when /.docx$|.xlsx$|.pptx$/i
+          @r_officexs << result[2]
+        when /.txt$|.rtf$|ans$/i
+          @r_txts << result[2]
+        else
+          @r_urls << result[2]
+        end
+      else
+        puts "I do not parse the #{result[0]} filetype yet:)"
+      end
+    end
+  end
+end

data/lib/esearchy/googlegroups.rb ADDED Viewed

@@ -0,0 +1,69 @@
+%w{rubygems cgi net/http}.each { |lib| require lib }
+local_path = "#{File.dirname(__FILE__)}/"
+%w{searchy keys}.each {|lib| require local_path + lib}
+class GoogleGroups
+  include Searchy
+  def initialize(maxhits = nil, start = nil)
+    @start = start || 0
+    @totalhits = maxhits || 0
+    @r_urls = Queue.new
+    @r_docs = Queue.new
+    @r_pdfs = Queue.new
+    @r_txts = Queue.new
+    @r_officexs = Queue.new
+    @lock = Mutex.new
+    @threads = []
+  end
+  attr_accessor :emails
+  def search(query)
+    @query = query
+    http = Net::HTTP.new("groups.google.com",80)
+    begin
+      http.start do |http|
+        request = Net::HTTP::Get.new( "/groups/search?&safe=off&num=100" +
+                                       "&q=" + CGI.escape(query) +
+                                       "&btnG=Search&start=#{@start}")
+        response = http.request(request)
+        case response
+        when Net::HTTPSuccess, Net::HTTPRedirection
+          parse(response.body)
+          @start = @start + 100
+          if @totalhits > @start
+            puts "Searching in URL: #{self.class} up to point #{@start}"
+            search_emails(response.body)
+            sleep(4)
+            search(query)
+          else
+            puts "Searching in URL: #{self.class} up to point #{@start}"
+            search_emails(response.body)
+          end
+        else
+          return response.error!
+        end
+      end
+    rescue Net::HTTPFatalError
+      puts "Error: Something went wrong with the HTTP request"
+    end
+  end
+  def parse(html)
+    @totalhits= html.scan(/<\/b> of about <b>(.*)<\/b> for /)[0][0].gsub(",","").to_i  if @totalhits == 0
+    html.scan(/<div class=g align="left"><a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" target=""/).each do |result|
+      case result[0]
+      when /.pdf$/i
+        @r_pdfs << result[0]
+      when /.doc$/i
+        @r_docs << result[0]
+      when /.docx$|.xlsx$|.pptx$/i
+        @r_officexs << result[0]
+      when /.txt$|.asn$/i
+        @r_txts << result[0]
+      else
+        @r_urls << result[0]
+      end
+    end
+  end
+end

data/lib/esearchy/keys.rb ADDED Viewed

@@ -0,0 +1,5 @@
+class Keys
+  BING_APP_KEY= open(File.dirname(__FILE__) + "/../../data/bing.key").readline.strip
+  YAHOO_APP_KEY= open(File.dirname(__FILE__) + "/../../data/yahoo.key").readline.strip
+end

data/lib/esearchy/linkedin.rb ADDED Viewed

@@ -0,0 +1,76 @@
+%w{rubygems cgi net/http}.each { |lib| require lib }
+local_path = "#{File.dirname(__FILE__)}/"
+%w{yahoo google}.each {|lib| require local_path + lib}
+# http:///
+class Linkedin
+  include Searchy
+  def initialize(maxhits=nil, start=nil)
+    @totalhits = maxhits || 0
+    @pages = 1
+    @emails = []
+    @lock = Mutex.new
+    @start = start || 0
+    @threads = []
+    @lock = Mutex.new
+  end
+  attr_accessor :emails, :appid
+  def search(query)
+    @query = query
+    begin
+      http = Net::HTTP.new("www.linkedin.com",80)
+      http.start do |http|
+        request = Net::HTTP::Get.new("search?search=&company=" + @query +
+                                     "&currentCompany=currentCompany" +
+                                     "&trk=coprofile_in_network_see_more" +
+                                     "&page_num=" + @pages)
+        response = http.request(request)
+        case response
+        when Net::HTTPSuccess, Net::HTTPRedirection
+          parse(response.body)
+          @start = @start + 10
+          if @totalhits > @start
+            @pages = @pages + 1
+            puts "Searching in: #{self.class} up to point #{@start}"
+            create_emails(response.body)
+            sleep(4)
+            search(@query)
+          else
+            puts "Searching in: #{self.class} up to point #{@start}"
+            search_emails(response.body)
+          end
+        else
+          return response.error!
+        end
+      end
+    rescue Net::HTTPFatalError
+      puts "Error: Something went wrong with the HTTP request"
+    end
+  end
+  def parse(string)
+    @totalhits = string.scan(/<p class="summary>"<strong>(\w)<\/strong>/) if @totalhits == 0
+  end
+  def search_people(string)
+    @people = string.scan(/<spam class="given-name">(*.)<\/spam><spam class="family-name">(*.)<\/spam>)/)
+  end
+  def search_person(name,last)
+    emails = Yahoo.new(50).search("first:\"#{name}\" last:\"#{last}\"").emails
+    emails.concat(Google.new(50).search("#{name} #{last}").emails).uniq!
+  end
+  def create_emails
+    @domain = + @query.match(/@/) ? @query : ("@" + @query)
+    @people.each do |person|
+      name = person[0]
+      last = person[1]
+      @emails << name + last + @domain
+      @emails << name[0] + last + @domain
+      @emails.concat(search_person(name,last))
+    end
+    print_emails(@emails)
+  end
+end

data/lib/esearchy/pdf2txt.rb ADDED Viewed

@@ -0,0 +1,30 @@
+%w{pdf/reader}.each { |lib| require lib }
+class PageTextReceiver
+   attr_accessor :content
+   def initialize
+     @content = []
+   end
+   # Called when page parsing starts
+   def begin_page(arg = nil)
+     @content << ""
+   end
+   # record text that is drawn on the page
+   def show_text(string, *params)
+     @content.last << string.strip
+   end
+   # there's a few text callbacks, so make sure we process them all
+   alias :super_show_text :show_text
+   alias :move_to_next_line_and_show_text :show_text
+   alias :set_spacing_next_line_show_text :show_text
+   # this final text callback takes slightly different arguments
+   def show_text_with_positioning(*params)
+     params = params.first
+     params.each { |str| show_text(str) if str.kind_of?(String)}
+   end
+end

data/lib/esearchy/pgp.rb ADDED Viewed

@@ -0,0 +1,29 @@
+class PGP
+  include Searchy
+  def initialize(maxhits=nil)
+    @emails = []
+    @lock = Mutex.new
+    @threads = []
+  end
+  attr_accessor :emails
+  def search(query)
+    @query = query
+    http = Net::HTTP.new("pgp.mit.edu",11371)
+    begin
+      http.start do |http|
+        request = Net::HTTP::Get.new( "/pks/lookup?search=#{@query}")
+        response = http.request(request)
+        case response
+        when Net::HTTPSuccess, Net::HTTPRedirection
+          puts "Searching #{self.class}"
+          search_emails(response.body)
+        else
+          return response.error!
+        end
+      end
+    rescue Net::HTTPFatalError
+      puts "Error: Something went wrong with the HTTP request"
+    end
+  end
+end

data/lib/esearchy/searchy.rb ADDED Viewed

@@ -0,0 +1,248 @@
+require 'digest/sha2'
+require 'net/http'
+require 'zip/zip'
+require 'zip/zipfilesystem'
+local_path = "#{File.dirname(__FILE__)}/"
+require local_path + 'pdf2txt'
+if RUBY_PLATFORM =~ /mingw|mswin/
+ require 'win32ole'
+ require local_path + 'wcol'
+end
+module Searchy
+  case RUBY_PLATFORM
+  when /mingw|mswin/
+    TEMP = "C:\\WINDOWS\\Temp\\"
+  else
+    TEMP = "/tmp/"
+  end
+  def search_emails(string)
+    string = string.gsub("<em>","") if self.class == Google #still not sure if this is going to work.
+    # OLD regex list = string.scan(/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/)
+    list = string.scan(/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*_at_(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
+[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
+[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
+[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*\s@\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
+[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\sdot\s[a-z0-9!#$&'*+=?\^_`{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\sdot\s)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/)
+    @lock.synchronize do
+      print_emails(list)
+      @emails.concat(fix(list)).uniq!
+    end
+  end
+  def search_pdfs(urls)
+    while urls.size >= 1
+      @threads << Thread.new do
+        web = URI.parse(urls.pop)
+        puts "Searching in PDF: #{web.to_s}\n"
+        begin
+          http = Net::HTTP.new(web.host,80)
+          http.start do |http|
+            request = Net::HTTP::Get.new("#{web.path}#{web.query}")
+            response = http.request(request)
+            case response
+            when Net::HTTPSuccess, Net::HTTPRedirection
+              name = Searchy::TEMP + "#{hash_url(web.to_s)}.pdf"
+              open(name, "wb") do |file|
+                file.write(response.body)
+              end
+              begin
+                receiver = PageTextReceiver.new
+                pdf = PDF::Reader.file(name, receiver)
+                search_emails(receiver.content.inspect)
+              rescue PDF::Reader::UnsupportedFeatureError
+                puts "Encrypted PDF: Unable to parse it.\n"
+              rescue PDF::Reader::MalformedPDFError
+                puts "Malformed PDF: Unable to parse it.\n"
+              end
+              `rm "#{name}"`
+            else
+              return response.error!
+            end
+          end
+        rescue Net::HTTPFatalError
+          puts "Error: Something went wrong with the HTTP request.\n"
+        rescue Net::HTTPServerException
+          puts "Error: Not longer there. 404 Not Found.\n"
+        rescue
+          puts "Error: < .. SocketError .. >\n"
+        end
+      end
+    end
+    @threads.each {|t| t.join } if @threads != nil
+  end
+  if RUBY_PLATFORM =~ /mingw|mswin/
+    def search_docs(urls)
+      while urls.size >= 1
+         @threads << Thread.new do
+           web = URI.parse(urls.pop)
+           puts "Searching in DOC: #{web.to_s}\n"
+           begin
+             http = Net::HTTP.new(web.host,80)
+             http.start do |http|
+               request = Net::HTTP::Get.new("#{web.path}#{web.query}")
+               response = http.request(request)
+               case response
+               when Net::HTTPSuccess, Net::HTTPRedirection
+                 name = Searchy::TEMP + "#{hash_url(web.to_s)}.doc"
+                 open(name, "wb") do |file|
+                   file.write(response.body)
+                 end
+                 begin
+                   word = WIN32OLE.new('word.application')
+                   word.documents.open(name)
+                   word.selection.wholestory
+                   search_emails(word.selection.text.chomp)
+                   word.activedocument.close( false )
+                   word.quit
+                 rescue
+                   puts "Something went wrong parsing the .doc}\n"
+                 end
+                 `rm "#{name}"`
+               else
+                 return response.error!
+               end
+             end
+           rescue Net::HTTPFatalError
+             puts "Error: Something went wrong with the HTTP request.\n"
+           rescue Net::HTTPServerException
+             puts "Error: Not longer there. 404 Not Found.\n"
+           rescue
+             puts "Error: < .. SocketError .. >\n"
+           end
+         end
+       end
+       @threads.each {|t| t.join } if @threads != nil
+    end
+  end
+  def search_office_xml(urls)
+    while urls.size >= 1
+      @threads << Thread.new do
+        web = URI.parse(urls.pop)
+        format = web.scan(/docx|xlsx|pptx/i)[0]
+        puts "Searching in #{format.upcase}: #{web.to_s}\n"
+        begin
+          http = Net::HTTP.new(web.host,80)
+          http.start do |http|
+            request = Net::HTTP::Get.new("#{web.path}#{web.query}")
+            response = http.request(request)
+            case response
+            when Net::HTTPSuccess, Net::HTTPRedirection
+              name = Searchy::TEMP + "#{hash_url(web.to_s)}." + format
+              open(name, "wb") do |file|
+                file.write(response.body)
+              end
+              begin
+                Zip::ZipFile.open(name) do |zip|
+                  text = z.entries.each { |e| zip.file.read(e.name) if e.name =~ /.xml$/}
+                  search_emails(text)
+                end
+              rescue
+                puts "Something went wrong parsing the .#{format.downcase}\n"
+              end
+              `rm "#{name}"`
+            else
+              return response.error!
+            end
+          end
+        rescue Net::HTTPFatalError
+          puts "Error: Something went wrong with the HTTP request.\n"
+        rescue Net::HTTPServerException
+          puts "Error: Not longer there. 404 Not Found.\n"
+        rescue
+          puts "Error: < .. SocketError .. >\n"
+        end
+      end
+    end
+    @threads.each {|t| t.join } if @threads != nil
+  end
+  def search_txts(urls)
+    while urls.size >= 1
+      @threads << Thread.new do
+        web = URI.parse(urls.pop)
+        puts "Searching in #{web.to_s.scan(/txt|rtf|ans/i)[0].upcase}: #{web.to_s}\n"
+        begin
+          http = Net::HTTP.new(web.host,80)
+          http.start do |http|
+            request = Net::HTTP::Get.new("#{web.path}#{web.query}")
+            response = http.request(request)
+            case response
+            when Net::HTTPSuccess, Net::HTTPRedirection
+              search_emails(response.body)
+            else
+              return response.error!
+            end
+          end
+        rescue Net::HTTPFatalError
+          puts "Error: Something went wrong with the HTTP request\n"
+        rescue Net::HTTPServerException
+          puts "Error: Not longer there. 404 Not Found.\n"
+        rescue
+          puts "Error: < .... >"
+        end
+      end
+    end
+    @threads.each {|t| t.join } if @threads != nil
+  end
+  # HELPER METHODS ---------------------------------------------------------------------------------
+  def print_emails(list)
+    list.each do |email|
+      unless @emails.include?(email)
+        unless RUBY_PLATFORM =~ /mingw|mswin/
+          if email.match(/#{@query.gsub("@","").split('.')[0]}/)
+            puts "\033[31m" + email + "\033\[0m"
+          else
+            puts "\033[32m" + email + "\033\[0m"
+          end
+        else
+          if email.match(/#{@query.gsub("@","").split('.')[0]}/)
+            Wcol::color(12)
+            puts email
+            Wcol::color(7)
+          else
+            Wcol::color(2)
+            puts email
+            Wcol::color(7)
+          end
+        end
+      end
+    end
+  end
+  def hash_url(url)
+    Digest::SHA2.hexdigest("#{Time.now.to_f}--#{url}")
+  end
+  def fix(list)
+    list.each do |e|
+      e.gsub!(" at ","@")
+      e.gsub!("_at_","@")
+      e.gsub!(" dot ",".")
+    end
+  end
+  def clean( &block )
+    @emails.delete_if &block.call
+  end
+  def maxhits=( value )
+    @totalhits = value
+  end
+  def search_depth
+    search_pdfs @r_pdfs if @r_pdfs
+    search_txts @r_txts if @r_txts
+    search_office_xml @r_officexs if @r_officexs
+    if RUBY_PLATFORM =~ /mingw|mswin/
+      search_docs @r_docs if @r_docs
+    end
+  end
+end

data/lib/esearchy/wcol.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require 'Win32API'
+class Wcol
+  gsh = Win32API.new("kernel32", "GetStdHandle", ['L'], 'L')
+  @textAttr = Win32API.new("kernel32","SetConsoleTextAttribute", ['L','N'], 'I')
+  @h = gsh.call(-11)
+  def self.color(col)
+    @textAttr.call(@h,col)
+  end
+end

data/lib/esearchy/yahoo.rb ADDED Viewed

@@ -0,0 +1,73 @@
+%w{rubygems json cgi net/http}.each { |lib| require lib }
+local_path = "#{File.dirname(__FILE__)}/"
+%w{searchy keys}.each {|lib| require local_path + lib}
+class Yahoo
+  include Searchy
+  def initialize(maxhits = nil, appid=nil, start=nil )
+    @appid = appid || Keys::YAHOO_APP_KEY
+    @start = start || 0
+    @totalhits = maxhits || 0
+    @emails = []
+    @r_urls = Queue.new
+    @r_docs = Queue.new
+    @r_pdfs = Queue.new
+    @r_officexs = Queue.new
+    @r_txts = Queue.new
+    @threads = []
+    @lock = Mutex.new
+  end
+  attr_accessor :emails, :appid
+  def search(query)
+    @query = query
+    begin
+      http = Net::HTTP.new("boss.yahooapis.com",80)
+      http.start do |http|
+        request = Net::HTTP::Get.new("/ysearch/web/v1/" + CGI.escape(query) +
+                                     "?appid="+ @appid +
+                                     "&format=json&count=50"+
+                                     "&start=#{@start}" )
+        response = http.request(request)
+        case response
+        when Net::HTTPSuccess, Net::HTTPRedirection
+          parse(response.body)
+          @start = @start + 50
+          if @totalhits > @start
+            puts "Searching in URL: #{self.class} up to point #{@start}"
+            search_emails(response.body)
+            sleep(4)
+            search(@query)
+          else
+            puts "Searching in URL: #{self.class} up to point #{@start}"
+            search_emails(response.body)
+          end
+        else
+          return response.error!
+        end
+      end
+    rescue Net::HTTPFatalError
+      puts "Error: Something went wrong with the HTTP request"
+    end
+  end
+  def parse(json)
+    doc = JSON.parse(json)
+    @totalhits = doc["ysearchresponse"]["totalhits"].to_i if @totalhits == 0
+    doc["ysearchresponse"]["resultset_web"].each do |result|
+      case result["url"]
+      when /.pdf$/i
+        @r_pdfs << result["url"]
+      when /.docx$|.xlsx$|.pptx$/i
+        @r_officexs << result["url"]
+      when /.doc$/i
+        @r_docs << result["url"]
+      when /.txt$|.rtf$|ans$/i
+        @r_txts << result["url"]
+      else
+        @r_urls << result["url"]
+      end
+    end
+  end
+end

data/lib/esearchy.rb ADDED Viewed

@@ -0,0 +1,79 @@
+local_path = "#{File.dirname(__FILE__) + '/esearchy/'}"
+%w{google bing yahoo PGP keys}.each { |lib| require local_path + lib }
+class ESearchy
+  def initialize(options={}, &block)
+    @query = options[:query]
+    @depth_search = options[:depth] || true
+    @maxhits = options[:maxhits]
+    @engines = options[:engines] || {"Google" => Google,
+                                     "Bing" => Bing,
+                                     "Yahoo" => Yahoo,
+                                     "PGP" => PGP }
+    @engines.each {|n,e| @engines[n] = e.new(@maxhits)}
+    @emails = Array.new
+    @threads = Array.new
+    block.call(self) if block_given?
+  end
+  attr_accessor :engines, :query, :threads, :depth_search
+  attr_reader :maxhits
+  def search(query=nil)
+    @engines.each do |n,e|
+      puts "+--- Launching Search for #{n} ---+\n"
+      e.search(query || @query)
+      e.search_depth if depth_search?
+      puts "+--- Finishing Search for #{n} ---+\n"
+    end
+  end
+  def emails
+    @engines.each do |n,e|
+      @emails.concat(e.emails).uniq!
+    end
+    @emails
+  end
+  def clean(&block)
+    emails.each do |e|
+      e.delete_if block.call
+    end
+  end
+  def maxhits=(value)
+    @engines.each do |n,e|
+      e.maxhits = value
+    end
+  end
+  def yahoo_key=(value)
+    @engines['Yahoo'].appid = value
+  end
+  def bing_key=(value)
+    @engines['Bing'].appid = value
+  end
+  def save_to_file(file)
+    open(file,"a") do |f|
+      emails.each { |e| f << e + "\n" }
+    end
+  end
+  def filter(regex)
+    emails.each.select { |email| email =~ regex }
+  end
+  def self.create(query=nil, &block)
+    self.new :query => query do |search|
+      block.call(search) if block_given?
+    end
+  end
+  private
+  def depth_search?
+    @depth_search
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,101 @@
+--- !ruby/object:Gem::Specification
+name: FreedomCoder-esearchy
+version: !ruby/object:Gem::Version
+  version: 0.0.5
+platform: ruby
+authors:
+- Matias P. Brutti
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2009-05-16 00:00:00 -07:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: pdf/reader
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.7.5
+    version:
+- !ruby/object:Gem::Dependency
+  name: json
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.1.6
+    version:
+- !ruby/object:Gem::Dependency
+  name: rubyzip
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.9.1
+    version:
+description:
+email: matiasbrutti@gmail.com
+executables:
+- esearchy
+extensions: []
+extra_rdoc_files:
+- README.rdoc
+files:
+- esearchy.rb
+- bin
+- bin/esearchy
+- data
+- data/bing.key
+- data/yahoo.key
+- lib
+- lib/esearchy.rb
+- lib/esearchy
+- lib/esearchy/bing.rb
+- lib/esearchy/google.rb
+- lib/esearchy/googlegroups.rb
+- lib/esearchy/keys.rb
+- lib/esearchy/linkedin.rb
+- lib/esearchy/pdf2txt.rb
+- lib/esearchy/pgp.rb
+- lib/esearchy/searchy.rb
+- lib/esearchy/yahoo.rb
+- lib/esearchy/wcol.rb
+- README.rdoc
+has_rdoc: true
+homepage: http://freedomcoder.com.ar/esearchy
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project:
+rubygems_version: 1.2.0
+signing_key:
+specification_version: 2
+summary: A library to search for emails in search engines
+test_files: []