RubyGems - wmap - Versions diffs - 2.4.4 - Mend

wmap 2.4.4

Files changed (141) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +141 -0
data/LICENSE.txt +15 -0
data/README.rdoc +98 -0
data/TODO +13 -0
data/bin/deprime +21 -0
data/bin/distrust +38 -0
data/bin/googleBot +23 -0
data/bin/prime +21 -0
data/bin/refresh +26 -0
data/bin/run_tests +16 -0
data/bin/spiderBot +26 -0
data/bin/trust +38 -0
data/bin/updateAll +57 -0
data/bin/wadd +25 -0
data/bin/wadds +26 -0
data/bin/wcheck +28 -0
data/bin/wdel +25 -0
data/bin/wdump +21 -0
data/bin/wmap +151 -0
data/bin/wscan +32 -0
data/data/cidrs +2 -0
data/data/deactivated_sites +1 -0
data/data/domains +2 -0
data/data/hosts +1 -0
data/data/prime_hosts +1 -0
data/data/sites +2 -0
data/data/sub_domains +2 -0
data/demos/bruter.rb +27 -0
data/demos/dns_brutes.rb +28 -0
data/demos/filter_cidr.rb +18 -0
data/demos/filter_crawls.rb +5 -0
data/demos/filter_domain.rb +25 -0
data/demos/filter_geoip.rb +26 -0
data/demos/filter_known_services.rb +59 -0
data/demos/filter_netinfo.rb +23 -0
data/demos/filter_prime.rb +25 -0
data/demos/filter_profiler.rb +3 -0
data/demos/filter_redirection.rb +19 -0
data/demos/filter_site.rb +40 -0
data/demos/filter_siteip.rb +31 -0
data/demos/filter_status.rb +17 -0
data/demos/filter_timestamp.rb +23 -0
data/demos/filter_url.rb +19 -0
data/demos/new_fnd.rb +66 -0
data/demos/nmap_parser.pl +138 -0
data/demos/site_format.rb +18 -0
data/demos/whois_domain.rb +78 -0
data/dicts/GeoIP.dat +0 -0
data/dicts/GeoIPASNum.dat +0 -0
data/dicts/GeoLiteCity.dat +0 -0
data/dicts/ccsld.txt +2646 -0
data/dicts/cctld.txt +243 -0
data/dicts/gtld.txt +25 -0
data/dicts/hostnames-dict.big +1402 -0
data/dicts/hostnames-dict.txt +101 -0
data/lib/wmap/cidr_tracker.rb +327 -0
data/lib/wmap/dns_bruter.rb +308 -0
data/lib/wmap/domain_tracker/sub_domain.rb +142 -0
data/lib/wmap/domain_tracker.rb +342 -0
data/lib/wmap/geoip_tracker.rb +72 -0
data/lib/wmap/google_search_scraper.rb +177 -0
data/lib/wmap/host_tracker/primary_host.rb +130 -0
data/lib/wmap/host_tracker.rb +550 -0
data/lib/wmap/network_profiler.rb +144 -0
data/lib/wmap/port_scanner.rb +208 -0
data/lib/wmap/site_tracker/deactivated_site.rb +85 -0
data/lib/wmap/site_tracker.rb +937 -0
data/lib/wmap/url_checker.rb +314 -0
data/lib/wmap/url_crawler.rb +381 -0
data/lib/wmap/utils/domain_root.rb +184 -0
data/lib/wmap/utils/logger.rb +53 -0
data/lib/wmap/utils/url_magic.rb +343 -0
data/lib/wmap/utils/utils.rb +333 -0
data/lib/wmap/whois.rb +76 -0
data/lib/wmap.rb +227 -0
data/logs/wmap.log +17 -0
data/ruby_whois_patches/base_cocca2.rb +149 -0
data/ruby_whois_patches/kero.yachay.pe.rb +120 -0
data/ruby_whois_patches/whois.PublicDomainRegistry.com.rb +124 -0
data/ruby_whois_patches/whois.above.com.rb +61 -0
data/ruby_whois_patches/whois.adamsnames.tc.rb +107 -0
data/ruby_whois_patches/whois.aeda.net.ae.rb +105 -0
data/ruby_whois_patches/whois.ai.rb +112 -0
data/ruby_whois_patches/whois.arnes.si.rb +121 -0
data/ruby_whois_patches/whois.ascio.com.rb +91 -0
data/ruby_whois_patches/whois.cnnic.cn.rb +123 -0
data/ruby_whois_patches/whois.corporatedomains.com.rb +67 -0
data/ruby_whois_patches/whois.crsnic.net.rb +108 -0
data/ruby_whois_patches/whois.denic.de.rb +174 -0
data/ruby_whois_patches/whois.dk-hostmaster.dk.rb +120 -0
data/ruby_whois_patches/whois.dns.be.rb +134 -0
data/ruby_whois_patches/whois.dns.lu.rb +129 -0
data/ruby_whois_patches/whois.dns.pl.rb +150 -0
data/ruby_whois_patches/whois.dns.pt.rb +119 -0
data/ruby_whois_patches/whois.domain.kg.rb +126 -0
data/ruby_whois_patches/whois.domainregistry.my.rb +123 -0
data/ruby_whois_patches/whois.domreg.lt.rb +110 -0
data/ruby_whois_patches/whois.dot.tk.rb +140 -0
data/ruby_whois_patches/whois.hkirc.hk.rb +121 -0
data/ruby_whois_patches/whois.isnic.is.rb +130 -0
data/ruby_whois_patches/whois.je.rb +119 -0
data/ruby_whois_patches/whois.jprs.jp.rb +137 -0
data/ruby_whois_patches/whois.kenic.or.ke.rb +140 -0
data/ruby_whois_patches/whois.markmonitor.com.rb +118 -0
data/ruby_whois_patches/whois.melbourneit.com.rb +58 -0
data/ruby_whois_patches/whois.nic.as.rb +96 -0
data/ruby_whois_patches/whois.nic.at.rb +109 -0
data/ruby_whois_patches/whois.nic.ch.rb +141 -0
data/ruby_whois_patches/whois.nic.cl.rb +117 -0
data/ruby_whois_patches/whois.nic.ec.rb +157 -0
data/ruby_whois_patches/whois.nic.im.rb +120 -0
data/ruby_whois_patches/whois.nic.it.rb +170 -0
data/ruby_whois_patches/whois.nic.lv.rb +116 -0
data/ruby_whois_patches/whois.nic.ly.rb +127 -0
data/ruby_whois_patches/whois.nic.mu.rb +27 -0
data/ruby_whois_patches/whois.nic.mx.rb +123 -0
data/ruby_whois_patches/whois.nic.net.sa.rb +111 -0
data/ruby_whois_patches/whois.nic.or.kr.rb +101 -0
data/ruby_whois_patches/whois.nic.tel.rb +129 -0
data/ruby_whois_patches/whois.nic.tr.rb +133 -0
data/ruby_whois_patches/whois.nic.us.rb +129 -0
data/ruby_whois_patches/whois.nic.ve.rb +135 -0
data/ruby_whois_patches/whois.norid.no.rb +127 -0
data/ruby_whois_patches/whois.pandi.or.id.rb +118 -0
data/ruby_whois_patches/whois.psi-usa.info.rb +63 -0
data/ruby_whois_patches/whois.registro.br.rb +109 -0
data/ruby_whois_patches/whois.registrygate.com.rb +55 -0
data/ruby_whois_patches/whois.rrpproxy.net.rb +61 -0
data/ruby_whois_patches/whois.sgnic.sg.rb +130 -0
data/ruby_whois_patches/whois.srs.net.nz.rb +166 -0
data/ruby_whois_patches/whois.tucows.com.rb +70 -0
data/ruby_whois_patches/whois.twnic.net.tw.rb +133 -0
data/settings/discovery_ports +24 -0
data/settings/google_keywords.txt +9 -0
data/settings/google_locator.txt +23 -0
data/test/domain_tracker_test.rb +31 -0
data/test/utils_test.rb +168 -0
data/version.txt +13 -0
data/wmap.gemspec +49 -0
metadata +202 -0

data/lib/wmap/url_checker.rb ADDED Viewed

@@ -0,0 +1,314 @@
+#--
+# Wmap
+#
+# A pure Ruby library for the Internet web application discovery and tracking.
+#
+# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
+#++
+require "net/http"
+require 'httpclient'
+require "openssl"
+require "uri"
+require "digest/md5"
+require "parallel"
+# A quick checker class to identify / finger-print a URL / site
+class Wmap::UrlChecker
+	include Wmap::Utils
+	attr_accessor :http_timeout, :max_parallel, :verbose, :data_dir
+	def initialize (params = {})
+		# Set default instance variables
+		@verbose=params.fetch(:verbose, false)
+		@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
+		@http_timeout=params.fetch(:http_timeout, 5000)
+		@max_parallel=params.fetch(:max_parallel, 40)
+		@ssl_version=nil
+		@url_code={}
+		@url_redirection={}
+		@url_finger_print={}
+		@url_server={}
+	end
+	# Main worker method to perform various checks on the URL / site
+	def url_worker (url)
+		puts "Checking out an unknown URL: #{url}" if @verbose
+		begin
+			url=url.strip.downcase
+			raise "Invalid URL format: #{url}" unless is_url?(url)
+			timestamp=Time.now
+			host=url_2_host(url)
+			ip=host_2_ip(host)
+			port=url_2_port(url)
+			code=10000
+			if @url_code.key?(url)
+				code=@url_code[url]
+			else
+				code=response_code(url)
+			end
+			if @url_redirection.key?(url)
+				loc=@url_redirection[url]
+			else
+				loc=redirect_location(url)
+			end
+			if @url_finger_print.key?(url)
+				fp=@url_finger_print[url]
+			else
+				fp=response_body_md5(url)
+			end
+			if @url_server.key?(url)
+				server=@url_server[url]
+			else
+				server=get_server_header(url)
+			end
+			# save the data
+			checker=Hash.new
+			checker['ip']=ip
+			checker['port']=port
+			checker['url']=url
+			checker['code']=code
+			checker['redirection']=loc
+			checker['md5']=fp
+			checker['server']=server
+			checker['timestamp']=timestamp
+			if Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
+				checker['status']="int_hosted"
+			else
+				checker['status']="ext_hosted"
+			end
+			return checker
+		rescue OpenSSL::SSL::SSLError => es  # handler to temporally hold the openssl bug in bay:  SSL_set_session: unable to find ssl method
+			checker=Hash.new
+			checker['ip']=ip
+			checker['port']=port
+			checker['url']=url
+			checker['code']=20000
+			checker['server']="Unknown SSL error: #{es}"
+			checker['md']=nil
+			checker['redirection']=nil
+			checker['timestamp']=timestamp
+			return checker
+		rescue Exception => ee
+			puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
+			return nil
+		end
+	end
+	alias_method :check, :url_worker
+	# Parallel scanner - by utilizing fork manager 'parallel' to spawn numbers of child processes on multiple urls simultaneously
+	def url_workers (targets,num=@max_parallel)
+		begin
+			results=Array.new
+			targets -= ["", nil]
+			if targets.size > 0
+				puts "Start the url checker on the targets:\n #{targets}"
+				Parallel.map(targets, :in_processes => num) { |target|
+					url_worker(target)
+				}.each do |process|
+					if process.nil?
+						next
+					elsif process.empty?
+						#do nothing
+					else
+						results << process
+					end
+				end
+			end
+			return results
+		rescue Exception => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return nil
+		end
+	end
+	alias_method :checks, :url_workers
+	# Test the URL and return the response code
+	def response_code (url)
+		puts "Check the http response code on the url: #{url}" if @verbose
+		response_code = 10000	# All unknown url connection exceptions go here
+		begin
+			raise "Invalid url: #{url}" unless is_url?(url)
+			url=url.strip.downcase
+			timeo = @http_timeout/1000.0
+			uri = URI.parse(url)
+			http = Net::HTTP.new(uri.host, uri.port)
+			http.open_timeout = timeo
+			http.read_timeout = timeo
+			if (url =~ /https\:/i)
+				http.use_ssl = true
+				#http.ssl_version = :SSLv3
+				# Bypass the remote web server cert validation test
+				http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+			end
+			request = Net::HTTP::Get.new(uri.request_uri)
+			response = http.request(request)
+			puts "Server response the following: #{response}" if @verbose
+			response_code = response.code.to_i
+			#response.finish if response.started?()
+			@url_code[url]=response_code
+			puts "Response code on #{url}: #{response_code}" if @verbose
+			return response_code
+		rescue Exception => ee
+			puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
+			case ee
+				# rescue "Connection reset by peer" error type
+				when Errno::ECONNRESET
+					response_code=104
+				when Errno::ECONNABORTED,Errno::ETIMEDOUT
+					#response_code=10000
+				when Timeout::Error				# Quick fix
+					if (url =~ /https\:/i)		# try again for ssl timeout session, in case of default :TLSv1 failure
+						http.ssl_version = :SSLv3
+						response = http.request(request)
+						response_code = response.code.to_i
+						unless response_code.nil?
+							@ssl_version = http.ssl_version
+						end
+					end
+				else
+					#response_code=10000
+			end
+			@url_code[url]=response_code
+			return response_code
+		end
+	end
+	alias_method :query, :response_code
+	# Test the URL / site and return the redirection location (3xx response code only)
+	def redirect_location (url)
+		puts "Test the redirection location for the url: #{url}" if @verbose
+		location=""
+		begin
+			raise "Invalid url: #{url}" unless is_url?(url)
+			url=url.strip.downcase
+			timeo = @http_timeout/1000.0
+			uri = URI.parse(url)
+			code = response_code (url)
+			if code >= 300 && code < 400
+				http = Net::HTTP.new(uri.host, uri.port)
+				http.open_timeout = timeo
+				http.read_timeout = timeo
+				if (url =~ /https\:/i)
+					http.use_ssl = true
+					# Bypass the remote web server cert validation test
+					http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+					http.ssl_version = @ssl_version
+				end
+				request = Net::HTTP::Get.new(uri.request_uri)
+				response = http.request(request)
+				case response
+				when Net::HTTPRedirection then
+					location = response['location']
+				end
+			end
+			@url_redirection[url]=location
+			return location
+		rescue Exception => ee
+			puts "Exception on method redirect_location for URL #{url}: #{ee}" if @verbose
+			@url_redirection[url]=location
+			return location
+		end
+	end
+	alias_method :location, :redirect_location
+	# Test the URL / site and return the web server type from the HTTP header "server" field
+	def get_server_header (url)
+		puts "Retrieve the server header field from the url: #{url}" if @verbose
+		server=String.new
+		begin
+			raise "Invalid url: #{url}" unless is_url?(url)
+			url=url.strip.downcase
+			timeo = @http_timeout/1000.0
+			uri = URI.parse(url)
+			code = response_code (url)
+			http = Net::HTTP.new(uri.host, uri.port)
+			http.open_timeout = timeo
+			http.read_timeout = timeo
+			if (url =~ /https\:/i)
+				http.use_ssl = true
+				# Bypass the remote web server cert validation test
+				http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+				http.ssl_version = @ssl_version
+			end
+			request = Net::HTTP::Get.new(uri.request_uri)
+			response = http.request(request)
+			server=response["server"]
+			server=server.gsub(/\,/,' ')
+			return server
+		rescue Exception => ee
+			puts "Exception on method get_server_header for URL #{url}: #{ee}" if @verbose
+			@url_server[url]=server
+			return server
+		end
+	end
+	# Use MD5 algorithm to fingerprint the URL / site response payload (web page content)
+	def response_body_md5(url)
+		puts "MD5 finger print page body content: #{url}" if @verbose
+		begin
+			raise "Invalid url: #{url}" unless is_url?(url)
+			url=url.strip.downcase
+			timeo = @http_timeout/1000.0
+			uri = URI.parse(url)
+			fp=""
+			http = Net::HTTP.new(uri.host, uri.port)
+			http.open_timeout = timeo
+			http.read_timeout = timeo
+			if (url =~ /https\:/i)
+				http.use_ssl = true
+				# Bypass the remote web server cert validation test
+				http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+				http.ssl_version = @ssl_version
+			end
+			request = Net::HTTP::Get.new(uri.request_uri)
+			response = http.request(request)
+			response_body = response.body.to_s
+			fp=Digest::MD5.hexdigest(response_body) unless response_body.nil?
+			@url_finger_print[url] = fp
+			return fp
+		rescue Exception => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+		end
+	end
+	alias_method :md5, :response_body_md5
+	# Retrieve the remote web server certification, open it and return the cert content as a string
+	def get_certificate (url)
+		puts "Retrieve the remote web server SSL certificate in clear text: #{url}" if @verbose
+		begin
+			url=url.strip
+			raise "Invalid URL string: #{url}" unless is_ssl?(url)
+			client = HTTPClient.new
+			client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
+			response = client.get(url)
+			cert = response.peer_cert
+			cer = OpenSSL::X509::Certificate.new(cert)
+			return cer.to_text
+		rescue Exception => ee
+			puts "Exception on method #{__method__} from #{url}: #{ee}"
+		end
+		return nil
+	end
+	alias_method :get_cert, :get_certificate
+	# Retrieve the X509 cert in the clear text from the remote web server, extract and return the common name field within the cert
+	def get_cert_cn (url)
+		puts "Extract the common name field from a X509 cert: #{cert}" if @verbose
+		begin
+			cert=get_certificate(url)
+			subject, cn = ""
+			if cert =~ /\n(.+)Subject\:(.+)\n/i
+				subject=$2
+			end
+			if subject =~/CN\=(.+)/i
+				cn=$1
+			end
+			return cn
+		rescue Exception => ee
+			puts "Error on method #{__method__} from #{cert}: #{ee}" if @verbose
+		end
+		return nil
+	end
+	alias_method :get_cn, :get_cert_cn
+end

data/lib/wmap/url_crawler.rb ADDED Viewed

@@ -0,0 +1,381 @@
+#--
+# Wmap
+#
+# A pure Ruby library for Internet web application discovery and tracking.
+#
+# Copyright (c) 2012-2015 Yang Li
+#++
+require "net/http"
+require "uri"
+require "open-uri"
+require "open_uri_redirections"
+require "nokogiri"
+require "parallel"
+# Web site crawler class
+class Wmap::UrlCrawler
+	include Wmap::Utils
+	attr_accessor :http_timeout, :crawl_page_limit, :crawl_depth, :max_parallel, :verbose, :data_dir
+	attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
+	# Global variable used to store the combined result of all the forked child processes. Note that class variable
+	# would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
+#	$discovered_urls=Hash.new
+	# set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
+	Max_http_timeout=8000
+	# set hard stop limit of crawler time-out to 1200 seconds or 20 minutes
+	Crawl_timeout=1200000
+	# Crawler instance default variables
+	def initialize (params = {})
+		@verbose=params.fetch(:verbose, false)
+		@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../logs/')
+		@http_timeout=params.fetch(:http_timeout, 5000)
+		@crawl_depth=params.fetch(:crawl_depth, 4)
+		@crawl_page_limit=params.fetch(:crawl_page_limit, 1000)
+		@max_parallel=params.fetch(:max_parallel, 40)
+		# Discovered data store
+		@discovered_urls_by_crawler=Hash.new
+		@visited_urls_by_crawler=Hash.new
+		@crawl_start=Hash.new
+		@crawl_done=Hash.new
+		Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
+		@log_file=@data_dir + "crawler.log"
+	end
+	# Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
+	def pre_crawl(url)
+		puts "Perform network profiling works on the web server before the web crawling: #{url}" if @verbose
+		begin
+			host=url_2_host(url)
+			# Use the following formula to 'guess' the right http time-out threshold for the scanner
+			nwk_to=Wmap::NetworkProfiler.new.profile(host).to_i
+			if (1500 + Wmap::NetworkProfiler.new.profile(host)*2).to_i > Max_http_timeout
+				@http_timeout = Max_http_timeout
+			else
+				@http_timeout = 1500 + nwk_to*2
+			end
+			puts "Done with the pre-scan works: reset @http_timeout to: #{@http_timeout} ms" if @verbose
+		rescue Exception => ee
+			puts "Exception on method #{__method__} for #{host}: #{ee}" if @verbose
+			@http_timeout = Max_http_timeout
+		end
+	end
+	# A web crawler to crawl a known website and search for html links within the same root domain. For example,
+    # by crawling 'http://www.yahoo.com/' it could discover 'http://login.yahoo.com/'
+	def crawl(url)
+		puts "Start web crawling on #{url}"
+		#begin
+			result=Array.new
+			url=url.chomp.strip
+			result.push(url_2_site(url))
+			raise "Error! Invalid url format: #{urls}" unless is_url?(url)
+			# Add logic to profile the web server before crawling; this is used to optimize the crawling speed
+			pre_crawl(url)
+			status = Timeout::timeout(Crawl_timeout/1000) {
+				result+=crawl_worker(url).keys
+			}
+			puts "Web crawling time-out on #{url}: #{status}" if @verbose
+			return result
+		#rescue => ee
+			#puts "Exception on method #{__method__} for URL #{url}: #{ee}"
+			#return result
+		#end
+	end
+	alias_method :query, :crawl
+    # The worker instance of crawler who perform the labour work
+	def crawl_worker(url0)
+		puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
+		#begin
+			# Input URL sanity check first
+			if is_url?(url0)
+				host=url_2_host(url0)
+				ip=host_2_ip(host).to_s
+				raise "Invalid IP address: #{url0}" if ip.nil?
+				port=url_2_port(url0).to_s
+				raise "Invalid port number: #{url0}" if port.nil?
+			else
+				raise "Invalid URL: #{url0}. Please check it out with your browser again."
+			end
+			log_info=Hash.new
+			log_info[1]="Start working on #{url0}"
+			url_stores=Hash.new
+			url_stores[url0]=true unless url_stores.key?(url0)
+			@discovered_urls_by_crawler[url0]=true unless @discovered_urls_by_crawler.key?(url0)
+			@crawl_start[url0]=true unless @crawl_start.key?(url0)
+#			$discovered_urls[url0]=true unless $discovered_urls.key?(url0)
+			@crawl_depth.times do
+				url_stores.keys.each do |url|
+					# 10/01/2013 add logic to avoid unnecessary crawling within the same child instance
+					next if @visited_urls_by_crawler.key?(url)
+					url_object = open_url(url)
+					next if url_object == nil
+					url = update_url_if_redirected(url, url_object)
+					url_body = read_url(url)
+					# Protection code - to avoid parsing failure on the empty or nil object
+					next if url_body.nil? or url_body.empty?
+					url_stores[url]=true unless url_stores.key?(url)
+					@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
+#					$discovered_urls[url]=true unless $discovered_urls.key?(url)
+					doc = parse_html(url_body)
+					next if doc == nil
+					if url_stores.size >= @crawl_page_limit
+						#@visited_urls_by_crawler.merge!(url_stores)
+						@discovered_urls_by_crawler.merge!(url_stores)
+#						$discovered_urls.merge!(url_stores)
+						puts "Finish web crawling the url: #{url0}"
+						return url_stores
+					end
+					page_urls = find_urls_on_page(doc, url)
+					page_urls.uniq!
+					page_urls.map do |y|
+						y=normalize_url(y)
+						url_stores[y]=true unless url_stores.key?(y)
+						@discovered_urls_by_crawler[y]=true unless @discovered_urls_by_crawler.key?(y)
+#						$discovered_urls[y]=true unless $discovered_urls.key?(y)
+					end
+				end
+			end
+			puts "Finish web crawling on: #{url0}"
+			log_info[2]="Finish working on: #{url0}"
+			wlog(log_info, "UrlCrawler", @log_file)
+			@crawl_done[url0]=true unless @crawl_done.key?(url0)
+			return url_stores
+		#rescue => ee
+			#puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
+			#log_info[3]="Exception on #{url0}"
+			#wlog(log_info,"UrlCrawler",@log_file)
+			#return url_stores
+		#end
+	end
+	# Fast crawling by utilizing fork manager parallel to spawn numbers of child processes at the same time
+	# each child process will continuously work on the target pool until all the works are done
+	def crawl_workers (targets,num=@max_parallel)
+		begin
+			raise "Input error - expecting targets in an array format: #{targets}" unless targets.kind_of? Array
+			puts "Sanitize the URL seeds to eliminate the unnecessary duplication(s) ..." if @verbose
+			#puts "This could be awhile depending on the list size. Please be patient ..."
+			# 09/30/2013 Add additional logic to eliminate the duplicate target site(s) before the crawlers are invoked.
+			targets -= ["", nil]
+			uniq_sites=Hash.new
+			targets.dup.map do |target|
+				if is_url?(target)
+					host=url_2_host(target)
+					ip=host_2_ip(host).to_s
+					next if ip.nil?
+					port=url_2_port(target).to_s
+					next if port.nil?
+					site_key=ip+":"+port
+					unless uniq_sites.key?(site_key)
+						uniq_sites[site_key]=target
+					end
+				end
+			end
+			puts "Sanitization done! " if @verbose
+			puts "Start the parallel engine on the normalized crawling list:\n #{targets} "
+			puts "Maximum number of web crawling sessions allowed: #{num}" #if @verbose
+			raise "Error: target list is empty!" if targets.size < 1
+			Parallel.map(uniq_sites.values, :in_processes => num) { |target|
+				puts "Working on #{target} ..." if @verbose
+				crawl(target)
+			}.dup.each do |process|
+				puts "process.inspect: #{process}" if @verbose
+				urls=process
+				urls-=["",nil] unless urls.nil?
+				if urls.nil?
+					next
+				elsif urls.empty?
+					next
+					#do nothing
+				else
+					urls.map do |url|
+						url.strip!
+						@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
+						#$discovered_urls[url]=true unless $discovered_urls.key?(url)
+					end
+				end
+			end
+			#return sites
+			return @discovered_urls_by_crawler.keys
+		rescue Exception => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return nil
+		end
+	end
+	alias_method :crawls, :crawl_workers
+	# Fast crawling method - build the target pool from the input file
+	def crawl_workers_on_file (file)
+		puts "Web crawl the list of targets from file: #{file}"
+		begin
+			targets=file_2_list(file)
+			sites=crawl_workers(targets,num=@max_parallel)
+			return sites
+		rescue => ee
+            puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+		end
+	end
+	alias_method :query_file, :crawl_workers_on_file
+	alias_method :crawl_file, :crawl_workers_on_file
+    # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
+	def open_url(url)
+		puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
+		#url_object = nil
+        begin
+			if url =~ /http\:/i
+				# patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
+				url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
+				#url_object = open(url)
+			elsif url =~ /https\:/i
+				url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
+				#url_object = open(url,:ssl_verify_mode => 0)
+			else
+				raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
+			end
+			return url_object
+        rescue => ee
+            puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
+            return nil
+        end
+    end
+	# Wrapper to use OpenURI method 'read' to return url body contents
+	def read_url(url)
+		puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
+		begin
+			url_object=open_url(url)
+			@visited_urls_by_crawler[url]=true unless @visited_urls_by_crawler.key?(url)
+			body=url_object.read
+			return body
+        rescue => ee
+            puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+        end
+	end
+    # Return the destination url in case of url re-direct
+	def update_url_if_redirected(url, url_object)
+		#puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
+		begin
+			if url != url_object.base_uri.to_s
+				return url_object.base_uri.to_s
+			end
+			return url
+        rescue => ee
+            puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+        end
+    end
+    # Wrapper for the Nokogiri DOM parser
+	def parse_html(html_body)
+        #puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
+		begin
+            doc = Nokogiri::HTML(html_body)
+			#puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
+			#puts "doc: #{doc}" if @verbose
+			return doc
+        rescue => ee
+            puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+        end
+	end
+    # Search 'current_url' and return found URLs under the same domain
+	def find_urls_on_page(doc, current_url)
+        #puts "Search and return URLs within the doc: #{doc}" if @verbose
+		begin
+			urls_list = []
+			# case 1 - search embedded HTML tag <a href='url'> for the url elements
+			links=doc.css('a')
+			links.map do |x|
+				#puts "x: #{x}"
+				new_url = x.attribute('href').to_s
+				unless new_url == nil
+					if new_url.match("http")
+						#if urls_on_same_domain?(new_url,current_url)
+							urls_list.push(new_url)
+						#end
+					else
+						new_url = make_absolute(current_url, new_url)
+						urls_list.push(new_url)
+					end
+				end
+			end
+			# case 2 - search client side redirect - <meta http-equiv="refresh" content="5;URL='http://example.com/'">
+			elements=doc.css("meta[http-equiv]")
+			unless elements.size == 0
+				link=elements.attr("content").value.split(/url\=/i)[1]
+				unless link.nil?
+					new_url = make_absolute(current_url, link)
+					urls_list.push(new_url) unless new_url.nil?
+				end
+			end
+			#puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
+			return urls_list.uniq-["",nil]
+        rescue => ee
+            puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+		end
+    end
+	# Method to print out discovery URL result
+	def print_discovered_urls_by_crawler
+		puts "Print discovered url by the crawler. " if @verbose
+		begin
+			puts "\nSummary Report of Discovered URLs from the Crawler:"
+			@discovered_urls_by_crawler.keys.each do |url|
+				puts url
+			end
+			puts "Total: #{@discovered_urls_by_crawler.keys.size}"
+			puts "End of the summary"
+        rescue => ee
+            puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+        end
+	end
+	alias_method :print, :print_discovered_urls_by_crawler
+	# Method to save URL discovery  result
+	def save_discovered_urls (file)
+		puts "Save discovered urls by the crawler to file: #{file} "
+		begin
+			list_2_file(@discovered_urls_by_crawler.keys, file)
+			puts "Done!"
+        rescue => ee
+            puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+        end
+	end
+	alias_method :save, :save_discovered_urls
+	# Method to retrieve discovery site result
+	def get_discovered_sites_by_crawler
+		puts "Print summary report of discovered sites. " if @verbose
+		begin
+			puts "\nSummary Report of Discovered Sites from the Crawler:"
+			sites = Hash.new
+			@discovered_urls_by_crawler.keys.each do |url|
+				site=url_2_site(url)
+				sites[site]=true unless sites.key?(site)
+			end
+			sites.keys.map { |site| puts site }
+			puts "Total: #{sites.size}"
+			puts "End of the summary"
+			return sites.keys
+        rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+        end
+	end
+	alias_method :get_sites, :get_discovered_sites_by_crawler
+	private :open_url, :read_url, :update_url_if_redirected, :parse_html, :find_urls_on_page
+end