RubyGems - wmap - Versions diffs - 2.4.4 - Mend

wmap 2.4.4

Files changed (141) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +141 -0
data/LICENSE.txt +15 -0
data/README.rdoc +98 -0
data/TODO +13 -0
data/bin/deprime +21 -0
data/bin/distrust +38 -0
data/bin/googleBot +23 -0
data/bin/prime +21 -0
data/bin/refresh +26 -0
data/bin/run_tests +16 -0
data/bin/spiderBot +26 -0
data/bin/trust +38 -0
data/bin/updateAll +57 -0
data/bin/wadd +25 -0
data/bin/wadds +26 -0
data/bin/wcheck +28 -0
data/bin/wdel +25 -0
data/bin/wdump +21 -0
data/bin/wmap +151 -0
data/bin/wscan +32 -0
data/data/cidrs +2 -0
data/data/deactivated_sites +1 -0
data/data/domains +2 -0
data/data/hosts +1 -0
data/data/prime_hosts +1 -0
data/data/sites +2 -0
data/data/sub_domains +2 -0
data/demos/bruter.rb +27 -0
data/demos/dns_brutes.rb +28 -0
data/demos/filter_cidr.rb +18 -0
data/demos/filter_crawls.rb +5 -0
data/demos/filter_domain.rb +25 -0
data/demos/filter_geoip.rb +26 -0
data/demos/filter_known_services.rb +59 -0
data/demos/filter_netinfo.rb +23 -0
data/demos/filter_prime.rb +25 -0
data/demos/filter_profiler.rb +3 -0
data/demos/filter_redirection.rb +19 -0
data/demos/filter_site.rb +40 -0
data/demos/filter_siteip.rb +31 -0
data/demos/filter_status.rb +17 -0
data/demos/filter_timestamp.rb +23 -0
data/demos/filter_url.rb +19 -0
data/demos/new_fnd.rb +66 -0
data/demos/nmap_parser.pl +138 -0
data/demos/site_format.rb +18 -0
data/demos/whois_domain.rb +78 -0
data/dicts/GeoIP.dat +0 -0
data/dicts/GeoIPASNum.dat +0 -0
data/dicts/GeoLiteCity.dat +0 -0
data/dicts/ccsld.txt +2646 -0
data/dicts/cctld.txt +243 -0
data/dicts/gtld.txt +25 -0
data/dicts/hostnames-dict.big +1402 -0
data/dicts/hostnames-dict.txt +101 -0
data/lib/wmap/cidr_tracker.rb +327 -0
data/lib/wmap/dns_bruter.rb +308 -0
data/lib/wmap/domain_tracker/sub_domain.rb +142 -0
data/lib/wmap/domain_tracker.rb +342 -0
data/lib/wmap/geoip_tracker.rb +72 -0
data/lib/wmap/google_search_scraper.rb +177 -0
data/lib/wmap/host_tracker/primary_host.rb +130 -0
data/lib/wmap/host_tracker.rb +550 -0
data/lib/wmap/network_profiler.rb +144 -0
data/lib/wmap/port_scanner.rb +208 -0
data/lib/wmap/site_tracker/deactivated_site.rb +85 -0
data/lib/wmap/site_tracker.rb +937 -0
data/lib/wmap/url_checker.rb +314 -0
data/lib/wmap/url_crawler.rb +381 -0
data/lib/wmap/utils/domain_root.rb +184 -0
data/lib/wmap/utils/logger.rb +53 -0
data/lib/wmap/utils/url_magic.rb +343 -0
data/lib/wmap/utils/utils.rb +333 -0
data/lib/wmap/whois.rb +76 -0
data/lib/wmap.rb +227 -0
data/logs/wmap.log +17 -0
data/ruby_whois_patches/base_cocca2.rb +149 -0
data/ruby_whois_patches/kero.yachay.pe.rb +120 -0
data/ruby_whois_patches/whois.PublicDomainRegistry.com.rb +124 -0
data/ruby_whois_patches/whois.above.com.rb +61 -0
data/ruby_whois_patches/whois.adamsnames.tc.rb +107 -0
data/ruby_whois_patches/whois.aeda.net.ae.rb +105 -0
data/ruby_whois_patches/whois.ai.rb +112 -0
data/ruby_whois_patches/whois.arnes.si.rb +121 -0
data/ruby_whois_patches/whois.ascio.com.rb +91 -0
data/ruby_whois_patches/whois.cnnic.cn.rb +123 -0
data/ruby_whois_patches/whois.corporatedomains.com.rb +67 -0
data/ruby_whois_patches/whois.crsnic.net.rb +108 -0
data/ruby_whois_patches/whois.denic.de.rb +174 -0
data/ruby_whois_patches/whois.dk-hostmaster.dk.rb +120 -0
data/ruby_whois_patches/whois.dns.be.rb +134 -0
data/ruby_whois_patches/whois.dns.lu.rb +129 -0
data/ruby_whois_patches/whois.dns.pl.rb +150 -0
data/ruby_whois_patches/whois.dns.pt.rb +119 -0
data/ruby_whois_patches/whois.domain.kg.rb +126 -0
data/ruby_whois_patches/whois.domainregistry.my.rb +123 -0
data/ruby_whois_patches/whois.domreg.lt.rb +110 -0
data/ruby_whois_patches/whois.dot.tk.rb +140 -0
data/ruby_whois_patches/whois.hkirc.hk.rb +121 -0
data/ruby_whois_patches/whois.isnic.is.rb +130 -0
data/ruby_whois_patches/whois.je.rb +119 -0
data/ruby_whois_patches/whois.jprs.jp.rb +137 -0
data/ruby_whois_patches/whois.kenic.or.ke.rb +140 -0
data/ruby_whois_patches/whois.markmonitor.com.rb +118 -0
data/ruby_whois_patches/whois.melbourneit.com.rb +58 -0
data/ruby_whois_patches/whois.nic.as.rb +96 -0
data/ruby_whois_patches/whois.nic.at.rb +109 -0
data/ruby_whois_patches/whois.nic.ch.rb +141 -0
data/ruby_whois_patches/whois.nic.cl.rb +117 -0
data/ruby_whois_patches/whois.nic.ec.rb +157 -0
data/ruby_whois_patches/whois.nic.im.rb +120 -0
data/ruby_whois_patches/whois.nic.it.rb +170 -0
data/ruby_whois_patches/whois.nic.lv.rb +116 -0
data/ruby_whois_patches/whois.nic.ly.rb +127 -0
data/ruby_whois_patches/whois.nic.mu.rb +27 -0
data/ruby_whois_patches/whois.nic.mx.rb +123 -0
data/ruby_whois_patches/whois.nic.net.sa.rb +111 -0
data/ruby_whois_patches/whois.nic.or.kr.rb +101 -0
data/ruby_whois_patches/whois.nic.tel.rb +129 -0
data/ruby_whois_patches/whois.nic.tr.rb +133 -0
data/ruby_whois_patches/whois.nic.us.rb +129 -0
data/ruby_whois_patches/whois.nic.ve.rb +135 -0
data/ruby_whois_patches/whois.norid.no.rb +127 -0
data/ruby_whois_patches/whois.pandi.or.id.rb +118 -0
data/ruby_whois_patches/whois.psi-usa.info.rb +63 -0
data/ruby_whois_patches/whois.registro.br.rb +109 -0
data/ruby_whois_patches/whois.registrygate.com.rb +55 -0
data/ruby_whois_patches/whois.rrpproxy.net.rb +61 -0
data/ruby_whois_patches/whois.sgnic.sg.rb +130 -0
data/ruby_whois_patches/whois.srs.net.nz.rb +166 -0
data/ruby_whois_patches/whois.tucows.com.rb +70 -0
data/ruby_whois_patches/whois.twnic.net.tw.rb +133 -0
data/settings/discovery_ports +24 -0
data/settings/google_keywords.txt +9 -0
data/settings/google_locator.txt +23 -0
data/test/domain_tracker_test.rb +31 -0
data/test/utils_test.rb +168 -0
data/version.txt +13 -0
data/wmap.gemspec +49 -0
metadata +202 -0

data/lib/wmap/utils/domain_root.rb ADDED Viewed

@@ -0,0 +1,184 @@
+#--
+# Wmap
+#
+# A pure Ruby library for Internet web application discovery and tracking.
+#
+# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
+#++
+module Wmap
+ module Utils
+  # Module to validate and retrieve the top or second level domain name from a host-name (FQDN).
+  module DomainRoot
+	extend self
+	# Internet Domain Architecture Definitions
+	File_ccsld=File.dirname(__FILE__)+'/../../../dicts/ccsld.txt'
+	File_cctld=File.dirname(__FILE__)+'/../../../dicts/cctld.txt'
+	File_gtld=File.dirname(__FILE__)+'/../../../dicts/gtld.txt'
+	# Main function to retrieve the registered domain ('domain root' from the 'registrant' perspective) from a hostname, for example, "www.telegraph.co.uk" -> "telegraph.co.uk"
+	def get_domain_root (host)
+		puts "Retrieve the root domain for host: #{host}" if @verbose
+		begin
+			# Generic Top Level Domain List - loading once
+			@gtld=file_2_hash(File_gtld) if @gtld.nil?
+			# Country code top-level domain list - loading once
+			@cctld=file_2_hash(File_cctld) if @cctld.nil?
+			# Country code second level domain - loading once
+			@ccsld=load_ccsld_from_file(File_ccsld) if @ccsld.nil?
+			if host.strip.nil?
+				puts "Error: empty record found. Please check your input and remove any empty line." if @verbose
+				return nil
+			else
+				host=host.downcase.strip
+			end
+			found_tld=false
+			found_cctld=false
+			# search the general top level domain list first
+			root_domain=""
+			dn=host.split(".")
+			if @gtld.key?(dn.last)
+				found=false
+				if @cctld.key?(dn[dn.length-2])
+					found=true
+				end
+				if found
+					root_domain=dn[dn.length-3] + "." + dn[dn.length-2] + "." + dn.last
+				else
+					root_domain=dn[dn.length-2] + "." + dn.last
+				end
+				found_tld=true
+			end
+			# search the country code top level domain list secondly
+			if @cctld.key?(dn.last)
+				found=false
+				# reverse search of general top level domain
+				if @gtld.key?(dn[dn.length-2])
+					found=true
+				end
+				# search country code second level domain list
+				if @ccsld.key?(dn.last)
+					@ccsld[dn.last].each do |v|
+						if ( v =~ /#{dn[dn.length-2]}/i )
+							found=true
+							break
+						end
+					end
+					# 1/8/2015: additional logic to handle invalid ccsld string: reserved gtld string
+					#unless found
+					#	if @gtld.key?(dn[dn.length-2])
+					#		puts "Invalid ccsld: #{dn[dn.length-2]} for host: #{host}"
+					#		return nil
+					#	end
+					#end
+				end
+				if found
+					root_domain=dn[dn.length-3] + "." + dn[dn.length-2] + "." + dn.last
+				else
+					root_domain=dn[dn.length-2] + "." + dn.last
+				end
+				found_cctld=true
+			end
+			unless (found_tld or found_cctld)
+				puts "#{host} - the top level domain is unknown. Please check out your record #{root_domain} " if @verbose
+				return nil
+			else
+				puts "Domain root found: #{root_domain}" if @verbose
+				return root_domain
+			end
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return nil
+		end
+	end
+	alias_method :get_root_domain, :get_domain_root
+	alias_method :root_domain, :get_domain_root
+	alias_method :domain_root, :get_domain_root
+	alias_method :host_2_domain, :get_domain_root
+	# 'setter' to parse and load the known country code second level domain table from the file
+	# data structure example: {"uk" =>["co","plc"],"za"=>["mil","nom","org"]}
+	def load_ccsld_from_file (file_ccsld)
+		begin
+			ccsld=Hash.new
+			puts "Loading known country code second level domain list from file: #{file_ccsld}" if @verbose
+			f=File.open(file_ccsld, 'r:ISO-8859-1:UTF-8')   # transcoded magic bit
+			f.each do |line|
+				next unless line =~ /^\s+\.\w/
+				line=line.chomp.strip.downcase
+				entry=line.split(' ')[0].split('.')
+				if entry.length > 2
+					key=entry.last
+					ccsld[key] = Array.new if not ccsld.key?(key)
+					val=entry[entry.length-2]
+					#puts "Loading country code second level domain table with - Country code: #{key}, Second level domain: #{val}" if @verbose
+					ccsld[key].push(val) unless key.nil?
+				end
+			end
+			f.close
+			# Sort the blocks once in descendant order once for better performance
+			return ccsld
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+		end
+	end
+	# Test a host string to see if it's a valid Internet root domain
+	def is_domain_root? (domain)
+		puts "Validate the domain name is valid: #{domain}" if @verbose
+		begin
+			domain=domain.strip.downcase
+			return domain == get_domain_root(domain)
+		rescue => ee
+			puts "Exception on method #{__method__} for #{domain}: #{ee}" if @verbose
+			return false
+		end
+	end
+	alias_method :is_root_domain?, :is_domain_root?
+	alias_method :is_domain?, :is_domain_root?
+	alias_method :is_root?, :is_domain_root?
+	# Function to retrieve the sub-domain from a Fully Qualified Domain Name(FQDN), for example, "www.secure.telegraph.co.uk" -> "secure.telegraph.co.uk"
+	def get_sub_domain (host)
+		puts "Retrieve sub-domain from host: #{host}" if @verbose
+		begin
+			subdomain=String.new
+			host=host.strip.downcase
+			domain=get_domain_root(host)
+			record_h=host.split(".")
+			record_d=domain.split(".")
+			if (record_h.length - record_d.length) >= 2
+				subdomain=record_h[record_h.length-record_d.length-1]+"."+domain
+				puts "Sub domain found: #{subdomain}" if @verbose
+				return subdomain
+			else
+				return nil
+			end
+		rescue Exception => ee
+			puts "Exception on method #{__method__} for #{host}: #{ee}" if @verbose
+			return nil
+		end
+	end
+	alias_method :get_subdomain, :get_sub_domain
+	# Function to print instance variable - General top level domain list
+	def print_gtld
+		puts @gtld
+	end
+	# Function to print instance variable - Country code top-level domain list
+	def print_cctld
+		puts @cctld
+	end
+	# Function to print instance variable - Country code second-level domain list
+	def print_ccsld
+		puts @ccsld
+	end
+	private :load_ccsld_from_file
+  end
+ end
+end

data/lib/wmap/utils/logger.rb ADDED Viewed

@@ -0,0 +1,53 @@
+#--
+# Wmap
+#
+# A pure Ruby library for Internet web application discovery and tracking.
+#
+# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
+#++
+module Wmap
+ module Utils
+  # Module to log debugging and other messages
+  module Logger
+	extend self
+	# Append information into the log file for the trouble-shooting purpose
+	def wlog (obj, agent, file)
+		puts "Writing #{obj} into log file: #{file}" if @verbose
+		begin
+			return false if obj.nil?
+			# 01/27/2015, implementing singleton pattern for the logger
+			@@f=File.open(file,'a')
+			timestamp=Time.now
+			case obj
+			when Array
+				if obj.size >= 0
+					@@f.write "#{timestamp}: #{agent}: \n"
+					obj.map { |x| @@f.write "  #{x}\n" }
+					puts "The list is successfully saved into the log file: #{file} " if @verbose
+				end
+			when Hash
+				if obj.length >= 0
+					@@f.write "#{timestamp}: #{agent}: \n"
+					obj.each_value { |value| @@f.write "  #{value}\n" }
+					puts "The hash is successfully saved into the log file: #{file} " if @verbose
+				end
+			when String
+				@@f.write "#{timestamp}: #{agent}: #{obj}\n"
+				puts "The string is successfully saved into the log file: #{file} " if @verbose
+			else
+				#do nothing
+				puts "Un-handled exception on: #{obj}" if @verbose
+			end
+			@@f.close
+			return true
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return false
+		end
+	end
+  end
+ end
+end

data/lib/wmap/utils/url_magic.rb ADDED Viewed

@@ -0,0 +1,343 @@
+#--
+# Wmap
+#
+# A pure Ruby library for Internet web application discovery and tracking.
+#
+# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
+#++
+# require "uri"
+module Wmap
+ module Utils
+  module UrlMagic
+	extend self
+	# Simple sanity check on a 'claimed' URL string.
+	def is_url?(url)
+		puts "Validate the URL format is valid: #{url}" if @verbose
+		begin
+			if url =~ /(http|https)\:\/\/((.)+)/i
+				host=$2.split('/')[0]
+				host=host.split(':')[0]
+				if is_ip?(host) or is_fqdn?(host)
+					return true
+				else
+					return false
+				end
+			else
+				puts "Unknown URL format: #{url}" if @verbose
+				return false
+			end
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return false
+		end
+	end
+	# Simple sanity check on a 'claimed' SSL enabled URL string
+	def is_ssl?(url)
+		puts "Validate if SSL is enabled on: #{url}" if @verbose
+		begin
+			url=url.strip
+			if is_url?(url) && url =~ /https/i
+				return true
+			else
+				return false
+			end
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return false
+		end
+	end
+	alias_method :is_https?, :is_ssl?
+	# Simple sanity check on a 'claimed' web site base string.
+	def is_site?(url)
+		puts "Validate the website string format for: #{url}" if @verbose
+		begin
+			url=url.strip.downcase
+			if is_url?(url)
+				if url == url_2_site(url)
+					return true
+				else
+					return false
+				end
+			else
+				puts "Unknown site format: #{url}" if @verbose
+				return false
+			end
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return nil
+		end
+	end
+	# Check if URL is an absolute one
+	#def is_absolute?(url)
+	#	puts "Validate if the url is absolute: #{url}" if @verbose
+	#	begin
+	#		url.strip!
+	#		URI.absolute?(url)
+	#	rescue => ee
+	#		puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
+	#		return false
+	#	end
+	#end
+	# Check if URL is relative one
+	#def is_relative?(url)
+	#	begin
+	#		url.strip!
+	#		!is_absolute?(url)
+	#	rescue => ee
+	#		puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
+	#		return false
+	#	end
+	#end
+	# Extract the web server host's Fully Qualified Domain Name (FQDN) from the url. For example: "https://login.yahoo.com/email/help" -> "login.yahoo.com"
+	def url_2_host (url)
+		begin
+			url = url.strip.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
+			record1 = url.split('/')
+			if record1[0].nil?
+				puts "Error process url: #{url}"
+				return nil
+			else
+				record2 = record1[0].split(':')
+				return record2[0]
+			end
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return nil
+		end
+	end
+	# Extract web service port from the url. For example: "https://login.yahoo.com/email/help" -> 443
+	def url_2_port (url)
+		puts "Retrieve service port on URL: #{url}" if @verbose
+		begin
+			ssl = (url =~ /https/i)
+			url = url.downcase.gsub(/(http:\/\/|https:\/\/)/, "")
+			record1 = url.split('/')
+			record2 = record1[0].split(':')
+			if (record2.length == 2)
+				puts "The service port: #{record2[1]}" if @verbose
+				return record2[1].to_i
+			elsif ssl
+				puts "The service port: 443" if @verbose
+				return 443
+			else
+				puts "The service port: 80" if @verbose
+				return 80
+			end
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return nil
+		end
+	end
+	# Extract site in (host:port) format from a url: "https://login.yahoo.com:8443/email/help" -> "http://login.yahoo.com:8443/"
+	def url_2_site (url)
+		puts "Retrieve the web site base for url: #{url}" if @verbose
+		begin
+			url = url.downcase
+			url = url.sub(/^(.*?)http/i,'http')
+			entry = url.split(%r{\/\/})
+			prot=entry[0]
+			# step 1, extract the host:port pair from the url
+			host_port=entry[1].split(%r{\/})[0]
+			if host_port =~ /\:/
+				host=host_port.split(%r{\:})[0]
+				port=host_port.split(%r{\:})[1].to_i
+			elsif prot =~ /https/i
+				host=host_port
+				port=443
+			elsif prot =~ /http/i
+				host=host_port
+				port=80
+			else
+				host=host_port
+				#raise "Unknown url format: #{url}"
+			end
+			# additional logic to handle uncommon url base structures
+			unless is_fqdn?(host)
+				case host
+					# "https://letmechoose.barclays.co.uk?source=btorganic/" => "https://letmechoose.barclays.co.uk"
+					when /\?|\#/
+						host=host.split(%r{\?|\#})[0]
+					else
+						#do nothing
+				end
+			end
+			# step 2, put the host:port pair back to the normal site format
+			prot="https:" if port==443
+			if port==80 || port==443
+				site=prot+"//"+host+"/"
+			else
+				site=prot+"//"+host+":"+port.to_s+"/"
+			end
+			if site=~ /http/i
+				#puts "Base found: #{site}" if @verbose
+				return site
+			else
+				raise "Problem encountered on method url_2_site: Unable to convert #{url}"
+				return nil
+			end
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return nil
+		end
+	end
+	# Wrapper to return relative path component of the URL. i.e. http://www.yahoo.com/login.html => /login.html
+	def url_2_path(url)
+		#puts "Retrieve the relative path component of the url: #{url}" if @verbose
+		begin
+			url.strip!
+			base = url_2_site(url).chop
+			path=url.sub(base,'')
+			#puts "Path component found: #{path}" if @verbose
+			return path
+		rescue => ee
+			puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
+		end
+	end
+	# Test if the two URLs are both under the same domain: http://login.yahoo.com, http://mail.yahoo.com => true
+	def urls_on_same_domain?(url1, url2)
+		puts "Determine if two URLs under the same domain: #{url1}, #{url2}" if @verbose
+		begin
+			host1=url_2_host(url1)
+			host2=url_2_host(url2)
+			return get_domain_root(host1) == get_domain_root(host2)
+        rescue => ee
+			puts "Error searching the object content: #{ee}" if @verbose
+            return nil
+        end
+    end
+	# Input is host and open port, output is a URL for valid http response code or nil
+	def host_2_url (host,port=80)
+		puts "Perform simple http(s) service detection on host #{host}, port #{port}" if @verbose
+		begin
+			host=host.strip
+			if port.to_i == 80
+				url_1 = "http://" + host + "/"
+			elsif port.to_i ==443
+				url_1 = "https://" + host + "/"
+			else
+				url_1 = "http://" + host + ":" + port.to_s + "/"
+				url_2 = "https://" + host + ":" + port.to_s + "/"
+			end
+			puts "Please ensure your internet connection is active before running this method: #{__method__}" if @verbose
+			checker=Wmap::UrlChecker.new
+			if checker.response_code(url_1) != 10000
+				puts "Found URL: #{url_1}" if @verbose
+				return url_1
+			elsif checker.response_code(url_2) != 10000
+				puts "Found URL: #{url_2}" if @verbose
+				return url_2
+			else
+				puts "No http(s) service found on: #{host}:#{port}" if @verbose
+				return nil
+			end
+		rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+			return nil
+		end
+	end
+	# Convert a relative URL to an absolute one. For example, from URL base 'http://games.yahoo.com/' and file path '/game/the-magic-snowman-flash.html' => 'http://games.yahoo.com/game/the-magic-snowman-flash.html'
+	def make_absolute(base, relative_url)
+        puts "Determine and return the absolute URL:\n Base: #{base}, Relative: #{relative_url} " if @verbose
+		begin
+			absolute_url = nil;
+			if relative_url =~ /^\//
+				absolute_url = create_absolute_url_from_base(base, relative_url)
+			else
+				absolute_url = create_absolute_url_from_context(base, relative_url)
+			end
+			puts "Found absolute URL: #{absolute_url}" if @verbose
+			return absolute_url
+        rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+        end
+    end
+	# Create / construct the absolute URL from a known URL and relative file path. For example, 'http://images.search.yahoo.com/images' + '/search/images?p=raiders' => 'http://images.search.yahoo.com/search/images?p=raiders'
+	def create_absolute_url_from_base(potential_base, relative_url)
+        begin
+			#puts "Determine the absolute URL from potential base #{potential_base} and relative URL #{relative_url}" if @verbose
+			naked_base = url_2_site(potential_base).strip.chop
+			puts "Found absolute URL: #{naked_base+relative_url}" if @verbose
+			return naked_base + relative_url
+        rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+        end
+    end
+    # Construct the absolute URL by comparing a known URL and the relative file path
+	def create_absolute_url_from_context(potential_base, relative_url)
+        puts "Determine the absolute URL from context:\n Known base: #{potential_base}, Relative path: #{relative_url}" if @verbose
+		begin
+			absolute_url = nil
+			# make relative URL naked by removing the beginning '/'
+			relative_url.sub!(/^\//,'')
+			if potential_base =~ /\/$/
+				absolute_url = potential_base+relative_url.strip
+			else
+				last_index_of_slash = potential_base.rindex('/')
+				if potential_base[last_index_of_slash-2, 2] == ':/'
+					absolute_url = potential_base+relative_url
+				else
+					last_index_of_dot = potential_base.rindex('.')
+					if last_index_of_dot < last_index_of_slash
+						absolute_url = potential_base.strip.chop+relative_url
+					else
+						absolute_url = potential_base[0, last_index_of_slash+1] + relative_url
+					end
+				end
+			end
+			puts "Found absolute URL: #{absolute_url}" if @verbose
+			return absolute_url
+        rescue => ee
+			puts "Exception on method #{__method__}: #{ee}" if @verbose
+            return nil
+        end
+    end
+	# Normalize the URL to a consistent manner in order to determine if a link has been visited or cached before
+	# See http://en.wikipedia.org/wiki/URL_normalization for more explanation
+	def normalize_url(url)
+		begin
+			url.strip!
+			# Converting the scheme and host to lower case in the process, i.e. 'HTTP://www.Example.com/' => 'http://www.example.com/'
+			# Normalize the base
+			base=url_2_site(url)
+			# Case#1, remove the trailing dot after the hostname, i.e, 'http://www.yahoo.com./' => 'http://www.yahoo.com/'
+			base=base.sub(/\.\/$/,'/')
+			# Normalize the relative path, case#1
+			# retrieve the file path and remove the first '/' or '.',
+			# i.e. 'http://www.example.com/mypath' or 'http://www.example.com/./mypath' => 'mypath'
+			path=url_2_path(url).sub(/^(\/|\.)*/,'')
+			# Normalize the relative path, case#2
+			# Replace dot-segments. "/../" and "/./" with "/", i.e. 'http://www.example.com/../a/b/../c/./d.html" => 'http://www.example.com/a/c/d.html'
+			path=path.gsub(/\/\.{1,2}\//,'/')
+			if path.nil?
+				return base
+			else
+				return base+path
+			end
+		rescue => ee
+			puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
+			return url
+		end
+	end
+  end
+ end
+end