RubyGems - wmap - Versions diffs - 2.6.6 → 2.6.7 - Mend

wmap 2.6.6 → 2.6.7

Files changed (8) hide show

checksums.yaml +4 -4
data/bin/wmap +12 -15
data/lib/wmap/host_tracker.rb +3 -3
data/lib/wmap/site_tracker.rb +2 -2
data/lib/wmap/url_crawler/adware_tag.rb +5 -7
data/lib/wmap/url_crawler.rb +206 -228
data/version.txt +2 -2
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c4f814d2c0a04e5aedf5314a05a6de6ea59ff1a9cc6a71296bfcf94c301f9e67
-  data.tar.gz: 641c3dab030ff37bd0292f0ab2bfc2750c94d72051ddb46891ce490d917fb3b9
+  metadata.gz: 2e4f2a2dfe9b4b119331eefffc7b9b025d9953c2ce5f7255e4d2a08929a591c3
+  data.tar.gz: 3d018d69469cf4e4551b38397657341661fd95c3f59bebe8bb21405d4e107881
 SHA512:
-  metadata.gz: 977683b1f6a166ce1da6036e465b88e71aa9445b20e9c80048a640f6bf1dacee26b3503123529b0e5241c862370b0bc03c108c94ccfdf2e63c4af809af28a5ea
-  data.tar.gz: bac9ed1cd639750040f035b88001244751d7a1e627594c750d38962c4cb8625035cd2e2ec64d40b9464989b33d33a8cc70b702d7f5d737e0373d104b3ada443e
+  metadata.gz: 7e7d27b4d4abfc34ab3df0933412b4c99e94af93f71251a2e7a0706b4782ad62a2541dbf7c5f391f47d5a3b9eae9feb45ce1690b2e43fddab4f52a14e7bb334b
+  data.tar.gz: 0117422a9eac9f1c7a66783a0a4ca870711aeaa95252a9df8fe4f9ce2f8f10fd4ba461b2146a681f3b867becb9bace05c61a3e11b284714732432c95e12dc983

data/bin/wmap CHANGED Viewed

@@ -6,13 +6,8 @@
 require "wmap"
 require "optparse"
-# program helper
-def print_usage
-	abort "Program to perform website asset discovery and tracking. \nUsage: wmap -t <Target Host | URL | IP | CIDR | or a seed file with any of the above combo> -d <Optional Discovery Result Directory>"
-end
 # program command line options
-options = {:data_dir => nil, :target => nil}
+options = {:data_dir => nil, :target => nil, :verbose => false}
 parser = OptionParser.new do|opts|
 	opts.banner = Wmap.banner
 	opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
@@ -21,9 +16,11 @@ parser = OptionParser.new do|opts|
 	opts.on('-t', '--target target', 'Web Mapper target') do |target|
 		options[:target] = target;
 	end
+	opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
+		options[:verbose] = v;
+	end
 	opts.on('-h', '--help', 'Displays Help') do
-		print Wmap.banner,"\n"
-		print_usage
+		puts opts
 		exit 0
 	end
 end
@@ -47,7 +44,7 @@ Dir.mkdir(Log_dir) unless Dir.exist?(Log_dir)
 Wmap.wlog("Execute the command: wmap -t #{options[:target]}","wmap",Log_dir.join("wmap.log").to_s)
 urls = Array.new
 # first step - construct the host list
-scanner = Wmap::PortScanner.new(:verbose=>false, :socket_timeout=>600) # default time-out of 600 milliseconds
+scanner = Wmap::PortScanner.new(:verbose=>options[:verbose], :socket_timeout=>600) # default time-out of 600 milliseconds
 hosts=Array.new
 if File.exist?(options[:target])
 	puts "Parsing the discovery seed file: \"#{options[:target]}\" "
@@ -65,18 +62,18 @@ if File.exist?(options[:target])
 		cidrs.push(x) if scanner.is_cidr?(x)
 	end
 	puts "Parsing done. "
-	hosts+=Wmap::DnsBruter.new(:verbose=>false).dns_brute_workers(domains.uniq).values.flatten if domains.size > 0
+	hosts+=Wmap::DnsBruter.new(:verbose=>options[:verbose]).dns_brute_workers(domains.uniq).values.flatten if domains.size > 0
 	cidrs.map { |x| hosts+= scanner.cidr_2_ips(x) } if cidrs.size > 0
 elsif scanner.is_url?(options[:target])
 	puts "Processing the URL: #{options[:target]}"
 	urls.push(options[:target])
 elsif Wmap.domain_known?(options[:target]) or Wmap.sub_domain_known?(options[:target])
 	puts "Processing the domain: #{options[:target]}"
-	hosts+=Wmap::DnsBruter.new(:verbose=>false).dns_brute_worker(options[:target]).values.flatten
+	hosts+=Wmap::DnsBruter.new(:verbose=>options[:verbose]).dns_brute_worker(options[:target]).values.flatten
 elsif scanner.is_fqdn?(options[:target])
 	puts "Processing the host: #{options[:target]}"
 	hosts.push(options[:target])
-	my_hosts=Wmap::DnsBruter.new(:verbose=>false).dns_brute_worker(options[:target]).values.flatten if (options[:target].split('.')[0] =~ /\d+/)
+	my_hosts=Wmap::DnsBruter.new(:verbose=>options[:verbose]).dns_brute_worker(options[:target]).values.flatten if (options[:target].split('.')[0] =~ /\d+/)
 	hosts+=my_hosts unless my_hosts.nil?
 elsif scanner.is_cidr?(options[:target])
 	puts "Processing the network block: #{options[:target]}"
@@ -102,7 +99,7 @@ if options[:target] && options[:data_dir]
 	crawler = Wmap::UrlCrawler.new(:data_dir => options[:data_dir])
 elsif options[:target]
 	puts "Fire up the crawler."
-	crawler = Wmap::UrlCrawler.new(:verbose=>false)
+	crawler = Wmap::UrlCrawler.new(:verbose=>options[:verbose])
 else
 	abort "Error firing up UrlCrawler instance!"
 end
@@ -168,14 +165,14 @@ end
 if options[:target] && options[:data_dir]
 	puts "Invoke the HostTracker with optional directory setter."
 	host_tracker = Wmap::HostTracker.instance
-	host_tracker.verbose=false
+	host_tracker.verbose=options[:verbose]
 	host_tracker.data_dir = options[:data_dir]
 	host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
 	host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
 elsif options[:target]
 	puts puts "Invoke the HostTracker."
 	host_tracker = Wmap::HostTracker.instance
-	host_tracker.verbose=false
+	host_tracker.verbose=options[:verbose]
 else
 	abort "Error firing up HostTracker instance!"
 end

data/lib/wmap/host_tracker.rb CHANGED Viewed

@@ -57,9 +57,9 @@ class Wmap::HostTracker
 		end
 		f.close
 		return @known_hosts
-	#rescue => ee
-	#	puts "Exception on method #{__method__}: #{ee}"
-	#	return known_hosts
+	rescue => ee
+		puts "Exception on method #{__method__}: #{ee}"
+		return known_hosts
 	end
 	# Save the current local hosts hash table into a (random) data repository file

data/lib/wmap/site_tracker.rb CHANGED Viewed

@@ -282,8 +282,8 @@ class Wmap::SiteTracker
 			puts "No new entry added. "
 		end
 		return results
-	#rescue => ee
-		#puts "Exception on method #{__method__}: #{ee}" if @verbose
+	rescue => ee
+		puts "Exception on method #{__method__}: #{ee}" if @verbose
 	end
 	alias_method :adds, :bulk_add

data/lib/wmap/url_crawler/adware_tag.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module Wmap
 	# Class to identify and track adware within the site store
 	include Wmap::Utils
-	attr_accessor :signature_file, :tag_file, :verbose, :data_dir, :data_store
+	attr_accessor :signature_file, :tag_file, :verbose, :data_dir
 	attr_reader :tag_signatures, :tag_store
@@ -26,7 +26,7 @@ module Wmap
 			# Set default instance variables
 			@signature_file=File.dirname(__FILE__) + '/../../../settings/' + 'tag_signatures'
 			file=params.fetch(:signature_file, @signature_file)
-			@tag_signatures=load_from_file(file)
+			@tag_signatures=load_sig_from_file(file)
       @tag_file=params.fetch(:tag_file, @data_dir + 'tag_sites')
       File.write(@tag_file, "") unless File.exist?(@tag_file)
       # load the known tag store
@@ -34,9 +34,8 @@ module Wmap
       @landings = Hash.new  # cache landing page to reduce redundant browsing
 		end
     # load the known tag signatures into an instance variable
-  	def load_from_file (file, lc=true)
+  	def load_sig_from_file (file, lc=true)
       puts "Loading data file: #{file}"	if @verbose
 			data_store=Hash.new
 			f = File.open(file, 'r')
@@ -53,7 +52,6 @@ module Wmap
 				else
 					data_store[entry[0]]=entry[1].strip
 				end
 			end
 			f.close
 			return data_store
@@ -105,11 +103,11 @@ module Wmap
   	end
   	alias_method :save!, :save_to_file!
-    # add tag entries (from the sitetracker list)
+    # Refresh adware tag store signatures
   	def refresh (num=@max_parallel,use_cache=true)
 		  puts "Add entries to the local cache table from site tracker: " if @verbose
 			results = Hash.new
-			tags = Wmap::SiteTracker.instance.known_sites.keys
+			tags = @tag_store.keys
 			if tags.size > 0
 				Parallel.map(tags, :in_processes => num) { |target|
 					check_adware(target,use_cache)

data/lib/wmap/url_crawler.rb CHANGED Viewed

@@ -66,210 +66,196 @@ class Wmap::UrlCrawler
 	# A web crawler to crawl a known website and search for html links within the same root domain. For example,
     # by crawling 'http://www.yahoo.com/' it could discover 'http://login.yahoo.com/'
 	def crawl(url)
-		begin
-			puts "Start web crawling on #{url}"
-			result=Array.new
-			url=url.chomp.strip
-			result.push(url_2_site(url))
-			raise "Error! Invalid url format: #{urls}" unless is_url?(url)
-			# Add logic to profile the web server before crawling; this is used to optimize the crawling speed
-			pre_crawl(url)
-			status = Timeout::timeout(Crawl_timeout/1000) {
-				result+=crawl_worker(url).keys
-			}
-			puts "Web crawling time-out on #{url}: #{status}" if @verbose
-			return result
-		rescue => ee
-			puts "Exception on method #{__method__} for URL #{url}: #{ee}"
-			return result
-		end
+		puts "Start web crawling on #{url}"
+		result=Array.new
+		url=url.chomp.strip
+		result.push(url_2_site(url))
+		raise "Error! Invalid url format: #{urls}" unless is_url?(url)
+		# Add logic to profile the web server before crawling; this is used to optimize the crawling speed
+		pre_crawl(url)
+		status = Timeout::timeout(Crawl_timeout/1000) {
+			result+=crawl_worker(url).keys
+		}
+		puts "Web crawling time-out on #{url}: #{status}" if @verbose
+		return result
+	rescue => ee
+		puts "Exception on method #{__method__} for URL #{url}: #{ee}"
+		return result
 	end
 	alias_method :query, :crawl
     # The worker instance of crawler who perform the labour work
 	def crawl_worker(url0)
-		begin
-			puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
-			# Input URL sanity check first
-			if is_url?(url0)
-				host=url_2_host(url0)
-				ip=host_2_ip(host).to_s
-				raise "Invalid IP address: #{url0}" if ip.nil?
-				port=url_2_port(url0).to_s
-				raise "Invalid port number: #{url0}" if port.nil?
-			else
-				raise "Invalid URL: #{url0}. Please check it out with your browser again."
-			end
-			log_info=Hash.new
-			log_info[1]="Start working on #{url0}"
-			url_stores=Hash.new
-			url_stores[url0]=true unless url_stores.key?(url0)
-			@discovered_urls_by_crawler[url0]=true unless @discovered_urls_by_crawler.key?(url0)
-			@crawl_start[url0]=true unless @crawl_start.key?(url0)
+		puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
+		# Input URL sanity check first
+		if is_url?(url0)
+			host=url_2_host(url0)
+			ip=host_2_ip(host).to_s
+			raise "Invalid IP address: #{url0}" if ip.nil?
+			port=url_2_port(url0).to_s
+			raise "Invalid port number: #{url0}" if port.nil?
+		else
+			raise "Invalid URL: #{url0}. Please check it out with your browser again."
+		end
+		log_info=Hash.new
+		log_info[1]="Start working on #{url0}"
+		url_stores=Hash.new
+		url_stores[url0]=true unless url_stores.key?(url0)
+		@discovered_urls_by_crawler[url0]=true unless @discovered_urls_by_crawler.key?(url0)
+		@crawl_start[url0]=true unless @crawl_start.key?(url0)
 #			$discovered_urls[url0]=true unless $discovered_urls.key?(url0)
-			@crawl_depth.times do
-				url_stores.keys.each do |url|
-					# 10/01/2013 add logic to avoid unnecessary crawling within the same child instance
-					next if @visited_urls_by_crawler.key?(url)
-					url_object = open_url(url)
-					next if url_object == nil
-					url = update_url_if_redirected(url, url_object)
-					url_body = read_url(url)
-					# Protection code - to avoid parsing failure on the empty or nil object
-					next if url_body.nil? or url_body.empty?
-					url_stores[url]=true unless url_stores.key?(url)
-					@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
+		@crawl_depth.times do
+			url_stores.keys.each do |url|
+				# 10/01/2013 add logic to avoid unnecessary crawling within the same child instance
+				next if @visited_urls_by_crawler.key?(url)
+				url_object = open_url(url)
+				next if url_object == nil
+				url = update_url_if_redirected(url, url_object)
+				url_body = read_url(url)
+				# Protection code - to avoid parsing failure on the empty or nil object
+				next if url_body.nil? or url_body.empty?
+				url_stores[url]=true unless url_stores.key?(url)
+				@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
 #					$discovered_urls[url]=true unless $discovered_urls.key?(url)
-					doc = Nokogiri::HTML(url_body)
-					next if doc == nil
-					if url_stores.size >= @crawl_page_limit
-						#@visited_urls_by_crawler.merge!(url_stores)
-						@discovered_urls_by_crawler.merge!(url_stores)
+				doc = Nokogiri::HTML(url_body)
+				next if doc == nil
+				if url_stores.size >= @crawl_page_limit
+					#@visited_urls_by_crawler.merge!(url_stores)
+					@discovered_urls_by_crawler.merge!(url_stores)
 #						$discovered_urls.merge!(url_stores)
-						puts "Finish web crawling the url: #{url0}"
-						return url_stores
-					end
-					page_urls = find_urls_on_page(doc, url)
-					page_urls.uniq!
-					page_urls.map do |y|
-						y=normalize_url(y)
-						url_stores[y]=true unless url_stores.key?(y)
-						@discovered_urls_by_crawler[y]=true unless @discovered_urls_by_crawler.key?(y)
+					puts "Finish web crawling the url: #{url0}"
+					return url_stores
+				end
+				page_urls = find_urls_on_page(doc, url)
+				page_urls.uniq!
+				page_urls.map do |y|
+					y=normalize_url(y)
+					url_stores[y]=true unless url_stores.key?(y)
+					@discovered_urls_by_crawler[y]=true unless @discovered_urls_by_crawler.key?(y)
 #						$discovered_urls[y]=true unless $discovered_urls.key?(y)
-					end
 				end
 			end
-			puts "Finish web crawling on: #{url0}"
-			log_info[2]="Finish working on: #{url0}"
-			wlog(log_info, "UrlCrawler", @log_file)
-			@crawl_done[url0]=true unless @crawl_done.key?(url0)
-			return url_stores
-		rescue => ee
-			puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
-			log_info[3]="Exception on #{url0}"
-			wlog(log_info,"UrlCrawler",@log_file)
-			return url_stores
 		end
+		puts "Finish web crawling on: #{url0}"
+		log_info[2]="Finish working on: #{url0}"
+		wlog(log_info, "UrlCrawler", @log_file)
+		@crawl_done[url0]=true unless @crawl_done.key?(url0)
+		return url_stores
+	rescue => ee
+		puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
+		log_info[3]="Exception on #{url0}"
+		wlog(log_info,"UrlCrawler",@log_file)
+		return url_stores
 	end
 	# Fast crawling by utilizing fork manager parallel to spawn numbers of child processes at the same time
 	# each child process will continuously work on the target pool until all the works are done
 	def crawl_workers (targets,num=@max_parallel)
-		begin
-			raise "Input error - expecting targets in an array format: #{targets}" unless targets.kind_of? Array
-			puts "Sanitize the URL seeds to eliminate the unnecessary duplication(s) ..." if @verbose
-			#puts "This could be awhile depending on the list size. Please be patient ..."
-			# 09/30/2013 Add additional logic to eliminate the duplicate target site(s) before the crawlers are invoked.
-			targets -= ["", nil]
-			uniq_sites=Hash.new
-			targets.dup.map do |target|
-				if is_url?(target)
-					host=url_2_host(target)
-					ip=host_2_ip(host).to_s
-					next if ip.nil?
-					port=url_2_port(target).to_s
-					next if port.nil?
-					site_key=ip+":"+port
-					unless uniq_sites.key?(site_key)
-						uniq_sites[site_key]=target
-					end
+		raise "Input error - expecting targets in an array format: #{targets}" unless targets.kind_of? Array
+		puts "Sanitize the URL seeds to eliminate the unnecessary duplication(s) ..." if @verbose
+		#puts "This could be awhile depending on the list size. Please be patient ..."
+		# 09/30/2013 Add additional logic to eliminate the duplicate target site(s) before the crawlers are invoked.
+		targets -= ["", nil]
+		uniq_sites=Hash.new
+		targets.dup.map do |target|
+			if is_url?(target)
+				host=url_2_host(target)
+				ip=host_2_ip(host).to_s
+				next if ip.nil?
+				port=url_2_port(target).to_s
+				next if port.nil?
+				site_key=ip+":"+port
+				unless uniq_sites.key?(site_key)
+					uniq_sites[site_key]=target
 				end
 			end
-			puts "Sanitization done! " if @verbose
-			puts "Start the parallel engine on the normalized crawling list:\n #{targets} "
-			puts "Maximum number of web crawling sessions allowed: #{num}" #if @verbose
-			raise "Error: target list is empty!" if targets.size < 1
-			Parallel.map(uniq_sites.values, :in_processes => num) { |target|
-				puts "Working on #{target} ..." if @verbose
-				crawl(target)
-			}.dup.each do |process|
-				puts "process.inspect: #{process}" if @verbose
-				urls=process
-				urls-=["",nil] unless urls.nil?
-				if urls.nil?
-					next
-				elsif urls.empty?
-					next
-					#do nothing
-				else
-					urls.map do |url|
-						url.strip!
-						@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
-						#$discovered_urls[url]=true unless $discovered_urls.key?(url)
-					end
+		end
+		puts "Sanitization done! " if @verbose
+		puts "Start the parallel engine on the normalized crawling list:\n #{targets} "
+		puts "Maximum number of web crawling sessions allowed: #{num}" #if @verbose
+		raise "Error: target list is empty!" if targets.size < 1
+		Parallel.map(uniq_sites.values, :in_processes => num) { |target|
+			puts "Working on #{target} ..." if @verbose
+			crawl(target)
+		}.dup.each do |process|
+			puts "process.inspect: #{process}" if @verbose
+			urls=process
+			urls-=["",nil] unless urls.nil?
+			if urls.nil?
+				next
+			elsif urls.empty?
+				next
+				#do nothing
+			else
+				urls.map do |url|
+					url.strip!
+					@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
+					#$discovered_urls[url]=true unless $discovered_urls.key?(url)
 				end
 			end
-			#return sites
-			return @discovered_urls_by_crawler.keys
-		rescue Exception => ee
-			puts "Exception on method #{__method__}: #{ee}" if @verbose
-			return nil
 		end
+		#return sites
+		return @discovered_urls_by_crawler.keys
+	rescue Exception => ee
+		puts "Exception on method #{__method__}: #{ee}" if @verbose
+		return nil
 	end
 	alias_method :crawls, :crawl_workers
 	# Fast crawling method - build the target pool from the input file
 	def crawl_workers_on_file (file)
-		begin
-			puts "Web crawl the list of targets from file: #{file}"
-			targets=file_2_list(file)
-			sites=crawl_workers(targets,num=@max_parallel)
-			return sites
-		rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-		end
+		puts "Web crawl the list of targets from file: #{file}"
+		targets=file_2_list(file)
+		sites=crawl_workers(targets,num=@max_parallel)
+		return sites
+	rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
 	alias_method :query_file, :crawl_workers_on_file
 	alias_method :crawl_file, :crawl_workers_on_file
   # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
 	def open_url(url)
-    begin
-			puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
-			if url =~ /http\:/i
-				# patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
-				url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
-				#url_object = open(url)
-			elsif url =~ /https\:/i
-				url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
-				#url_object = open(url,:ssl_verify_mode => 0)
-			else
-				raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
-			end
-			return url_object
-    rescue => ee
-      puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
+		if url =~ /http\:/i
+			# patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
+			url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
+			#url_object = open(url)
+		elsif url =~ /https\:/i
+			url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
+			#url_object = open(url,:ssl_verify_mode => 0)
+		else
+			raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
+		end
+		return url_object
+  rescue => ee
+    puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
+    return nil
   end
 	# Wrapper to use OpenURI method 'read' to return url body contents
 	def read_url(url)
-		begin
-			puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
-			url_object=open_url(url)
-			@visited_urls_by_crawler[url]=true unless @visited_urls_by_crawler.key?(url)
-			body=url_object.read
-			return body
-  	rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
+		url_object=open_url(url)
+		@visited_urls_by_crawler[url]=true unless @visited_urls_by_crawler.key?(url)
+		body=url_object.read
+		return body
+	rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
     # Return the destination url in case of url re-direct
 	def update_url_if_redirected(url, url_object)
-		begin
-			#puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
-			if url != url_object.base_uri.to_s
-				return url_object.base_uri.to_s
-			end
-			return url
-    rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		#puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
+		if url != url_object.base_uri.to_s
+			return url_object.base_uri.to_s
+		end
+		return url
+  rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
   end
 =begin
@@ -290,90 +276,82 @@ class Wmap::UrlCrawler
   # Search 'current_url' and return found URLs under the same domain
 	def find_urls_on_page(doc, current_url)
-		begin
-			puts "Search and return URLs within the doc: #{doc}" if @verbose
-			urls_list = []
-			# case 1 - search embedded HTML tag <a href='url'> for the url elements
-			links=doc.css('a')
-			links.map do |x|
-				#puts "x: #{x}"
-				new_url = x.attribute('href').to_s
-				unless new_url == nil
-					if new_url.match("http")
-						#if urls_on_same_domain?(new_url,current_url)
-							urls_list.push(new_url)
-						#end
-					else
-						new_url = make_absolute(current_url, new_url)
+		puts "Search and return URLs within the doc: #{doc}" if @verbose
+		urls_list = []
+		# case 1 - search embedded HTML tag <a href='url'> for the url elements
+		links=doc.css('a')
+		links.map do |x|
+			#puts "x: #{x}"
+			new_url = x.attribute('href').to_s
+			unless new_url == nil
+				if new_url.match("http")
+					#if urls_on_same_domain?(new_url,current_url)
 						urls_list.push(new_url)
-					end
+					#end
+				else
+					new_url = make_absolute(current_url, new_url)
+					urls_list.push(new_url)
 				end
 			end
-			# case 2 - search client side redirect - <meta http-equiv="refresh" content="5;URL='http://example.com/'">
-			elements=doc.css("meta[http-equiv]")
-			unless elements.size == 0
-				link=elements.attr("content").value.split(/url\=/i)[1]
-				unless link.nil?
-					new_url = make_absolute(current_url, link)
-					urls_list.push(new_url) unless new_url.nil?
-				end
+		end
+		# case 2 - search client side redirect - <meta http-equiv="refresh" content="5;URL='http://example.com/'">
+		elements=doc.css("meta[http-equiv]")
+		unless elements.size == 0
+			link=elements.attr("content").value.split(/url\=/i)[1]
+			unless link.nil?
+				new_url = make_absolute(current_url, link)
+				urls_list.push(new_url) unless new_url.nil?
 			end
-			#puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
-			return urls_list.uniq-["",nil]
-    rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
 		end
+		#puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
+		return urls_list.uniq-["",nil]
+  rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
   end
 	# Method to print out discovery URL result
 	def print_discovered_urls_by_crawler
-		begin
-			puts "Print discovered url by the crawler. " if @verbose
-			puts "\nSummary Report of Discovered URLs from the Crawler:"
-			@discovered_urls_by_crawler.keys.each do |url|
-				puts url
-			end
-			puts "Total: #{@discovered_urls_by_crawler.keys.size}"
-			puts "End of the summary"
-    rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Print discovered url by the crawler. " if @verbose
+		puts "\nSummary Report of Discovered URLs from the Crawler:"
+		@discovered_urls_by_crawler.keys.each do |url|
+			puts url
+		end
+		puts "Total: #{@discovered_urls_by_crawler.keys.size}"
+		puts "End of the summary"
+  rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
 	alias_method :print, :print_discovered_urls_by_crawler
 	# Method to save URL discovery  result
 	def save_discovered_urls (file)
-		begin
-			puts "Save discovered urls by the crawler to file: #{file} "
-			list_2_file(@discovered_urls_by_crawler.keys, file)
-			puts "Done!"
-    rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Save discovered urls by the crawler to file: #{file} "
+		list_2_file(@discovered_urls_by_crawler.keys, file)
+		puts "Done!"
+  rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
 	alias_method :save, :save_discovered_urls
 	# Method to retrieve discovery site result
 	def get_discovered_sites_by_crawler
-		begin
-			puts "Print summary report of discovered sites. " if @verbose
-			puts "\nSummary Report of Discovered Sites from the Crawler:"
-			sites = Hash.new
-			@discovered_urls_by_crawler.keys.each do |url|
-				site=url_2_site(url)
-				sites[site]=true unless sites.key?(site)
-			end
-			sites.keys.map { |site| puts site }
-			puts "Total: #{sites.size}"
-			puts "End of the summary"
-			return sites.keys
-    rescue => ee
-			puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Print summary report of discovered sites. " if @verbose
+		puts "\nSummary Report of Discovered Sites from the Crawler:"
+		sites = Hash.new
+		@discovered_urls_by_crawler.keys.each do |url|
+			site=url_2_site(url)
+			sites[site]=true unless sites.key?(site)
+		end
+		sites.keys.map { |site| puts site }
+		puts "Total: #{sites.size}"
+		puts "End of the summary"
+		return sites.keys
+  rescue => ee
+		puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
 	alias_method :get_sites, :get_discovered_sites_by_crawler

data/version.txt CHANGED Viewed

@@ -3,8 +3,8 @@
 ###############################################################################
 package = wmap
 # wmap version 2.0 == web_discovery version 1.5.3
-version = 2.6.6
-date = 2019-11-12
+version = 2.6.7
+date = 2019-11-19
 author = Sam (Yang) Li
 email = yang.li@owasp.org

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wmap
 version: !ruby/object:Gem::Version
-  version: 2.6.6
+  version: 2.6.7
 platform: ruby
 authors:
 - Sam (Yang) Li
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-11-12 00:00:00.000000000 Z
+date: 2019-11-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dnsruby