RubyGems - wmap - Versions diffs - 2.6.6 → 2.6.7 - Mend

wmap 2.6.6 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/bin/wmap +12 -15
data/lib/wmap/host_tracker.rb +3 -3
data/lib/wmap/site_tracker.rb +2 -2
data/lib/wmap/url_crawler/adware_tag.rb +5 -7
data/lib/wmap/url_crawler.rb +206 -228
data/version.txt +2 -2
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c4f814d2c0a04e5aedf5314a05a6de6ea59ff1a9cc6a71296bfcf94c301f9e67
-  data.tar.gz: 641c3dab030ff37bd0292f0ab2bfc2750c94d72051ddb46891ce490d917fb3b9
+  metadata.gz: 2e4f2a2dfe9b4b119331eefffc7b9b025d9953c2ce5f7255e4d2a08929a591c3
+  data.tar.gz: 3d018d69469cf4e4551b38397657341661fd95c3f59bebe8bb21405d4e107881
 SHA512:
-  metadata.gz: 977683b1f6a166ce1da6036e465b88e71aa9445b20e9c80048a640f6bf1dacee26b3503123529b0e5241c862370b0bc03c108c94ccfdf2e63c4af809af28a5ea
-  data.tar.gz: bac9ed1cd639750040f035b88001244751d7a1e627594c750d38962c4cb8625035cd2e2ec64d40b9464989b33d33a8cc70b702d7f5d737e0373d104b3ada443e
+  metadata.gz: 7e7d27b4d4abfc34ab3df0933412b4c99e94af93f71251a2e7a0706b4782ad62a2541dbf7c5f391f47d5a3b9eae9feb45ce1690b2e43fddab4f52a14e7bb334b
+  data.tar.gz: 0117422a9eac9f1c7a66783a0a4ca870711aeaa95252a9df8fe4f9ce2f8f10fd4ba461b2146a681f3b867becb9bace05c61a3e11b284714732432c95e12dc983

data/bin/wmap CHANGED Viewed

@@ -6,13 +6,8 @@
 require "wmap"
 require "optparse"
-# program helper
-def print_usage
-	abort "Program to perform website asset discovery and tracking. \nUsage: wmap -t <Target Host | URL | IP | CIDR | or a seed file with any of the above combo> -d <Optional Discovery Result Directory>"
-end
 # program command line options
-options = {:data_dir => nil, :target => nil}
+options = {:data_dir => nil, :target => nil, :verbose => false}
 parser = OptionParser.new do|opts|
 	opts.banner = Wmap.banner
 	opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
@@ -21,9 +16,11 @@ parser = OptionParser.new do|opts|
 	opts.on('-t', '--target target', 'Web Mapper target') do |target|
 		options[:target] = target;
 	end
+	opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
+		options[:verbose] = v;
+	end
 	opts.on('-h', '--help', 'Displays Help') do
-		print Wmap.banner,"\n"
-		print_usage
+		puts opts
 		exit 0
 	end
 end
@@ -47,7 +44,7 @@ Dir.mkdir(Log_dir) unless Dir.exist?(Log_dir)
 Wmap.wlog("Execute the command: wmap -t #{options[:target]}","wmap",Log_dir.join("wmap.log").to_s)
 urls = Array.new
 # first step - construct the host list
-scanner = Wmap::PortScanner.new(:verbose=>false, :socket_timeout=>600) # default time-out of 600 milliseconds
+scanner = Wmap::PortScanner.new(:verbose=>options[:verbose], :socket_timeout=>600) # default time-out of 600 milliseconds
 hosts=Array.new
 if File.exist?(options[:target])
 	puts "Parsing the discovery seed file: \"#{options[:target]}\" "
@@ -65,18 +62,18 @@ if File.exist?(options[:target])
 		cidrs.push(x) if scanner.is_cidr?(x)
 	end
 	puts "Parsing done. "
-	hosts+=Wmap::DnsBruter.new(:verbose=>false).dns_brute_workers(domains.uniq).values.flatten if domains.size > 0
+	hosts+=Wmap::DnsBruter.new(:verbose=>options[:verbose]).dns_brute_workers(domains.uniq).values.flatten if domains.size > 0
 	cidrs.map { |x| hosts+= scanner.cidr_2_ips(x) } if cidrs.size > 0
 elsif scanner.is_url?(options[:target])
 	puts "Processing the URL: #{options[:target]}"
 	urls.push(options[:target])
 elsif Wmap.domain_known?(options[:target]) or Wmap.sub_domain_known?(options[:target])
 	puts "Processing the domain: #{options[:target]}"
-	hosts+=Wmap::DnsBruter.new(:verbose=>false).dns_brute_worker(options[:target]).values.flatten
+	hosts+=Wmap::DnsBruter.new(:verbose=>options[:verbose]).dns_brute_worker(options[:target]).values.flatten
 elsif scanner.is_fqdn?(options[:target])
 	puts "Processing the host: #{options[:target]}"
 	hosts.push(options[:target])
-	my_hosts=Wmap::DnsBruter.new(:verbose=>false).dns_brute_worker(options[:target]).values.flatten if (options[:target].split('.')[0] =~ /\d+/)
+	my_hosts=Wmap::DnsBruter.new(:verbose=>options[:verbose]).dns_brute_worker(options[:target]).values.flatten if (options[:target].split('.')[0] =~ /\d+/)
 	hosts+=my_hosts unless my_hosts.nil?
 elsif scanner.is_cidr?(options[:target])
 	puts "Processing the network block: #{options[:target]}"
@@ -102,7 +99,7 @@ if options[:target] && options[:data_dir]
 	crawler = Wmap::UrlCrawler.new(:data_dir => options[:data_dir])
 elsif options[:target]
 	puts "Fire up the crawler."
-	crawler = Wmap::UrlCrawler.new(:verbose=>false)
+	crawler = Wmap::UrlCrawler.new(:verbose=>options[:verbose])
 else
 	abort "Error firing up UrlCrawler instance!"
 end
@@ -168,14 +165,14 @@ end
 if options[:target] && options[:data_dir]
 	puts "Invoke the HostTracker with optional directory setter."
 	host_tracker = Wmap::HostTracker.instance
-	host_tracker.verbose=false
+	host_tracker.verbose=options[:verbose]
 	host_tracker.data_dir = options[:data_dir]
 	host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
 	host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
 elsif options[:target]
 	puts puts "Invoke the HostTracker."
 	host_tracker = Wmap::HostTracker.instance
-	host_tracker.verbose=false
+	host_tracker.verbose=options[:verbose]
 else
 	abort "Error firing up HostTracker instance!"
 end

data/lib/wmap/host_tracker.rb CHANGED Viewed

@@ -57,9 +57,9 @@ class Wmap::HostTracker
 		end
 		f.close
 		return @known_hosts
-	#rescue => ee
-	#	puts "Exception on method #{__method__}: #{ee}"
-	#	return known_hosts
+	rescue => ee
+		puts "Exception on method #{__method__}: #{ee}"
+		return known_hosts
 	end
 	# Save the current local hosts hash table into a (random) data repository file

data/lib/wmap/site_tracker.rb CHANGED Viewed

@@ -282,8 +282,8 @@ class Wmap::SiteTracker
 			puts "No new entry added. "
 		end
 		return results
-	#rescue => ee
-		#puts "Exception on method #{__method__}: #{ee}" if @verbose
+	rescue => ee
+		puts "Exception on method #{__method__}: #{ee}" if @verbose
 	end
 	alias_method :adds, :bulk_add

data/lib/wmap/url_crawler/adware_tag.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module Wmap
 	# Class to identify and track adware within the site store
 	include Wmap::Utils
-	attr_accessor :signature_file, :tag_file, :verbose, :data_dir, :data_store
+	attr_accessor :signature_file, :tag_file, :verbose, :data_dir
 	attr_reader :tag_signatures, :tag_store
@@ -26,7 +26,7 @@ module Wmap
 			# Set default instance variables
 			@signature_file=File.dirname(__FILE__) + '/../../../settings/' + 'tag_signatures'
 			file=params.fetch(:signature_file, @signature_file)
-			@tag_signatures=load_from_file(file)
+			@tag_signatures=load_sig_from_file(file)
       @tag_file=params.fetch(:tag_file, @data_dir + 'tag_sites')
       File.write(@tag_file, "") unless File.exist?(@tag_file)
       # load the known tag store
@@ -34,9 +34,8 @@ module Wmap
       @landings = Hash.new  # cache landing page to reduce redundant browsing
 		end
     # load the known tag signatures into an instance variable
-  	def load_from_file (file, lc=true)
+  	def load_sig_from_file (file, lc=true)
       puts "Loading data file: #{file}"	if @verbose
 			data_store=Hash.new
 			f = File.open(file, 'r')
@@ -53,7 +52,6 @@ module Wmap
 				else
 					data_store[entry[0]]=entry[1].strip
 				end
 			end
 			f.close
 			return data_store
@@ -105,11 +103,11 @@ module Wmap
   	end
   	alias_method :save!, :save_to_file!
-    # add tag entries (from the sitetracker list)
+    # Refresh adware tag store signatures
   	def refresh (num=@max_parallel,use_cache=true)
 		  puts "Add entries to the local cache table from site tracker: " if @verbose
 			results = Hash.new
-			tags = Wmap::SiteTracker.instance.known_sites.keys
+			tags = @tag_store.keys
 			if tags.size > 0
 				Parallel.map(tags, :in_processes => num) { |target|
 					check_adware(target,use_cache)

data/lib/wmap/url_crawler.rb CHANGED Viewed

@@ -66,210 +66,196 @@ class Wmap::UrlCrawler
 	# A web crawler to crawl a known website and search for html links within the same root domain. For example,
     # by crawling 'http://www.yahoo.com/' it could discover 'http://login.yahoo.com/'
 	def crawl(url)
-		begin
-			puts "Start web crawling on #{url}"
-			result=Array.new
-			url=url.chomp.strip
-			result.push(url_2_site(url))
-			raise "Error! Invalid url format: #{urls}" unless is_url?(url)
-			# Add logic to profile the web server before crawling; this is used to optimize the crawling speed
-			pre_crawl(url)
-			status = Timeout::timeout(Crawl_timeout/1000) {
-				result+=crawl_worker(url).keys
-			}
-			puts "Web crawling time-out on #{url}: #{status}" if @verbose
-			return result
-		rescue => ee
-			puts "Exception on method #{__method__} for URL #{url}: #{ee}"
-			return result
-		end
+		puts "Start web crawling on #{url}"
+		result=Array.new
+		url=url.chomp.strip
+		result.push(url_2_site(url))
+		raise "Error! Invalid url format: #{urls}" unless is_url?(url)
+		# Add logic to profile the web server before crawling; this is used to optimize the crawling speed
+		pre_crawl(url)
+		status = Timeout::timeout(Crawl_timeout/1000) {
+			result+=crawl_worker(url).keys
+		}
+		puts "Web crawling time-out on #{url}: #{status}" if @verbose
+		return result
+	rescue => ee
+		puts "Exception on method #{__method__} for URL #{url}: #{ee}"
+		return result
 	end
 	alias_method :query, :crawl
     # The worker instance of crawler who perform the labour work
 	def crawl_worker(url0)
-		begin
-			puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
-			# Input URL sanity check first
-			if is_url?(url0)
-				host=url_2_host(url0)
-				ip=host_2_ip(host).to_s
-				raise "Invalid IP address: #{url0}" if ip.nil?
-				port=url_2_port(url0).to_s
-				raise "Invalid port number: #{url0}" if port.nil?
-			else
-				raise "Invalid URL: #{url0}. Please check it out with your browser again."
-			end
-			log_info=Hash.new
-			log_info[1]="Start working on #{url0}"
-			url_stores=Hash.new
-			url_stores[url0]=true unless url_stores.key?(url0)
-			@discovered_urls_by_crawler[url0]=true unless @discovered_urls_by_crawler.key?(url0)
-			@crawl_start[url0]=true unless @crawl_start.key?(url0)
+		puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
+		# Input URL sanity check first
+		if is_url?(url0)
+			host=url_2_host(url0)
+			ip=host_2_ip(host).to_s
+			raise "Invalid IP address: #{url0}" if ip.nil?
+			port=url_2_port(url0).to_s
+			raise "Invalid port number: #{url0}" if port.nil?
+		else
+			raise "Invalid URL: #{url0}. Please check it out with your browser again."
+		end
+		log_info=Hash.new
+		log_info[1]="Start working on #{url0}"
+		url_stores=Hash.new
+		url_stores[url0]=true unless url_stores.key?(url0)
+		@discovered_urls_by_crawler[url0]=true unless @discovered_urls_by_crawler.key?(url0)
+		@crawl_start[url0]=true unless @crawl_start.key?(url0)
 #			$discovered_urls[url0]=true unless $discovered_urls.key?(url0)
-			@crawl_depth.times do
-				url_stores.keys.each do |url|
-					# 10/01/2013 add logic to avoid unnecessary crawling within the same child instance
-					next if @visited_urls_by_crawler.key?(url)
-					url_object = open_url(url)
-					next if url_object == nil
-					url = update_url_if_redirected(url, url_object)
-					url_body = read_url(url)
-					# Protection code - to avoid parsing failure on the empty or nil object
-					next if url_body.nil? or url_body.empty?
-					url_stores[url]=true unless url_stores.key?(url)
-					@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
+		@crawl_depth.times do
+			url_stores.keys.each do |url|
+				# 10/01/2013 add logic to avoid unnecessary crawling within the same child instance
+				next if @visited_urls_by_crawler.key?(url)
+				url_object = open_url(url)
+				next if url_object == nil
+				url = update_url_if_redirected(url, url_object)
+				url_body = read_url(url)
+				# Protection code - to avoid parsing failure on the empty or nil object
+				next if url_body.nil? or url_body.empty?
+				url_stores[url]=true unless url_stores.key?(url)
+				@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
 #					$discovered_urls[url]=true unless $discovered_urls.key?(url)
-					doc = Nokogiri::HTML(url_body)
-					next if doc == nil
-					if url_stores.size >= @crawl_page_limit
-						#@visited_urls_by_crawler.merge!(url_stores)
-						@discovered_urls_by_crawler.merge!(url_stores)
+				doc = Nokogiri::HTML(url_body)
+				next if doc == nil
+				if url_stores.size >= @crawl_page_limit
+					#@visited_urls_by_crawler.merge!(url_stores)
+					@discovered_urls_by_crawler.merge!(url_stores)
 #						$discovered_urls.merge!(url_stores)
-						puts "Finish web crawling the url: #{url0}"
-						return url_stores
-					end
-					page_urls = find_urls_on_page(doc, url)
-					page_urls.uniq!
-					page_urls.map do |y|
-						y=normalize_url(y)
-						url_stores[y]=true unless url_stores.key?(y)
-						@discovered_urls_by_crawler[y]=true unless @discovered_urls_by_crawler.key?(y)
+					puts "Finish web crawling the url: #{url0}"
+					return url_stores
+				end
+				page_urls = find_urls_on_page(doc, url)
+				page_urls.uniq!
+				page_urls.map do |y|
+					y=normalize_url(y)
+					url_stores[y]=true unless url_stores.key?(y)
+					@discovered_urls_by_crawler[y]=true unless @discovered_urls_by_crawler.key?(y)
 #						$discovered_urls[y]=true unless $discovered_urls.key?(y)
-					end
 				end
 			end
-			puts "Finish web crawling on: #{url0}"
-			log_info[2]="Finish working on: #{url0}"
-			wlog(log_info, "UrlCrawler", @log_file)
-			@crawl_done[url0]=true unless @crawl_done.key?(url0)
-			return url_stores
-		rescue => ee
-			puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
-			log_info[3]="Exception on #{url0}"
-			wlog(log_info,"UrlCrawler",@log_file)
-			return url_stores
 		end
+		puts "Finish web crawling on: #{url0}"
+		log_info[2]="Finish working on: #{url0}"
+		wlog(log_info, "UrlCrawler", @log_file)
+		@crawl_done[url0]=true unless @crawl_done.key?(url0)
+		return url_stores
+	rescue => ee
+		puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
+		log_info[3]="Exception on #{url0}"
+		wlog(log_info,"UrlCrawler",@log_file)
+		return url_stores
 	end
 	# Fast crawling by utilizing fork manager parallel to spawn numbers of child processes at the same time
 	# each child process will continuously work on the target pool until all the works are done
 	def crawl_workers (targets,num=@max_parallel)
-		begin
-			raise "Input error - expecting targets in an array format: #{targets}" unless targets.kind_of? Array
-			puts "Sanitize the URL seeds to eliminate the unnecessary duplication(s) ..." if @verbose
-			#puts "This could be awhile depending on the list size. Please be patient ..."
-			# 09/30/2013 Add additional logic to eliminate the duplicate target site(s) before the crawlers are invoked.
-			targets -= ["", nil]
-			uniq_sites=Hash.new
-			targets.dup.map do |target|
-				if is_url?(target)
-					host=url_2_host(target)
-					ip=host_2_ip(host).to_s
-					next if ip.nil?
-					port=url_2_port(target).to_s
-					next if port.nil?
-					site_key=ip+":"+port
-					unless uniq_sites.key?(site_key)
-						uniq_sites[site_key]=target
-					end
+		raise "Input error - expecting targets in an array format: #{targets}" unless targets.kind_of? Array
+		puts "Sanitize the URL seeds to eliminate the unnecessary duplication(s) ..." if @verbose
+		#puts "This could be awhile depending on the list size. Please be patient ..."
+		# 09/30/2013 Add additional logic to eliminate the duplicate target site(s) before the crawlers are invoked.
+		targets -= ["", nil]
+		uniq_sites=Hash.new
+		targets.dup.map do |target|
+			if is_url?(target)
+				host=url_2_host(target)
+				ip=host_2_ip(host).to_s
+				next if ip.nil?
+				port=url_2_port(target).to_s
+				next if port.nil?
+				site_key=ip+":"+port
+				unless uniq_sites.key?(site_key)
+					uniq_sites[site_key]=target
 				end
 			end
-			puts "Sanitization done! " if @verbose
-			puts "Start the parallel engine on the normalized crawling list:\n #{targets} "
-			puts "Maximum number of web crawling sessions allowed: #{num}" #if @verbose
-			raise "Error: target list is empty!" if targets.size < 1
-			Parallel.map(uniq_sites.values, :in_processes => num) { |target|
-				puts "Working on #{target} ..." if @verbose
-				crawl(target)
-			}.dup.each do |process|
-				puts "process.inspect: #{process}" if @verbose
-				urls=process
-				urls-=["",nil] unless urls.nil?
-				if urls.nil?
-					next
-				elsif urls.empty?
-					next
-					#do nothing
-				else
-					urls.map do |url|
-						url.strip!
-						@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
-						#$discovered_urls[url]=true unless $discovered_urls.key?(url)
-					end
+		end
+		puts "Sanitization done! " if @verbose
+		puts "Start the parallel engine on the normalized crawling list:\n #{targets} "
+		puts "Maximum number of web crawling sessions allowed: #{num}" #if @verbose
+		raise "Error: target list is empty!" if targets.size < 1
+		Parallel.map(uniq_sites.values, :in_processes => num) { |target|
+			puts "Working on #{target} ..." if @verbose
+			crawl(target)
+		}.dup.each do |process|
+			puts "process.inspect: #{process}" if @verbose
+			urls=process
+			urls-=["",nil] unless urls.nil?
+			if urls.nil?
+				next
+			elsif urls.empty?
+				next
+				#do nothing
+			else
+				urls.map do |url|
+					url.strip!
+					@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
+					#$discovered_urls[url]=true unless $discovered_urls.key?(url)
 				end
 			end
-			#return sites
-			return @discovered_urls_by_crawler.keys
-		rescue Exception => ee
-			puts "Exception on method #{__method__}: #{ee}" if @verbose
-			return nil
 		end
+		#return sites
+		return @discovered_urls_by_crawler.keys
+	rescue Exception => ee
+		puts "Exception on method #{__method__}: #{ee}" if @verbose
+		return nil
 	end
 	alias_method :crawls, :crawl_workers
 	# Fast crawling method - build the target pool from the input file
 	def crawl_workers_on_file (file)
-		begin
-			puts "Web crawl the list of targets from file: #{file}"
-			targets=file_2_list(file)
-			sites=crawl_workers(targets,num=@max_parallel)
-			return sites
-		rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-		end
+		puts "Web crawl the list of targets from file: #{file}"
+		targets=file_2_list(file)
+		sites=crawl_workers(targets,num=@max_parallel)
+		return sites
+	rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
 	alias_method :query_file, :crawl_workers_on_file
 	alias_method :crawl_file, :crawl_workers_on_file
   # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
 	def open_url(url)
-    begin
-			puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
-			if url =~ /http\:/i
-				# patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
-				url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
-				#url_object = open(url)
-			elsif url =~ /https\:/i
-				url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
-				#url_object = open(url,:ssl_verify_mode => 0)
-			else
-				raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
-			end
-			return url_object
-    rescue => ee
-      puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
+		if url =~ /http\:/i
+			# patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
+			url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
+			#url_object = open(url)
+		elsif url =~ /https\:/i
+			url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
+			#url_object = open(url,:ssl_verify_mode => 0)
+		else
+			raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
+		end
+		return url_object
+  rescue => ee
+    puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
+    return nil
   end
 	# Wrapper to use OpenURI method 'read' to return url body contents
 	def read_url(url)
-		begin
-			puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
-			url_object=open_url(url)
-			@visited_urls_by_crawler[url]=true unless @visited_urls_by_crawler.key?(url)
-			body=url_object.read
-			return body
-  	rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
+		url_object=open_url(url)
+		@visited_urls_by_crawler[url]=true unless @visited_urls_by_crawler.key?(url)
+		body=url_object.read
+		return body
+	rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
     # Return the destination url in case of url re-direct
 	def update_url_if_redirected(url, url_object)
-		begin
-			#puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
-			if url != url_object.base_uri.to_s
-				return url_object.base_uri.to_s
-			end
-			return url
-    rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		#puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
+		if url != url_object.base_uri.to_s
+			return url_object.base_uri.to_s
+		end
+		return url
+  rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
   end
 =begin
@@ -290,90 +276,82 @@ class Wmap::UrlCrawler
   # Search 'current_url' and return found URLs under the same domain
 	def find_urls_on_page(doc, current_url)
-		begin
-			puts "Search and return URLs within the doc: #{doc}" if @verbose
-			urls_list = []
-			# case 1 - search embedded HTML tag <a href='url'> for the url elements
-			links=doc.css('a')
-			links.map do |x|
-				#puts "x: #{x}"
-				new_url = x.attribute('href').to_s
-				unless new_url == nil
-					if new_url.match("http")
-						#if urls_on_same_domain?(new_url,current_url)
-							urls_list.push(new_url)
-						#end
-					else
-						new_url = make_absolute(current_url, new_url)
+		puts "Search and return URLs within the doc: #{doc}" if @verbose
+		urls_list = []
+		# case 1 - search embedded HTML tag <a href='url'> for the url elements
+		links=doc.css('a')
+		links.map do |x|
+			#puts "x: #{x}"
+			new_url = x.attribute('href').to_s
+			unless new_url == nil
+				if new_url.match("http")
+					#if urls_on_same_domain?(new_url,current_url)
 						urls_list.push(new_url)
-					end
+					#end
+				else
+					new_url = make_absolute(current_url, new_url)
+					urls_list.push(new_url)
 				end
 			end
-			# case 2 - search client side redirect - <meta http-equiv="refresh" content="5;URL='http://example.com/'">
-			elements=doc.css("meta[http-equiv]")
-			unless elements.size == 0
-				link=elements.attr("content").value.split(/url\=/i)[1]
-				unless link.nil?
-					new_url = make_absolute(current_url, link)
-					urls_list.push(new_url) unless new_url.nil?
-				end
+		end
+		# case 2 - search client side redirect - <meta http-equiv="refresh" content="5;URL='http://example.com/'">
+		elements=doc.css("meta[http-equiv]")
+		unless elements.size == 0
+			link=elements.attr("content").value.split(/url\=/i)[1]
+			unless link.nil?
+				new_url = make_absolute(current_url, link)
+				urls_list.push(new_url) unless new_url.nil?
 			end
-			#puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
-			return urls_list.uniq-["",nil]
-    rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
 		end
+		#puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
+		return urls_list.uniq-["",nil]
+  rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
   end
 	# Method to print out discovery URL result
 	def print_discovered_urls_by_crawler
-		begin
-			puts "Print discovered url by the crawler. " if @verbose
-			puts "\nSummary Report of Discovered URLs from the Crawler:"
-			@discovered_urls_by_crawler.keys.each do |url|
-				puts url
-			end
-			puts "Total: #{@discovered_urls_by_crawler.keys.size}"
-			puts "End of the summary"
-    rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Print discovered url by the crawler. " if @verbose
+		puts "\nSummary Report of Discovered URLs from the Crawler:"
+		@discovered_urls_by_crawler.keys.each do |url|
+			puts url
+		end
+		puts "Total: #{@discovered_urls_by_crawler.keys.size}"
+		puts "End of the summary"
+  rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
 	alias_method :print, :print_discovered_urls_by_crawler
 	# Method to save URL discovery  result
 	def save_discovered_urls (file)
-		begin
-			puts "Save discovered urls by the crawler to file: #{file} "
-			list_2_file(@discovered_urls_by_crawler.keys, file)
-			puts "Done!"
-    rescue => ee
-      puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Save discovered urls by the crawler to file: #{file} "
+		list_2_file(@discovered_urls_by_crawler.keys, file)
+		puts "Done!"
+  rescue => ee
+    puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
 	alias_method :save, :save_discovered_urls
 	# Method to retrieve discovery site result
 	def get_discovered_sites_by_crawler
-		begin
-			puts "Print summary report of discovered sites. " if @verbose
-			puts "\nSummary Report of Discovered Sites from the Crawler:"
-			sites = Hash.new
-			@discovered_urls_by_crawler.keys.each do |url|
-				site=url_2_site(url)
-				sites[site]=true unless sites.key?(site)
-			end
-			sites.keys.map { |site| puts site }
-			puts "Total: #{sites.size}"
-			puts "End of the summary"
-			return sites.keys
-    rescue => ee
-			puts "Exception on method #{__method__}: #{ee}" if @verbose
-      return nil
-    end
+		puts "Print summary report of discovered sites. " if @verbose
+		puts "\nSummary Report of Discovered Sites from the Crawler:"
+		sites = Hash.new
+		@discovered_urls_by_crawler.keys.each do |url|
+			site=url_2_site(url)
+			sites[site]=true unless sites.key?(site)
+		end
+		sites.keys.map { |site| puts site }
+		puts "Total: #{sites.size}"
+		puts "End of the summary"
+		return sites.keys
+  rescue => ee
+		puts "Exception on method #{__method__}: #{ee}" if @verbose
+    return nil
 	end
 	alias_method :get_sites, :get_discovered_sites_by_crawler

data/version.txt CHANGED Viewed

@@ -3,8 +3,8 @@
 ###############################################################################
 package = wmap
 # wmap version 2.0 == web_discovery version 1.5.3
-version = 2.6.6
-date = 2019-11-12
+version = 2.6.7
+date = 2019-11-19
 author = Sam (Yang) Li
 email = yang.li@owasp.org

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wmap
 version: !ruby/object:Gem::Version
-  version: 2.6.6
+  version: 2.6.7
 platform: ruby
 authors:
 - Sam (Yang) Li
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-11-12 00:00:00.000000000 Z
+date: 2019-11-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dnsruby