RubyGems - wmap - Versions diffs - 2.5.0 → 2.5.1 - Mend

wmap 2.5.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/lib/wmap.rb +1 -0
data/lib/wmap/domain_tracker.rb +1 -1
data/lib/wmap/site_tracker.rb +2 -2
data/lib/wmap/url_checker.rb +24 -4
data/lib/wmap/url_crawler.rb +49 -49
data/lib/wmap/url_crawler/adware_tag.rb +281 -0
data/lib/wmap/wp_tracker.rb +5 -5
data/logs/wmap.log +1 -0
data/settings/tag_signatures +6 -0
data/version.txt +2 -2
data/wmap.gemspec +2 -2
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8d240d91e365ae3d5d0e43cf0cc9921f334d309c963c95da7599bf2a64743142
-  data.tar.gz: a64728d73c6e6a07772edd42fd1c46cb3748feb6f67705691e6c20dc6962bd2b
+  metadata.gz: 6f2042c146724dcfa9852bcb1920a2d9baded35fb8ba589d3af277277d678d36
+  data.tar.gz: d0ae5c5f90a4707eddbb91119b71ae2e9dc1c9ace6631e3f66e03d892ef3d8f1
 SHA512:
-  metadata.gz: 889518a8645797863f7c9b879b46802566a8c05c9f1f18dfc26326380bb387ad7905ab0daa48fda6981c5e52d21efab6c64abb327117c0cbb5661d490947cd0b
-  data.tar.gz: a2928c8147507c19571ce4294d981525848d6767696a63a58d1db016a5a81a4ec3ee9da07a3ccea927ae7a12259731730abaa66474e35b69f480cb3a05a7afd7
+  metadata.gz: 2a543f31d23e42604a0c86445eb3c2b469485527c0897a7495a8d74f593be19a1e915cf38e8ae03246012d5bb21734f8d2f93b0180863383a3a5260030fb1336
+  data.tar.gz: 830a9645c9633f0cd396cf9dfc40654a902ad3275ec37870f2fa1e06f64365bf9e0229f5acb0972af8569497c253e13d88b87c5d86710d324cb1510e3cd6679c

data/lib/wmap.rb CHANGED Viewed

@@ -20,6 +20,7 @@ require 'wmap/wp_tracker'
 require 'wmap/network_profiler'
 require 'wmap/port_scanner'
 require 'wmap/url_crawler'
+require 'wmap/url_crawler/adware_tag'
 require 'wmap/dns_bruter'
 require 'wmap/site_tracker'
 require 'wmap/site_tracker/deactivated_site'

data/lib/wmap/domain_tracker.rb CHANGED Viewed

@@ -143,7 +143,7 @@ class Wmap::DomainTracker
 					@known_internet_domains.merge!(record)
 					return record
 				else
-					puts "Problem add domain #{host} - please use legal root domain or sub domain only."
+					puts "Problem add domain #{host}: unknown domain format - please use legal root domain or sub domain only."
 				end
 			end
 		rescue => ee

data/lib/wmap/site_tracker.rb CHANGED Viewed

@@ -108,7 +108,7 @@ class Wmap::SiteTracker
 			# Preliminary sanity check
 			site=site.strip.downcase unless site.nil?
 			if site_known?(site)
-				puts  "Site is already exist. Skip #{site}"
+				puts  "Site already exists. Skip it: #{site}"
 				return nil
 			end
 			site=normalize_url(site) if is_url?(site)
@@ -242,7 +242,7 @@ class Wmap::SiteTracker
 			puts "Exception on method #{__method__}: #{ee}"
 			checker=nil
 			deact=nil
-			host_tracker=nil
+			host_tracker=nil
 			return nil
 		end
 	end

data/lib/wmap/url_checker.rb CHANGED Viewed

@@ -32,8 +32,8 @@ class Wmap::UrlChecker
 	# Main worker method to perform various checks on the URL / site
 	def url_worker (url)
-		puts "Checking out an unknown URL: #{url}" if @verbose
 		begin
+			puts "Checking out an unknown URL: #{url}" if @verbose
 			url=url.strip.downcase
 			raise "Invalid URL format: #{url}" unless is_url?(url)
 			timestamp=Time.now
@@ -46,10 +46,10 @@ class Wmap::UrlChecker
 			else
 				code=response_code(url)
 			end
-			if @url_redirection.key?(url)
-				loc=@url_redirection[url]
+			if code>=300 && code < 400
+				loc=landing_location(4,url)
 			else
-				loc=redirect_location(url)
+				loc=nil
 			end
 			if @url_finger_print.key?(url)
 				fp=@url_finger_print[url]
@@ -212,6 +212,26 @@ class Wmap::UrlChecker
 	end
 	alias_method :location, :redirect_location
+	# Test the URL / Site and return the landing url location (recursive with the depth = 4 )
+	def landing_location (depth=4, url)
+		begin
+			depth -= 1
+			return url if depth < 1
+			timeo = @http_timeout/1000.0
+			uri = URI.parse(url)
+			code = response_code (url)
+			if code >= 300 && code < 400
+				url = redirect_location (url)
+				url = landing_location(depth,url)
+			else
+				return url
+			end
+			return url
+		rescue Exception => ee
+			puts "Exception on method #{__method__} on URL #{url}: #{ee}" if @verbose
+		end
+	end
 	# Test the URL / site and return the web server type from the HTTP header "server" field
 	def get_server_header (url)
 		begin

data/lib/wmap/url_crawler.rb CHANGED Viewed

@@ -21,7 +21,6 @@ class Wmap::UrlCrawler
 	attr_reader :discovered_urls_by_crawler, :visited_urls_by_crawler, :crawl_start, :crawl_done
 	# Global variable used to store the combined result of all the forked child processes. Note that class variable
 	# would not be able to pass the result due the limitation of IO Pipe communication mechanism used by 'parallel' fork manager
-#	$discovered_urls=Hash.new
 	# set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
 	Max_http_timeout=8000
@@ -47,8 +46,8 @@ class Wmap::UrlCrawler
 	# Pre-crawl profiler, to be used for network profiling to maximum the crawler performance.
 	def pre_crawl(url)
-		puts "Perform network profiling works on the web server before the web crawling: #{url}" if @verbose
 		begin
+			puts "Perform network profiling works on the web server before the web crawling: #{url}" if @verbose
 			host=url_2_host(url)
 			# Use the following formula to 'guess' the right http time-out threshold for the scanner
 			nwk_to=Wmap::NetworkProfiler.new.profile(host).to_i
@@ -67,8 +66,8 @@ class Wmap::UrlCrawler
 	# A web crawler to crawl a known website and search for html links within the same root domain. For example,
     # by crawling 'http://www.yahoo.com/' it could discover 'http://login.yahoo.com/'
 	def crawl(url)
-		puts "Start web crawling on #{url}"
-		#begin
+		begin
+			puts "Start web crawling on #{url}"
 			result=Array.new
 			url=url.chomp.strip
 			result.push(url_2_site(url))
@@ -80,17 +79,17 @@ class Wmap::UrlCrawler
 			}
 			puts "Web crawling time-out on #{url}: #{status}" if @verbose
 			return result
-		#rescue => ee
-			#puts "Exception on method #{__method__} for URL #{url}: #{ee}"
-			#return result
-		#end
+		rescue => ee
+			puts "Exception on method #{__method__} for URL #{url}: #{ee}"
+			return result
+		end
 	end
 	alias_method :query, :crawl
     # The worker instance of crawler who perform the labour work
 	def crawl_worker(url0)
-		puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
-		#begin
+		begin
+			puts "Please be aware that it may take a while to crawl #{url0}, depending on the site's responsiveness and the amount of contents."
 			# Input URL sanity check first
 			if is_url?(url0)
 				host=url_2_host(url0)
@@ -121,7 +120,7 @@ class Wmap::UrlCrawler
 					url_stores[url]=true unless url_stores.key?(url)
 					@discovered_urls_by_crawler[url]=true unless @discovered_urls_by_crawler.key?(url)
 #					$discovered_urls[url]=true unless $discovered_urls.key?(url)
-					doc = parse_html(url_body)
+					doc = Nokogiri::HTML(url_body)
 					next if doc == nil
 					if url_stores.size >= @crawl_page_limit
 						#@visited_urls_by_crawler.merge!(url_stores)
@@ -145,12 +144,12 @@ class Wmap::UrlCrawler
 			wlog(log_info, "UrlCrawler", @log_file)
 			@crawl_done[url0]=true unless @crawl_done.key?(url0)
 			return url_stores
-		#rescue => ee
-			#puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
-			#log_info[3]="Exception on #{url0}"
-			#wlog(log_info,"UrlCrawler",@log_file)
-			#return url_stores
-		#end
+		rescue => ee
+			puts "Exception on method #{__method__} for URL #{url0}: #{ee}" if @verbose
+			log_info[3]="Exception on #{url0}"
+			wlog(log_info,"UrlCrawler",@log_file)
+			return url_stores
+		end
 	end
 	# Fast crawling by utilizing fork manager parallel to spawn numbers of child processes at the same time
@@ -211,14 +210,14 @@ class Wmap::UrlCrawler
 	# Fast crawling method - build the target pool from the input file
 	def crawl_workers_on_file (file)
-		puts "Web crawl the list of targets from file: #{file}"
 		begin
+			puts "Web crawl the list of targets from file: #{file}"
 			targets=file_2_list(file)
 			sites=crawl_workers(targets,num=@max_parallel)
 			return sites
 		rescue => ee
-            puts "Exception on method #{__method__}: #{ee}" if @verbose
-            return nil
+      puts "Exception on method #{__method__}: #{ee}" if @verbose
+      return nil
 		end
 	end
 	alias_method :query_file, :crawl_workers_on_file
@@ -226,7 +225,6 @@ class Wmap::UrlCrawler
   # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
 	def open_url(url)
-		#url_object = nil
     begin
 			puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
 			if url =~ /http\:/i
@@ -262,8 +260,8 @@ class Wmap::UrlCrawler
     # Return the destination url in case of url re-direct
 	def update_url_if_redirected(url, url_object)
-		#puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
 		begin
+			#puts "Comparing the original URL with the return object base_uri. Return the one where the true content is found. " if @verbose
 			if url != url_object.base_uri.to_s
 				return url_object.base_uri.to_s
 			end
@@ -274,24 +272,26 @@ class Wmap::UrlCrawler
     end
   end
+=begin
     # Wrapper for the Nokogiri DOM parser
 	def parse_html(html_body)
-        #puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
 		begin
-            doc = Nokogiri::HTML(html_body)
+			#puts "Parsing the html content: #{html_body}. Return DOM " if @verbose
+      doc = Nokogiri::HTML(html_body)
 			#puts "Successfully crawling the url: #{url_object.base_uri.to_s}" if @verbose
 			#puts "doc: #{doc}" if @verbose
 			return doc
-        rescue => ee
-            puts "Exception on method #{__method__}: #{ee}" if @verbose
-            return nil
-        end
+    rescue => ee
+      puts "Exception on method #{__method__}: #{ee}" if @verbose
+      return nil
+    end
 	end
+=end
-    # Search 'current_url' and return found URLs under the same domain
+  # Search 'current_url' and return found URLs under the same domain
 	def find_urls_on_page(doc, current_url)
-        #puts "Search and return URLs within the doc: #{doc}" if @verbose
 		begin
+			puts "Search and return URLs within the doc: #{doc}" if @verbose
 			urls_list = []
 			# case 1 - search embedded HTML tag <a href='url'> for the url elements
 			links=doc.css('a')
@@ -320,46 +320,46 @@ class Wmap::UrlCrawler
 			end
 			#puts "Found URLs under page #{current_url}:\n#{urls_list}" if @verbose
 			return urls_list.uniq-["",nil]
-        rescue => ee
-            puts "Exception on method #{__method__}: #{ee}" if @verbose
-            return nil
+    rescue => ee
+      puts "Exception on method #{__method__}: #{ee}" if @verbose
+      return nil
 		end
-    end
+  end
 	# Method to print out discovery URL result
 	def print_discovered_urls_by_crawler
-		puts "Print discovered url by the crawler. " if @verbose
 		begin
+			puts "Print discovered url by the crawler. " if @verbose
 			puts "\nSummary Report of Discovered URLs from the Crawler:"
 			@discovered_urls_by_crawler.keys.each do |url|
 				puts url
 			end
 			puts "Total: #{@discovered_urls_by_crawler.keys.size}"
 			puts "End of the summary"
-        rescue => ee
-            puts "Exception on method #{__method__}: #{ee}" if @verbose
-            return nil
-        end
+    rescue => ee
+      puts "Exception on method #{__method__}: #{ee}" if @verbose
+      return nil
+    end
 	end
 	alias_method :print, :print_discovered_urls_by_crawler
 	# Method to save URL discovery  result
 	def save_discovered_urls (file)
-		puts "Save discovered urls by the crawler to file: #{file} "
 		begin
+			puts "Save discovered urls by the crawler to file: #{file} "
 			list_2_file(@discovered_urls_by_crawler.keys, file)
 			puts "Done!"
-        rescue => ee
-            puts "Exception on method #{__method__}: #{ee}" if @verbose
-            return nil
-        end
+    rescue => ee
+      puts "Exception on method #{__method__}: #{ee}" if @verbose
+      return nil
+    end
 	end
 	alias_method :save, :save_discovered_urls
 	# Method to retrieve discovery site result
 	def get_discovered_sites_by_crawler
-		puts "Print summary report of discovered sites. " if @verbose
 		begin
+			puts "Print summary report of discovered sites. " if @verbose
 			puts "\nSummary Report of Discovered Sites from the Crawler:"
 			sites = Hash.new
 			@discovered_urls_by_crawler.keys.each do |url|
@@ -370,12 +370,12 @@ class Wmap::UrlCrawler
 			puts "Total: #{sites.size}"
 			puts "End of the summary"
 			return sites.keys
-        rescue => ee
+    rescue => ee
 			puts "Exception on method #{__method__}: #{ee}" if @verbose
-            return nil
-        end
+      return nil
+    end
 	end
 	alias_method :get_sites, :get_discovered_sites_by_crawler
-	private :open_url, :read_url, :update_url_if_redirected, :parse_html, :find_urls_on_page
+	private :open_url, :read_url, :update_url_if_redirected, :find_urls_on_page
 end

data/lib/wmap/url_crawler/adware_tag.rb ADDED Viewed

@@ -0,0 +1,281 @@
+#--
+# Wmap
+#
+# A pure Ruby library for Internet web application discovery and tracking.
+#
+# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
+#++
+module Wmap
+  class UrlCrawler
+	# Class to identify and track adware within the site store
+	include Wmap::Utils
+	attr_accessor :signature_file, :tag_file, :verbose, :data_dir, :data_store
+	attr_reader :tag_store, :tag_signatures
+  class AdwareTag < Wmap::UrlCrawler
+		# Initialize the instance variables
+		def initialize (params = {})
+			@verbose=params.fetch(:verbose, false)
+      @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../../data/')
+      @tag_file=@data_dir + 'tag_sites'
+			# Set default instance variables
+			@signature_file=File.dirname(__FILE__) + '/../../../settings/' + 'tag_signatures'
+			file=params.fetch(:signature_file, @signature_file)
+			@tag_signatures=load_from_file(file)
+      file2=params.fetch(:tag_file, @tag_file)
+      File.write(file2, "") unless File.exist?(@tag_file)
+      # load the known tag store
+      @tag_store=load_tag_from_file(file2)
+		end
+    # load the known tag signatures into an instance variable
+  	def load_from_file (file, lc=true)
+  		begin
+        puts "Loading data file: #{file}"	if @verbose
+  			data_store=Hash.new
+  			f = File.open(file, 'r')
+  			f.each_line do |line|
+  				puts "Processing line: #{line}" if @verbose
+  				line=line.chomp.strip
+  				next if line.nil?
+  				next if line.empty?
+  				next if line =~ /^\s*#/
+  				line=line.downcase if lc==true
+  				entry=line.split(',')
+  				if data_store.key?(entry[0])
+  					next
+  				else
+  					data_store[entry[0]]=entry[1].strip
+  				end
+  			end
+  			f.close
+  			return data_store
+  		rescue => ee
+  			puts "Exception on method #{__method__}: #{ee}" if @verbose
+  			return nil
+  		end
+  	end
+    # load the known tag store cache into an instance variable
+  	def load_tag_from_file (file, lc=true)
+  		begin
+        puts "Loading tag data file: #{file}"	if @verbose
+  			data_store=Hash.new
+  			f = File.open(file, 'r')
+  			f.each_line do |line|
+  				puts "Processing line: #{line}" if @verbose
+  				line=line.chomp.strip
+  				next if line.nil?
+  				next if line.empty?
+  				next if line =~ /^\s*#/
+  				line=line.downcase if lc==true
+  				entry=line.split(',')
+  				if data_store.key?(entry[0])
+  					next
+  				else
+  					data_store[entry[0]]=[entry[1].strip, entry[2].strip, entry[3]]
+  				end
+  			end
+  			f.close
+  			return data_store
+  		rescue => ee
+  			puts "Exception on method #{__method__}: #{ee}" if @verbose
+  			return nil
+  		end
+  	end
+    # Save the current tag store hash table into a file
+  	def save_to_file!(file_tag=@tag_file, tags=@tag_store)
+      begin
+        puts "Saving the current wordpress site table from memory to file: #{file_tag} ..." if @verbose
+  			timestamp=Time.now
+  			f=File.open(file_tag, 'w')
+  			f.write "# Local tag file created by class #{self.class} method #{__method__} at: #{timestamp}\n"
+  			f.write "# Site, Landing URL, Detected Adware Tag, Tag Version, Tag Description\n"
+  			tags.each do |key, val|
+  				f.write "#{key}, #{val[0]}, #{val[1]}, #{val[2]}, #{val[3]}\n"
+  			end
+  			f.close
+  			puts "Tag store cache table is successfully saved: #{file_tag}"
+  		rescue => ee
+  			puts "Exception on method #{__method__}: #{ee}" if @verbose
+  		end
+  	end
+  	alias_method :save!, :save_to_file!
+    # add tag entries (from the sitetracker list)
+  	def refresh (num=@max_parallel,use_cache=true)
+      #begin
+  		  puts "Add entries to the local cache table from site tracker: " if @verbose
+  			results=Hash.new
+  			tags=Wmap::SiteTracker.instance.known_sites.keys
+  			if tags.size > 0
+  				Parallel.map(tags, :in_processes => num) { |target|
+  					check_adware(target,use_cache)
+  				}.each do |process|
+  					if !process
+  						next
+  					else
+  						results.merge!(process)
+  					end
+  				end
+  				@tag_store.merge!(results)
+  				puts "Done loading entries."
+          tags=nil
+  				return results
+  			else
+  				puts "Error: no entry is loaded. Please check your list and try again."
+  			end
+        tags=nil
+  			return results
+  		#rescue => ee
+  		#	puts "Exception on method #{__method__}: #{ee}" if @verbose
+  		#end
+  	end
+    # Give a  site, locate the landing page, then sift out the adware tag if found
+  	def check_adware(site,use_cache=true)
+      #begin
+  		  puts "Check the site for known Adware tags: #{site}" if @verbose
+        record = Hash.new
+  			if use_cache && @tag_store.key?(site)
+				  puts "Site entry already exist. Skipping: #{site}" if @verbose
+  			else
+          url = fast_landing(site)
+          tags = find_tags(url)
+          return record if tags.size==0
+          tag_vers=tags.map do |tag|
+            get_ver(url,tag)
+          end
+          tag_descs=tags.map do |tag|
+            Base64.urlsafe_encode64(get_desc(url,tag))
+          end
+  				if tags
+            record[site]=[url, tags.join("|"), tag_vers.join("|"), tag_descs.join("|")]
+            @tag_store.merge!(record)
+            puts "Tag entry loaded: #{record}" if @verbose
+          else
+            puts "No tag found. Skip site #{site}" if @verbose
+          end
+  			end
+        return record
+      #rescue => ee
+  		#	puts "Exception on method #{__method__}: #{ee}: #{site}" if @verbose
+  		#end
+  	end
+    # Given a site, determine the landing url
+    def fast_landing(site)
+      puts "Locate the landing url for: #{site}" if @verbose
+      my_tracker=Wmap::SiteTracker.instance
+      if my_tracker.known_sites.key?(site)
+        # looking into the cache first
+        if my_tracker.known_sites[site]['code'] >= 300 && my_tracker.known_sites[site]['code'] < 400
+          url = my_tracker.known_sites[site]['redirection']
+        else
+          url = site
+        end
+        my_tracker = nil
+      else
+        # no cache, then need to do it fresh
+        my_checker = Wmap::UrlChecker.new
+        url = my_checker.landing_location(site)
+        my_checker = nil
+      end
+      puts "Landing url found: #{url}" if @verbose
+      return url
+    end
+    # Search the page for known tag signatures. If found return them in an array
+  	def find_tags(url)
+  		begin
+  			puts "Search and return tags within the url payload: #{url}" if @verbose
+  			tag_list = []
+        doc = Nokogiri::HTML(open(url))
+        doc.text.each_line do |line|
+          my_line = line.downcase
+          @tag_signatures.keys.map do |tag|
+            tag_list.push(tag) if my_line.include?(tag)
+          end
+        end
+        doc = nil
+        return tag_list
+      rescue => ee
+        puts "Exception on method #{__method__}: #{ee}" if @verbose
+        return []
+  		end
+    end
+    # Search the url payload for known tag version identifier. If found return a string, else empty string.
+  	def get_ver(url,tag)
+      puts "Search and return tag version within the url payload: #{url}, #{tag}" if @verbose
+      tag_ver=""
+      doc = Nokogiri::HTML(open(url))
+      case tag
+      when "utag.js"          # sample: ...,"code_release_version":"cb20190312032612",...
+        doc.text.each_line do |line|
+          my_line = line.downcase
+          if my_line.include?("code_release_version")
+            puts "Extract tag version from line: #{my_line}" if @verbose
+            m = my_line.match(/\"code\_release\_version\"\:\"(?<ver>[a-z]+\d+)\"/)
+            tag_ver = m[:ver]
+            break
+          end
+        end
+      when "analytics.js"          # sample:   ga('create', 'UA-19175804-2', 'knopfdoubleday.com');
+        doc.text.each_line do |line|
+          my_line = line.downcase
+          if my_line.include?("ga(") && my_line.include?("create")
+            puts "Extract tag version from line: #{my_line}" if @verbose
+            m = my_line.match(/[\'|\"]create[\'|\"]\s*\,\s*[\'|\"](?<ver>\w+\-\d+\-\d+)[\'|\"]\s*\,/)
+            tag_ver = m[:ver]
+            break
+          end
+        end
+      when "all.js"          # sample:    appId      : '749936668352954',
+        doc.text.each_line do |line|
+          my_line = line.downcase
+          if my_line.include?("appid") && my_line.include?(":")
+            puts "Extract tag version from line: #{my_line}" if @verbose
+            m = my_line.match(/appid\s+\:\s+[\'|\"](?<ver>\d+)[\'|\"]\s*\,/)
+            tag_ver = m[:ver]
+            break
+          end
+        end
+      else
+        puts "Unknown Adware Tag: #{tag}"
+        # do nothing
+      end
+      doc = nil
+      return tag_ver
+    end
+    # Search the url payload for known tag. If found return the base64 encode whole script snippet.
+  	def get_desc(url,tag)
+      puts "Search and return tag script in url payload: #{url}, #{tag}" if @verbose
+      recording=false
+      tag_found=false
+      tag_desc=""
+      doc = Nokogiri::HTML(open(url))
+      doc.search('script').map do |script|
+        if script.text.include?(tag)
+          return script.text
+        end
+      end
+      doc = nil
+      return tag_desc
+    end
+	end
+  end
+end

data/lib/wmap/wp_tracker.rb CHANGED Viewed

@@ -71,7 +71,7 @@ class Wmap::WpTracker
 		end
 	end
-	# Save the current domain hash table into a file
+	# Save the current hash table into a file
 	def save_to_file!(file_wps=@file_wps, wps=@known_wp_sites)
 		puts "Saving the current wordpress site table from memory to file: #{file_wps} ..." if @verbose
 		begin
@@ -95,11 +95,11 @@ class Wmap::WpTracker
 	alias_method :save!, :save_to_file!
   # 'setter' to add wordpress entry to the cache one at a time
-	def add(url)
+	def add(url, use_cache=true)
     begin
 		  puts "Add entry to the local cache table: #{url}" if @verbose
       site=url_2_site(url)
-			if @known_wp_sites.key?(site)
+			if use_cache && @known_wp_sites.key?(site)
 				puts "Site is already exist. Skipping: #{site}"
 			else
 				record=Hash.new
@@ -141,14 +141,14 @@ class Wmap::WpTracker
 	end
   # add wordpress site entries (from a sitetracker list)
-	def refresh (num=@max_parallel)
+	def refresh (num=@max_parallel,use_cache=true)
     #begin
 		  puts "Add entries to the local cache table from site tracker: " if @verbose
 			results=Hash.new
 			wps=Wmap::SiteTracker.instance.known_sites.keys
 			if wps.size > 0
 				Parallel.map(wps, :in_processes => num) { |target|
-					add(target)
+					add(target,use_cache)
 				}.each do |process|
 					if process.nil?
 						next

data/logs/wmap.log CHANGED Viewed

@@ -1550,3 +1550,4 @@
 2019-02-20 21:10:59 -0500: googleBot: Execute the command: googleBot
 2019-02-20 21:17:21 -0500: googleBot: Execute the command: googleBot
 2019-02-20 21:26:13 -0500: googleBot: Execute the command: googleBot
+2019-02-22 09:59:09 -0500: wmap: Execute the command: wmap /Users/sli/prh_wmap/shared/data/seed

data/settings/tag_signatures ADDED Viewed

@@ -0,0 +1,6 @@
+# Adware signature file: signture string, description
+gtag.js, Google / DoubleClick Floodlight Tag
+analytics.js, Google Universal Analytics Tag
+ga.js, Google Analytics Tag
+utag.js, Tealium Tag
+all.js, Facebook Tag

data/version.txt CHANGED Viewed

@@ -3,8 +3,8 @@
 ###############################################################################
 package = wmap
 # wmap version 2.0 == web_discovery version 1.5.3
-version = 2.5.0
-date = 2019-02-21
+version = 2.5.1
+date = 2019-03-17
 author = Sam (Yang) Li
 email = yang.li@owasp.org

data/wmap.gemspec CHANGED Viewed

@@ -36,9 +36,9 @@ Gem::Specification.new do |s|
   s.description = "wmap is written to perform Internet web application / service discovery. The discovery results are designed to be automatically tracked by the software."
   s.email = info["email"]
   s.executables = ["wmap","wscan","wadd","wadds","wdel","wcheck","wdump","spiderBot","googleBot","updateAll","prime","deprime","refresh","trust","distrust","run_tests"]
-  s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","settings/google_keywords.txt","settings/google_locator.txt","data/","LICENSE.txt",
+  s.files = ["CHANGELOG.md", "TODO", "settings/discovery_ports","data/","LICENSE.txt",
 							"version.txt","README.rdoc", "wmap.gemspec"]
-  s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap/**/*.rb'] + Dir['bin/*'] + Dir['demos/*'] + Dir['test/*'] + Dir['ruby_whois_patches/*'] + Dir['dicts/*'] + Dir['logs/wmap.log']
+  s.files += Dir['lib/*.rb'] + Dir['lib/wmap/*.rb'] + Dir['lib/wmap/**/*'] + Dir['bin/*'] + Dir['settings/*'] + Dir['demos/*'] + Dir['test/*'] + Dir['ruby_whois_patches/*'] + Dir['dicts/*'] + Dir['logs/wmap.log']
   #s.homepage = "none"
   s.post_install_message = "*"*80 + "\n\nThank you for installing the wmap gem - a pure Ruby library for Internet web application discovery and tracking. Please refer to the README.rdoc for more information of using this gem.  \n\n" + "*"*80 + "\n"
   s.require_paths = ["lib"]

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wmap
 version: !ruby/object:Gem::Version
-  version: 2.5.0
+  version: 2.5.1
 platform: ruby
 authors:
 - Sam (Yang) Li
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-02-21 00:00:00.000000000 Z
+date: 2019-03-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dnsruby
@@ -268,6 +268,7 @@ files:
 - lib/wmap/site_tracker/deactivated_site.rb
 - lib/wmap/url_checker.rb
 - lib/wmap/url_crawler.rb
+- lib/wmap/url_crawler/adware_tag.rb
 - lib/wmap/utils/domain_root.rb
 - lib/wmap/utils/logger.rb
 - lib/wmap/utils/url_magic.rb
@@ -334,6 +335,7 @@ files:
 - settings/discovery_ports
 - settings/google_keywords.txt
 - settings/google_locator.txt
+- settings/tag_signatures
 - test/domain_tracker_test.rb
 - test/utils_test.rb
 - version.txt