wmap 2.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +141 -0
- data/LICENSE.txt +15 -0
- data/README.rdoc +98 -0
- data/TODO +13 -0
- data/bin/deprime +21 -0
- data/bin/distrust +38 -0
- data/bin/googleBot +23 -0
- data/bin/prime +21 -0
- data/bin/refresh +26 -0
- data/bin/run_tests +16 -0
- data/bin/spiderBot +26 -0
- data/bin/trust +38 -0
- data/bin/updateAll +57 -0
- data/bin/wadd +25 -0
- data/bin/wadds +26 -0
- data/bin/wcheck +28 -0
- data/bin/wdel +25 -0
- data/bin/wdump +21 -0
- data/bin/wmap +151 -0
- data/bin/wscan +32 -0
- data/data/cidrs +2 -0
- data/data/deactivated_sites +1 -0
- data/data/domains +2 -0
- data/data/hosts +1 -0
- data/data/prime_hosts +1 -0
- data/data/sites +2 -0
- data/data/sub_domains +2 -0
- data/demos/bruter.rb +27 -0
- data/demos/dns_brutes.rb +28 -0
- data/demos/filter_cidr.rb +18 -0
- data/demos/filter_crawls.rb +5 -0
- data/demos/filter_domain.rb +25 -0
- data/demos/filter_geoip.rb +26 -0
- data/demos/filter_known_services.rb +59 -0
- data/demos/filter_netinfo.rb +23 -0
- data/demos/filter_prime.rb +25 -0
- data/demos/filter_profiler.rb +3 -0
- data/demos/filter_redirection.rb +19 -0
- data/demos/filter_site.rb +40 -0
- data/demos/filter_siteip.rb +31 -0
- data/demos/filter_status.rb +17 -0
- data/demos/filter_timestamp.rb +23 -0
- data/demos/filter_url.rb +19 -0
- data/demos/new_fnd.rb +66 -0
- data/demos/nmap_parser.pl +138 -0
- data/demos/site_format.rb +18 -0
- data/demos/whois_domain.rb +78 -0
- data/dicts/GeoIP.dat +0 -0
- data/dicts/GeoIPASNum.dat +0 -0
- data/dicts/GeoLiteCity.dat +0 -0
- data/dicts/ccsld.txt +2646 -0
- data/dicts/cctld.txt +243 -0
- data/dicts/gtld.txt +25 -0
- data/dicts/hostnames-dict.big +1402 -0
- data/dicts/hostnames-dict.txt +101 -0
- data/lib/wmap/cidr_tracker.rb +327 -0
- data/lib/wmap/dns_bruter.rb +308 -0
- data/lib/wmap/domain_tracker/sub_domain.rb +142 -0
- data/lib/wmap/domain_tracker.rb +342 -0
- data/lib/wmap/geoip_tracker.rb +72 -0
- data/lib/wmap/google_search_scraper.rb +177 -0
- data/lib/wmap/host_tracker/primary_host.rb +130 -0
- data/lib/wmap/host_tracker.rb +550 -0
- data/lib/wmap/network_profiler.rb +144 -0
- data/lib/wmap/port_scanner.rb +208 -0
- data/lib/wmap/site_tracker/deactivated_site.rb +85 -0
- data/lib/wmap/site_tracker.rb +937 -0
- data/lib/wmap/url_checker.rb +314 -0
- data/lib/wmap/url_crawler.rb +381 -0
- data/lib/wmap/utils/domain_root.rb +184 -0
- data/lib/wmap/utils/logger.rb +53 -0
- data/lib/wmap/utils/url_magic.rb +343 -0
- data/lib/wmap/utils/utils.rb +333 -0
- data/lib/wmap/whois.rb +76 -0
- data/lib/wmap.rb +227 -0
- data/logs/wmap.log +17 -0
- data/ruby_whois_patches/base_cocca2.rb +149 -0
- data/ruby_whois_patches/kero.yachay.pe.rb +120 -0
- data/ruby_whois_patches/whois.PublicDomainRegistry.com.rb +124 -0
- data/ruby_whois_patches/whois.above.com.rb +61 -0
- data/ruby_whois_patches/whois.adamsnames.tc.rb +107 -0
- data/ruby_whois_patches/whois.aeda.net.ae.rb +105 -0
- data/ruby_whois_patches/whois.ai.rb +112 -0
- data/ruby_whois_patches/whois.arnes.si.rb +121 -0
- data/ruby_whois_patches/whois.ascio.com.rb +91 -0
- data/ruby_whois_patches/whois.cnnic.cn.rb +123 -0
- data/ruby_whois_patches/whois.corporatedomains.com.rb +67 -0
- data/ruby_whois_patches/whois.crsnic.net.rb +108 -0
- data/ruby_whois_patches/whois.denic.de.rb +174 -0
- data/ruby_whois_patches/whois.dk-hostmaster.dk.rb +120 -0
- data/ruby_whois_patches/whois.dns.be.rb +134 -0
- data/ruby_whois_patches/whois.dns.lu.rb +129 -0
- data/ruby_whois_patches/whois.dns.pl.rb +150 -0
- data/ruby_whois_patches/whois.dns.pt.rb +119 -0
- data/ruby_whois_patches/whois.domain.kg.rb +126 -0
- data/ruby_whois_patches/whois.domainregistry.my.rb +123 -0
- data/ruby_whois_patches/whois.domreg.lt.rb +110 -0
- data/ruby_whois_patches/whois.dot.tk.rb +140 -0
- data/ruby_whois_patches/whois.hkirc.hk.rb +121 -0
- data/ruby_whois_patches/whois.isnic.is.rb +130 -0
- data/ruby_whois_patches/whois.je.rb +119 -0
- data/ruby_whois_patches/whois.jprs.jp.rb +137 -0
- data/ruby_whois_patches/whois.kenic.or.ke.rb +140 -0
- data/ruby_whois_patches/whois.markmonitor.com.rb +118 -0
- data/ruby_whois_patches/whois.melbourneit.com.rb +58 -0
- data/ruby_whois_patches/whois.nic.as.rb +96 -0
- data/ruby_whois_patches/whois.nic.at.rb +109 -0
- data/ruby_whois_patches/whois.nic.ch.rb +141 -0
- data/ruby_whois_patches/whois.nic.cl.rb +117 -0
- data/ruby_whois_patches/whois.nic.ec.rb +157 -0
- data/ruby_whois_patches/whois.nic.im.rb +120 -0
- data/ruby_whois_patches/whois.nic.it.rb +170 -0
- data/ruby_whois_patches/whois.nic.lv.rb +116 -0
- data/ruby_whois_patches/whois.nic.ly.rb +127 -0
- data/ruby_whois_patches/whois.nic.mu.rb +27 -0
- data/ruby_whois_patches/whois.nic.mx.rb +123 -0
- data/ruby_whois_patches/whois.nic.net.sa.rb +111 -0
- data/ruby_whois_patches/whois.nic.or.kr.rb +101 -0
- data/ruby_whois_patches/whois.nic.tel.rb +129 -0
- data/ruby_whois_patches/whois.nic.tr.rb +133 -0
- data/ruby_whois_patches/whois.nic.us.rb +129 -0
- data/ruby_whois_patches/whois.nic.ve.rb +135 -0
- data/ruby_whois_patches/whois.norid.no.rb +127 -0
- data/ruby_whois_patches/whois.pandi.or.id.rb +118 -0
- data/ruby_whois_patches/whois.psi-usa.info.rb +63 -0
- data/ruby_whois_patches/whois.registro.br.rb +109 -0
- data/ruby_whois_patches/whois.registrygate.com.rb +55 -0
- data/ruby_whois_patches/whois.rrpproxy.net.rb +61 -0
- data/ruby_whois_patches/whois.sgnic.sg.rb +130 -0
- data/ruby_whois_patches/whois.srs.net.nz.rb +166 -0
- data/ruby_whois_patches/whois.tucows.com.rb +70 -0
- data/ruby_whois_patches/whois.twnic.net.tw.rb +133 -0
- data/settings/discovery_ports +24 -0
- data/settings/google_keywords.txt +9 -0
- data/settings/google_locator.txt +23 -0
- data/test/domain_tracker_test.rb +31 -0
- data/test/utils_test.rb +168 -0
- data/version.txt +13 -0
- data/wmap.gemspec +49 -0
- metadata +202 -0
|
@@ -0,0 +1,937 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Wmap
|
|
3
|
+
#
|
|
4
|
+
# A pure Ruby library for the Internet web application discovery and tracking.
|
|
5
|
+
#
|
|
6
|
+
# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
|
|
7
|
+
#++
|
|
8
|
+
require "parallel"
|
|
9
|
+
#require "singleton"
|
|
10
|
+
require "nokogiri"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Main class to automatically track the site inventory
|
|
14
|
+
class Wmap::SiteTracker
|
|
15
|
+
include Wmap::Utils
|
|
16
|
+
#include Singleton
|
|
17
|
+
|
|
18
|
+
attr_accessor :sites_file, :max_parallel, :verbose, :data_dir
|
|
19
|
+
attr_reader :known_sites
|
|
20
|
+
|
|
21
|
+
# Set default instance variables
|
|
22
|
+
def initialize (params = {})
|
|
23
|
+
# Initialize the instance variables
|
|
24
|
+
@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
|
|
25
|
+
@file_sites=@data_dir+'sites'
|
|
26
|
+
@file_stores=params.fetch(:sites_file, @file_sites)
|
|
27
|
+
@verbose=params.fetch(:verbose, false)
|
|
28
|
+
@max_parallel=params.fetch(:max_parallel, 30)
|
|
29
|
+
# Hash table to hold the site store
|
|
30
|
+
File.write(@file_stores, "") unless File.exist?(@file_stores)
|
|
31
|
+
@known_sites=load_site_stores_from_file(@file_stores)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Setter to load the known hosts into an instance variable
|
|
35
|
+
def load_site_stores_from_file (file)
|
|
36
|
+
puts "Loading the site store data repository from file: #{file} " if @verbose
|
|
37
|
+
begin
|
|
38
|
+
known_sites=Hash.new
|
|
39
|
+
f=File.open(file, 'r')
|
|
40
|
+
f.each do |line|
|
|
41
|
+
line=line.chomp.strip
|
|
42
|
+
next if line.nil?
|
|
43
|
+
next if line.empty?
|
|
44
|
+
next if line =~ /^\s*#/
|
|
45
|
+
entry=line.split(%r{\t+|\,})
|
|
46
|
+
site=entry[0].downcase
|
|
47
|
+
ip=entry[1]
|
|
48
|
+
port=entry[2]
|
|
49
|
+
status=entry[3]
|
|
50
|
+
server=entry[4]
|
|
51
|
+
res=entry[5].to_i
|
|
52
|
+
fp=entry[6]
|
|
53
|
+
loc=entry[7]
|
|
54
|
+
timestamp=entry[8]
|
|
55
|
+
puts "Loading entry: #{site} - #{ip} - #{status}" if @verbose
|
|
56
|
+
known_sites[site]= Hash.new unless known_sites.key?(site)
|
|
57
|
+
known_sites[site]['ip']=ip
|
|
58
|
+
known_sites[site]['port']=port
|
|
59
|
+
known_sites[site]['status']=status
|
|
60
|
+
known_sites[site]['server']=server
|
|
61
|
+
known_sites[site]['code']=res
|
|
62
|
+
known_sites[site]['md5']=fp
|
|
63
|
+
known_sites[site]['redirection']=loc
|
|
64
|
+
known_sites[site]['timestamp']=timestamp
|
|
65
|
+
end
|
|
66
|
+
f.close
|
|
67
|
+
puts "Successfully loading file: #{file}" if @verbose
|
|
68
|
+
return known_sites
|
|
69
|
+
rescue => ee
|
|
70
|
+
puts "Exception on method #{__method__} for file #{file}: #{ee}"
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Save the current site store hash table into a file
|
|
75
|
+
def save_sites_to_file!(file_sites=@file_stores)
|
|
76
|
+
puts "Saving the current site store table from memory to file: #{file_sites}"
|
|
77
|
+
begin
|
|
78
|
+
timestamp=Time.now
|
|
79
|
+
f=File.open(file_sites, 'w')
|
|
80
|
+
f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
|
|
81
|
+
f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
|
|
82
|
+
@known_sites.keys.sort.map do |key|
|
|
83
|
+
f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
|
|
84
|
+
end
|
|
85
|
+
f.close
|
|
86
|
+
puts "site store table is successfully saved: #{file_sites}"
|
|
87
|
+
rescue => ee
|
|
88
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
alias_method :save!, :save_sites_to_file!
|
|
92
|
+
|
|
93
|
+
# Count numbers of entries in the site store table
|
|
94
|
+
def count
|
|
95
|
+
puts "Counting number of entries in the site store table ..."
|
|
96
|
+
begin
|
|
97
|
+
return @known_sites.size
|
|
98
|
+
rescue => ee
|
|
99
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Setter to add site entry to the cache one at a time
|
|
104
|
+
def add(site)
|
|
105
|
+
puts "Add entry to the site store: #{site}"
|
|
106
|
+
begin
|
|
107
|
+
# Preliminary sanity check
|
|
108
|
+
site=site.strip.downcase unless site.nil?
|
|
109
|
+
raise "Site is already exist. Skip #{site}" if site_known?(site)
|
|
110
|
+
site=normalize_url(site) if is_url?(site)
|
|
111
|
+
site=url_2_site(site) if is_url?(site)
|
|
112
|
+
puts "Site in standard format: #{site}" if @verbose
|
|
113
|
+
raise "Exception on method #{__method__}: invalid site format of #{site}. Expected format is: http://your_website_name/" unless is_site?(site)
|
|
114
|
+
trusted=false
|
|
115
|
+
host=url_2_host(site)
|
|
116
|
+
ip=host_2_ip(host)
|
|
117
|
+
# Additional logic to refresh deactivated site, 02/12/2014
|
|
118
|
+
deact=Wmap::SiteTracker::DeactivatedSite.new(:data_dir=>@data_dir)
|
|
119
|
+
# only trust either the domain or IP we know
|
|
120
|
+
if is_ip?(host)
|
|
121
|
+
trusted=Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
|
|
122
|
+
else
|
|
123
|
+
root=get_domain_root(host)
|
|
124
|
+
if root.nil?
|
|
125
|
+
raise "Invalid web site format. Please check your record again."
|
|
126
|
+
else
|
|
127
|
+
trusted=Wmap::DomainTracker.new(:data_dir=>@data_dir).domain_known?(root)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
# add record only if trusted
|
|
131
|
+
if trusted
|
|
132
|
+
# Add logic to check site status before adding it
|
|
133
|
+
checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
|
|
134
|
+
raise "Site is currently down. Skip #{site}" if checker.nil?
|
|
135
|
+
# Skip the http site if it's un-responsive; for the https we'll keep it because we're interested in analysing the SSL layer later
|
|
136
|
+
if is_https?(site)
|
|
137
|
+
# do nothing
|
|
138
|
+
else
|
|
139
|
+
raise "Site is currently down. Skip #{site}" if checker['code']==10000
|
|
140
|
+
end
|
|
141
|
+
raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
|
|
142
|
+
my_tracker = Wmap::HostTracker.new(:data_dir=>@data_dir)
|
|
143
|
+
# Update the local host table when necessary
|
|
144
|
+
if is_ip?(host)
|
|
145
|
+
# Case #1: Trusted site contains IP
|
|
146
|
+
if my_tracker.ip_known?(host)
|
|
147
|
+
# Try local reverse DNS lookup first
|
|
148
|
+
puts "Local hosts table lookup for IP: #{ip}" if @verbose
|
|
149
|
+
host=my_tracker.local_ip_2_host(host)
|
|
150
|
+
puts "Host found from the local hosts table for #{ip}: #{host}" if @verbose
|
|
151
|
+
site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
|
|
152
|
+
else
|
|
153
|
+
# Try reverse DNS lookup over Internet as secondary precaution
|
|
154
|
+
puts "Reverse DNS lookup for IP: #{ip}" if @verbose
|
|
155
|
+
host1=ip_2_host(host)
|
|
156
|
+
puts "host1: #{host1}" if @verbose
|
|
157
|
+
if is_fqdn?(host1)
|
|
158
|
+
if Wmap::HostTracker.new(:data_dir=>@data_dir).domain_known?(host1)
|
|
159
|
+
# replace IP with host-name only if domain root is known
|
|
160
|
+
puts "Host found from the Internet reverse DNS lookup for #{ip}: #{host1}" if @verbose
|
|
161
|
+
host=host1
|
|
162
|
+
site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
# Adding site for Case #1
|
|
167
|
+
raise "Site already exist! Skip #{site}" if @known_sites.key?(site)
|
|
168
|
+
puts "Adding site: #{site}" if @verbose
|
|
169
|
+
@known_sites[site]=Hash.new
|
|
170
|
+
@known_sites[site]=checker
|
|
171
|
+
if deact.site_known?(site)
|
|
172
|
+
deact.delete(site)
|
|
173
|
+
deact.save!
|
|
174
|
+
end
|
|
175
|
+
puts "Site entry loaded: #{checker}"
|
|
176
|
+
if is_fqdn?(host)
|
|
177
|
+
# Add logic to update the hosts table for case #1 variance
|
|
178
|
+
# - case that reverse DNS lookup successful
|
|
179
|
+
puts "Update local hosts table for host: #{host}"
|
|
180
|
+
if my_tracker.host_known?(host)
|
|
181
|
+
old_ip=my_tracker.local_host_2_ip(host)
|
|
182
|
+
if old_ip != ip
|
|
183
|
+
my_tracker.efresh(host)
|
|
184
|
+
my_tracker.save!
|
|
185
|
+
else
|
|
186
|
+
puts "Host resolve to the same IP #{ip} - no need to update the local host table." if @verbose
|
|
187
|
+
end
|
|
188
|
+
else
|
|
189
|
+
my_tracker.add(host)
|
|
190
|
+
my_tracker.save!
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
else
|
|
194
|
+
# Case #2: Trusted site contains valid FQDN
|
|
195
|
+
puts "Ading site: #{site}" if @verbose
|
|
196
|
+
@known_sites[site]=Hash.new
|
|
197
|
+
@known_sites[site]=checker
|
|
198
|
+
if deact.site_known?(site)
|
|
199
|
+
deact.delete(site)
|
|
200
|
+
deact.save!
|
|
201
|
+
end
|
|
202
|
+
puts "Site entry loaded: #{checker}"
|
|
203
|
+
# Add logic to update the hosts table for case #2
|
|
204
|
+
puts "Update local hosts table for host: #{host}"
|
|
205
|
+
if my_tracker.host_known?(host)
|
|
206
|
+
old_ip=my_tracker.local_host_2_ip(host)
|
|
207
|
+
if old_ip != ip
|
|
208
|
+
my_tracker.efresh(host)
|
|
209
|
+
my_tracker.save!
|
|
210
|
+
else
|
|
211
|
+
# Skip - no need to update the local hosts table
|
|
212
|
+
end
|
|
213
|
+
else
|
|
214
|
+
my_tracker.add(host)
|
|
215
|
+
my_tracker.save!
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
deact=nil
|
|
219
|
+
my_tracker=nil
|
|
220
|
+
return checker
|
|
221
|
+
else
|
|
222
|
+
puts "Problem found: untrusted Internet domain or IP. Skip #{site}"
|
|
223
|
+
deact=nil
|
|
224
|
+
my_tracker=nil
|
|
225
|
+
return nil
|
|
226
|
+
end
|
|
227
|
+
rescue => ee
|
|
228
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
229
|
+
deact=nil
|
|
230
|
+
return nil
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Setter to add site entry to the cache table in batch (from a file)
|
|
235
|
+
def file_add(file)
|
|
236
|
+
puts "Add entries to the local site store from file: #{file}"
|
|
237
|
+
begin
|
|
238
|
+
raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
|
|
239
|
+
changes=Hash.new
|
|
240
|
+
sites=file_2_list(file)
|
|
241
|
+
changes=bulk_add(sites) unless sites.nil? or sites.empty?
|
|
242
|
+
puts "Done loading file #{file}. "
|
|
243
|
+
return changes
|
|
244
|
+
rescue => ee
|
|
245
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Setter to add site entry to the cache in batch (from a list)
|
|
250
|
+
def bulk_add(list,num=@max_parallel)
|
|
251
|
+
puts "Add entries to the local site store from list:\n #{list}"
|
|
252
|
+
#begin
|
|
253
|
+
results=Hash.new
|
|
254
|
+
if list.size > 0
|
|
255
|
+
puts "Start parallel adding on the sites:\n #{list}"
|
|
256
|
+
Parallel.map(list, :in_processes => num) { |target|
|
|
257
|
+
add(target)
|
|
258
|
+
}.each do |process|
|
|
259
|
+
if process.nil?
|
|
260
|
+
next
|
|
261
|
+
elsif process.empty?
|
|
262
|
+
#do nothing
|
|
263
|
+
else
|
|
264
|
+
results[process['url']]=Hash.new
|
|
265
|
+
results[process['url']]=process
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
@known_sites.merge!(results)
|
|
269
|
+
else
|
|
270
|
+
puts "Error: no entry is added. Please check your list and try again."
|
|
271
|
+
end
|
|
272
|
+
puts "Done adding site entries."
|
|
273
|
+
if results.size>0
|
|
274
|
+
puts "New entries added: #{results}"
|
|
275
|
+
else
|
|
276
|
+
puts "No new entry added. "
|
|
277
|
+
end
|
|
278
|
+
return results
|
|
279
|
+
#rescue => ee
|
|
280
|
+
#puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
281
|
+
#end
|
|
282
|
+
end
|
|
283
|
+
alias_method :adds, :bulk_add
|
|
284
|
+
|
|
285
|
+
# Setter to remove entry from the site store one at a time
|
|
286
|
+
def delete(site)
|
|
287
|
+
puts "Remove entry from the site store: #{site} " if @verbose
|
|
288
|
+
begin
|
|
289
|
+
# Additional logic to deactivate the site properly, by moving it to the DeactivatedSite list, 02/07/2014
|
|
290
|
+
deact=Wmap::SiteTracker::DeactivatedSite.new(:data_dir=>@data_dir)
|
|
291
|
+
site=site.strip.downcase
|
|
292
|
+
site=url_2_site(site)
|
|
293
|
+
if @known_sites.key?(site)
|
|
294
|
+
site_info=@known_sites[site]
|
|
295
|
+
deact.add(site,site_info)
|
|
296
|
+
deact.save!
|
|
297
|
+
deact=nil
|
|
298
|
+
del=@known_sites.delete(site)
|
|
299
|
+
puts "Entry cleared: #{site}"
|
|
300
|
+
return del
|
|
301
|
+
else
|
|
302
|
+
puts "Entry not fund. Skip #{site}"
|
|
303
|
+
deact=nil
|
|
304
|
+
return nil
|
|
305
|
+
end
|
|
306
|
+
rescue => ee
|
|
307
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
308
|
+
deact=nil
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
alias_method :del, :delete
|
|
312
|
+
|
|
313
|
+
# Setter to delete site entry to the cache in batch (from a file)
|
|
314
|
+
def file_delete(file)
|
|
315
|
+
begin
|
|
316
|
+
puts "Delete entries to the local site store from file: #{file}" if @verbose
|
|
317
|
+
raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
|
|
318
|
+
sites=file_2_list(file)
|
|
319
|
+
changes=Array.new
|
|
320
|
+
changes=bulk_delete(sites) unless sites.nil? or sites.empty?
|
|
321
|
+
rescue => ee
|
|
322
|
+
puts "Exception on method file_delete: #{ee} for file: #{file}" if @verbose
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
alias_method :file_del, :file_delete
|
|
326
|
+
|
|
327
|
+
# Setter to delete site entry to the cache in batch (from a list)
|
|
328
|
+
def bulk_delete(list)
|
|
329
|
+
puts "Delete entries to the local site store from list:\n #{list}" if @verbose
|
|
330
|
+
begin
|
|
331
|
+
sites=list
|
|
332
|
+
changes=Array.new
|
|
333
|
+
if sites.size > 0
|
|
334
|
+
sites.map do |x|
|
|
335
|
+
x=url_2_site(x)
|
|
336
|
+
site=delete(x)
|
|
337
|
+
changes.push(site) unless site.nil?
|
|
338
|
+
end
|
|
339
|
+
puts "Done deleting sites from the list:\n #{list}"
|
|
340
|
+
return changes
|
|
341
|
+
else
|
|
342
|
+
puts "Error: no entry is loaded. Please check your list and try again."
|
|
343
|
+
end
|
|
344
|
+
rescue => ee
|
|
345
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
alias_method :dels, :bulk_delete
|
|
349
|
+
|
|
350
|
+
# Setter to refresh the entry in the site store one at a time
|
|
351
|
+
def refresh(site)
|
|
352
|
+
puts "Refresh the local site store for site: #{site} "
|
|
353
|
+
begin
|
|
354
|
+
raise "Invalid site: #{site}" if site.nil? or site.empty?
|
|
355
|
+
site=site.strip.downcase
|
|
356
|
+
if @known_sites.key?(site)
|
|
357
|
+
delete(site)
|
|
358
|
+
site_info=add(site)
|
|
359
|
+
puts "Done refresh entry: #{site}"
|
|
360
|
+
return site_info
|
|
361
|
+
else
|
|
362
|
+
puts "Error entry non exist: #{site}"
|
|
363
|
+
end
|
|
364
|
+
return nil
|
|
365
|
+
rescue => ee
|
|
366
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
367
|
+
return nil
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
# 'Refresh sites in the site store in batch (from a file)
|
|
372
|
+
def file_refresh(file)
|
|
373
|
+
puts "Refresh entries in the site store from file: #{file}" if @verbose
|
|
374
|
+
begin
|
|
375
|
+
changes=Hash.new
|
|
376
|
+
sites=file_2_list(file)
|
|
377
|
+
changes=bulk_refresh(sites) unless sites.nil? or sites.empty?
|
|
378
|
+
return changes
|
|
379
|
+
rescue => ee
|
|
380
|
+
puts "Exception on method #{__method__}: #{ee} for file: #{file}" if @verbose
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# 'Refresh unique sites in the site store only
|
|
385
|
+
def refresh_uniq_sites
|
|
386
|
+
puts "Refresh unique site entries in the site store. " if @verbose
|
|
387
|
+
begin
|
|
388
|
+
changes=Hash.new
|
|
389
|
+
sites=get_uniq_sites
|
|
390
|
+
if sites.size > 0
|
|
391
|
+
changes=bulk_refresh(sites)
|
|
392
|
+
else
|
|
393
|
+
puts "Error: no entry is refreshed. Please check your site store and try again."
|
|
394
|
+
end
|
|
395
|
+
return changes
|
|
396
|
+
rescue => ee
|
|
397
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
# 'Refresh sites in the site store in batch (from a list)
|
|
402
|
+
def bulk_refresh(list,num=@max_parallel)
|
|
403
|
+
puts "Refresh entries in the site store from list:\n #{list}" if @verbose
|
|
404
|
+
begin
|
|
405
|
+
results=Hash.new
|
|
406
|
+
if list.size > 0
|
|
407
|
+
puts "Start parallel refreshing on the sites:\n #{list}"
|
|
408
|
+
Parallel.map(list, :in_processes => num) { |target|
|
|
409
|
+
refresh(target)
|
|
410
|
+
}.each do |process|
|
|
411
|
+
if process.nil?
|
|
412
|
+
next
|
|
413
|
+
elsif process.empty?
|
|
414
|
+
#do nothing
|
|
415
|
+
else
|
|
416
|
+
results[process['url']]=Hash.new
|
|
417
|
+
results[process['url']]=process
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
# Clean up old entries, by Y.L. 03/30/2015
|
|
421
|
+
list.map {|x| @known_sites.delete(x)}
|
|
422
|
+
# Add back fresh entries
|
|
423
|
+
@known_sites.merge!(results)
|
|
424
|
+
puts "Done refresh sites."
|
|
425
|
+
else
|
|
426
|
+
puts "Error: no entry is loaded. Please check your list and try again."
|
|
427
|
+
end
|
|
428
|
+
return results
|
|
429
|
+
rescue => ee
|
|
430
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
alias_method :refreshs, :bulk_refresh
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
# Refresh all site entries in the stores in one shot
|
|
437
|
+
def refresh_all
|
|
438
|
+
puts "Refresh all the entries within the local site store ... "
|
|
439
|
+
begin
|
|
440
|
+
changes=Hash.new
|
|
441
|
+
changes=bulk_refresh(@known_sites.keys)
|
|
442
|
+
@known_sites.merge!(changes)
|
|
443
|
+
puts "Done refresh all entries."
|
|
444
|
+
return changes
|
|
445
|
+
rescue => ee
|
|
446
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
447
|
+
end
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# Refresh all site entries in the stores that contains an IP instead of a hostname
|
|
451
|
+
def refresh_ip_sites
|
|
452
|
+
puts "Refresh all entries that contain an IP address instead of a FQDN ... "
|
|
453
|
+
begin
|
|
454
|
+
sites=get_ip_sites
|
|
455
|
+
live_sites=sites.delete_if { |x| @known_sites[x]['code'] == 10000 or @known_sites[x]['code'] == 20000 }
|
|
456
|
+
changes=Hash.new
|
|
457
|
+
changes=bulk_refresh(live_sites)
|
|
458
|
+
@known_sites.merge!(changes)
|
|
459
|
+
puts "Done refresh IP sites."
|
|
460
|
+
return changes
|
|
461
|
+
rescue => ee
|
|
462
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# Quick validation if a site is already covered under the site store
|
|
467
|
+
def site_known?(site)
|
|
468
|
+
begin
|
|
469
|
+
raise "Web site store not loaded properly! " if @known_sites.nil?
|
|
470
|
+
site=site.strip.downcase unless site.nil?
|
|
471
|
+
site=url_2_site(site)
|
|
472
|
+
return @known_sites.key?(site) unless site.nil?
|
|
473
|
+
rescue => ee
|
|
474
|
+
puts "Error checking web site #{site} against the site store: #{ee}"
|
|
475
|
+
end
|
|
476
|
+
return false
|
|
477
|
+
end
|
|
478
|
+
alias_method :is_known?, :site_known?
|
|
479
|
+
|
|
480
|
+
# Quick validation check on an IP is already part of the site store
|
|
481
|
+
def site_ip_known?(ip)
|
|
482
|
+
begin
|
|
483
|
+
ip=ip.chomp.strip
|
|
484
|
+
known=false
|
|
485
|
+
if is_ip?(ip)
|
|
486
|
+
@known_sites.keys.map do |site|
|
|
487
|
+
if @known_sites[site]['ip']==ip
|
|
488
|
+
return true
|
|
489
|
+
end
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
myDis=nil
|
|
493
|
+
return known
|
|
494
|
+
rescue => ee
|
|
495
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
496
|
+
return false
|
|
497
|
+
end
|
|
498
|
+
end
|
|
499
|
+
alias_method :siteip_known?, :site_ip_known?
|
|
500
|
+
|
|
501
|
+
# Quick check of the stored information of a site within the store
|
|
502
|
+
def site_check(site)
|
|
503
|
+
begin
|
|
504
|
+
raise "Web site store not loaded properly! " if @known_sites.nil?
|
|
505
|
+
site=site.strip.downcase unless site.nil?
|
|
506
|
+
site=url_2_site(site)
|
|
507
|
+
return @known_sites[site] unless site.nil?
|
|
508
|
+
rescue => ee
|
|
509
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
510
|
+
return nil
|
|
511
|
+
end
|
|
512
|
+
end
|
|
513
|
+
alias_method :check, :site_check
|
|
514
|
+
|
|
515
|
+
# Retrieve external hosted sites into a list
|
|
516
|
+
def get_ext_sites
|
|
517
|
+
puts "getter to retrieve all the external hosted sites. " if @verbose
|
|
518
|
+
begin
|
|
519
|
+
sites=Array.new
|
|
520
|
+
@known_sites.keys.map do |key|
|
|
521
|
+
if @known_sites[key]['status']=="ext_hosted"
|
|
522
|
+
sites.push(key)
|
|
523
|
+
end
|
|
524
|
+
end
|
|
525
|
+
sites.sort!
|
|
526
|
+
return sites
|
|
527
|
+
rescue Exception => ee
|
|
528
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
529
|
+
return nil
|
|
530
|
+
end
|
|
531
|
+
end
|
|
532
|
+
alias_method :get_ext, :get_ext_sites
|
|
533
|
+
|
|
534
|
+
# Retrieve a list of internal hosted site URLs
|
|
535
|
+
def get_int_sites
|
|
536
|
+
puts "getter to retrieve all the internal hosted sites." if @verbose
|
|
537
|
+
begin
|
|
538
|
+
sites=Array.new
|
|
539
|
+
@known_sites.keys.map do |key|
|
|
540
|
+
if @known_sites[key]['status']=="int_hosted"
|
|
541
|
+
sites.push(key)
|
|
542
|
+
end
|
|
543
|
+
end
|
|
544
|
+
sites.sort!
|
|
545
|
+
return sites
|
|
546
|
+
rescue Exception => ee
|
|
547
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
548
|
+
return nil
|
|
549
|
+
end
|
|
550
|
+
end
|
|
551
|
+
alias_method :get_int, :get_int_sites
|
|
552
|
+
|
|
553
|
+
# Retrieve a list of sites that contain an IP in the site URL
|
|
554
|
+
def get_ip_sites
|
|
555
|
+
puts "Getter to retrieve sites contain an IP instead of a host-name ." if @verbose
|
|
556
|
+
begin
|
|
557
|
+
sites=Array.new
|
|
558
|
+
@known_sites.keys.map do |key|
|
|
559
|
+
host=url_2_host(key)
|
|
560
|
+
if is_ip?(host)
|
|
561
|
+
sites.push(key)
|
|
562
|
+
end
|
|
563
|
+
end
|
|
564
|
+
sites.sort!
|
|
565
|
+
return sites
|
|
566
|
+
rescue Exception => ee
|
|
567
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
568
|
+
return nil
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
# Retrieve a list of unique sites within the known site store
|
|
573
|
+
def get_uniq_sites
|
|
574
|
+
puts "Getter to retrieve unique sites containing unique IP:PORT key identifier." if @verbose
|
|
575
|
+
begin
|
|
576
|
+
#primary_host_tracker=Wmap::HostTracker::PrimaryHost.new
|
|
577
|
+
sites=Hash.new
|
|
578
|
+
#uniqueness=Hash.new
|
|
579
|
+
my_tracker=Wmap::HostTracker.new(:data_dir=>@data_dir)
|
|
580
|
+
@known_sites.keys.map do |key|
|
|
581
|
+
port=url_2_port(key).to_s
|
|
582
|
+
host=url_2_host(key)
|
|
583
|
+
md5=@known_sites[key]['md5']
|
|
584
|
+
code=@known_sites[key]['code']
|
|
585
|
+
ip=my_trakcer.local_host_2_ip(host)
|
|
586
|
+
ip=host_2_ip(host) if ip.nil?
|
|
587
|
+
# filtering out 'un-reachable' sites
|
|
588
|
+
next if (code == 10000 or code == 20000)
|
|
589
|
+
# filtering out 'empty' sites
|
|
590
|
+
next if (md5.nil? or md5.empty?)
|
|
591
|
+
next if ip.nil?
|
|
592
|
+
# url_new=key
|
|
593
|
+
#if primary_host_tracker.ip_known?(ip)
|
|
594
|
+
# p_host=primary_host_tracker.known_hosts[ip]
|
|
595
|
+
# url_new=key.sub(host,p_host)
|
|
596
|
+
#end
|
|
597
|
+
id=ip+":"+port
|
|
598
|
+
# filtering out duplicates by 'IP:PORT' key pair
|
|
599
|
+
unless sites.key?(id)
|
|
600
|
+
#if @known_sites.key?(key)
|
|
601
|
+
# sites[id]=url_new
|
|
602
|
+
#else
|
|
603
|
+
# Further filtering out redundant site by checking MD5 finger-print
|
|
604
|
+
#unless uniqueness.key?(md5)
|
|
605
|
+
sites[id]=key
|
|
606
|
+
# uniqueness[md5]=true
|
|
607
|
+
#end
|
|
608
|
+
#end
|
|
609
|
+
end
|
|
610
|
+
end
|
|
611
|
+
#primary_host_tracker=nil
|
|
612
|
+
my_tracker=nil
|
|
613
|
+
return sites.values
|
|
614
|
+
rescue Exception => ee
|
|
615
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
616
|
+
return nil
|
|
617
|
+
end
|
|
618
|
+
end
|
|
619
|
+
alias_method :uniq_sites, :get_uniq_sites
|
|
620
|
+
|
|
621
|
+
# Retrieve a list of sites that contain an IP in the site URL
|
|
622
|
+
def get_ssl_sites
|
|
623
|
+
puts "getter to retrieve https sites from the site store." if @verbose
|
|
624
|
+
begin
|
|
625
|
+
sites=Array.new
|
|
626
|
+
@known_sites.keys.map do |key|
|
|
627
|
+
key =~ /https/i
|
|
628
|
+
sites.push(key)
|
|
629
|
+
end
|
|
630
|
+
sites.sort!
|
|
631
|
+
return sites
|
|
632
|
+
rescue Exception => ee
|
|
633
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
634
|
+
return nil
|
|
635
|
+
end
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
# Retrieve a list of redirection URLs from the site store
|
|
639
|
+
def get_redirection_urls
|
|
640
|
+
puts "getter to retrieve all the redirection URLs from the site store." if @verbose
|
|
641
|
+
begin
|
|
642
|
+
urls=Array.new
|
|
643
|
+
@known_sites.keys.map do |key|
|
|
644
|
+
unless @known_sites[key]['redirection'].nil?
|
|
645
|
+
urls.push(@known_sites[key]['redirection'])
|
|
646
|
+
end
|
|
647
|
+
end
|
|
648
|
+
urls.sort!
|
|
649
|
+
return urls
|
|
650
|
+
rescue Exception => ee
|
|
651
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
652
|
+
return nil
|
|
653
|
+
end
|
|
654
|
+
end
|
|
655
|
+
|
|
656
|
+
# Retrieve redirection URL if available
|
|
657
|
+
def get_redirection_url (site)
|
|
658
|
+
puts "getter to retrieve the redirection URL from the site store." if @verbose
|
|
659
|
+
begin
|
|
660
|
+
site=site.strip.downcase
|
|
661
|
+
if @known_sites.key?(site)
|
|
662
|
+
return @known_sites[site]['redirection']
|
|
663
|
+
else
|
|
664
|
+
puts "Unknown site: #{site}" if @verbose
|
|
665
|
+
return nil
|
|
666
|
+
end
|
|
667
|
+
rescue Exception => ee
|
|
668
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
669
|
+
return nil
|
|
670
|
+
end
|
|
671
|
+
end
|
|
672
|
+
|
|
673
|
+
# Perform local host table reverse lookup for the IP sites, in hope that the hostname could now be resolved since the site was discovered
|
|
674
|
+
def resolve_ip_sites
|
|
675
|
+
puts "Resolve sites that contain an IP address. Update the site cache table once a hostname is found in the local host table." if @verbose
|
|
676
|
+
begin
|
|
677
|
+
updates=Array.new
|
|
678
|
+
sites=get_ip_sites
|
|
679
|
+
my_tracker=Wmap::HostTracker.new(:data_dir=>@data_dir)
|
|
680
|
+
sites.map do |site|
|
|
681
|
+
puts "Work on resolve the IP site: #{site}" if @verbose
|
|
682
|
+
ip=url_2_host(site)
|
|
683
|
+
hostname=my_tracker.local_ip_2_host(ip)
|
|
684
|
+
if hostname.nil?
|
|
685
|
+
puts "Can't resolve #{ip} from the local host store. Skip #{site}" if @verbose
|
|
686
|
+
else
|
|
687
|
+
puts "Host-name found for IP #{ip}: #{hostname}" if @verbose
|
|
688
|
+
updates.push(site)
|
|
689
|
+
refresh(site)
|
|
690
|
+
end
|
|
691
|
+
end
|
|
692
|
+
updates.sort!
|
|
693
|
+
puts "The following sites are now refreshed: #{updates}" if @verbose
|
|
694
|
+
my_tracker=nil
|
|
695
|
+
return updates
|
|
696
|
+
rescue Exception => ee
|
|
697
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
|
698
|
+
end
|
|
699
|
+
end
|
|
700
|
+
|
|
701
|
+
# Search potential matching sites from the site store by using simple regular expression. Note that any upper-case char in the search string will be automatically converted into lower case
|
|
702
|
+
def search (pattern)
|
|
703
|
+
puts "Search site store based on the regular expression: #{pattern}" if @verbose
|
|
704
|
+
begin
|
|
705
|
+
pattern=pattern.strip.downcase
|
|
706
|
+
results=Array.new
|
|
707
|
+
@known_sites.keys.map do |key|
|
|
708
|
+
if key =~ /#{pattern}/i
|
|
709
|
+
results.push(key)
|
|
710
|
+
end
|
|
711
|
+
end
|
|
712
|
+
return results
|
|
713
|
+
rescue Exception => ee
|
|
714
|
+
puts "Exception on method search: #{ee}" if @verbose
|
|
715
|
+
return nil
|
|
716
|
+
end
|
|
717
|
+
end
|
|
718
|
+
|
|
719
|
+
# Print summary report on all sites that contain an IP in the site URL
|
|
720
|
+
def print_ip_sites
|
|
721
|
+
puts "Print sites contain an IP instead of a host-name."
|
|
722
|
+
sites=get_ip_sites
|
|
723
|
+
sites.map { |x| puts x }
|
|
724
|
+
puts "End of report. "
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
# Retrieve and print specific information of a site in the site store
|
|
728
|
+
def print_site(site)
|
|
729
|
+
puts "Site Information Report for: #{site}" if @verbose
|
|
730
|
+
begin
|
|
731
|
+
site=site.strip unless site.nil?
|
|
732
|
+
raise "Unknown site: #{site}" unless @known_sites.key?(site)
|
|
733
|
+
ip=@known_sites[site]['ip']
|
|
734
|
+
port=@known_sites[site]['port']
|
|
735
|
+
status=@known_sites[site]['status']
|
|
736
|
+
server=@known_sites[site]['server']
|
|
737
|
+
fp=@known_sites[site]['md5']
|
|
738
|
+
loc=@known_sites[site]['redirection']
|
|
739
|
+
res=@known_sites[site]['code']
|
|
740
|
+
timestamp=@known_sites[site]['timestamp']
|
|
741
|
+
puts "#{site},#{ip},#{port},#{status},#{server},#{res},#{fp},#{loc},#{timestamp}"
|
|
742
|
+
rescue => ee
|
|
743
|
+
puts "Exception on method #{__method__} for #{site}: #{ee}"
|
|
744
|
+
end
|
|
745
|
+
end
|
|
746
|
+
alias_method :print, :print_site
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
# Print summary report of all sites URL in the site store
|
|
750
|
+
def print_all_sites
|
|
751
|
+
puts "\nSummary Report of the site store:"
|
|
752
|
+
sites=@known_sites.keys.sort
|
|
753
|
+
sites.each do |site|
|
|
754
|
+
puts site
|
|
755
|
+
end
|
|
756
|
+
|
|
757
|
+
puts "End of the summary"
|
|
758
|
+
#return sites
|
|
759
|
+
end
|
|
760
|
+
alias_method :print_all, :print_all_sites
|
|
761
|
+
|
|
762
|
+
# Retrieve and save unique sites information for the quarterly scan into a plain local file
|
|
763
|
+
def save_uniq_sites(file)
|
|
764
|
+
puts "Save unique sites information into a flat file: #{file}\nThis may take a long while as it go through a lengthy self correction check process, please be patient ..."
|
|
765
|
+
begin
|
|
766
|
+
prime_sites=get_prim_uniq_sites
|
|
767
|
+
puts "Primary Sites: #{prime_sites}" if @verbose
|
|
768
|
+
f=File.open(file,"w")
|
|
769
|
+
f.write "Unique Sites Information Report\n"
|
|
770
|
+
f.write "Site, IP, Port, Server, Hosting, Response Code, MD5, Redirect, Timestamps\n"
|
|
771
|
+
prime_sites.map do |key|
|
|
772
|
+
next if key.nil?
|
|
773
|
+
site=key.strip
|
|
774
|
+
raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\wadd #{site}\n" unless @known_sites.key?(site)
|
|
775
|
+
ip=@known_sites[site]['ip']
|
|
776
|
+
port=@known_sites[site]['port']
|
|
777
|
+
status=@known_sites[site]['status']
|
|
778
|
+
server=@known_sites[site]['server']
|
|
779
|
+
fp=@known_sites[site]['md5']
|
|
780
|
+
loc=@known_sites[site]['redirection']
|
|
781
|
+
res=@known_sites[site]['code']
|
|
782
|
+
timestamp=@known_sites[site]['timestamp']
|
|
783
|
+
f.write "#{site},#{ip},#{port},#{server},#{status},#{res},#{fp},#{loc},#{timestamp}\n"
|
|
784
|
+
end
|
|
785
|
+
f.close
|
|
786
|
+
puts "Done!"
|
|
787
|
+
return true # success
|
|
788
|
+
rescue => ee
|
|
789
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
790
|
+
return false # fail
|
|
791
|
+
end
|
|
792
|
+
end
|
|
793
|
+
alias_method :dump, :save_uniq_sites
|
|
794
|
+
|
|
795
|
+
# Retrieve and save unique sites information for the quarterly scan into a XML file
|
|
796
|
+
def save_uniq_sites_xml(file)
|
|
797
|
+
puts "Save unique sites information into XML file: #{file}\nThis may take a long while as it go through lengthy self correctness check, please be patient ..."
|
|
798
|
+
begin
|
|
799
|
+
prime_sites=get_prim_uniq_sites
|
|
800
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
|
801
|
+
xml.root {
|
|
802
|
+
xml.websites {
|
|
803
|
+
prime_sites.each do |key|
|
|
804
|
+
next if key.nil?
|
|
805
|
+
site=key.strip
|
|
806
|
+
raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twmap #{site}\n" unless @known_sites.key?(site)
|
|
807
|
+
xml.site {
|
|
808
|
+
xml.name site
|
|
809
|
+
xml.ip_ @known_sites[site]['ip']
|
|
810
|
+
xml.port_ @known_sites[site]['port']
|
|
811
|
+
xml.status_ @known_sites[site]['status']
|
|
812
|
+
xml.server_ @known_sites[site]['server']
|
|
813
|
+
xml.fingerprint_ @known_sites[site]['md5']
|
|
814
|
+
xml.redirection_ @known_sites[site]['redirection']
|
|
815
|
+
xml.responsecode_ @known_sites[site]['code']
|
|
816
|
+
xml.timestamp_ @known_sites[site]['timestamp']
|
|
817
|
+
}
|
|
818
|
+
end
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
end
|
|
822
|
+
puts builder.to_xml if @verbose
|
|
823
|
+
f=File.new(file,'w')
|
|
824
|
+
f.write(builder.to_xml)
|
|
825
|
+
f.close
|
|
826
|
+
puts "Done!"
|
|
827
|
+
return true
|
|
828
|
+
rescue => ee
|
|
829
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
830
|
+
return false
|
|
831
|
+
end
|
|
832
|
+
end
|
|
833
|
+
alias_method :dump_xml, :save_uniq_sites_xml
|
|
834
|
+
|
|
835
|
+
# Retrieve the unique sites from the local site store in the primary host format
|
|
836
|
+
def get_prim_uniq_sites
|
|
837
|
+
puts "Retrieve and prime unique sites in the site store. " if @verbose
|
|
838
|
+
begin
|
|
839
|
+
host_tracker=Wmap::HostTracker.new(:data_dir=>@data_dir)
|
|
840
|
+
primary_host_tracker=Wmap::HostTracker::PrimaryHost.new(:data_dir=>@data_dir)
|
|
841
|
+
# Step 1. Retrieve the unique site list first
|
|
842
|
+
sites=get_uniq_sites
|
|
843
|
+
prim_uniq_sites=Array.new
|
|
844
|
+
# Step 2. Iterate on the unique site list, spit out the site in the primary host format one at a time
|
|
845
|
+
sites.map do |site|
|
|
846
|
+
puts "Work on priming unique site: #{site}" if @verbose
|
|
847
|
+
host=url_2_host(site)
|
|
848
|
+
# case#1, for the IP only site, do nothing (presuming 'refresh_ip_sites' or 'refresh_all' method already take care of the potential discrepancy here).
|
|
849
|
+
if is_ip?(host)
|
|
850
|
+
prim_uniq_sites.push(site)
|
|
851
|
+
next
|
|
852
|
+
end
|
|
853
|
+
ip=@known_sites[site]['ip']
|
|
854
|
+
# case#2, for site with an unique IP, do nothing
|
|
855
|
+
puts "Local hosts table entry count for #{ip}: #{host_tracker.alias[ip]}" if @verbose
|
|
856
|
+
if host_tracker.alias[ip] == 1
|
|
857
|
+
prim_uniq_sites.push(site)
|
|
858
|
+
next
|
|
859
|
+
end
|
|
860
|
+
# case#3, case of multiple IPs for A DNS record, where the site IP may have 0 alias count, do nothing
|
|
861
|
+
if host_tracker.alias[ip] == nil
|
|
862
|
+
prim_uniq_sites.push(site)
|
|
863
|
+
next
|
|
864
|
+
end
|
|
865
|
+
# case#4, for the site has a duplicate IP with others, we try to determine which one is the primary site
|
|
866
|
+
# raise "Error: inconsistency detected on record: #{site}. Please run the following shell command to refresh it first: \n\srefresh #{site}" if tracker1.alias[ip].nil?
|
|
867
|
+
if ( primary_host_tracker.known_hosts.key?(ip) and (host_tracker.alias[ip] > 1) )
|
|
868
|
+
new_host=primary_host_tracker.prime(host)
|
|
869
|
+
puts "Host: #{host}, New host:#{new_host}" if @verbose
|
|
870
|
+
unless host==new_host
|
|
871
|
+
new_site=site.sub(host,new_host)
|
|
872
|
+
raise "Site not found in the site tracking data repository: #{new_site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twadd #{new_site}\n" unless @known_sites.key?(new_site)
|
|
873
|
+
new_ip=@known_sites[new_site]['ip']
|
|
874
|
+
if new_ip==ip # consistency check
|
|
875
|
+
site=new_site
|
|
876
|
+
else
|
|
877
|
+
# TBD - case of multiple IPs for A DNS record
|
|
878
|
+
#raise "Inconsistency found on prime host entrance: #{new_ip}, #{ip}; #{new_site}, #{site}. Please refresh your entries by running the following shell command: \n\s refresh #{new_site}"
|
|
879
|
+
end
|
|
880
|
+
end
|
|
881
|
+
end
|
|
882
|
+
prim_uniq_sites.push(site)
|
|
883
|
+
end
|
|
884
|
+
primary_host_tracker=nil
|
|
885
|
+
host_tracker=nil
|
|
886
|
+
return prim_uniq_sites
|
|
887
|
+
rescue => ee
|
|
888
|
+
puts "Exception on method #{__method__}: #{ee}"
|
|
889
|
+
end
|
|
890
|
+
end
|
|
891
|
+
alias_method :get_prime, :get_prim_uniq_sites
|
|
892
|
+
|
|
893
|
+
# Print summary report of external hosted sites URL in the
|
|
894
|
+
def print_ext_sites
|
|
895
|
+
puts "\nSummary Report of the External Hosted Site"
|
|
896
|
+
sites=get_ext_sites
|
|
897
|
+
sites.each do |site|
|
|
898
|
+
puts site
|
|
899
|
+
end
|
|
900
|
+
return nil
|
|
901
|
+
end
|
|
902
|
+
alias_method :print_ext, :print_ext_sites
|
|
903
|
+
|
|
904
|
+
# Print summary report of internal hosted site URLs
|
|
905
|
+
def print_int_sites
|
|
906
|
+
puts "\nSummary Report of the Internal Hosted Site"
|
|
907
|
+
sites=get_int_sites
|
|
908
|
+
sites.each do |site|
|
|
909
|
+
puts site
|
|
910
|
+
end
|
|
911
|
+
return nil
|
|
912
|
+
end
|
|
913
|
+
alias_method :print_int, :print_int_sites
|
|
914
|
+
|
|
915
|
+
# Print summary report of internal hosted site URLs
|
|
916
|
+
def print_ssl_sites
|
|
917
|
+
puts "\nSummary Report of the HTTPS Sites from the Site Store"
|
|
918
|
+
sites=get_ssl_sites
|
|
919
|
+
sites.each do |site|
|
|
920
|
+
puts site
|
|
921
|
+
end
|
|
922
|
+
return nil
|
|
923
|
+
end
|
|
924
|
+
|
|
925
|
+
# Print summary report of unique sites in the site store
|
|
926
|
+
def print_uniq_sites
|
|
927
|
+
puts "Summary Report for the Unique sites:"
|
|
928
|
+
puts "Website,Primary IP,Port,Hosting Status,Server,Response Code,Site MD5 Finger-print,Site Redirection,Timestamp"
|
|
929
|
+
sites=get_uniq_sites
|
|
930
|
+
sites.each do |site|
|
|
931
|
+
print_site(site)
|
|
932
|
+
end
|
|
933
|
+
end
|
|
934
|
+
|
|
935
|
+
private
|
|
936
|
+
|
|
937
|
+
end
|