wmap 2.5.5 → 2.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{README.rdoc → README.md} +47 -33
- data/bin/wmap +46 -32
- data/lib/wmap/domain_tracker/sub_domain.rb +67 -76
- data/lib/wmap/domain_tracker.rb +176 -208
- data/lib/wmap/host_tracker/primary_host.rb +9 -9
- data/lib/wmap/host_tracker.rb +314 -361
- data/lib/wmap/site_tracker/deactivated_site.rb +3 -4
- data/lib/wmap/site_tracker.rb +586 -640
- data/lib/wmap/utils/url_magic.rb +1 -1
- data/version.txt +2 -2
- data/wmap.gemspec +2 -2
- metadata +4 -5
- data/logs/wmap.log +0 -17
data/lib/wmap/site_tracker.rb
CHANGED
@@ -15,204 +15,176 @@ class Wmap::SiteTracker
|
|
15
15
|
include Wmap::Utils
|
16
16
|
include Singleton
|
17
17
|
|
18
|
-
attr_accessor :sites_file, :max_parallel, :verbose, :data_dir
|
19
|
-
attr_reader :known_sites
|
18
|
+
attr_accessor :sites_file, :max_parallel, :verbose, :data_dir, :known_sites
|
20
19
|
|
21
20
|
# Set default instance variables
|
22
21
|
def initialize (params = {})
|
23
22
|
# Initialize the instance variables
|
24
23
|
@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
|
25
24
|
Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
|
26
|
-
@
|
27
|
-
@file_stores=params.fetch(:sites_file, @file_sites)
|
25
|
+
@sites_file=params.fetch(:sites_file, @data_dir+'sites')
|
28
26
|
@verbose=params.fetch(:verbose, false)
|
29
27
|
@max_parallel=params.fetch(:max_parallel, 30)
|
28
|
+
File.write(@sites_file, "") unless File.exist?(@sites_file)
|
30
29
|
# Hash table to hold the site store
|
31
|
-
|
32
|
-
@known_sites=load_site_stores_from_file(@file_stores)
|
30
|
+
load_site_stores_from_file(@sites_file)
|
33
31
|
end
|
34
32
|
|
35
33
|
# Setter to load the known hosts into an instance variable
|
36
|
-
def load_site_stores_from_file (file)
|
34
|
+
def load_site_stores_from_file (file=@sites_file)
|
37
35
|
puts "Loading the site store data repository from file: #{file} " if @verbose
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
puts "Exception on method #{__method__} for file #{file}: #{ee}"
|
72
|
-
end
|
36
|
+
@known_sites=Hash.new
|
37
|
+
f=File.open(file, 'r')
|
38
|
+
f.each do |line|
|
39
|
+
line=line.chomp.strip
|
40
|
+
next if line.nil?
|
41
|
+
next if line.empty?
|
42
|
+
next if line =~ /^\s*#/
|
43
|
+
entry=line.split(%r{\t+|\,})
|
44
|
+
site=entry[0].downcase
|
45
|
+
ip=entry[1]
|
46
|
+
port=entry[2]
|
47
|
+
status=entry[3]
|
48
|
+
server=entry[4]
|
49
|
+
res=entry[5].to_i
|
50
|
+
fp=entry[6]
|
51
|
+
loc=entry[7]
|
52
|
+
timestamp=entry[8]
|
53
|
+
puts "Loading entry: #{site} - #{ip} - #{status}" if @verbose
|
54
|
+
@known_sites[site]= Hash.new unless @known_sites.key?(site)
|
55
|
+
@known_sites[site]['ip']=ip
|
56
|
+
@known_sites[site]['port']=port
|
57
|
+
@known_sites[site]['status']=status
|
58
|
+
@known_sites[site]['server']=server
|
59
|
+
@known_sites[site]['code']=res
|
60
|
+
@known_sites[site]['md5']=fp
|
61
|
+
@known_sites[site]['redirection']=loc
|
62
|
+
@known_sites[site]['timestamp']=timestamp
|
63
|
+
end
|
64
|
+
f.close
|
65
|
+
puts "Successfully loading file: #{file}" if @verbose
|
66
|
+
return @known_sites
|
67
|
+
rescue => ee
|
68
|
+
puts "Exception on method #{__method__} for file #{file}: #{ee}"
|
73
69
|
end
|
74
70
|
|
75
71
|
# Save the current site store hash table into a file
|
76
|
-
def save_sites_to_file!(file_sites=@
|
72
|
+
def save_sites_to_file!(file_sites=@sites_file)
|
77
73
|
puts "Saving the current site store table from memory to file: #{file_sites}"
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
puts "Exception on method #{__method__}: #{ee}"
|
90
|
-
end
|
74
|
+
timestamp=Time.now
|
75
|
+
f=File.open(file_sites, 'w')
|
76
|
+
f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
|
77
|
+
f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
|
78
|
+
@known_sites.keys.sort.map do |key|
|
79
|
+
f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
|
80
|
+
end
|
81
|
+
f.close
|
82
|
+
puts "site store table is successfully saved: #{file_sites}"
|
83
|
+
rescue => ee
|
84
|
+
puts "Exception on method #{__method__}: #{ee}"
|
91
85
|
end
|
92
86
|
alias_method :save!, :save_sites_to_file!
|
93
87
|
|
94
88
|
# Count numbers of entries in the site store table
|
95
89
|
def count
|
96
90
|
puts "Counting number of entries in the site store table ..."
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
puts "Exception on method #{__method__}: #{ee}"
|
101
|
-
end
|
91
|
+
return @known_sites.size
|
92
|
+
rescue => ee
|
93
|
+
puts "Exception on method #{__method__}: #{ee}"
|
102
94
|
end
|
103
95
|
|
104
96
|
# Setter to add site entry to the cache one at a time
|
105
97
|
def add(site)
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
98
|
+
puts "Add entry to the site store: #{site}"
|
99
|
+
# Preliminary sanity check
|
100
|
+
site=site.strip.downcase unless site.nil?
|
101
|
+
if site_known?(site)
|
102
|
+
puts "Site already exists. Skip it: #{site}"
|
103
|
+
return nil
|
104
|
+
end
|
105
|
+
site=normalize_url(site) if is_url?(site)
|
106
|
+
site=url_2_site(site) if is_url?(site)
|
107
|
+
puts "Site in standard format: #{site}" if @verbose
|
108
|
+
raise "Exception on method #{__method__}: invalid site format of #{site}. Expected format is: http://your_website_name/" unless is_site?(site)
|
109
|
+
trusted=false
|
110
|
+
host=url_2_host(site)
|
111
|
+
ip=host_2_ip(host)
|
112
|
+
# Additional logic to refresh deactivated site, 02/12/2014
|
113
|
+
deact=Wmap::SiteTracker::DeactivatedSite.instance
|
114
|
+
deact.sites_file=@data_dir + 'deactivated_sites'
|
115
|
+
File.write(deact.sites_file, "") unless File.exist?(deact.sites_file)
|
116
|
+
deact.load_site_stores_from_file
|
117
|
+
# only trust either the domain or IP we know
|
118
|
+
if is_ip?(host)
|
119
|
+
trusted=Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
|
120
|
+
else
|
121
|
+
root=get_domain_root(host)
|
122
|
+
if root.nil?
|
123
|
+
raise "Invalid web site format. Please check your record again."
|
124
|
+
else
|
125
|
+
domain_tracker=Wmap::DomainTracker.instance
|
126
|
+
domain_tracker.domains_file=@data_dir+'domains'
|
127
|
+
File.write(domain_tracker.domains_file, "") unless File.exist?(domain_tracker.domains_file)
|
128
|
+
domain_tracker.load_domains_from_file
|
129
|
+
trusted=domain_tracker.domain_known?(root)
|
130
|
+
domain_tracker=nil
|
113
131
|
end
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
deact.data_dir=@data_dir
|
124
|
-
# only trust either the domain or IP we know
|
125
|
-
if is_ip?(host)
|
126
|
-
trusted=Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
|
132
|
+
end
|
133
|
+
# add record only if trusted
|
134
|
+
if trusted
|
135
|
+
# Add logic to check site status before adding it
|
136
|
+
checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
|
137
|
+
raise "Site is currently down. Skip #{site}" if checker.nil?
|
138
|
+
# Skip the http site if it's un-responsive; for the https we'll keep it because we're interested in analysing the SSL layer later
|
139
|
+
if is_https?(site)
|
140
|
+
# do nothing
|
127
141
|
else
|
128
|
-
|
129
|
-
if root.nil?
|
130
|
-
raise "Invalid web site format. Please check your record again."
|
131
|
-
else
|
132
|
-
domain_tracker=Wmap::DomainTracker.instance
|
133
|
-
domain_tracker.data_dir=@data_dir
|
134
|
-
trusted=domain_tracker.domain_known?(root)
|
135
|
-
domain_tracker=nil
|
136
|
-
end
|
142
|
+
raise "Site is currently down. Skip #{site}" if checker['code']==10000
|
137
143
|
end
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
#
|
144
|
-
if
|
145
|
-
#
|
144
|
+
raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
|
145
|
+
my_tracker = Wmap::HostTracker.instance
|
146
|
+
my_tracker.data_dir=@data_dir
|
147
|
+
# Update the local host table when necessary
|
148
|
+
if is_ip?(host)
|
149
|
+
# Case #1: Trusted site contains IP
|
150
|
+
if my_tracker.ip_known?(host)
|
151
|
+
# Try local reverse DNS lookup first
|
152
|
+
puts "Local hosts table lookup for IP: #{ip}" if @verbose
|
153
|
+
host=my_tracker.local_ip_2_host(host)
|
154
|
+
puts "Host found from the local hosts table for #{ip}: #{host}" if @verbose
|
155
|
+
site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
|
146
156
|
else
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
else
|
162
|
-
# Try reverse DNS lookup over Internet as secondary precaution
|
163
|
-
puts "Reverse DNS lookup for IP: #{ip}" if @verbose
|
164
|
-
host1=ip_2_host(host)
|
165
|
-
puts "host1: #{host1}" if @verbose
|
166
|
-
if is_fqdn?(host1)
|
167
|
-
host_tracker=Wmap::HostTracker.instance
|
168
|
-
host_tracker.data_dir=@data_dir
|
169
|
-
if host_tracker.domain_known?(host1)
|
170
|
-
# replace IP with host-name only if domain root is known
|
171
|
-
puts "Host found from the Internet reverse DNS lookup for #{ip}: #{host1}" if @verbose
|
172
|
-
host=host1
|
173
|
-
site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
|
174
|
-
end
|
175
|
-
host_tracker=nil
|
176
|
-
end
|
177
|
-
end
|
178
|
-
# Adding site for Case #1
|
179
|
-
raise "Site already exist! Skip #{site}" if @known_sites.key?(site)
|
180
|
-
puts "Adding site: #{site}" if @verbose
|
181
|
-
@known_sites[site]=Hash.new
|
182
|
-
@known_sites[site]=checker
|
183
|
-
if deact.site_known?(site)
|
184
|
-
deact.delete(site)
|
185
|
-
deact.save!
|
186
|
-
end
|
187
|
-
puts "Site entry loaded: #{checker}"
|
188
|
-
if is_fqdn?(host)
|
189
|
-
# Add logic to update the hosts table for case #1 variance
|
190
|
-
# - case that reverse DNS lookup successful
|
191
|
-
puts "Update local hosts table for host: #{host}"
|
192
|
-
if my_tracker.host_known?(host)
|
193
|
-
old_ip=my_tracker.local_host_2_ip(host)
|
194
|
-
if old_ip != ip
|
195
|
-
my_tracker.refresh(host)
|
196
|
-
my_tracker.save!
|
197
|
-
else
|
198
|
-
puts "Host resolve to the same IP #{ip} - no need to update the local host table." if @verbose
|
199
|
-
end
|
200
|
-
else
|
201
|
-
my_tracker.add(host)
|
202
|
-
my_tracker.save!
|
157
|
+
# Try reverse DNS lookup over Internet as secondary precaution
|
158
|
+
puts "Reverse DNS lookup for IP: #{ip}" if @verbose
|
159
|
+
host1=ip_2_host(host)
|
160
|
+
puts "host1: #{host1}" if @verbose
|
161
|
+
if is_fqdn?(host1)
|
162
|
+
host_tracker=Wmap::HostTracker.instance
|
163
|
+
host_tracker.data_dir=@data_dir
|
164
|
+
host_tracker.hosts_file=host_tracker.data_dir + "hosts"
|
165
|
+
host_tracker.load_known_hosts_from_file
|
166
|
+
if host_tracker.domain_known?(host1)
|
167
|
+
# replace IP with host-name only if domain root is known
|
168
|
+
puts "Host found from the Internet reverse DNS lookup for #{ip}: #{host1}" if @verbose
|
169
|
+
host=host1
|
170
|
+
site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
|
203
171
|
end
|
172
|
+
host_tracker=nil
|
204
173
|
end
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
174
|
+
end
|
175
|
+
# Adding site for Case #1
|
176
|
+
raise "Site already exist! Skip #{site}" if @known_sites.key?(site)
|
177
|
+
puts "Adding site: #{site}" if @verbose
|
178
|
+
@known_sites[site]=Hash.new
|
179
|
+
@known_sites[site]=checker
|
180
|
+
if deact.site_known?(site)
|
181
|
+
deact.delete(site)
|
182
|
+
deact.save!
|
183
|
+
end
|
184
|
+
puts "Site entry loaded: #{checker}"
|
185
|
+
if is_fqdn?(host)
|
186
|
+
# Add logic to update the hosts table for case #1 variance
|
187
|
+
# - case that reverse DNS lookup successful
|
216
188
|
puts "Update local hosts table for host: #{host}"
|
217
189
|
if my_tracker.host_known?(host)
|
218
190
|
old_ip=my_tracker.local_host_2_ip(host)
|
@@ -220,232 +192,239 @@ class Wmap::SiteTracker
|
|
220
192
|
my_tracker.refresh(host)
|
221
193
|
my_tracker.save!
|
222
194
|
else
|
223
|
-
#
|
195
|
+
puts "Host resolve to the same IP #{ip} - no need to update the local host table." if @verbose
|
224
196
|
end
|
225
197
|
else
|
226
198
|
my_tracker.add(host)
|
227
199
|
my_tracker.save!
|
228
200
|
end
|
229
201
|
end
|
230
|
-
deact=nil
|
231
|
-
my_tracker=nil
|
232
|
-
host_tracker=nil
|
233
|
-
return checker
|
234
202
|
else
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
203
|
+
# Case #2: Trusted site contains valid FQDN
|
204
|
+
puts "Ading site: #{site}" if @verbose
|
205
|
+
@known_sites[site]=Hash.new
|
206
|
+
@known_sites[site]=checker
|
207
|
+
if deact.site_known?(site)
|
208
|
+
deact.delete(site)
|
209
|
+
deact.save!
|
210
|
+
end
|
211
|
+
puts "Site entry loaded: #{checker}"
|
212
|
+
# Add logic to update the hosts table for case #2
|
213
|
+
puts "Update local hosts table for host: #{host}"
|
214
|
+
if my_tracker.host_known?(host)
|
215
|
+
old_ip=my_tracker.local_host_2_ip(host)
|
216
|
+
if old_ip != ip
|
217
|
+
my_tracker.refresh(host)
|
218
|
+
my_tracker.save!
|
219
|
+
else
|
220
|
+
# Skip - no need to update the local hosts table
|
221
|
+
end
|
222
|
+
else
|
223
|
+
my_tracker.add(host)
|
224
|
+
my_tracker.save!
|
225
|
+
end
|
240
226
|
end
|
241
|
-
rescue => ee
|
242
|
-
puts "Exception on method #{__method__}: #{ee}"
|
243
|
-
checker=nil
|
244
227
|
deact=nil
|
228
|
+
my_tracker=nil
|
229
|
+
host_tracker=nil
|
230
|
+
return site
|
231
|
+
else
|
232
|
+
puts "Problem found: untrusted Internet domain or IP. Skip #{site}"
|
233
|
+
deact=nil
|
234
|
+
my_tracker=nil
|
245
235
|
host_tracker=nil
|
246
236
|
return nil
|
247
237
|
end
|
238
|
+
#rescue => ee
|
239
|
+
# puts "Exception on method #{__method__}: #{ee}"
|
240
|
+
# checker=nil
|
241
|
+
# deact=nil
|
242
|
+
# host_tracker=nil
|
243
|
+
# return nil
|
248
244
|
end
|
249
245
|
|
250
246
|
# Setter to add site entry to the cache table in batch (from a file)
|
251
247
|
def file_add(file)
|
252
248
|
puts "Add entries to the local site store from file: #{file}"
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
puts "Exception on method #{__method__}: #{ee}"
|
262
|
-
end
|
249
|
+
raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
|
250
|
+
changes=Hash.new
|
251
|
+
sites=file_2_list(file)
|
252
|
+
changes=bulk_add(sites) unless sites.nil? or sites.empty?
|
253
|
+
puts "Done loading file #{file}. "
|
254
|
+
return changes
|
255
|
+
rescue => ee
|
256
|
+
puts "Exception on method #{__method__}: #{ee}"
|
263
257
|
end
|
264
258
|
|
265
259
|
# Setter to add site entry to the cache in batch (from a list)
|
266
260
|
def bulk_add(list,num=@max_parallel)
|
267
261
|
puts "Add entries to the local site store from list:\n #{list}"
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
results[process['url']]=process
|
282
|
-
end
|
262
|
+
results=Hash.new
|
263
|
+
if list.size > 0
|
264
|
+
puts "Start parallel adding on the sites:\n #{list}"
|
265
|
+
Parallel.map(list, :in_processes => num) { |target|
|
266
|
+
add(target)
|
267
|
+
}.each do |process|
|
268
|
+
if process.nil?
|
269
|
+
next
|
270
|
+
elsif process.empty?
|
271
|
+
next #do nothing
|
272
|
+
else
|
273
|
+
results[process['url']]=Hash.new
|
274
|
+
results[process['url']]=process
|
283
275
|
end
|
284
|
-
@known_sites.merge!(results)
|
285
|
-
else
|
286
|
-
puts "Error: no entry is added. Please check your list and try again."
|
287
276
|
end
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
277
|
+
@known_sites.merge!(results)
|
278
|
+
else
|
279
|
+
puts "Error: no entry is added. Please check your list and try again."
|
280
|
+
end
|
281
|
+
puts "Done adding site entries."
|
282
|
+
if results.size>0
|
283
|
+
puts "New entries added: #{results}"
|
284
|
+
else
|
285
|
+
puts "No new entry added. "
|
286
|
+
end
|
287
|
+
return results
|
288
|
+
#rescue => ee
|
289
|
+
#puts "Exception on method #{__method__}: #{ee}" if @verbose
|
298
290
|
end
|
299
291
|
alias_method :adds, :bulk_add
|
300
292
|
|
301
293
|
# Setter to remove entry from the site store one at a time
|
302
294
|
def delete(site)
|
303
295
|
puts "Remove entry from the site store: #{site} " if @verbose
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
deact=nil
|
315
|
-
del=@known_sites.delete(site)
|
316
|
-
puts "Entry cleared: #{site}"
|
317
|
-
return del
|
318
|
-
else
|
319
|
-
puts "Entry not fund. Skip #{site}"
|
320
|
-
deact=nil
|
321
|
-
return nil
|
322
|
-
end
|
323
|
-
rescue => ee
|
324
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
296
|
+
# Additional logic to deactivate the site properly, by moving it to the DeactivatedSite list, 02/07/2014
|
297
|
+
deact=Wmap::SiteTracker::DeactivatedSite.instance
|
298
|
+
deact.sites_file=@data_dir + 'deactivated_sites'
|
299
|
+
File.write(deact.sites_file, "") unless File.exist?(deact.sites_file)
|
300
|
+
site=site.strip.downcase
|
301
|
+
site=url_2_site(site)
|
302
|
+
if @known_sites.key?(site)
|
303
|
+
site_info=@known_sites[site]
|
304
|
+
deact.add(site,site_info)
|
305
|
+
deact.save!
|
325
306
|
deact=nil
|
307
|
+
del=@known_sites.delete(site)
|
308
|
+
puts "Entry cleared: #{site}"
|
309
|
+
return del
|
310
|
+
else
|
311
|
+
puts "Entry not fund. Skip #{site}"
|
312
|
+
deact=nil
|
313
|
+
return nil
|
326
314
|
end
|
315
|
+
rescue => ee
|
316
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
317
|
+
deact=nil
|
327
318
|
end
|
328
319
|
alias_method :del, :delete
|
329
320
|
|
330
321
|
# Setter to delete site entry to the cache in batch (from a file)
|
331
322
|
def file_delete(file)
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
puts "Exception on method file_delete: #{ee} for file: #{file}" if @verbose
|
340
|
-
end
|
323
|
+
puts "Delete entries to the local site store from file: #{file}" if @verbose
|
324
|
+
raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
|
325
|
+
sites=file_2_list(file)
|
326
|
+
changes=Array.new
|
327
|
+
changes=bulk_delete(sites) unless sites.nil? or sites.empty?
|
328
|
+
rescue => ee
|
329
|
+
puts "Exception on method file_delete: #{ee} for file: #{file}" if @verbose
|
341
330
|
end
|
342
331
|
alias_method :file_del, :file_delete
|
343
332
|
|
344
333
|
# Setter to delete site entry to the cache in batch (from a list)
|
345
334
|
def bulk_delete(list)
|
346
335
|
puts "Delete entries to the local site store from list:\n #{list}" if @verbose
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
changes.push(site) unless site.nil?
|
355
|
-
end
|
356
|
-
puts "Done deleting sites from the list:\n #{list}"
|
357
|
-
return changes
|
358
|
-
else
|
359
|
-
puts "Error: no entry is loaded. Please check your list and try again."
|
336
|
+
sites=list
|
337
|
+
changes=Array.new
|
338
|
+
if sites.size > 0
|
339
|
+
sites.map do |x|
|
340
|
+
x=url_2_site(x)
|
341
|
+
site=delete(x)
|
342
|
+
changes.push(site) unless site.nil?
|
360
343
|
end
|
361
|
-
|
362
|
-
|
344
|
+
puts "Done deleting sites from the list:\n #{list}"
|
345
|
+
return changes
|
346
|
+
else
|
347
|
+
puts "Error: no entry is loaded. Please check your list and try again."
|
363
348
|
end
|
349
|
+
rescue => ee
|
350
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
364
351
|
end
|
365
352
|
alias_method :dels, :bulk_delete
|
366
353
|
|
367
354
|
# Setter to refresh the entry in the site store one at a time
|
368
355
|
def refresh(site)
|
369
356
|
puts "Refresh the local site store for site: #{site} "
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
puts "Error entry non exist: #{site}"
|
380
|
-
end
|
381
|
-
return nil
|
382
|
-
rescue => ee
|
383
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
384
|
-
return nil
|
357
|
+
raise "Invalid site: #{site}" if site.nil? or site.empty?
|
358
|
+
site=site.strip.downcase
|
359
|
+
if @known_sites.key?(site)
|
360
|
+
delete(site)
|
361
|
+
site_info=add(site)
|
362
|
+
puts "Done refresh entry: #{site}"
|
363
|
+
return site_info
|
364
|
+
else
|
365
|
+
puts "Error entry non exist: #{site}"
|
385
366
|
end
|
367
|
+
return nil
|
368
|
+
rescue => ee
|
369
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
370
|
+
return nil
|
386
371
|
end
|
387
372
|
|
388
373
|
# 'Refresh sites in the site store in batch (from a file)
|
389
374
|
def file_refresh(file)
|
390
375
|
puts "Refresh entries in the site store from file: #{file}" if @verbose
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
puts "Exception on method #{__method__}: #{ee} for file: #{file}" if @verbose
|
398
|
-
end
|
376
|
+
changes=Hash.new
|
377
|
+
sites=file_2_list(file)
|
378
|
+
changes=bulk_refresh(sites) unless sites.nil? or sites.empty?
|
379
|
+
return changes
|
380
|
+
rescue => ee
|
381
|
+
puts "Exception on method #{__method__}: #{ee} for file: #{file}" if @verbose
|
399
382
|
end
|
400
383
|
|
401
384
|
# 'Refresh unique sites in the site store only
|
402
385
|
def refresh_uniq_sites
|
403
386
|
puts "Refresh unique site entries in the site store. " if @verbose
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
puts "Error: no entry is refreshed. Please check your site store and try again."
|
411
|
-
end
|
412
|
-
return changes
|
413
|
-
rescue => ee
|
414
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
387
|
+
changes=Hash.new
|
388
|
+
sites=get_uniq_sites
|
389
|
+
if sites.size > 0
|
390
|
+
changes=bulk_refresh(sites)
|
391
|
+
else
|
392
|
+
puts "Error: no entry is refreshed. Please check your site store and try again."
|
415
393
|
end
|
394
|
+
return changes
|
395
|
+
rescue => ee
|
396
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
416
397
|
end
|
417
398
|
|
418
399
|
# 'Refresh sites in the site store in batch (from a list)
|
419
400
|
def bulk_refresh(list,num=@max_parallel)
|
420
401
|
puts "Refresh entries in the site store from list:\n #{list}" if @verbose
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
results[process['url']]=process
|
435
|
-
end
|
402
|
+
results=Hash.new
|
403
|
+
if list.size > 0
|
404
|
+
puts "Start parallel refreshing on the sites:\n #{list}"
|
405
|
+
Parallel.map(list, :in_processes => num) { |target|
|
406
|
+
refresh(target)
|
407
|
+
}.each do |process|
|
408
|
+
if process.nil?
|
409
|
+
next
|
410
|
+
elsif process.empty?
|
411
|
+
#do nothing
|
412
|
+
else
|
413
|
+
results[process['url']]=Hash.new
|
414
|
+
results[process['url']]=process
|
436
415
|
end
|
437
|
-
# Clean up old entries, by Y.L. 03/30/2015
|
438
|
-
list.map {|x| @known_sites.delete(x)}
|
439
|
-
# Add back fresh entries
|
440
|
-
@known_sites.merge!(results)
|
441
|
-
puts "Done refresh sites."
|
442
|
-
else
|
443
|
-
puts "Error: no entry is loaded. Please check your list and try again."
|
444
416
|
end
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
417
|
+
# Clean up old entries, by Y.L. 03/30/2015
|
418
|
+
list.map {|x| @known_sites.delete(x)}
|
419
|
+
# Add back fresh entries
|
420
|
+
@known_sites.merge!(results)
|
421
|
+
puts "Done refresh sites."
|
422
|
+
else
|
423
|
+
puts "Error: no entry is loaded. Please check your list and try again."
|
424
|
+
end
|
425
|
+
return results
|
426
|
+
rescue => ee
|
427
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
449
428
|
end
|
450
429
|
alias_method :refreshs, :bulk_refresh
|
451
430
|
|
@@ -453,286 +432,259 @@ class Wmap::SiteTracker
|
|
453
432
|
# Refresh all site entries in the stores in one shot
|
454
433
|
def refresh_all
|
455
434
|
puts "Refresh all the entries within the local site store ... "
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
464
|
-
end
|
435
|
+
changes=Hash.new
|
436
|
+
changes=bulk_refresh(@known_sites.keys)
|
437
|
+
@known_sites.merge!(changes)
|
438
|
+
puts "Done refresh all entries."
|
439
|
+
return changes
|
440
|
+
rescue => ee
|
441
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
465
442
|
end
|
466
443
|
|
467
444
|
# Refresh all site entries in the stores that contains an IP instead of a hostname
|
468
445
|
def refresh_ip_sites
|
469
446
|
puts "Refresh all entries that contain an IP address instead of a FQDN ... "
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
480
|
-
end
|
447
|
+
sites=get_ip_sites
|
448
|
+
live_sites=sites.delete_if { |x| @known_sites[x]['code'] == 10000 or @known_sites[x]['code'] == 20000 }
|
449
|
+
changes=Hash.new
|
450
|
+
changes=bulk_refresh(live_sites)
|
451
|
+
@known_sites.merge!(changes)
|
452
|
+
puts "Done refresh IP sites."
|
453
|
+
return changes
|
454
|
+
rescue => ee
|
455
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
481
456
|
end
|
482
457
|
|
483
458
|
# Quick validation if a site is already covered under the site store
|
484
459
|
def site_known?(site)
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
puts "Error checking web site #{site} against the site store: #{ee}"
|
492
|
-
end
|
460
|
+
raise "Web site store not loaded properly! " if @known_sites.nil?
|
461
|
+
site=site.strip.downcase unless site.nil?
|
462
|
+
site=url_2_site(site)
|
463
|
+
return @known_sites.key?(site) unless site.nil?
|
464
|
+
rescue => ee
|
465
|
+
puts "Error checking web site #{site} against the site store: #{ee}"
|
493
466
|
return false
|
494
467
|
end
|
495
468
|
alias_method :is_known?, :site_known?
|
496
469
|
|
497
470
|
# Quick validation check on an IP is already part of the site store
|
498
471
|
def site_ip_known?(ip)
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
@known_sites
|
504
|
-
|
505
|
-
return true
|
506
|
-
end
|
472
|
+
ip=ip.chomp.strip
|
473
|
+
known=false
|
474
|
+
if is_ip?(ip)
|
475
|
+
@known_sites.keys.map do |site|
|
476
|
+
if @known_sites[site]['ip']==ip
|
477
|
+
return true
|
507
478
|
end
|
508
479
|
end
|
509
|
-
myDis=nil
|
510
|
-
return known
|
511
|
-
rescue => ee
|
512
|
-
puts "Exception on method #{__method__}: #{ee}"
|
513
|
-
return false
|
514
480
|
end
|
481
|
+
myDis=nil
|
482
|
+
return known
|
483
|
+
rescue => ee
|
484
|
+
puts "Exception on method #{__method__}: #{ee}"
|
485
|
+
return false
|
515
486
|
end
|
516
487
|
alias_method :siteip_known?, :site_ip_known?
|
517
488
|
|
518
489
|
# Quick check of the stored information of a site within the store
|
519
490
|
def site_check(site)
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
return nil
|
528
|
-
end
|
491
|
+
raise "Web site store not loaded properly! " if @known_sites.nil?
|
492
|
+
site=site.strip.downcase unless site.nil?
|
493
|
+
site=url_2_site(site)
|
494
|
+
return @known_sites[site] unless site.nil?
|
495
|
+
rescue => ee
|
496
|
+
puts "Exception on method #{__method__}: #{ee}"
|
497
|
+
return nil
|
529
498
|
end
|
530
499
|
alias_method :check, :site_check
|
531
500
|
|
532
501
|
# Retrieve external hosted sites into a list
|
533
502
|
def get_ext_sites
|
534
503
|
puts "getter to retrieve all the external hosted sites. " if @verbose
|
535
|
-
|
536
|
-
|
537
|
-
@known_sites
|
538
|
-
|
539
|
-
sites.push(key)
|
540
|
-
end
|
504
|
+
sites=Array.new
|
505
|
+
@known_sites.keys.map do |key|
|
506
|
+
if @known_sites[key]['status']=="ext_hosted"
|
507
|
+
sites.push(key)
|
541
508
|
end
|
542
|
-
sites.sort!
|
543
|
-
return sites
|
544
|
-
rescue Exception => ee
|
545
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
546
|
-
return nil
|
547
509
|
end
|
510
|
+
sites.sort!
|
511
|
+
return sites
|
512
|
+
rescue Exception => ee
|
513
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
514
|
+
return nil
|
548
515
|
end
|
549
516
|
alias_method :get_ext, :get_ext_sites
|
550
517
|
|
551
518
|
# Retrieve a list of internal hosted site URLs
|
552
519
|
def get_int_sites
|
553
520
|
puts "getter to retrieve all the internal hosted sites." if @verbose
|
554
|
-
|
555
|
-
|
556
|
-
@known_sites
|
557
|
-
|
558
|
-
sites.push(key)
|
559
|
-
end
|
521
|
+
sites=Array.new
|
522
|
+
@known_sites.keys.map do |key|
|
523
|
+
if @known_sites[key]['status']=="int_hosted"
|
524
|
+
sites.push(key)
|
560
525
|
end
|
561
|
-
sites.sort!
|
562
|
-
return sites
|
563
|
-
rescue Exception => ee
|
564
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
565
|
-
return nil
|
566
526
|
end
|
527
|
+
sites.sort!
|
528
|
+
return sites
|
529
|
+
rescue Exception => ee
|
530
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
531
|
+
return nil
|
567
532
|
end
|
568
533
|
alias_method :get_int, :get_int_sites
|
569
534
|
|
570
535
|
# Retrieve a list of sites that contain an IP in the site URL
|
571
536
|
def get_ip_sites
|
572
537
|
puts "Getter to retrieve sites contain an IP instead of a host-name ." if @verbose
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
sites.push(key)
|
579
|
-
end
|
538
|
+
sites=Array.new
|
539
|
+
@known_sites.keys.map do |key|
|
540
|
+
host=url_2_host(key)
|
541
|
+
if is_ip?(host)
|
542
|
+
sites.push(key)
|
580
543
|
end
|
581
|
-
sites.sort!
|
582
|
-
return sites
|
583
|
-
rescue Exception => ee
|
584
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
585
|
-
return nil
|
586
544
|
end
|
545
|
+
sites.sort!
|
546
|
+
return sites
|
547
|
+
rescue Exception => ee
|
548
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
549
|
+
return nil
|
587
550
|
end
|
588
551
|
|
589
552
|
# Retrieve a list of unique sites within the known site store
|
590
553
|
def get_uniq_sites
|
591
|
-
puts "Getter to retrieve unique sites containing unique IP:PORT key identifier." if @verbose
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
#end
|
554
|
+
puts "Getter to retrieve unique sites containing unique IP:PORT key identifier." if @verbose=
|
555
|
+
#primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
|
556
|
+
sites=Hash.new
|
557
|
+
#uniqueness=Hash.new
|
558
|
+
my_tracker=Wmap::HostTracker.instance
|
559
|
+
my_tracker.hosts_file=@data_dir + 'hosts'
|
560
|
+
my_tracker.load_known_hosts_from_file
|
561
|
+
@known_sites.keys.map do |key|
|
562
|
+
port=url_2_port(key).to_s
|
563
|
+
host=url_2_host(key)
|
564
|
+
md5=@known_sites[key]['md5']
|
565
|
+
code=@known_sites[key]['code']
|
566
|
+
ip=my_tracker.local_host_2_ip(host)
|
567
|
+
ip=host_2_ip(host) if ip.nil?
|
568
|
+
# filtering out 'un-reachable' sites
|
569
|
+
next if (code == 10000 or code == 20000)
|
570
|
+
# filtering out 'empty' sites
|
571
|
+
next if (md5.nil? or md5.empty?)
|
572
|
+
next if ip.nil?
|
573
|
+
# url_new=key
|
574
|
+
#if primary_host_tracker.ip_known?(ip)
|
575
|
+
# p_host=primary_host_tracker.known_hosts[ip]
|
576
|
+
# url_new=key.sub(host,p_host)
|
577
|
+
#end
|
578
|
+
id=ip+":"+port
|
579
|
+
# filtering out duplicates by 'IP:PORT' key pair
|
580
|
+
unless sites.key?(id)
|
581
|
+
#if @known_sites.key?(key)
|
582
|
+
# sites[id]=url_new
|
583
|
+
#else
|
584
|
+
# Further filtering out redundant site by checking MD5 finger-print
|
585
|
+
#unless uniqueness.key?(md5)
|
586
|
+
sites[id]=key
|
587
|
+
# uniqueness[md5]=true
|
626
588
|
#end
|
627
|
-
end
|
589
|
+
#end
|
628
590
|
end
|
629
|
-
#primary_host_tracker=nil
|
630
|
-
my_tracker=nil
|
631
|
-
return sites.values
|
632
|
-
rescue Exception => ee
|
633
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
634
|
-
return nil
|
635
591
|
end
|
592
|
+
#primary_host_tracker=nil
|
593
|
+
my_tracker=nil
|
594
|
+
return sites.values
|
595
|
+
rescue Exception => ee
|
596
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
597
|
+
return nil
|
636
598
|
end
|
637
599
|
alias_method :uniq_sites, :get_uniq_sites
|
638
600
|
|
639
601
|
# Retrieve a list of sites that contain an IP in the site URL
|
640
602
|
def get_ssl_sites
|
641
603
|
puts "getter to retrieve https sites from the site store." if @verbose
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
return nil
|
653
|
-
end
|
604
|
+
sites=Array.new
|
605
|
+
@known_sites.keys.map do |key|
|
606
|
+
key =~ /https/i
|
607
|
+
sites.push(key)
|
608
|
+
end
|
609
|
+
sites.sort!
|
610
|
+
return sites
|
611
|
+
rescue Exception => ee
|
612
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
613
|
+
return nil
|
654
614
|
end
|
655
615
|
|
656
616
|
# Retrieve a list of redirection URLs from the site store
|
657
617
|
def get_redirection_urls
|
658
618
|
puts "getter to retrieve all the redirection URLs from the site store." if @verbose
|
659
|
-
|
660
|
-
|
661
|
-
@known_sites.
|
662
|
-
|
663
|
-
urls.push(@known_sites[key]['redirection'])
|
664
|
-
end
|
619
|
+
urls=Array.new
|
620
|
+
@known_sites.keys.map do |key|
|
621
|
+
unless @known_sites[key]['redirection'].nil?
|
622
|
+
urls.push(@known_sites[key]['redirection'])
|
665
623
|
end
|
666
|
-
urls.sort!
|
667
|
-
return urls
|
668
|
-
rescue Exception => ee
|
669
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
670
|
-
return nil
|
671
624
|
end
|
625
|
+
urls.sort!
|
626
|
+
return urls
|
627
|
+
rescue Exception => ee
|
628
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
629
|
+
return nil
|
672
630
|
end
|
673
631
|
|
674
632
|
# Retrieve redirection URL if available
|
675
633
|
def get_redirection_url (site)
|
676
634
|
puts "getter to retrieve the redirection URL from the site store." if @verbose
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
puts "Unknown site: #{site}" if @verbose
|
683
|
-
return nil
|
684
|
-
end
|
685
|
-
rescue Exception => ee
|
686
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
635
|
+
site=site.strip.downcase
|
636
|
+
if @known_sites.key?(site)
|
637
|
+
return @known_sites[site]['redirection']
|
638
|
+
else
|
639
|
+
puts "Unknown site: #{site}" if @verbose
|
687
640
|
return nil
|
688
641
|
end
|
642
|
+
rescue Exception => ee
|
643
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
644
|
+
return nil
|
689
645
|
end
|
690
646
|
|
691
647
|
# Perform local host table reverse lookup for the IP sites, in hope that the hostname could now be resolved since the site was discovered
|
692
648
|
def resolve_ip_sites
|
693
649
|
puts "Resolve sites that contain an IP address. Update the site cache table once a hostname is found in the local host table." if @verbose
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
if
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
refresh(site)
|
709
|
-
end
|
650
|
+
updates=Array.new
|
651
|
+
sites=get_ip_sites
|
652
|
+
my_tracker=Wmap::HostTracker.instance
|
653
|
+
my_tracker.data_dir=@data_dir
|
654
|
+
sites.map do |site|
|
655
|
+
puts "Work on resolve the IP site: #{site}" if @verbose
|
656
|
+
ip=url_2_host(site)
|
657
|
+
hostname=my_tracker.local_ip_2_host(ip)
|
658
|
+
if hostname.nil?
|
659
|
+
puts "Can't resolve #{ip} from the local host store. Skip #{site}" if @verbose
|
660
|
+
else
|
661
|
+
puts "Host-name found for IP #{ip}: #{hostname}" if @verbose
|
662
|
+
updates.push(site)
|
663
|
+
refresh(site)
|
710
664
|
end
|
711
|
-
updates.sort!
|
712
|
-
puts "The following sites are now refreshed: #{updates}" if @verbose
|
713
|
-
my_tracker=nil
|
714
|
-
return updates
|
715
|
-
rescue Exception => ee
|
716
|
-
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
717
665
|
end
|
666
|
+
updates.sort!
|
667
|
+
puts "The following sites are now refreshed: #{updates}" if @verbose
|
668
|
+
my_tracker=nil
|
669
|
+
return updates
|
670
|
+
rescue Exception => ee
|
671
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
718
672
|
end
|
719
673
|
|
720
674
|
# Search potential matching sites from the site store by using simple regular expression. Note that any upper-case char in the search string will be automatically converted into lower case
|
721
675
|
def search (pattern)
|
722
676
|
puts "Search site store based on the regular expression: #{pattern}" if @verbose
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
results.push(key)
|
729
|
-
end
|
677
|
+
pattern=pattern.strip.downcase
|
678
|
+
results=Array.new
|
679
|
+
@known_sites.keys.map do |key|
|
680
|
+
if key =~ /#{pattern}/i
|
681
|
+
results.push(key)
|
730
682
|
end
|
731
|
-
return results
|
732
|
-
rescue Exception => ee
|
733
|
-
puts "Exception on method search: #{ee}" if @verbose
|
734
|
-
return nil
|
735
683
|
end
|
684
|
+
return results
|
685
|
+
rescue Exception => ee
|
686
|
+
puts "Exception on method search: #{ee}" if @verbose
|
687
|
+
return nil
|
736
688
|
end
|
737
689
|
|
738
690
|
# Print summary report on all sites that contain an IP in the site URL
|
@@ -741,26 +693,26 @@ class Wmap::SiteTracker
|
|
741
693
|
sites=get_ip_sites
|
742
694
|
sites.map { |x| puts x }
|
743
695
|
puts "End of report. "
|
696
|
+
rescue => ee
|
697
|
+
puts "Exception on method #{__method__} "
|
744
698
|
end
|
745
699
|
|
746
700
|
# Retrieve and print specific information of a site in the site store
|
747
701
|
def print_site(site)
|
748
702
|
puts "Site Information Report for: #{site}" if @verbose
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
puts "Exception on method #{__method__} for #{site}: #{ee}"
|
763
|
-
end
|
703
|
+
site=site.strip unless site.nil?
|
704
|
+
raise "Unknown site: #{site}" unless @known_sites.key?(site)
|
705
|
+
ip=@known_sites[site]['ip']
|
706
|
+
port=@known_sites[site]['port']
|
707
|
+
status=@known_sites[site]['status']
|
708
|
+
server=@known_sites[site]['server']
|
709
|
+
fp=@known_sites[site]['md5']
|
710
|
+
loc=@known_sites[site]['redirection']
|
711
|
+
res=@known_sites[site]['code']
|
712
|
+
timestamp=@known_sites[site]['timestamp']
|
713
|
+
puts "#{site},#{ip},#{port},#{status},#{server},#{res},#{fp},#{loc},#{timestamp}"
|
714
|
+
rescue => ee
|
715
|
+
puts "Exception on method #{__method__} for #{site}: #{ee}"
|
764
716
|
end
|
765
717
|
alias_method :print, :print_site
|
766
718
|
|
@@ -772,142 +724,136 @@ class Wmap::SiteTracker
|
|
772
724
|
sites.each do |site|
|
773
725
|
puts site
|
774
726
|
end
|
775
|
-
|
776
727
|
puts "End of the summary"
|
777
|
-
|
728
|
+
rescue => ee
|
729
|
+
puts "Exception on method #{__method__} "
|
778
730
|
end
|
779
731
|
alias_method :print_all, :print_all_sites
|
780
732
|
|
781
733
|
# Retrieve and save unique sites information for the quarterly scan into a plain local file
|
782
734
|
def save_uniq_sites(file)
|
783
735
|
puts "Save unique sites information into a flat file: #{file}\nThis may take a long while as it go through a lengthy self correction check process, please be patient ..."
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
f.write "#{site},#{ip},#{port},#{server},#{status},#{res},#{fp},#{loc},#{timestamp}\n"
|
803
|
-
end
|
804
|
-
f.close
|
805
|
-
puts "Done!"
|
806
|
-
return true # success
|
807
|
-
rescue => ee
|
808
|
-
puts "Exception on method #{__method__}: #{ee}"
|
809
|
-
return false # fail
|
736
|
+
prime_sites=get_prim_uniq_sites
|
737
|
+
puts "Primary Sites: #{prime_sites}" if @verbose
|
738
|
+
f=File.open(file,"w")
|
739
|
+
f.write "Unique Sites Information Report\n"
|
740
|
+
f.write "Site, IP, Port, Server, Hosting, Response Code, MD5, Redirect, Timestamps\n"
|
741
|
+
prime_sites.map do |key|
|
742
|
+
next if key.nil?
|
743
|
+
site=key.strip
|
744
|
+
raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\wadd #{site}\n" unless @known_sites.key?(site)
|
745
|
+
ip=@known_sites[site]['ip']
|
746
|
+
port=@known_sites[site]['port']
|
747
|
+
status=@known_sites[site]['status']
|
748
|
+
server=@known_sites[site]['server']
|
749
|
+
fp=@known_sites[site]['md5']
|
750
|
+
loc=@known_sites[site]['redirection']
|
751
|
+
res=@known_sites[site]['code']
|
752
|
+
timestamp=@known_sites[site]['timestamp']
|
753
|
+
f.write "#{site},#{ip},#{port},#{server},#{status},#{res},#{fp},#{loc},#{timestamp}\n"
|
810
754
|
end
|
755
|
+
f.close
|
756
|
+
puts "Done!"
|
757
|
+
return true # success
|
758
|
+
rescue => ee
|
759
|
+
puts "Exception on method #{__method__}: #{ee}"
|
760
|
+
return false # fail
|
811
761
|
end
|
812
762
|
alias_method :dump, :save_uniq_sites
|
813
763
|
|
814
764
|
# Retrieve and save unique sites information for the quarterly scan into a XML file
|
815
765
|
def save_uniq_sites_xml(file)
|
816
766
|
puts "Save unique sites information into XML file: #{file}\nThis may take a long while as it go through lengthy self correctness check, please be patient ..."
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
xml.
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
xml.site
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
end
|
838
|
-
}
|
767
|
+
prime_sites=get_prim_uniq_sites
|
768
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
769
|
+
xml.root {
|
770
|
+
xml.websites {
|
771
|
+
prime_sites.each do |key|
|
772
|
+
next if key.nil?
|
773
|
+
site=key.strip
|
774
|
+
raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twmap #{site}\n" unless @known_sites.key?(site)
|
775
|
+
xml.site {
|
776
|
+
xml.name site
|
777
|
+
xml.ip_ @known_sites[site]['ip']
|
778
|
+
xml.port_ @known_sites[site]['port']
|
779
|
+
xml.status_ @known_sites[site]['status']
|
780
|
+
xml.server_ @known_sites[site]['server']
|
781
|
+
xml.fingerprint_ @known_sites[site]['md5']
|
782
|
+
xml.redirection_ @known_sites[site]['redirection']
|
783
|
+
xml.responsecode_ @known_sites[site]['code']
|
784
|
+
xml.timestamp_ @known_sites[site]['timestamp']
|
785
|
+
}
|
786
|
+
end
|
839
787
|
}
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
788
|
+
}
|
789
|
+
end
|
790
|
+
puts builder.to_xml if @verbose
|
791
|
+
f=File.new(file,'w')
|
792
|
+
f.write(builder.to_xml)
|
793
|
+
f.close
|
794
|
+
puts "Done!"
|
795
|
+
return true
|
796
|
+
rescue => ee
|
797
|
+
puts "Exception on method #{__method__}: #{ee}"
|
798
|
+
return false
|
851
799
|
end
|
852
800
|
alias_method :dump_xml, :save_uniq_sites_xml
|
853
801
|
|
854
802
|
# Retrieve the unique sites from the local site store in the primary host format
|
855
803
|
def get_prim_uniq_sites
|
856
804
|
puts "Retrieve and prime unique sites in the site store. " if @verbose
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
#raise "Inconsistency found on prime host entrance: #{new_ip}, #{ip}; #{new_site}, #{site}. Please refresh your entries by running the following shell command: \n\s refresh #{new_site}"
|
900
|
-
end
|
805
|
+
host_tracker=Wmap::HostTracker.instance
|
806
|
+
host_tracker.data_dir=@data_dir
|
807
|
+
primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
|
808
|
+
primary_host_tracker.data_dir=@data_dir
|
809
|
+
# Step 1. Retrieve the unique site list first
|
810
|
+
sites=get_uniq_sites
|
811
|
+
prim_uniq_sites=Array.new
|
812
|
+
# Step 2. Iterate on the unique site list, spit out the site in the primary host format one at a time
|
813
|
+
sites.map do |site|
|
814
|
+
puts "Work on priming unique site: #{site}" if @verbose
|
815
|
+
host=url_2_host(site)
|
816
|
+
# case#1, for the IP only site, do nothing (presuming 'refresh_ip_sites' or 'refresh_all' method already take care of the potential discrepancy here).
|
817
|
+
if is_ip?(host)
|
818
|
+
prim_uniq_sites.push(site)
|
819
|
+
next
|
820
|
+
end
|
821
|
+
ip=@known_sites[site]['ip']
|
822
|
+
# case#2, for site with an unique IP, do nothing
|
823
|
+
puts "Local hosts table entry count for #{ip}: #{host_tracker.alias[ip]}" if @verbose
|
824
|
+
if host_tracker.alias[ip] == 1
|
825
|
+
prim_uniq_sites.push(site)
|
826
|
+
next
|
827
|
+
end
|
828
|
+
# case#3, case of multiple IPs for A DNS record, where the site IP may have 0 alias count, do nothing
|
829
|
+
if host_tracker.alias[ip] == nil
|
830
|
+
prim_uniq_sites.push(site)
|
831
|
+
next
|
832
|
+
end
|
833
|
+
# case#4, for the site has a duplicate IP with others, we try to determine which one is the primary site
|
834
|
+
# raise "Error: inconsistency detected on record: #{site}. Please run the following shell command to refresh it first: \n\srefresh #{site}" if tracker1.alias[ip].nil?
|
835
|
+
if ( primary_host_tracker.known_hosts.key?(ip) and (host_tracker.alias[ip] > 1) )
|
836
|
+
new_host=primary_host_tracker.prime(host)
|
837
|
+
puts "Host: #{host}, New host:#{new_host}" if @verbose
|
838
|
+
unless host==new_host
|
839
|
+
new_site=site.sub(host,new_host)
|
840
|
+
raise "Site not found in the site tracking data repository: #{new_site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twadd #{new_site}\n" unless @known_sites.key?(new_site)
|
841
|
+
new_ip=@known_sites[new_site]['ip']
|
842
|
+
if new_ip==ip # consistency check
|
843
|
+
site=new_site
|
844
|
+
else
|
845
|
+
# TBD - case of multiple IPs for A DNS record
|
846
|
+
#raise "Inconsistency found on prime host entrance: #{new_ip}, #{ip}; #{new_site}, #{site}. Please refresh your entries by running the following shell command: \n\s refresh #{new_site}"
|
901
847
|
end
|
902
848
|
end
|
903
|
-
prim_uniq_sites.push(site)
|
904
849
|
end
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
850
|
+
prim_uniq_sites.push(site)
|
851
|
+
end
|
852
|
+
primary_host_tracker=nil
|
853
|
+
host_tracker=nil
|
854
|
+
return prim_uniq_sites
|
855
|
+
#rescue => ee
|
856
|
+
# puts "Exception on method #{__method__}: #{ee}"
|
911
857
|
end
|
912
858
|
alias_method :get_prime, :get_prim_uniq_sites
|
913
859
|
|