wmap 2.5.5 → 2.5.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,204 +15,176 @@ class Wmap::SiteTracker
15
15
  include Wmap::Utils
16
16
  include Singleton
17
17
 
18
- attr_accessor :sites_file, :max_parallel, :verbose, :data_dir
19
- attr_reader :known_sites
18
+ attr_accessor :sites_file, :max_parallel, :verbose, :data_dir, :known_sites
20
19
 
21
20
  # Set default instance variables
22
21
  def initialize (params = {})
23
22
  # Initialize the instance variables
24
23
  @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
25
24
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
26
- @file_sites=@data_dir+'sites'
27
- @file_stores=params.fetch(:sites_file, @file_sites)
25
+ @sites_file=params.fetch(:sites_file, @data_dir+'sites')
28
26
  @verbose=params.fetch(:verbose, false)
29
27
  @max_parallel=params.fetch(:max_parallel, 30)
28
+ File.write(@sites_file, "") unless File.exist?(@sites_file)
30
29
  # Hash table to hold the site store
31
- File.write(@file_stores, "") unless File.exist?(@file_stores)
32
- @known_sites=load_site_stores_from_file(@file_stores)
30
+ load_site_stores_from_file(@sites_file)
33
31
  end
34
32
 
35
33
  # Setter to load the known hosts into an instance variable
36
- def load_site_stores_from_file (file)
34
+ def load_site_stores_from_file (file=@sites_file)
37
35
  puts "Loading the site store data repository from file: #{file} " if @verbose
38
- begin
39
- known_sites=Hash.new
40
- f=File.open(file, 'r')
41
- f.each do |line|
42
- line=line.chomp.strip
43
- next if line.nil?
44
- next if line.empty?
45
- next if line =~ /^\s*#/
46
- entry=line.split(%r{\t+|\,})
47
- site=entry[0].downcase
48
- ip=entry[1]
49
- port=entry[2]
50
- status=entry[3]
51
- server=entry[4]
52
- res=entry[5].to_i
53
- fp=entry[6]
54
- loc=entry[7]
55
- timestamp=entry[8]
56
- puts "Loading entry: #{site} - #{ip} - #{status}" if @verbose
57
- known_sites[site]= Hash.new unless known_sites.key?(site)
58
- known_sites[site]['ip']=ip
59
- known_sites[site]['port']=port
60
- known_sites[site]['status']=status
61
- known_sites[site]['server']=server
62
- known_sites[site]['code']=res
63
- known_sites[site]['md5']=fp
64
- known_sites[site]['redirection']=loc
65
- known_sites[site]['timestamp']=timestamp
66
- end
67
- f.close
68
- puts "Successfully loading file: #{file}" if @verbose
69
- return known_sites
70
- rescue => ee
71
- puts "Exception on method #{__method__} for file #{file}: #{ee}"
72
- end
36
+ @known_sites=Hash.new
37
+ f=File.open(file, 'r')
38
+ f.each do |line|
39
+ line=line.chomp.strip
40
+ next if line.nil?
41
+ next if line.empty?
42
+ next if line =~ /^\s*#/
43
+ entry=line.split(%r{\t+|\,})
44
+ site=entry[0].downcase
45
+ ip=entry[1]
46
+ port=entry[2]
47
+ status=entry[3]
48
+ server=entry[4]
49
+ res=entry[5].to_i
50
+ fp=entry[6]
51
+ loc=entry[7]
52
+ timestamp=entry[8]
53
+ puts "Loading entry: #{site} - #{ip} - #{status}" if @verbose
54
+ @known_sites[site]= Hash.new unless @known_sites.key?(site)
55
+ @known_sites[site]['ip']=ip
56
+ @known_sites[site]['port']=port
57
+ @known_sites[site]['status']=status
58
+ @known_sites[site]['server']=server
59
+ @known_sites[site]['code']=res
60
+ @known_sites[site]['md5']=fp
61
+ @known_sites[site]['redirection']=loc
62
+ @known_sites[site]['timestamp']=timestamp
63
+ end
64
+ f.close
65
+ puts "Successfully loading file: #{file}" if @verbose
66
+ return @known_sites
67
+ rescue => ee
68
+ puts "Exception on method #{__method__} for file #{file}: #{ee}"
73
69
  end
74
70
 
75
71
  # Save the current site store hash table into a file
76
- def save_sites_to_file!(file_sites=@file_stores)
72
+ def save_sites_to_file!(file_sites=@sites_file)
77
73
  puts "Saving the current site store table from memory to file: #{file_sites}"
78
- begin
79
- timestamp=Time.now
80
- f=File.open(file_sites, 'w')
81
- f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
82
- f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
83
- @known_sites.keys.sort.map do |key|
84
- f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
85
- end
86
- f.close
87
- puts "site store table is successfully saved: #{file_sites}"
88
- rescue => ee
89
- puts "Exception on method #{__method__}: #{ee}"
90
- end
74
+ timestamp=Time.now
75
+ f=File.open(file_sites, 'w')
76
+ f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
77
+ f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
78
+ @known_sites.keys.sort.map do |key|
79
+ f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
80
+ end
81
+ f.close
82
+ puts "site store table is successfully saved: #{file_sites}"
83
+ rescue => ee
84
+ puts "Exception on method #{__method__}: #{ee}"
91
85
  end
92
86
  alias_method :save!, :save_sites_to_file!
93
87
 
94
88
  # Count numbers of entries in the site store table
95
89
  def count
96
90
  puts "Counting number of entries in the site store table ..."
97
- begin
98
- return @known_sites.size
99
- rescue => ee
100
- puts "Exception on method #{__method__}: #{ee}"
101
- end
91
+ return @known_sites.size
92
+ rescue => ee
93
+ puts "Exception on method #{__method__}: #{ee}"
102
94
  end
103
95
 
104
96
  # Setter to add site entry to the cache one at a time
105
97
  def add(site)
106
- begin
107
- puts "Add entry to the site store: #{site}"
108
- # Preliminary sanity check
109
- site=site.strip.downcase unless site.nil?
110
- if site_known?(site)
111
- puts "Site already exists. Skip it: #{site}"
112
- return nil
98
+ puts "Add entry to the site store: #{site}"
99
+ # Preliminary sanity check
100
+ site=site.strip.downcase unless site.nil?
101
+ if site_known?(site)
102
+ puts "Site already exists. Skip it: #{site}"
103
+ return nil
104
+ end
105
+ site=normalize_url(site) if is_url?(site)
106
+ site=url_2_site(site) if is_url?(site)
107
+ puts "Site in standard format: #{site}" if @verbose
108
+ raise "Exception on method #{__method__}: invalid site format of #{site}. Expected format is: http://your_website_name/" unless is_site?(site)
109
+ trusted=false
110
+ host=url_2_host(site)
111
+ ip=host_2_ip(host)
112
+ # Additional logic to refresh deactivated site, 02/12/2014
113
+ deact=Wmap::SiteTracker::DeactivatedSite.instance
114
+ deact.sites_file=@data_dir + 'deactivated_sites'
115
+ File.write(deact.sites_file, "") unless File.exist?(deact.sites_file)
116
+ deact.load_site_stores_from_file
117
+ # only trust either the domain or IP we know
118
+ if is_ip?(host)
119
+ trusted=Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
120
+ else
121
+ root=get_domain_root(host)
122
+ if root.nil?
123
+ raise "Invalid web site format. Please check your record again."
124
+ else
125
+ domain_tracker=Wmap::DomainTracker.instance
126
+ domain_tracker.domains_file=@data_dir+'domains'
127
+ File.write(domain_tracker.domains_file, "") unless File.exist?(domain_tracker.domains_file)
128
+ domain_tracker.load_domains_from_file
129
+ trusted=domain_tracker.domain_known?(root)
130
+ domain_tracker=nil
113
131
  end
114
- site=normalize_url(site) if is_url?(site)
115
- site=url_2_site(site) if is_url?(site)
116
- puts "Site in standard format: #{site}" if @verbose
117
- raise "Exception on method #{__method__}: invalid site format of #{site}. Expected format is: http://your_website_name/" unless is_site?(site)
118
- trusted=false
119
- host=url_2_host(site)
120
- ip=host_2_ip(host)
121
- # Additional logic to refresh deactivated site, 02/12/2014
122
- deact=Wmap::SiteTracker::DeactivatedSite.instance
123
- deact.data_dir=@data_dir
124
- # only trust either the domain or IP we know
125
- if is_ip?(host)
126
- trusted=Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
132
+ end
133
+ # add record only if trusted
134
+ if trusted
135
+ # Add logic to check site status before adding it
136
+ checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
137
+ raise "Site is currently down. Skip #{site}" if checker.nil?
138
+ # Skip the http site if it's un-responsive; for the https we'll keep it because we're interested in analysing the SSL layer later
139
+ if is_https?(site)
140
+ # do nothing
127
141
  else
128
- root=get_domain_root(host)
129
- if root.nil?
130
- raise "Invalid web site format. Please check your record again."
131
- else
132
- domain_tracker=Wmap::DomainTracker.instance
133
- domain_tracker.data_dir=@data_dir
134
- trusted=domain_tracker.domain_known?(root)
135
- domain_tracker=nil
136
- end
142
+ raise "Site is currently down. Skip #{site}" if checker['code']==10000
137
143
  end
138
- # add record only if trusted
139
- if trusted
140
- # Add logic to check site status before adding it
141
- checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
142
- raise "Site is currently down. Skip #{site}" if checker.nil?
143
- # Skip the http site if it's un-responsive; for the https we'll keep it because we're interested in analysing the SSL layer later
144
- if is_https?(site)
145
- # do nothing
144
+ raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
145
+ my_tracker = Wmap::HostTracker.instance
146
+ my_tracker.data_dir=@data_dir
147
+ # Update the local host table when necessary
148
+ if is_ip?(host)
149
+ # Case #1: Trusted site contains IP
150
+ if my_tracker.ip_known?(host)
151
+ # Try local reverse DNS lookup first
152
+ puts "Local hosts table lookup for IP: #{ip}" if @verbose
153
+ host=my_tracker.local_ip_2_host(host)
154
+ puts "Host found from the local hosts table for #{ip}: #{host}" if @verbose
155
+ site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
146
156
  else
147
- raise "Site is currently down. Skip #{site}" if checker['code']==10000
148
- end
149
- raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
150
- my_tracker = Wmap::HostTracker.instance
151
- my_tracker.data_dir=@data_dir
152
- # Update the local host table when necessary
153
- if is_ip?(host)
154
- # Case #1: Trusted site contains IP
155
- if my_tracker.ip_known?(host)
156
- # Try local reverse DNS lookup first
157
- puts "Local hosts table lookup for IP: #{ip}" if @verbose
158
- host=my_tracker.local_ip_2_host(host)
159
- puts "Host found from the local hosts table for #{ip}: #{host}" if @verbose
160
- site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
161
- else
162
- # Try reverse DNS lookup over Internet as secondary precaution
163
- puts "Reverse DNS lookup for IP: #{ip}" if @verbose
164
- host1=ip_2_host(host)
165
- puts "host1: #{host1}" if @verbose
166
- if is_fqdn?(host1)
167
- host_tracker=Wmap::HostTracker.instance
168
- host_tracker.data_dir=@data_dir
169
- if host_tracker.domain_known?(host1)
170
- # replace IP with host-name only if domain root is known
171
- puts "Host found from the Internet reverse DNS lookup for #{ip}: #{host1}" if @verbose
172
- host=host1
173
- site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
174
- end
175
- host_tracker=nil
176
- end
177
- end
178
- # Adding site for Case #1
179
- raise "Site already exist! Skip #{site}" if @known_sites.key?(site)
180
- puts "Adding site: #{site}" if @verbose
181
- @known_sites[site]=Hash.new
182
- @known_sites[site]=checker
183
- if deact.site_known?(site)
184
- deact.delete(site)
185
- deact.save!
186
- end
187
- puts "Site entry loaded: #{checker}"
188
- if is_fqdn?(host)
189
- # Add logic to update the hosts table for case #1 variance
190
- # - case that reverse DNS lookup successful
191
- puts "Update local hosts table for host: #{host}"
192
- if my_tracker.host_known?(host)
193
- old_ip=my_tracker.local_host_2_ip(host)
194
- if old_ip != ip
195
- my_tracker.refresh(host)
196
- my_tracker.save!
197
- else
198
- puts "Host resolve to the same IP #{ip} - no need to update the local host table." if @verbose
199
- end
200
- else
201
- my_tracker.add(host)
202
- my_tracker.save!
157
+ # Try reverse DNS lookup over Internet as secondary precaution
158
+ puts "Reverse DNS lookup for IP: #{ip}" if @verbose
159
+ host1=ip_2_host(host)
160
+ puts "host1: #{host1}" if @verbose
161
+ if is_fqdn?(host1)
162
+ host_tracker=Wmap::HostTracker.instance
163
+ host_tracker.data_dir=@data_dir
164
+ host_tracker.hosts_file=host_tracker.data_dir + "hosts"
165
+ host_tracker.load_known_hosts_from_file
166
+ if host_tracker.domain_known?(host1)
167
+ # replace IP with host-name only if domain root is known
168
+ puts "Host found from the Internet reverse DNS lookup for #{ip}: #{host1}" if @verbose
169
+ host=host1
170
+ site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
203
171
  end
172
+ host_tracker=nil
204
173
  end
205
- else
206
- # Case #2: Trusted site contains valid FQDN
207
- puts "Ading site: #{site}" if @verbose
208
- @known_sites[site]=Hash.new
209
- @known_sites[site]=checker
210
- if deact.site_known?(site)
211
- deact.delete(site)
212
- deact.save!
213
- end
214
- puts "Site entry loaded: #{checker}"
215
- # Add logic to update the hosts table for case #2
174
+ end
175
+ # Adding site for Case #1
176
+ raise "Site already exist! Skip #{site}" if @known_sites.key?(site)
177
+ puts "Adding site: #{site}" if @verbose
178
+ @known_sites[site]=Hash.new
179
+ @known_sites[site]=checker
180
+ if deact.site_known?(site)
181
+ deact.delete(site)
182
+ deact.save!
183
+ end
184
+ puts "Site entry loaded: #{checker}"
185
+ if is_fqdn?(host)
186
+ # Add logic to update the hosts table for case #1 variance
187
+ # - case that reverse DNS lookup successful
216
188
  puts "Update local hosts table for host: #{host}"
217
189
  if my_tracker.host_known?(host)
218
190
  old_ip=my_tracker.local_host_2_ip(host)
@@ -220,232 +192,239 @@ class Wmap::SiteTracker
220
192
  my_tracker.refresh(host)
221
193
  my_tracker.save!
222
194
  else
223
- # Skip - no need to update the local hosts table
195
+ puts "Host resolve to the same IP #{ip} - no need to update the local host table." if @verbose
224
196
  end
225
197
  else
226
198
  my_tracker.add(host)
227
199
  my_tracker.save!
228
200
  end
229
201
  end
230
- deact=nil
231
- my_tracker=nil
232
- host_tracker=nil
233
- return checker
234
202
  else
235
- puts "Problem found: untrusted Internet domain or IP. Skip #{site}"
236
- deact=nil
237
- my_tracker=nil
238
- host_tracker=nil
239
- return nil
203
+ # Case #2: Trusted site contains valid FQDN
204
+ puts "Ading site: #{site}" if @verbose
205
+ @known_sites[site]=Hash.new
206
+ @known_sites[site]=checker
207
+ if deact.site_known?(site)
208
+ deact.delete(site)
209
+ deact.save!
210
+ end
211
+ puts "Site entry loaded: #{checker}"
212
+ # Add logic to update the hosts table for case #2
213
+ puts "Update local hosts table for host: #{host}"
214
+ if my_tracker.host_known?(host)
215
+ old_ip=my_tracker.local_host_2_ip(host)
216
+ if old_ip != ip
217
+ my_tracker.refresh(host)
218
+ my_tracker.save!
219
+ else
220
+ # Skip - no need to update the local hosts table
221
+ end
222
+ else
223
+ my_tracker.add(host)
224
+ my_tracker.save!
225
+ end
240
226
  end
241
- rescue => ee
242
- puts "Exception on method #{__method__}: #{ee}"
243
- checker=nil
244
227
  deact=nil
228
+ my_tracker=nil
229
+ host_tracker=nil
230
+ return site
231
+ else
232
+ puts "Problem found: untrusted Internet domain or IP. Skip #{site}"
233
+ deact=nil
234
+ my_tracker=nil
245
235
  host_tracker=nil
246
236
  return nil
247
237
  end
238
+ #rescue => ee
239
+ # puts "Exception on method #{__method__}: #{ee}"
240
+ # checker=nil
241
+ # deact=nil
242
+ # host_tracker=nil
243
+ # return nil
248
244
  end
249
245
 
250
246
  # Setter to add site entry to the cache table in batch (from a file)
251
247
  def file_add(file)
252
248
  puts "Add entries to the local site store from file: #{file}"
253
- begin
254
- raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
255
- changes=Hash.new
256
- sites=file_2_list(file)
257
- changes=bulk_add(sites) unless sites.nil? or sites.empty?
258
- puts "Done loading file #{file}. "
259
- return changes
260
- rescue => ee
261
- puts "Exception on method #{__method__}: #{ee}"
262
- end
249
+ raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
250
+ changes=Hash.new
251
+ sites=file_2_list(file)
252
+ changes=bulk_add(sites) unless sites.nil? or sites.empty?
253
+ puts "Done loading file #{file}. "
254
+ return changes
255
+ rescue => ee
256
+ puts "Exception on method #{__method__}: #{ee}"
263
257
  end
264
258
 
265
259
  # Setter to add site entry to the cache in batch (from a list)
266
260
  def bulk_add(list,num=@max_parallel)
267
261
  puts "Add entries to the local site store from list:\n #{list}"
268
- #begin
269
- results=Hash.new
270
- if list.size > 0
271
- puts "Start parallel adding on the sites:\n #{list}"
272
- Parallel.map(list, :in_processes => num) { |target|
273
- add(target)
274
- }.each do |process|
275
- if process.nil?
276
- next
277
- elsif process.empty?
278
- #do nothing
279
- else
280
- results[process['url']]=Hash.new
281
- results[process['url']]=process
282
- end
262
+ results=Hash.new
263
+ if list.size > 0
264
+ puts "Start parallel adding on the sites:\n #{list}"
265
+ Parallel.map(list, :in_processes => num) { |target|
266
+ add(target)
267
+ }.each do |process|
268
+ if process.nil?
269
+ next
270
+ elsif process.empty?
271
+ next #do nothing
272
+ else
273
+ results[process['url']]=Hash.new
274
+ results[process['url']]=process
283
275
  end
284
- @known_sites.merge!(results)
285
- else
286
- puts "Error: no entry is added. Please check your list and try again."
287
276
  end
288
- puts "Done adding site entries."
289
- if results.size>0
290
- puts "New entries added: #{results}"
291
- else
292
- puts "No new entry added. "
293
- end
294
- return results
295
- #rescue => ee
296
- #puts "Exception on method #{__method__}: #{ee}" if @verbose
297
- #end
277
+ @known_sites.merge!(results)
278
+ else
279
+ puts "Error: no entry is added. Please check your list and try again."
280
+ end
281
+ puts "Done adding site entries."
282
+ if results.size>0
283
+ puts "New entries added: #{results}"
284
+ else
285
+ puts "No new entry added. "
286
+ end
287
+ return results
288
+ #rescue => ee
289
+ #puts "Exception on method #{__method__}: #{ee}" if @verbose
298
290
  end
299
291
  alias_method :adds, :bulk_add
300
292
 
301
293
  # Setter to remove entry from the site store one at a time
302
294
  def delete(site)
303
295
  puts "Remove entry from the site store: #{site} " if @verbose
304
- begin
305
- # Additional logic to deactivate the site properly, by moving it to the DeactivatedSite list, 02/07/2014
306
- deact=Wmap::SiteTracker::DeactivatedSite.instance
307
- deact.data_dir=@data_dir
308
- site=site.strip.downcase
309
- site=url_2_site(site)
310
- if @known_sites.key?(site)
311
- site_info=@known_sites[site]
312
- deact.add(site,site_info)
313
- deact.save!
314
- deact=nil
315
- del=@known_sites.delete(site)
316
- puts "Entry cleared: #{site}"
317
- return del
318
- else
319
- puts "Entry not fund. Skip #{site}"
320
- deact=nil
321
- return nil
322
- end
323
- rescue => ee
324
- puts "Exception on method #{__method__}: #{ee}" if @verbose
296
+ # Additional logic to deactivate the site properly, by moving it to the DeactivatedSite list, 02/07/2014
297
+ deact=Wmap::SiteTracker::DeactivatedSite.instance
298
+ deact.sites_file=@data_dir + 'deactivated_sites'
299
+ File.write(deact.sites_file, "") unless File.exist?(deact.sites_file)
300
+ site=site.strip.downcase
301
+ site=url_2_site(site)
302
+ if @known_sites.key?(site)
303
+ site_info=@known_sites[site]
304
+ deact.add(site,site_info)
305
+ deact.save!
325
306
  deact=nil
307
+ del=@known_sites.delete(site)
308
+ puts "Entry cleared: #{site}"
309
+ return del
310
+ else
311
+ puts "Entry not fund. Skip #{site}"
312
+ deact=nil
313
+ return nil
326
314
  end
315
+ rescue => ee
316
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
317
+ deact=nil
327
318
  end
328
319
  alias_method :del, :delete
329
320
 
330
321
  # Setter to delete site entry to the cache in batch (from a file)
331
322
  def file_delete(file)
332
- begin
333
- puts "Delete entries to the local site store from file: #{file}" if @verbose
334
- raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
335
- sites=file_2_list(file)
336
- changes=Array.new
337
- changes=bulk_delete(sites) unless sites.nil? or sites.empty?
338
- rescue => ee
339
- puts "Exception on method file_delete: #{ee} for file: #{file}" if @verbose
340
- end
323
+ puts "Delete entries to the local site store from file: #{file}" if @verbose
324
+ raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
325
+ sites=file_2_list(file)
326
+ changes=Array.new
327
+ changes=bulk_delete(sites) unless sites.nil? or sites.empty?
328
+ rescue => ee
329
+ puts "Exception on method file_delete: #{ee} for file: #{file}" if @verbose
341
330
  end
342
331
  alias_method :file_del, :file_delete
343
332
 
344
333
  # Setter to delete site entry to the cache in batch (from a list)
345
334
  def bulk_delete(list)
346
335
  puts "Delete entries to the local site store from list:\n #{list}" if @verbose
347
- begin
348
- sites=list
349
- changes=Array.new
350
- if sites.size > 0
351
- sites.map do |x|
352
- x=url_2_site(x)
353
- site=delete(x)
354
- changes.push(site) unless site.nil?
355
- end
356
- puts "Done deleting sites from the list:\n #{list}"
357
- return changes
358
- else
359
- puts "Error: no entry is loaded. Please check your list and try again."
336
+ sites=list
337
+ changes=Array.new
338
+ if sites.size > 0
339
+ sites.map do |x|
340
+ x=url_2_site(x)
341
+ site=delete(x)
342
+ changes.push(site) unless site.nil?
360
343
  end
361
- rescue => ee
362
- puts "Exception on method #{__method__}: #{ee}" if @verbose
344
+ puts "Done deleting sites from the list:\n #{list}"
345
+ return changes
346
+ else
347
+ puts "Error: no entry is loaded. Please check your list and try again."
363
348
  end
349
+ rescue => ee
350
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
364
351
  end
365
352
  alias_method :dels, :bulk_delete
366
353
 
367
354
  # Setter to refresh the entry in the site store one at a time
368
355
  def refresh(site)
369
356
  puts "Refresh the local site store for site: #{site} "
370
- begin
371
- raise "Invalid site: #{site}" if site.nil? or site.empty?
372
- site=site.strip.downcase
373
- if @known_sites.key?(site)
374
- delete(site)
375
- site_info=add(site)
376
- puts "Done refresh entry: #{site}"
377
- return site_info
378
- else
379
- puts "Error entry non exist: #{site}"
380
- end
381
- return nil
382
- rescue => ee
383
- puts "Exception on method #{__method__}: #{ee}" if @verbose
384
- return nil
357
+ raise "Invalid site: #{site}" if site.nil? or site.empty?
358
+ site=site.strip.downcase
359
+ if @known_sites.key?(site)
360
+ delete(site)
361
+ site_info=add(site)
362
+ puts "Done refresh entry: #{site}"
363
+ return site_info
364
+ else
365
+ puts "Error entry non exist: #{site}"
385
366
  end
367
+ return nil
368
+ rescue => ee
369
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
370
+ return nil
386
371
  end
387
372
 
388
373
  # 'Refresh sites in the site store in batch (from a file)
389
374
  def file_refresh(file)
390
375
  puts "Refresh entries in the site store from file: #{file}" if @verbose
391
- begin
392
- changes=Hash.new
393
- sites=file_2_list(file)
394
- changes=bulk_refresh(sites) unless sites.nil? or sites.empty?
395
- return changes
396
- rescue => ee
397
- puts "Exception on method #{__method__}: #{ee} for file: #{file}" if @verbose
398
- end
376
+ changes=Hash.new
377
+ sites=file_2_list(file)
378
+ changes=bulk_refresh(sites) unless sites.nil? or sites.empty?
379
+ return changes
380
+ rescue => ee
381
+ puts "Exception on method #{__method__}: #{ee} for file: #{file}" if @verbose
399
382
  end
400
383
 
401
384
  # 'Refresh unique sites in the site store only
402
385
  def refresh_uniq_sites
403
386
  puts "Refresh unique site entries in the site store. " if @verbose
404
- begin
405
- changes=Hash.new
406
- sites=get_uniq_sites
407
- if sites.size > 0
408
- changes=bulk_refresh(sites)
409
- else
410
- puts "Error: no entry is refreshed. Please check your site store and try again."
411
- end
412
- return changes
413
- rescue => ee
414
- puts "Exception on method #{__method__}: #{ee}" if @verbose
387
+ changes=Hash.new
388
+ sites=get_uniq_sites
389
+ if sites.size > 0
390
+ changes=bulk_refresh(sites)
391
+ else
392
+ puts "Error: no entry is refreshed. Please check your site store and try again."
415
393
  end
394
+ return changes
395
+ rescue => ee
396
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
416
397
  end
417
398
 
418
399
  # 'Refresh sites in the site store in batch (from a list)
419
400
  def bulk_refresh(list,num=@max_parallel)
420
401
  puts "Refresh entries in the site store from list:\n #{list}" if @verbose
421
- begin
422
- results=Hash.new
423
- if list.size > 0
424
- puts "Start parallel refreshing on the sites:\n #{list}"
425
- Parallel.map(list, :in_processes => num) { |target|
426
- refresh(target)
427
- }.each do |process|
428
- if process.nil?
429
- next
430
- elsif process.empty?
431
- #do nothing
432
- else
433
- results[process['url']]=Hash.new
434
- results[process['url']]=process
435
- end
402
+ results=Hash.new
403
+ if list.size > 0
404
+ puts "Start parallel refreshing on the sites:\n #{list}"
405
+ Parallel.map(list, :in_processes => num) { |target|
406
+ refresh(target)
407
+ }.each do |process|
408
+ if process.nil?
409
+ next
410
+ elsif process.empty?
411
+ #do nothing
412
+ else
413
+ results[process['url']]=Hash.new
414
+ results[process['url']]=process
436
415
  end
437
- # Clean up old entries, by Y.L. 03/30/2015
438
- list.map {|x| @known_sites.delete(x)}
439
- # Add back fresh entries
440
- @known_sites.merge!(results)
441
- puts "Done refresh sites."
442
- else
443
- puts "Error: no entry is loaded. Please check your list and try again."
444
416
  end
445
- return results
446
- rescue => ee
447
- puts "Exception on method #{__method__}: #{ee}" if @verbose
448
- end
417
+ # Clean up old entries, by Y.L. 03/30/2015
418
+ list.map {|x| @known_sites.delete(x)}
419
+ # Add back fresh entries
420
+ @known_sites.merge!(results)
421
+ puts "Done refresh sites."
422
+ else
423
+ puts "Error: no entry is loaded. Please check your list and try again."
424
+ end
425
+ return results
426
+ rescue => ee
427
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
449
428
  end
450
429
  alias_method :refreshs, :bulk_refresh
451
430
 
@@ -453,286 +432,259 @@ class Wmap::SiteTracker
453
432
  # Refresh all site entries in the stores in one shot
454
433
  def refresh_all
455
434
  puts "Refresh all the entries within the local site store ... "
456
- begin
457
- changes=Hash.new
458
- changes=bulk_refresh(@known_sites.keys)
459
- @known_sites.merge!(changes)
460
- puts "Done refresh all entries."
461
- return changes
462
- rescue => ee
463
- puts "Exception on method #{__method__}: #{ee}" if @verbose
464
- end
435
+ changes=Hash.new
436
+ changes=bulk_refresh(@known_sites.keys)
437
+ @known_sites.merge!(changes)
438
+ puts "Done refresh all entries."
439
+ return changes
440
+ rescue => ee
441
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
465
442
  end
466
443
 
467
444
  # Refresh all site entries in the stores that contains an IP instead of a hostname
468
445
  def refresh_ip_sites
469
446
  puts "Refresh all entries that contain an IP address instead of a FQDN ... "
470
- begin
471
- sites=get_ip_sites
472
- live_sites=sites.delete_if { |x| @known_sites[x]['code'] == 10000 or @known_sites[x]['code'] == 20000 }
473
- changes=Hash.new
474
- changes=bulk_refresh(live_sites)
475
- @known_sites.merge!(changes)
476
- puts "Done refresh IP sites."
477
- return changes
478
- rescue => ee
479
- puts "Exception on method #{__method__}: #{ee}" if @verbose
480
- end
447
+ sites=get_ip_sites
448
+ live_sites=sites.delete_if { |x| @known_sites[x]['code'] == 10000 or @known_sites[x]['code'] == 20000 }
449
+ changes=Hash.new
450
+ changes=bulk_refresh(live_sites)
451
+ @known_sites.merge!(changes)
452
+ puts "Done refresh IP sites."
453
+ return changes
454
+ rescue => ee
455
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
481
456
  end
482
457
 
483
458
  # Quick validation if a site is already covered under the site store
484
459
  def site_known?(site)
485
- begin
486
- raise "Web site store not loaded properly! " if @known_sites.nil?
487
- site=site.strip.downcase unless site.nil?
488
- site=url_2_site(site)
489
- return @known_sites.key?(site) unless site.nil?
490
- rescue => ee
491
- puts "Error checking web site #{site} against the site store: #{ee}"
492
- end
460
+ raise "Web site store not loaded properly! " if @known_sites.nil?
461
+ site=site.strip.downcase unless site.nil?
462
+ site=url_2_site(site)
463
+ return @known_sites.key?(site) unless site.nil?
464
+ rescue => ee
465
+ puts "Error checking web site #{site} against the site store: #{ee}"
493
466
  return false
494
467
  end
495
468
  alias_method :is_known?, :site_known?
496
469
 
497
470
  # Quick validation check on an IP is already part of the site store
498
471
  def site_ip_known?(ip)
499
- begin
500
- ip=ip.chomp.strip
501
- known=false
502
- if is_ip?(ip)
503
- @known_sites.keys.map do |site|
504
- if @known_sites[site]['ip']==ip
505
- return true
506
- end
472
+ ip=ip.chomp.strip
473
+ known=false
474
+ if is_ip?(ip)
475
+ @known_sites.keys.map do |site|
476
+ if @known_sites[site]['ip']==ip
477
+ return true
507
478
  end
508
479
  end
509
- myDis=nil
510
- return known
511
- rescue => ee
512
- puts "Exception on method #{__method__}: #{ee}"
513
- return false
514
480
  end
481
+ myDis=nil
482
+ return known
483
+ rescue => ee
484
+ puts "Exception on method #{__method__}: #{ee}"
485
+ return false
515
486
  end
516
487
  alias_method :siteip_known?, :site_ip_known?
517
488
 
518
489
  # Quick check of the stored information of a site within the store
519
490
  def site_check(site)
520
- begin
521
- raise "Web site store not loaded properly! " if @known_sites.nil?
522
- site=site.strip.downcase unless site.nil?
523
- site=url_2_site(site)
524
- return @known_sites[site] unless site.nil?
525
- rescue => ee
526
- puts "Exception on method #{__method__}: #{ee}"
527
- return nil
528
- end
491
+ raise "Web site store not loaded properly! " if @known_sites.nil?
492
+ site=site.strip.downcase unless site.nil?
493
+ site=url_2_site(site)
494
+ return @known_sites[site] unless site.nil?
495
+ rescue => ee
496
+ puts "Exception on method #{__method__}: #{ee}"
497
+ return nil
529
498
  end
530
499
  alias_method :check, :site_check
531
500
 
532
501
  # Retrieve external hosted sites into a list
533
502
  def get_ext_sites
534
503
  puts "getter to retrieve all the external hosted sites. " if @verbose
535
- begin
536
- sites=Array.new
537
- @known_sites.keys.map do |key|
538
- if @known_sites[key]['status']=="ext_hosted"
539
- sites.push(key)
540
- end
504
+ sites=Array.new
505
+ @known_sites.keys.map do |key|
506
+ if @known_sites[key]['status']=="ext_hosted"
507
+ sites.push(key)
541
508
  end
542
- sites.sort!
543
- return sites
544
- rescue Exception => ee
545
- puts "Exception on method #{__method__}: #{ee}" if @verbose
546
- return nil
547
509
  end
510
+ sites.sort!
511
+ return sites
512
+ rescue Exception => ee
513
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
514
+ return nil
548
515
  end
549
516
  alias_method :get_ext, :get_ext_sites
550
517
 
551
518
  # Retrieve a list of internal hosted site URLs
552
519
  def get_int_sites
553
520
  puts "getter to retrieve all the internal hosted sites." if @verbose
554
- begin
555
- sites=Array.new
556
- @known_sites.keys.map do |key|
557
- if @known_sites[key]['status']=="int_hosted"
558
- sites.push(key)
559
- end
521
+ sites=Array.new
522
+ @known_sites.keys.map do |key|
523
+ if @known_sites[key]['status']=="int_hosted"
524
+ sites.push(key)
560
525
  end
561
- sites.sort!
562
- return sites
563
- rescue Exception => ee
564
- puts "Exception on method #{__method__}: #{ee}" if @verbose
565
- return nil
566
526
  end
527
+ sites.sort!
528
+ return sites
529
+ rescue Exception => ee
530
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
531
+ return nil
567
532
  end
568
533
  alias_method :get_int, :get_int_sites
569
534
 
570
535
  # Retrieve a list of sites that contain an IP in the site URL
571
536
  def get_ip_sites
572
537
  puts "Getter to retrieve sites contain an IP instead of a host-name ." if @verbose
573
- begin
574
- sites=Array.new
575
- @known_sites.keys.map do |key|
576
- host=url_2_host(key)
577
- if is_ip?(host)
578
- sites.push(key)
579
- end
538
+ sites=Array.new
539
+ @known_sites.keys.map do |key|
540
+ host=url_2_host(key)
541
+ if is_ip?(host)
542
+ sites.push(key)
580
543
  end
581
- sites.sort!
582
- return sites
583
- rescue Exception => ee
584
- puts "Exception on method #{__method__}: #{ee}" if @verbose
585
- return nil
586
544
  end
545
+ sites.sort!
546
+ return sites
547
+ rescue Exception => ee
548
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
549
+ return nil
587
550
  end
588
551
 
589
552
  # Retrieve a list of unique sites within the known site store
590
553
  def get_uniq_sites
591
- puts "Getter to retrieve unique sites containing unique IP:PORT key identifier." if @verbose
592
- begin
593
- #primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
594
- sites=Hash.new
595
- #uniqueness=Hash.new
596
- my_tracker=Wmap::HostTracker.instance
597
- my_tracker.data_dir=@data_dir
598
- @known_sites.keys.map do |key|
599
- port=url_2_port(key).to_s
600
- host=url_2_host(key)
601
- md5=@known_sites[key]['md5']
602
- code=@known_sites[key]['code']
603
- ip=my_tracker.local_host_2_ip(host)
604
- ip=host_2_ip(host) if ip.nil?
605
- # filtering out 'un-reachable' sites
606
- next if (code == 10000 or code == 20000)
607
- # filtering out 'empty' sites
608
- next if (md5.nil? or md5.empty?)
609
- next if ip.nil?
610
- # url_new=key
611
- #if primary_host_tracker.ip_known?(ip)
612
- # p_host=primary_host_tracker.known_hosts[ip]
613
- # url_new=key.sub(host,p_host)
614
- #end
615
- id=ip+":"+port
616
- # filtering out duplicates by 'IP:PORT' key pair
617
- unless sites.key?(id)
618
- #if @known_sites.key?(key)
619
- # sites[id]=url_new
620
- #else
621
- # Further filtering out redundant site by checking MD5 finger-print
622
- #unless uniqueness.key?(md5)
623
- sites[id]=key
624
- # uniqueness[md5]=true
625
- #end
554
+ puts "Getter to retrieve unique sites containing unique IP:PORT key identifier." if @verbose=
555
+ #primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
556
+ sites=Hash.new
557
+ #uniqueness=Hash.new
558
+ my_tracker=Wmap::HostTracker.instance
559
+ my_tracker.hosts_file=@data_dir + 'hosts'
560
+ my_tracker.load_known_hosts_from_file
561
+ @known_sites.keys.map do |key|
562
+ port=url_2_port(key).to_s
563
+ host=url_2_host(key)
564
+ md5=@known_sites[key]['md5']
565
+ code=@known_sites[key]['code']
566
+ ip=my_tracker.local_host_2_ip(host)
567
+ ip=host_2_ip(host) if ip.nil?
568
+ # filtering out 'un-reachable' sites
569
+ next if (code == 10000 or code == 20000)
570
+ # filtering out 'empty' sites
571
+ next if (md5.nil? or md5.empty?)
572
+ next if ip.nil?
573
+ # url_new=key
574
+ #if primary_host_tracker.ip_known?(ip)
575
+ # p_host=primary_host_tracker.known_hosts[ip]
576
+ # url_new=key.sub(host,p_host)
577
+ #end
578
+ id=ip+":"+port
579
+ # filtering out duplicates by 'IP:PORT' key pair
580
+ unless sites.key?(id)
581
+ #if @known_sites.key?(key)
582
+ # sites[id]=url_new
583
+ #else
584
+ # Further filtering out redundant site by checking MD5 finger-print
585
+ #unless uniqueness.key?(md5)
586
+ sites[id]=key
587
+ # uniqueness[md5]=true
626
588
  #end
627
- end
589
+ #end
628
590
  end
629
- #primary_host_tracker=nil
630
- my_tracker=nil
631
- return sites.values
632
- rescue Exception => ee
633
- puts "Exception on method #{__method__}: #{ee}" if @verbose
634
- return nil
635
591
  end
592
+ #primary_host_tracker=nil
593
+ my_tracker=nil
594
+ return sites.values
595
+ rescue Exception => ee
596
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
597
+ return nil
636
598
  end
637
599
  alias_method :uniq_sites, :get_uniq_sites
638
600
 
639
601
  # Retrieve a list of sites that contain an IP in the site URL
640
602
  def get_ssl_sites
641
603
  puts "getter to retrieve https sites from the site store." if @verbose
642
- begin
643
- sites=Array.new
644
- @known_sites.keys.map do |key|
645
- key =~ /https/i
646
- sites.push(key)
647
- end
648
- sites.sort!
649
- return sites
650
- rescue Exception => ee
651
- puts "Exception on method #{__method__}: #{ee}" if @verbose
652
- return nil
653
- end
604
+ sites=Array.new
605
+ @known_sites.keys.map do |key|
606
+ key =~ /https/i
607
+ sites.push(key)
608
+ end
609
+ sites.sort!
610
+ return sites
611
+ rescue Exception => ee
612
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
613
+ return nil
654
614
  end
655
615
 
656
616
  # Retrieve a list of redirection URLs from the site store
657
617
  def get_redirection_urls
658
618
  puts "getter to retrieve all the redirection URLs from the site store." if @verbose
659
- begin
660
- urls=Array.new
661
- @known_sites.keys.map do |key|
662
- unless @known_sites[key]['redirection'].nil?
663
- urls.push(@known_sites[key]['redirection'])
664
- end
619
+ urls=Array.new
620
+ @known_sites.keys.map do |key|
621
+ unless @known_sites[key]['redirection'].nil?
622
+ urls.push(@known_sites[key]['redirection'])
665
623
  end
666
- urls.sort!
667
- return urls
668
- rescue Exception => ee
669
- puts "Exception on method #{__method__}: #{ee}" if @verbose
670
- return nil
671
624
  end
625
+ urls.sort!
626
+ return urls
627
+ rescue Exception => ee
628
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
629
+ return nil
672
630
  end
673
631
 
674
632
  # Retrieve redirection URL if available
675
633
  def get_redirection_url (site)
676
634
  puts "getter to retrieve the redirection URL from the site store." if @verbose
677
- begin
678
- site=site.strip.downcase
679
- if @known_sites.key?(site)
680
- return @known_sites[site]['redirection']
681
- else
682
- puts "Unknown site: #{site}" if @verbose
683
- return nil
684
- end
685
- rescue Exception => ee
686
- puts "Exception on method #{__method__}: #{ee}" if @verbose
635
+ site=site.strip.downcase
636
+ if @known_sites.key?(site)
637
+ return @known_sites[site]['redirection']
638
+ else
639
+ puts "Unknown site: #{site}" if @verbose
687
640
  return nil
688
641
  end
642
+ rescue Exception => ee
643
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
644
+ return nil
689
645
  end
690
646
 
691
647
  # Perform local host table reverse lookup for the IP sites, in hope that the hostname could now be resolved since the site was discovered
692
648
  def resolve_ip_sites
693
649
  puts "Resolve sites that contain an IP address. Update the site cache table once a hostname is found in the local host table." if @verbose
694
- begin
695
- updates=Array.new
696
- sites=get_ip_sites
697
- my_tracker=Wmap::HostTracker.instance
698
- my_tracker.data_dir=@data_dir
699
- sites.map do |site|
700
- puts "Work on resolve the IP site: #{site}" if @verbose
701
- ip=url_2_host(site)
702
- hostname=my_tracker.local_ip_2_host(ip)
703
- if hostname.nil?
704
- puts "Can't resolve #{ip} from the local host store. Skip #{site}" if @verbose
705
- else
706
- puts "Host-name found for IP #{ip}: #{hostname}" if @verbose
707
- updates.push(site)
708
- refresh(site)
709
- end
650
+ updates=Array.new
651
+ sites=get_ip_sites
652
+ my_tracker=Wmap::HostTracker.instance
653
+ my_tracker.data_dir=@data_dir
654
+ sites.map do |site|
655
+ puts "Work on resolve the IP site: #{site}" if @verbose
656
+ ip=url_2_host(site)
657
+ hostname=my_tracker.local_ip_2_host(ip)
658
+ if hostname.nil?
659
+ puts "Can't resolve #{ip} from the local host store. Skip #{site}" if @verbose
660
+ else
661
+ puts "Host-name found for IP #{ip}: #{hostname}" if @verbose
662
+ updates.push(site)
663
+ refresh(site)
710
664
  end
711
- updates.sort!
712
- puts "The following sites are now refreshed: #{updates}" if @verbose
713
- my_tracker=nil
714
- return updates
715
- rescue Exception => ee
716
- puts "Exception on method #{__method__}: #{ee}" if @verbose
717
665
  end
666
+ updates.sort!
667
+ puts "The following sites are now refreshed: #{updates}" if @verbose
668
+ my_tracker=nil
669
+ return updates
670
+ rescue Exception => ee
671
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
718
672
  end
719
673
 
720
674
  # Search potential matching sites from the site store by using simple regular expression. Note that any upper-case char in the search string will be automatically converted into lower case
721
675
  def search (pattern)
722
676
  puts "Search site store based on the regular expression: #{pattern}" if @verbose
723
- begin
724
- pattern=pattern.strip.downcase
725
- results=Array.new
726
- @known_sites.keys.map do |key|
727
- if key =~ /#{pattern}/i
728
- results.push(key)
729
- end
677
+ pattern=pattern.strip.downcase
678
+ results=Array.new
679
+ @known_sites.keys.map do |key|
680
+ if key =~ /#{pattern}/i
681
+ results.push(key)
730
682
  end
731
- return results
732
- rescue Exception => ee
733
- puts "Exception on method search: #{ee}" if @verbose
734
- return nil
735
683
  end
684
+ return results
685
+ rescue Exception => ee
686
+ puts "Exception on method search: #{ee}" if @verbose
687
+ return nil
736
688
  end
737
689
 
738
690
  # Print summary report on all sites that contain an IP in the site URL
@@ -741,26 +693,26 @@ class Wmap::SiteTracker
741
693
  sites=get_ip_sites
742
694
  sites.map { |x| puts x }
743
695
  puts "End of report. "
696
+ rescue => ee
697
+ puts "Exception on method #{__method__} "
744
698
  end
745
699
 
746
700
  # Retrieve and print specific information of a site in the site store
747
701
  def print_site(site)
748
702
  puts "Site Information Report for: #{site}" if @verbose
749
- begin
750
- site=site.strip unless site.nil?
751
- raise "Unknown site: #{site}" unless @known_sites.key?(site)
752
- ip=@known_sites[site]['ip']
753
- port=@known_sites[site]['port']
754
- status=@known_sites[site]['status']
755
- server=@known_sites[site]['server']
756
- fp=@known_sites[site]['md5']
757
- loc=@known_sites[site]['redirection']
758
- res=@known_sites[site]['code']
759
- timestamp=@known_sites[site]['timestamp']
760
- puts "#{site},#{ip},#{port},#{status},#{server},#{res},#{fp},#{loc},#{timestamp}"
761
- rescue => ee
762
- puts "Exception on method #{__method__} for #{site}: #{ee}"
763
- end
703
+ site=site.strip unless site.nil?
704
+ raise "Unknown site: #{site}" unless @known_sites.key?(site)
705
+ ip=@known_sites[site]['ip']
706
+ port=@known_sites[site]['port']
707
+ status=@known_sites[site]['status']
708
+ server=@known_sites[site]['server']
709
+ fp=@known_sites[site]['md5']
710
+ loc=@known_sites[site]['redirection']
711
+ res=@known_sites[site]['code']
712
+ timestamp=@known_sites[site]['timestamp']
713
+ puts "#{site},#{ip},#{port},#{status},#{server},#{res},#{fp},#{loc},#{timestamp}"
714
+ rescue => ee
715
+ puts "Exception on method #{__method__} for #{site}: #{ee}"
764
716
  end
765
717
  alias_method :print, :print_site
766
718
 
@@ -772,142 +724,136 @@ class Wmap::SiteTracker
772
724
  sites.each do |site|
773
725
  puts site
774
726
  end
775
-
776
727
  puts "End of the summary"
777
- #return sites
728
+ rescue => ee
729
+ puts "Exception on method #{__method__} "
778
730
  end
779
731
  alias_method :print_all, :print_all_sites
780
732
 
781
733
  # Retrieve and save unique sites information for the quarterly scan into a plain local file
782
734
  def save_uniq_sites(file)
783
735
  puts "Save unique sites information into a flat file: #{file}\nThis may take a long while as it go through a lengthy self correction check process, please be patient ..."
784
- begin
785
- prime_sites=get_prim_uniq_sites
786
- puts "Primary Sites: #{prime_sites}" if @verbose
787
- f=File.open(file,"w")
788
- f.write "Unique Sites Information Report\n"
789
- f.write "Site, IP, Port, Server, Hosting, Response Code, MD5, Redirect, Timestamps\n"
790
- prime_sites.map do |key|
791
- next if key.nil?
792
- site=key.strip
793
- raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\wadd #{site}\n" unless @known_sites.key?(site)
794
- ip=@known_sites[site]['ip']
795
- port=@known_sites[site]['port']
796
- status=@known_sites[site]['status']
797
- server=@known_sites[site]['server']
798
- fp=@known_sites[site]['md5']
799
- loc=@known_sites[site]['redirection']
800
- res=@known_sites[site]['code']
801
- timestamp=@known_sites[site]['timestamp']
802
- f.write "#{site},#{ip},#{port},#{server},#{status},#{res},#{fp},#{loc},#{timestamp}\n"
803
- end
804
- f.close
805
- puts "Done!"
806
- return true # success
807
- rescue => ee
808
- puts "Exception on method #{__method__}: #{ee}"
809
- return false # fail
736
+ prime_sites=get_prim_uniq_sites
737
+ puts "Primary Sites: #{prime_sites}" if @verbose
738
+ f=File.open(file,"w")
739
+ f.write "Unique Sites Information Report\n"
740
+ f.write "Site, IP, Port, Server, Hosting, Response Code, MD5, Redirect, Timestamps\n"
741
+ prime_sites.map do |key|
742
+ next if key.nil?
743
+ site=key.strip
744
+ raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\wadd #{site}\n" unless @known_sites.key?(site)
745
+ ip=@known_sites[site]['ip']
746
+ port=@known_sites[site]['port']
747
+ status=@known_sites[site]['status']
748
+ server=@known_sites[site]['server']
749
+ fp=@known_sites[site]['md5']
750
+ loc=@known_sites[site]['redirection']
751
+ res=@known_sites[site]['code']
752
+ timestamp=@known_sites[site]['timestamp']
753
+ f.write "#{site},#{ip},#{port},#{server},#{status},#{res},#{fp},#{loc},#{timestamp}\n"
810
754
  end
755
+ f.close
756
+ puts "Done!"
757
+ return true # success
758
+ rescue => ee
759
+ puts "Exception on method #{__method__}: #{ee}"
760
+ return false # fail
811
761
  end
812
762
  alias_method :dump, :save_uniq_sites
813
763
 
814
764
  # Retrieve and save unique sites information for the quarterly scan into a XML file
815
765
  def save_uniq_sites_xml(file)
816
766
  puts "Save unique sites information into XML file: #{file}\nThis may take a long while as it go through lengthy self correctness check, please be patient ..."
817
- begin
818
- prime_sites=get_prim_uniq_sites
819
- builder = Nokogiri::XML::Builder.new do |xml|
820
- xml.root {
821
- xml.websites {
822
- prime_sites.each do |key|
823
- next if key.nil?
824
- site=key.strip
825
- raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twmap #{site}\n" unless @known_sites.key?(site)
826
- xml.site {
827
- xml.name site
828
- xml.ip_ @known_sites[site]['ip']
829
- xml.port_ @known_sites[site]['port']
830
- xml.status_ @known_sites[site]['status']
831
- xml.server_ @known_sites[site]['server']
832
- xml.fingerprint_ @known_sites[site]['md5']
833
- xml.redirection_ @known_sites[site]['redirection']
834
- xml.responsecode_ @known_sites[site]['code']
835
- xml.timestamp_ @known_sites[site]['timestamp']
836
- }
837
- end
838
- }
767
+ prime_sites=get_prim_uniq_sites
768
+ builder = Nokogiri::XML::Builder.new do |xml|
769
+ xml.root {
770
+ xml.websites {
771
+ prime_sites.each do |key|
772
+ next if key.nil?
773
+ site=key.strip
774
+ raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twmap #{site}\n" unless @known_sites.key?(site)
775
+ xml.site {
776
+ xml.name site
777
+ xml.ip_ @known_sites[site]['ip']
778
+ xml.port_ @known_sites[site]['port']
779
+ xml.status_ @known_sites[site]['status']
780
+ xml.server_ @known_sites[site]['server']
781
+ xml.fingerprint_ @known_sites[site]['md5']
782
+ xml.redirection_ @known_sites[site]['redirection']
783
+ xml.responsecode_ @known_sites[site]['code']
784
+ xml.timestamp_ @known_sites[site]['timestamp']
785
+ }
786
+ end
839
787
  }
840
- end
841
- puts builder.to_xml if @verbose
842
- f=File.new(file,'w')
843
- f.write(builder.to_xml)
844
- f.close
845
- puts "Done!"
846
- return true
847
- rescue => ee
848
- puts "Exception on method #{__method__}: #{ee}"
849
- return false
850
- end
788
+ }
789
+ end
790
+ puts builder.to_xml if @verbose
791
+ f=File.new(file,'w')
792
+ f.write(builder.to_xml)
793
+ f.close
794
+ puts "Done!"
795
+ return true
796
+ rescue => ee
797
+ puts "Exception on method #{__method__}: #{ee}"
798
+ return false
851
799
  end
852
800
  alias_method :dump_xml, :save_uniq_sites_xml
853
801
 
854
802
  # Retrieve the unique sites from the local site store in the primary host format
855
803
  def get_prim_uniq_sites
856
804
  puts "Retrieve and prime unique sites in the site store. " if @verbose
857
- #begin
858
- host_tracker=Wmap::HostTracker.instance
859
- host_tracker.data_dir=@data_dir
860
- primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
861
- primary_host_tracker.data_dir=@data_dir
862
- # Step 1. Retrieve the unique site list first
863
- sites=get_uniq_sites
864
- prim_uniq_sites=Array.new
865
- # Step 2. Iterate on the unique site list, spit out the site in the primary host format one at a time
866
- sites.map do |site|
867
- puts "Work on priming unique site: #{site}" if @verbose
868
- host=url_2_host(site)
869
- # case#1, for the IP only site, do nothing (presuming 'refresh_ip_sites' or 'refresh_all' method already take care of the potential discrepancy here).
870
- if is_ip?(host)
871
- prim_uniq_sites.push(site)
872
- next
873
- end
874
- ip=@known_sites[site]['ip']
875
- # case#2, for site with an unique IP, do nothing
876
- puts "Local hosts table entry count for #{ip}: #{host_tracker.alias[ip]}" if @verbose
877
- if host_tracker.alias[ip] == 1
878
- prim_uniq_sites.push(site)
879
- next
880
- end
881
- # case#3, case of multiple IPs for A DNS record, where the site IP may have 0 alias count, do nothing
882
- if host_tracker.alias[ip] == nil
883
- prim_uniq_sites.push(site)
884
- next
885
- end
886
- # case#4, for the site has a duplicate IP with others, we try to determine which one is the primary site
887
- # raise "Error: inconsistency detected on record: #{site}. Please run the following shell command to refresh it first: \n\srefresh #{site}" if tracker1.alias[ip].nil?
888
- if ( primary_host_tracker.known_hosts.key?(ip) and (host_tracker.alias[ip] > 1) )
889
- new_host=primary_host_tracker.prime(host)
890
- puts "Host: #{host}, New host:#{new_host}" if @verbose
891
- unless host==new_host
892
- new_site=site.sub(host,new_host)
893
- raise "Site not found in the site tracking data repository: #{new_site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twadd #{new_site}\n" unless @known_sites.key?(new_site)
894
- new_ip=@known_sites[new_site]['ip']
895
- if new_ip==ip # consistency check
896
- site=new_site
897
- else
898
- # TBD - case of multiple IPs for A DNS record
899
- #raise "Inconsistency found on prime host entrance: #{new_ip}, #{ip}; #{new_site}, #{site}. Please refresh your entries by running the following shell command: \n\s refresh #{new_site}"
900
- end
805
+ host_tracker=Wmap::HostTracker.instance
806
+ host_tracker.data_dir=@data_dir
807
+ primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
808
+ primary_host_tracker.data_dir=@data_dir
809
+ # Step 1. Retrieve the unique site list first
810
+ sites=get_uniq_sites
811
+ prim_uniq_sites=Array.new
812
+ # Step 2. Iterate on the unique site list, spit out the site in the primary host format one at a time
813
+ sites.map do |site|
814
+ puts "Work on priming unique site: #{site}" if @verbose
815
+ host=url_2_host(site)
816
+ # case#1, for the IP only site, do nothing (presuming 'refresh_ip_sites' or 'refresh_all' method already take care of the potential discrepancy here).
817
+ if is_ip?(host)
818
+ prim_uniq_sites.push(site)
819
+ next
820
+ end
821
+ ip=@known_sites[site]['ip']
822
+ # case#2, for site with an unique IP, do nothing
823
+ puts "Local hosts table entry count for #{ip}: #{host_tracker.alias[ip]}" if @verbose
824
+ if host_tracker.alias[ip] == 1
825
+ prim_uniq_sites.push(site)
826
+ next
827
+ end
828
+ # case#3, case of multiple IPs for A DNS record, where the site IP may have 0 alias count, do nothing
829
+ if host_tracker.alias[ip] == nil
830
+ prim_uniq_sites.push(site)
831
+ next
832
+ end
833
+ # case#4, for the site has a duplicate IP with others, we try to determine which one is the primary site
834
+ # raise "Error: inconsistency detected on record: #{site}. Please run the following shell command to refresh it first: \n\srefresh #{site}" if tracker1.alias[ip].nil?
835
+ if ( primary_host_tracker.known_hosts.key?(ip) and (host_tracker.alias[ip] > 1) )
836
+ new_host=primary_host_tracker.prime(host)
837
+ puts "Host: #{host}, New host:#{new_host}" if @verbose
838
+ unless host==new_host
839
+ new_site=site.sub(host,new_host)
840
+ raise "Site not found in the site tracking data repository: #{new_site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twadd #{new_site}\n" unless @known_sites.key?(new_site)
841
+ new_ip=@known_sites[new_site]['ip']
842
+ if new_ip==ip # consistency check
843
+ site=new_site
844
+ else
845
+ # TBD - case of multiple IPs for A DNS record
846
+ #raise "Inconsistency found on prime host entrance: #{new_ip}, #{ip}; #{new_site}, #{site}. Please refresh your entries by running the following shell command: \n\s refresh #{new_site}"
901
847
  end
902
848
  end
903
- prim_uniq_sites.push(site)
904
849
  end
905
- primary_host_tracker=nil
906
- host_tracker=nil
907
- return prim_uniq_sites
908
- #rescue => ee
909
- # puts "Exception on method #{__method__}: #{ee}"
910
- #end
850
+ prim_uniq_sites.push(site)
851
+ end
852
+ primary_host_tracker=nil
853
+ host_tracker=nil
854
+ return prim_uniq_sites
855
+ #rescue => ee
856
+ # puts "Exception on method #{__method__}: #{ee}"
911
857
  end
912
858
  alias_method :get_prime, :get_prim_uniq_sites
913
859