wmap 2.5.5 → 2.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,204 +15,176 @@ class Wmap::SiteTracker
15
15
  include Wmap::Utils
16
16
  include Singleton
17
17
 
18
- attr_accessor :sites_file, :max_parallel, :verbose, :data_dir
19
- attr_reader :known_sites
18
+ attr_accessor :sites_file, :max_parallel, :verbose, :data_dir, :known_sites
20
19
 
21
20
  # Set default instance variables
22
21
  def initialize (params = {})
23
22
  # Initialize the instance variables
24
23
  @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
25
24
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
26
- @file_sites=@data_dir+'sites'
27
- @file_stores=params.fetch(:sites_file, @file_sites)
25
+ @sites_file=params.fetch(:sites_file, @data_dir+'sites')
28
26
  @verbose=params.fetch(:verbose, false)
29
27
  @max_parallel=params.fetch(:max_parallel, 30)
28
+ File.write(@sites_file, "") unless File.exist?(@sites_file)
30
29
  # Hash table to hold the site store
31
- File.write(@file_stores, "") unless File.exist?(@file_stores)
32
- @known_sites=load_site_stores_from_file(@file_stores)
30
+ load_site_stores_from_file(@sites_file)
33
31
  end
34
32
 
35
33
  # Setter to load the known hosts into an instance variable
36
- def load_site_stores_from_file (file)
34
+ def load_site_stores_from_file (file=@sites_file)
37
35
  puts "Loading the site store data repository from file: #{file} " if @verbose
38
- begin
39
- known_sites=Hash.new
40
- f=File.open(file, 'r')
41
- f.each do |line|
42
- line=line.chomp.strip
43
- next if line.nil?
44
- next if line.empty?
45
- next if line =~ /^\s*#/
46
- entry=line.split(%r{\t+|\,})
47
- site=entry[0].downcase
48
- ip=entry[1]
49
- port=entry[2]
50
- status=entry[3]
51
- server=entry[4]
52
- res=entry[5].to_i
53
- fp=entry[6]
54
- loc=entry[7]
55
- timestamp=entry[8]
56
- puts "Loading entry: #{site} - #{ip} - #{status}" if @verbose
57
- known_sites[site]= Hash.new unless known_sites.key?(site)
58
- known_sites[site]['ip']=ip
59
- known_sites[site]['port']=port
60
- known_sites[site]['status']=status
61
- known_sites[site]['server']=server
62
- known_sites[site]['code']=res
63
- known_sites[site]['md5']=fp
64
- known_sites[site]['redirection']=loc
65
- known_sites[site]['timestamp']=timestamp
66
- end
67
- f.close
68
- puts "Successfully loading file: #{file}" if @verbose
69
- return known_sites
70
- rescue => ee
71
- puts "Exception on method #{__method__} for file #{file}: #{ee}"
72
- end
36
+ @known_sites=Hash.new
37
+ f=File.open(file, 'r')
38
+ f.each do |line|
39
+ line=line.chomp.strip
40
+ next if line.nil?
41
+ next if line.empty?
42
+ next if line =~ /^\s*#/
43
+ entry=line.split(%r{\t+|\,})
44
+ site=entry[0].downcase
45
+ ip=entry[1]
46
+ port=entry[2]
47
+ status=entry[3]
48
+ server=entry[4]
49
+ res=entry[5].to_i
50
+ fp=entry[6]
51
+ loc=entry[7]
52
+ timestamp=entry[8]
53
+ puts "Loading entry: #{site} - #{ip} - #{status}" if @verbose
54
+ @known_sites[site]= Hash.new unless @known_sites.key?(site)
55
+ @known_sites[site]['ip']=ip
56
+ @known_sites[site]['port']=port
57
+ @known_sites[site]['status']=status
58
+ @known_sites[site]['server']=server
59
+ @known_sites[site]['code']=res
60
+ @known_sites[site]['md5']=fp
61
+ @known_sites[site]['redirection']=loc
62
+ @known_sites[site]['timestamp']=timestamp
63
+ end
64
+ f.close
65
+ puts "Successfully loading file: #{file}" if @verbose
66
+ return @known_sites
67
+ rescue => ee
68
+ puts "Exception on method #{__method__} for file #{file}: #{ee}"
73
69
  end
74
70
 
75
71
  # Save the current site store hash table into a file
76
- def save_sites_to_file!(file_sites=@file_stores)
72
+ def save_sites_to_file!(file_sites=@sites_file)
77
73
  puts "Saving the current site store table from memory to file: #{file_sites}"
78
- begin
79
- timestamp=Time.now
80
- f=File.open(file_sites, 'w')
81
- f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
82
- f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
83
- @known_sites.keys.sort.map do |key|
84
- f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
85
- end
86
- f.close
87
- puts "site store table is successfully saved: #{file_sites}"
88
- rescue => ee
89
- puts "Exception on method #{__method__}: #{ee}"
90
- end
74
+ timestamp=Time.now
75
+ f=File.open(file_sites, 'w')
76
+ f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
77
+ f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
78
+ @known_sites.keys.sort.map do |key|
79
+ f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
80
+ end
81
+ f.close
82
+ puts "site store table is successfully saved: #{file_sites}"
83
+ rescue => ee
84
+ puts "Exception on method #{__method__}: #{ee}"
91
85
  end
92
86
  alias_method :save!, :save_sites_to_file!
93
87
 
94
88
  # Count numbers of entries in the site store table
95
89
  def count
96
90
  puts "Counting number of entries in the site store table ..."
97
- begin
98
- return @known_sites.size
99
- rescue => ee
100
- puts "Exception on method #{__method__}: #{ee}"
101
- end
91
+ return @known_sites.size
92
+ rescue => ee
93
+ puts "Exception on method #{__method__}: #{ee}"
102
94
  end
103
95
 
104
96
  # Setter to add site entry to the cache one at a time
105
97
  def add(site)
106
- begin
107
- puts "Add entry to the site store: #{site}"
108
- # Preliminary sanity check
109
- site=site.strip.downcase unless site.nil?
110
- if site_known?(site)
111
- puts "Site already exists. Skip it: #{site}"
112
- return nil
98
+ puts "Add entry to the site store: #{site}"
99
+ # Preliminary sanity check
100
+ site=site.strip.downcase unless site.nil?
101
+ if site_known?(site)
102
+ puts "Site already exists. Skip it: #{site}"
103
+ return nil
104
+ end
105
+ site=normalize_url(site) if is_url?(site)
106
+ site=url_2_site(site) if is_url?(site)
107
+ puts "Site in standard format: #{site}" if @verbose
108
+ raise "Exception on method #{__method__}: invalid site format of #{site}. Expected format is: http://your_website_name/" unless is_site?(site)
109
+ trusted=false
110
+ host=url_2_host(site)
111
+ ip=host_2_ip(host)
112
+ # Additional logic to refresh deactivated site, 02/12/2014
113
+ deact=Wmap::SiteTracker::DeactivatedSite.instance
114
+ deact.sites_file=@data_dir + 'deactivated_sites'
115
+ File.write(deact.sites_file, "") unless File.exist?(deact.sites_file)
116
+ deact.load_site_stores_from_file
117
+ # only trust either the domain or IP we know
118
+ if is_ip?(host)
119
+ trusted=Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
120
+ else
121
+ root=get_domain_root(host)
122
+ if root.nil?
123
+ raise "Invalid web site format. Please check your record again."
124
+ else
125
+ domain_tracker=Wmap::DomainTracker.instance
126
+ domain_tracker.domains_file=@data_dir+'domains'
127
+ File.write(domain_tracker.domains_file, "") unless File.exist?(domain_tracker.domains_file)
128
+ domain_tracker.load_domains_from_file
129
+ trusted=domain_tracker.domain_known?(root)
130
+ domain_tracker=nil
113
131
  end
114
- site=normalize_url(site) if is_url?(site)
115
- site=url_2_site(site) if is_url?(site)
116
- puts "Site in standard format: #{site}" if @verbose
117
- raise "Exception on method #{__method__}: invalid site format of #{site}. Expected format is: http://your_website_name/" unless is_site?(site)
118
- trusted=false
119
- host=url_2_host(site)
120
- ip=host_2_ip(host)
121
- # Additional logic to refresh deactivated site, 02/12/2014
122
- deact=Wmap::SiteTracker::DeactivatedSite.instance
123
- deact.data_dir=@data_dir
124
- # only trust either the domain or IP we know
125
- if is_ip?(host)
126
- trusted=Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
132
+ end
133
+ # add record only if trusted
134
+ if trusted
135
+ # Add logic to check site status before adding it
136
+ checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
137
+ raise "Site is currently down. Skip #{site}" if checker.nil?
138
+ # Skip the http site if it's un-responsive; for the https we'll keep it because we're interested in analysing the SSL layer later
139
+ if is_https?(site)
140
+ # do nothing
127
141
  else
128
- root=get_domain_root(host)
129
- if root.nil?
130
- raise "Invalid web site format. Please check your record again."
131
- else
132
- domain_tracker=Wmap::DomainTracker.instance
133
- domain_tracker.data_dir=@data_dir
134
- trusted=domain_tracker.domain_known?(root)
135
- domain_tracker=nil
136
- end
142
+ raise "Site is currently down. Skip #{site}" if checker['code']==10000
137
143
  end
138
- # add record only if trusted
139
- if trusted
140
- # Add logic to check site status before adding it
141
- checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
142
- raise "Site is currently down. Skip #{site}" if checker.nil?
143
- # Skip the http site if it's un-responsive; for the https we'll keep it because we're interested in analysing the SSL layer later
144
- if is_https?(site)
145
- # do nothing
144
+ raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
145
+ my_tracker = Wmap::HostTracker.instance
146
+ my_tracker.data_dir=@data_dir
147
+ # Update the local host table when necessary
148
+ if is_ip?(host)
149
+ # Case #1: Trusted site contains IP
150
+ if my_tracker.ip_known?(host)
151
+ # Try local reverse DNS lookup first
152
+ puts "Local hosts table lookup for IP: #{ip}" if @verbose
153
+ host=my_tracker.local_ip_2_host(host)
154
+ puts "Host found from the local hosts table for #{ip}: #{host}" if @verbose
155
+ site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
146
156
  else
147
- raise "Site is currently down. Skip #{site}" if checker['code']==10000
148
- end
149
- raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
150
- my_tracker = Wmap::HostTracker.instance
151
- my_tracker.data_dir=@data_dir
152
- # Update the local host table when necessary
153
- if is_ip?(host)
154
- # Case #1: Trusted site contains IP
155
- if my_tracker.ip_known?(host)
156
- # Try local reverse DNS lookup first
157
- puts "Local hosts table lookup for IP: #{ip}" if @verbose
158
- host=my_tracker.local_ip_2_host(host)
159
- puts "Host found from the local hosts table for #{ip}: #{host}" if @verbose
160
- site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
161
- else
162
- # Try reverse DNS lookup over Internet as secondary precaution
163
- puts "Reverse DNS lookup for IP: #{ip}" if @verbose
164
- host1=ip_2_host(host)
165
- puts "host1: #{host1}" if @verbose
166
- if is_fqdn?(host1)
167
- host_tracker=Wmap::HostTracker.instance
168
- host_tracker.data_dir=@data_dir
169
- if host_tracker.domain_known?(host1)
170
- # replace IP with host-name only if domain root is known
171
- puts "Host found from the Internet reverse DNS lookup for #{ip}: #{host1}" if @verbose
172
- host=host1
173
- site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
174
- end
175
- host_tracker=nil
176
- end
177
- end
178
- # Adding site for Case #1
179
- raise "Site already exist! Skip #{site}" if @known_sites.key?(site)
180
- puts "Adding site: #{site}" if @verbose
181
- @known_sites[site]=Hash.new
182
- @known_sites[site]=checker
183
- if deact.site_known?(site)
184
- deact.delete(site)
185
- deact.save!
186
- end
187
- puts "Site entry loaded: #{checker}"
188
- if is_fqdn?(host)
189
- # Add logic to update the hosts table for case #1 variance
190
- # - case that reverse DNS lookup successful
191
- puts "Update local hosts table for host: #{host}"
192
- if my_tracker.host_known?(host)
193
- old_ip=my_tracker.local_host_2_ip(host)
194
- if old_ip != ip
195
- my_tracker.refresh(host)
196
- my_tracker.save!
197
- else
198
- puts "Host resolve to the same IP #{ip} - no need to update the local host table." if @verbose
199
- end
200
- else
201
- my_tracker.add(host)
202
- my_tracker.save!
157
+ # Try reverse DNS lookup over Internet as secondary precaution
158
+ puts "Reverse DNS lookup for IP: #{ip}" if @verbose
159
+ host1=ip_2_host(host)
160
+ puts "host1: #{host1}" if @verbose
161
+ if is_fqdn?(host1)
162
+ host_tracker=Wmap::HostTracker.instance
163
+ host_tracker.data_dir=@data_dir
164
+ host_tracker.hosts_file=host_tracker.data_dir + "hosts"
165
+ host_tracker.load_known_hosts_from_file
166
+ if host_tracker.domain_known?(host1)
167
+ # replace IP with host-name only if domain root is known
168
+ puts "Host found from the Internet reverse DNS lookup for #{ip}: #{host1}" if @verbose
169
+ host=host1
170
+ site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
203
171
  end
172
+ host_tracker=nil
204
173
  end
205
- else
206
- # Case #2: Trusted site contains valid FQDN
207
- puts "Ading site: #{site}" if @verbose
208
- @known_sites[site]=Hash.new
209
- @known_sites[site]=checker
210
- if deact.site_known?(site)
211
- deact.delete(site)
212
- deact.save!
213
- end
214
- puts "Site entry loaded: #{checker}"
215
- # Add logic to update the hosts table for case #2
174
+ end
175
+ # Adding site for Case #1
176
+ raise "Site already exist! Skip #{site}" if @known_sites.key?(site)
177
+ puts "Adding site: #{site}" if @verbose
178
+ @known_sites[site]=Hash.new
179
+ @known_sites[site]=checker
180
+ if deact.site_known?(site)
181
+ deact.delete(site)
182
+ deact.save!
183
+ end
184
+ puts "Site entry loaded: #{checker}"
185
+ if is_fqdn?(host)
186
+ # Add logic to update the hosts table for case #1 variance
187
+ # - case that reverse DNS lookup successful
216
188
  puts "Update local hosts table for host: #{host}"
217
189
  if my_tracker.host_known?(host)
218
190
  old_ip=my_tracker.local_host_2_ip(host)
@@ -220,232 +192,239 @@ class Wmap::SiteTracker
220
192
  my_tracker.refresh(host)
221
193
  my_tracker.save!
222
194
  else
223
- # Skip - no need to update the local hosts table
195
+ puts "Host resolve to the same IP #{ip} - no need to update the local host table." if @verbose
224
196
  end
225
197
  else
226
198
  my_tracker.add(host)
227
199
  my_tracker.save!
228
200
  end
229
201
  end
230
- deact=nil
231
- my_tracker=nil
232
- host_tracker=nil
233
- return checker
234
202
  else
235
- puts "Problem found: untrusted Internet domain or IP. Skip #{site}"
236
- deact=nil
237
- my_tracker=nil
238
- host_tracker=nil
239
- return nil
203
+ # Case #2: Trusted site contains valid FQDN
204
+ puts "Ading site: #{site}" if @verbose
205
+ @known_sites[site]=Hash.new
206
+ @known_sites[site]=checker
207
+ if deact.site_known?(site)
208
+ deact.delete(site)
209
+ deact.save!
210
+ end
211
+ puts "Site entry loaded: #{checker}"
212
+ # Add logic to update the hosts table for case #2
213
+ puts "Update local hosts table for host: #{host}"
214
+ if my_tracker.host_known?(host)
215
+ old_ip=my_tracker.local_host_2_ip(host)
216
+ if old_ip != ip
217
+ my_tracker.refresh(host)
218
+ my_tracker.save!
219
+ else
220
+ # Skip - no need to update the local hosts table
221
+ end
222
+ else
223
+ my_tracker.add(host)
224
+ my_tracker.save!
225
+ end
240
226
  end
241
- rescue => ee
242
- puts "Exception on method #{__method__}: #{ee}"
243
- checker=nil
244
227
  deact=nil
228
+ my_tracker=nil
229
+ host_tracker=nil
230
+ return site
231
+ else
232
+ puts "Problem found: untrusted Internet domain or IP. Skip #{site}"
233
+ deact=nil
234
+ my_tracker=nil
245
235
  host_tracker=nil
246
236
  return nil
247
237
  end
238
+ #rescue => ee
239
+ # puts "Exception on method #{__method__}: #{ee}"
240
+ # checker=nil
241
+ # deact=nil
242
+ # host_tracker=nil
243
+ # return nil
248
244
  end
249
245
 
250
246
  # Setter to add site entry to the cache table in batch (from a file)
251
247
  def file_add(file)
252
248
  puts "Add entries to the local site store from file: #{file}"
253
- begin
254
- raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
255
- changes=Hash.new
256
- sites=file_2_list(file)
257
- changes=bulk_add(sites) unless sites.nil? or sites.empty?
258
- puts "Done loading file #{file}. "
259
- return changes
260
- rescue => ee
261
- puts "Exception on method #{__method__}: #{ee}"
262
- end
249
+ raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
250
+ changes=Hash.new
251
+ sites=file_2_list(file)
252
+ changes=bulk_add(sites) unless sites.nil? or sites.empty?
253
+ puts "Done loading file #{file}. "
254
+ return changes
255
+ rescue => ee
256
+ puts "Exception on method #{__method__}: #{ee}"
263
257
  end
264
258
 
265
259
  # Setter to add site entry to the cache in batch (from a list)
266
260
  def bulk_add(list,num=@max_parallel)
267
261
  puts "Add entries to the local site store from list:\n #{list}"
268
- #begin
269
- results=Hash.new
270
- if list.size > 0
271
- puts "Start parallel adding on the sites:\n #{list}"
272
- Parallel.map(list, :in_processes => num) { |target|
273
- add(target)
274
- }.each do |process|
275
- if process.nil?
276
- next
277
- elsif process.empty?
278
- #do nothing
279
- else
280
- results[process['url']]=Hash.new
281
- results[process['url']]=process
282
- end
262
+ results=Hash.new
263
+ if list.size > 0
264
+ puts "Start parallel adding on the sites:\n #{list}"
265
+ Parallel.map(list, :in_processes => num) { |target|
266
+ add(target)
267
+ }.each do |process|
268
+ if process.nil?
269
+ next
270
+ elsif process.empty?
271
+ next #do nothing
272
+ else
273
+ results[process['url']]=Hash.new
274
+ results[process['url']]=process
283
275
  end
284
- @known_sites.merge!(results)
285
- else
286
- puts "Error: no entry is added. Please check your list and try again."
287
276
  end
288
- puts "Done adding site entries."
289
- if results.size>0
290
- puts "New entries added: #{results}"
291
- else
292
- puts "No new entry added. "
293
- end
294
- return results
295
- #rescue => ee
296
- #puts "Exception on method #{__method__}: #{ee}" if @verbose
297
- #end
277
+ @known_sites.merge!(results)
278
+ else
279
+ puts "Error: no entry is added. Please check your list and try again."
280
+ end
281
+ puts "Done adding site entries."
282
+ if results.size>0
283
+ puts "New entries added: #{results}"
284
+ else
285
+ puts "No new entry added. "
286
+ end
287
+ return results
288
+ #rescue => ee
289
+ #puts "Exception on method #{__method__}: #{ee}" if @verbose
298
290
  end
299
291
  alias_method :adds, :bulk_add
300
292
 
301
293
  # Setter to remove entry from the site store one at a time
302
294
  def delete(site)
303
295
  puts "Remove entry from the site store: #{site} " if @verbose
304
- begin
305
- # Additional logic to deactivate the site properly, by moving it to the DeactivatedSite list, 02/07/2014
306
- deact=Wmap::SiteTracker::DeactivatedSite.instance
307
- deact.data_dir=@data_dir
308
- site=site.strip.downcase
309
- site=url_2_site(site)
310
- if @known_sites.key?(site)
311
- site_info=@known_sites[site]
312
- deact.add(site,site_info)
313
- deact.save!
314
- deact=nil
315
- del=@known_sites.delete(site)
316
- puts "Entry cleared: #{site}"
317
- return del
318
- else
319
- puts "Entry not fund. Skip #{site}"
320
- deact=nil
321
- return nil
322
- end
323
- rescue => ee
324
- puts "Exception on method #{__method__}: #{ee}" if @verbose
296
+ # Additional logic to deactivate the site properly, by moving it to the DeactivatedSite list, 02/07/2014
297
+ deact=Wmap::SiteTracker::DeactivatedSite.instance
298
+ deact.sites_file=@data_dir + 'deactivated_sites'
299
+ File.write(deact.sites_file, "") unless File.exist?(deact.sites_file)
300
+ site=site.strip.downcase
301
+ site=url_2_site(site)
302
+ if @known_sites.key?(site)
303
+ site_info=@known_sites[site]
304
+ deact.add(site,site_info)
305
+ deact.save!
325
306
  deact=nil
307
+ del=@known_sites.delete(site)
308
+ puts "Entry cleared: #{site}"
309
+ return del
310
+ else
311
+ puts "Entry not fund. Skip #{site}"
312
+ deact=nil
313
+ return nil
326
314
  end
315
+ rescue => ee
316
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
317
+ deact=nil
327
318
  end
328
319
  alias_method :del, :delete
329
320
 
330
321
  # Setter to delete site entry to the cache in batch (from a file)
331
322
  def file_delete(file)
332
- begin
333
- puts "Delete entries to the local site store from file: #{file}" if @verbose
334
- raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
335
- sites=file_2_list(file)
336
- changes=Array.new
337
- changes=bulk_delete(sites) unless sites.nil? or sites.empty?
338
- rescue => ee
339
- puts "Exception on method file_delete: #{ee} for file: #{file}" if @verbose
340
- end
323
+ puts "Delete entries to the local site store from file: #{file}" if @verbose
324
+ raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
325
+ sites=file_2_list(file)
326
+ changes=Array.new
327
+ changes=bulk_delete(sites) unless sites.nil? or sites.empty?
328
+ rescue => ee
329
+ puts "Exception on method file_delete: #{ee} for file: #{file}" if @verbose
341
330
  end
342
331
  alias_method :file_del, :file_delete
343
332
 
344
333
  # Setter to delete site entry to the cache in batch (from a list)
345
334
  def bulk_delete(list)
346
335
  puts "Delete entries to the local site store from list:\n #{list}" if @verbose
347
- begin
348
- sites=list
349
- changes=Array.new
350
- if sites.size > 0
351
- sites.map do |x|
352
- x=url_2_site(x)
353
- site=delete(x)
354
- changes.push(site) unless site.nil?
355
- end
356
- puts "Done deleting sites from the list:\n #{list}"
357
- return changes
358
- else
359
- puts "Error: no entry is loaded. Please check your list and try again."
336
+ sites=list
337
+ changes=Array.new
338
+ if sites.size > 0
339
+ sites.map do |x|
340
+ x=url_2_site(x)
341
+ site=delete(x)
342
+ changes.push(site) unless site.nil?
360
343
  end
361
- rescue => ee
362
- puts "Exception on method #{__method__}: #{ee}" if @verbose
344
+ puts "Done deleting sites from the list:\n #{list}"
345
+ return changes
346
+ else
347
+ puts "Error: no entry is loaded. Please check your list and try again."
363
348
  end
349
+ rescue => ee
350
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
364
351
  end
365
352
  alias_method :dels, :bulk_delete
366
353
 
367
354
  # Setter to refresh the entry in the site store one at a time
368
355
  def refresh(site)
369
356
  puts "Refresh the local site store for site: #{site} "
370
- begin
371
- raise "Invalid site: #{site}" if site.nil? or site.empty?
372
- site=site.strip.downcase
373
- if @known_sites.key?(site)
374
- delete(site)
375
- site_info=add(site)
376
- puts "Done refresh entry: #{site}"
377
- return site_info
378
- else
379
- puts "Error entry non exist: #{site}"
380
- end
381
- return nil
382
- rescue => ee
383
- puts "Exception on method #{__method__}: #{ee}" if @verbose
384
- return nil
357
+ raise "Invalid site: #{site}" if site.nil? or site.empty?
358
+ site=site.strip.downcase
359
+ if @known_sites.key?(site)
360
+ delete(site)
361
+ site_info=add(site)
362
+ puts "Done refresh entry: #{site}"
363
+ return site_info
364
+ else
365
+ puts "Error entry non exist: #{site}"
385
366
  end
367
+ return nil
368
+ rescue => ee
369
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
370
+ return nil
386
371
  end
387
372
 
388
373
  # 'Refresh sites in the site store in batch (from a file)
389
374
  def file_refresh(file)
390
375
  puts "Refresh entries in the site store from file: #{file}" if @verbose
391
- begin
392
- changes=Hash.new
393
- sites=file_2_list(file)
394
- changes=bulk_refresh(sites) unless sites.nil? or sites.empty?
395
- return changes
396
- rescue => ee
397
- puts "Exception on method #{__method__}: #{ee} for file: #{file}" if @verbose
398
- end
376
+ changes=Hash.new
377
+ sites=file_2_list(file)
378
+ changes=bulk_refresh(sites) unless sites.nil? or sites.empty?
379
+ return changes
380
+ rescue => ee
381
+ puts "Exception on method #{__method__}: #{ee} for file: #{file}" if @verbose
399
382
  end
400
383
 
401
384
  # 'Refresh unique sites in the site store only
402
385
  def refresh_uniq_sites
403
386
  puts "Refresh unique site entries in the site store. " if @verbose
404
- begin
405
- changes=Hash.new
406
- sites=get_uniq_sites
407
- if sites.size > 0
408
- changes=bulk_refresh(sites)
409
- else
410
- puts "Error: no entry is refreshed. Please check your site store and try again."
411
- end
412
- return changes
413
- rescue => ee
414
- puts "Exception on method #{__method__}: #{ee}" if @verbose
387
+ changes=Hash.new
388
+ sites=get_uniq_sites
389
+ if sites.size > 0
390
+ changes=bulk_refresh(sites)
391
+ else
392
+ puts "Error: no entry is refreshed. Please check your site store and try again."
415
393
  end
394
+ return changes
395
+ rescue => ee
396
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
416
397
  end
417
398
 
418
399
  # 'Refresh sites in the site store in batch (from a list)
419
400
  def bulk_refresh(list,num=@max_parallel)
420
401
  puts "Refresh entries in the site store from list:\n #{list}" if @verbose
421
- begin
422
- results=Hash.new
423
- if list.size > 0
424
- puts "Start parallel refreshing on the sites:\n #{list}"
425
- Parallel.map(list, :in_processes => num) { |target|
426
- refresh(target)
427
- }.each do |process|
428
- if process.nil?
429
- next
430
- elsif process.empty?
431
- #do nothing
432
- else
433
- results[process['url']]=Hash.new
434
- results[process['url']]=process
435
- end
402
+ results=Hash.new
403
+ if list.size > 0
404
+ puts "Start parallel refreshing on the sites:\n #{list}"
405
+ Parallel.map(list, :in_processes => num) { |target|
406
+ refresh(target)
407
+ }.each do |process|
408
+ if process.nil?
409
+ next
410
+ elsif process.empty?
411
+ #do nothing
412
+ else
413
+ results[process['url']]=Hash.new
414
+ results[process['url']]=process
436
415
  end
437
- # Clean up old entries, by Y.L. 03/30/2015
438
- list.map {|x| @known_sites.delete(x)}
439
- # Add back fresh entries
440
- @known_sites.merge!(results)
441
- puts "Done refresh sites."
442
- else
443
- puts "Error: no entry is loaded. Please check your list and try again."
444
416
  end
445
- return results
446
- rescue => ee
447
- puts "Exception on method #{__method__}: #{ee}" if @verbose
448
- end
417
+ # Clean up old entries, by Y.L. 03/30/2015
418
+ list.map {|x| @known_sites.delete(x)}
419
+ # Add back fresh entries
420
+ @known_sites.merge!(results)
421
+ puts "Done refresh sites."
422
+ else
423
+ puts "Error: no entry is loaded. Please check your list and try again."
424
+ end
425
+ return results
426
+ rescue => ee
427
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
449
428
  end
450
429
  alias_method :refreshs, :bulk_refresh
451
430
 
@@ -453,286 +432,259 @@ class Wmap::SiteTracker
453
432
  # Refresh all site entries in the stores in one shot
454
433
  def refresh_all
455
434
  puts "Refresh all the entries within the local site store ... "
456
- begin
457
- changes=Hash.new
458
- changes=bulk_refresh(@known_sites.keys)
459
- @known_sites.merge!(changes)
460
- puts "Done refresh all entries."
461
- return changes
462
- rescue => ee
463
- puts "Exception on method #{__method__}: #{ee}" if @verbose
464
- end
435
+ changes=Hash.new
436
+ changes=bulk_refresh(@known_sites.keys)
437
+ @known_sites.merge!(changes)
438
+ puts "Done refresh all entries."
439
+ return changes
440
+ rescue => ee
441
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
465
442
  end
466
443
 
467
444
  # Refresh all site entries in the stores that contains an IP instead of a hostname
468
445
  def refresh_ip_sites
469
446
  puts "Refresh all entries that contain an IP address instead of a FQDN ... "
470
- begin
471
- sites=get_ip_sites
472
- live_sites=sites.delete_if { |x| @known_sites[x]['code'] == 10000 or @known_sites[x]['code'] == 20000 }
473
- changes=Hash.new
474
- changes=bulk_refresh(live_sites)
475
- @known_sites.merge!(changes)
476
- puts "Done refresh IP sites."
477
- return changes
478
- rescue => ee
479
- puts "Exception on method #{__method__}: #{ee}" if @verbose
480
- end
447
+ sites=get_ip_sites
448
+ live_sites=sites.delete_if { |x| @known_sites[x]['code'] == 10000 or @known_sites[x]['code'] == 20000 }
449
+ changes=Hash.new
450
+ changes=bulk_refresh(live_sites)
451
+ @known_sites.merge!(changes)
452
+ puts "Done refresh IP sites."
453
+ return changes
454
+ rescue => ee
455
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
481
456
  end
482
457
 
483
458
  # Quick validation if a site is already covered under the site store
484
459
  def site_known?(site)
485
- begin
486
- raise "Web site store not loaded properly! " if @known_sites.nil?
487
- site=site.strip.downcase unless site.nil?
488
- site=url_2_site(site)
489
- return @known_sites.key?(site) unless site.nil?
490
- rescue => ee
491
- puts "Error checking web site #{site} against the site store: #{ee}"
492
- end
460
+ raise "Web site store not loaded properly! " if @known_sites.nil?
461
+ site=site.strip.downcase unless site.nil?
462
+ site=url_2_site(site)
463
+ return @known_sites.key?(site) unless site.nil?
464
+ rescue => ee
465
+ puts "Error checking web site #{site} against the site store: #{ee}"
493
466
  return false
494
467
  end
495
468
  alias_method :is_known?, :site_known?
496
469
 
497
470
  # Quick validation check on an IP is already part of the site store
498
471
  def site_ip_known?(ip)
499
- begin
500
- ip=ip.chomp.strip
501
- known=false
502
- if is_ip?(ip)
503
- @known_sites.keys.map do |site|
504
- if @known_sites[site]['ip']==ip
505
- return true
506
- end
472
+ ip=ip.chomp.strip
473
+ known=false
474
+ if is_ip?(ip)
475
+ @known_sites.keys.map do |site|
476
+ if @known_sites[site]['ip']==ip
477
+ return true
507
478
  end
508
479
  end
509
- myDis=nil
510
- return known
511
- rescue => ee
512
- puts "Exception on method #{__method__}: #{ee}"
513
- return false
514
480
  end
481
+ myDis=nil
482
+ return known
483
+ rescue => ee
484
+ puts "Exception on method #{__method__}: #{ee}"
485
+ return false
515
486
  end
516
487
  alias_method :siteip_known?, :site_ip_known?
517
488
 
518
489
  # Quick check of the stored information of a site within the store
519
490
  def site_check(site)
520
- begin
521
- raise "Web site store not loaded properly! " if @known_sites.nil?
522
- site=site.strip.downcase unless site.nil?
523
- site=url_2_site(site)
524
- return @known_sites[site] unless site.nil?
525
- rescue => ee
526
- puts "Exception on method #{__method__}: #{ee}"
527
- return nil
528
- end
491
+ raise "Web site store not loaded properly! " if @known_sites.nil?
492
+ site=site.strip.downcase unless site.nil?
493
+ site=url_2_site(site)
494
+ return @known_sites[site] unless site.nil?
495
+ rescue => ee
496
+ puts "Exception on method #{__method__}: #{ee}"
497
+ return nil
529
498
  end
530
499
  alias_method :check, :site_check
531
500
 
532
501
  # Retrieve external hosted sites into a list
533
502
  def get_ext_sites
534
503
  puts "getter to retrieve all the external hosted sites. " if @verbose
535
- begin
536
- sites=Array.new
537
- @known_sites.keys.map do |key|
538
- if @known_sites[key]['status']=="ext_hosted"
539
- sites.push(key)
540
- end
504
+ sites=Array.new
505
+ @known_sites.keys.map do |key|
506
+ if @known_sites[key]['status']=="ext_hosted"
507
+ sites.push(key)
541
508
  end
542
- sites.sort!
543
- return sites
544
- rescue Exception => ee
545
- puts "Exception on method #{__method__}: #{ee}" if @verbose
546
- return nil
547
509
  end
510
+ sites.sort!
511
+ return sites
512
+ rescue Exception => ee
513
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
514
+ return nil
548
515
  end
549
516
  alias_method :get_ext, :get_ext_sites
550
517
 
551
518
  # Retrieve a list of internal hosted site URLs
552
519
  def get_int_sites
553
520
  puts "getter to retrieve all the internal hosted sites." if @verbose
554
- begin
555
- sites=Array.new
556
- @known_sites.keys.map do |key|
557
- if @known_sites[key]['status']=="int_hosted"
558
- sites.push(key)
559
- end
521
+ sites=Array.new
522
+ @known_sites.keys.map do |key|
523
+ if @known_sites[key]['status']=="int_hosted"
524
+ sites.push(key)
560
525
  end
561
- sites.sort!
562
- return sites
563
- rescue Exception => ee
564
- puts "Exception on method #{__method__}: #{ee}" if @verbose
565
- return nil
566
526
  end
527
+ sites.sort!
528
+ return sites
529
+ rescue Exception => ee
530
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
531
+ return nil
567
532
  end
568
533
  alias_method :get_int, :get_int_sites
569
534
 
570
535
  # Retrieve a list of sites that contain an IP in the site URL
571
536
  def get_ip_sites
572
537
  puts "Getter to retrieve sites contain an IP instead of a host-name ." if @verbose
573
- begin
574
- sites=Array.new
575
- @known_sites.keys.map do |key|
576
- host=url_2_host(key)
577
- if is_ip?(host)
578
- sites.push(key)
579
- end
538
+ sites=Array.new
539
+ @known_sites.keys.map do |key|
540
+ host=url_2_host(key)
541
+ if is_ip?(host)
542
+ sites.push(key)
580
543
  end
581
- sites.sort!
582
- return sites
583
- rescue Exception => ee
584
- puts "Exception on method #{__method__}: #{ee}" if @verbose
585
- return nil
586
544
  end
545
+ sites.sort!
546
+ return sites
547
+ rescue Exception => ee
548
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
549
+ return nil
587
550
  end
588
551
 
589
552
  # Retrieve a list of unique sites within the known site store
590
553
  def get_uniq_sites
591
- puts "Getter to retrieve unique sites containing unique IP:PORT key identifier." if @verbose
592
- begin
593
- #primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
594
- sites=Hash.new
595
- #uniqueness=Hash.new
596
- my_tracker=Wmap::HostTracker.instance
597
- my_tracker.data_dir=@data_dir
598
- @known_sites.keys.map do |key|
599
- port=url_2_port(key).to_s
600
- host=url_2_host(key)
601
- md5=@known_sites[key]['md5']
602
- code=@known_sites[key]['code']
603
- ip=my_tracker.local_host_2_ip(host)
604
- ip=host_2_ip(host) if ip.nil?
605
- # filtering out 'un-reachable' sites
606
- next if (code == 10000 or code == 20000)
607
- # filtering out 'empty' sites
608
- next if (md5.nil? or md5.empty?)
609
- next if ip.nil?
610
- # url_new=key
611
- #if primary_host_tracker.ip_known?(ip)
612
- # p_host=primary_host_tracker.known_hosts[ip]
613
- # url_new=key.sub(host,p_host)
614
- #end
615
- id=ip+":"+port
616
- # filtering out duplicates by 'IP:PORT' key pair
617
- unless sites.key?(id)
618
- #if @known_sites.key?(key)
619
- # sites[id]=url_new
620
- #else
621
- # Further filtering out redundant site by checking MD5 finger-print
622
- #unless uniqueness.key?(md5)
623
- sites[id]=key
624
- # uniqueness[md5]=true
625
- #end
554
+ puts "Getter to retrieve unique sites containing unique IP:PORT key identifier." if @verbose=
555
+ #primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
556
+ sites=Hash.new
557
+ #uniqueness=Hash.new
558
+ my_tracker=Wmap::HostTracker.instance
559
+ my_tracker.hosts_file=@data_dir + 'hosts'
560
+ my_tracker.load_known_hosts_from_file
561
+ @known_sites.keys.map do |key|
562
+ port=url_2_port(key).to_s
563
+ host=url_2_host(key)
564
+ md5=@known_sites[key]['md5']
565
+ code=@known_sites[key]['code']
566
+ ip=my_tracker.local_host_2_ip(host)
567
+ ip=host_2_ip(host) if ip.nil?
568
+ # filtering out 'un-reachable' sites
569
+ next if (code == 10000 or code == 20000)
570
+ # filtering out 'empty' sites
571
+ next if (md5.nil? or md5.empty?)
572
+ next if ip.nil?
573
+ # url_new=key
574
+ #if primary_host_tracker.ip_known?(ip)
575
+ # p_host=primary_host_tracker.known_hosts[ip]
576
+ # url_new=key.sub(host,p_host)
577
+ #end
578
+ id=ip+":"+port
579
+ # filtering out duplicates by 'IP:PORT' key pair
580
+ unless sites.key?(id)
581
+ #if @known_sites.key?(key)
582
+ # sites[id]=url_new
583
+ #else
584
+ # Further filtering out redundant site by checking MD5 finger-print
585
+ #unless uniqueness.key?(md5)
586
+ sites[id]=key
587
+ # uniqueness[md5]=true
626
588
  #end
627
- end
589
+ #end
628
590
  end
629
- #primary_host_tracker=nil
630
- my_tracker=nil
631
- return sites.values
632
- rescue Exception => ee
633
- puts "Exception on method #{__method__}: #{ee}" if @verbose
634
- return nil
635
591
  end
592
+ #primary_host_tracker=nil
593
+ my_tracker=nil
594
+ return sites.values
595
+ rescue Exception => ee
596
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
597
+ return nil
636
598
  end
637
599
  alias_method :uniq_sites, :get_uniq_sites
638
600
 
639
601
  # Retrieve a list of sites that contain an IP in the site URL
640
602
  def get_ssl_sites
641
603
  puts "getter to retrieve https sites from the site store." if @verbose
642
- begin
643
- sites=Array.new
644
- @known_sites.keys.map do |key|
645
- key =~ /https/i
646
- sites.push(key)
647
- end
648
- sites.sort!
649
- return sites
650
- rescue Exception => ee
651
- puts "Exception on method #{__method__}: #{ee}" if @verbose
652
- return nil
653
- end
604
+ sites=Array.new
605
+ @known_sites.keys.map do |key|
606
+ key =~ /https/i
607
+ sites.push(key)
608
+ end
609
+ sites.sort!
610
+ return sites
611
+ rescue Exception => ee
612
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
613
+ return nil
654
614
  end
655
615
 
656
616
  # Retrieve a list of redirection URLs from the site store
657
617
  def get_redirection_urls
658
618
  puts "getter to retrieve all the redirection URLs from the site store." if @verbose
659
- begin
660
- urls=Array.new
661
- @known_sites.keys.map do |key|
662
- unless @known_sites[key]['redirection'].nil?
663
- urls.push(@known_sites[key]['redirection'])
664
- end
619
+ urls=Array.new
620
+ @known_sites.keys.map do |key|
621
+ unless @known_sites[key]['redirection'].nil?
622
+ urls.push(@known_sites[key]['redirection'])
665
623
  end
666
- urls.sort!
667
- return urls
668
- rescue Exception => ee
669
- puts "Exception on method #{__method__}: #{ee}" if @verbose
670
- return nil
671
624
  end
625
+ urls.sort!
626
+ return urls
627
+ rescue Exception => ee
628
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
629
+ return nil
672
630
  end
673
631
 
674
632
  # Retrieve redirection URL if available
675
633
  def get_redirection_url (site)
676
634
  puts "getter to retrieve the redirection URL from the site store." if @verbose
677
- begin
678
- site=site.strip.downcase
679
- if @known_sites.key?(site)
680
- return @known_sites[site]['redirection']
681
- else
682
- puts "Unknown site: #{site}" if @verbose
683
- return nil
684
- end
685
- rescue Exception => ee
686
- puts "Exception on method #{__method__}: #{ee}" if @verbose
635
+ site=site.strip.downcase
636
+ if @known_sites.key?(site)
637
+ return @known_sites[site]['redirection']
638
+ else
639
+ puts "Unknown site: #{site}" if @verbose
687
640
  return nil
688
641
  end
642
+ rescue Exception => ee
643
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
644
+ return nil
689
645
  end
690
646
 
691
647
  # Perform local host table reverse lookup for the IP sites, in hope that the hostname could now be resolved since the site was discovered
692
648
  def resolve_ip_sites
693
649
  puts "Resolve sites that contain an IP address. Update the site cache table once a hostname is found in the local host table." if @verbose
694
- begin
695
- updates=Array.new
696
- sites=get_ip_sites
697
- my_tracker=Wmap::HostTracker.instance
698
- my_tracker.data_dir=@data_dir
699
- sites.map do |site|
700
- puts "Work on resolve the IP site: #{site}" if @verbose
701
- ip=url_2_host(site)
702
- hostname=my_tracker.local_ip_2_host(ip)
703
- if hostname.nil?
704
- puts "Can't resolve #{ip} from the local host store. Skip #{site}" if @verbose
705
- else
706
- puts "Host-name found for IP #{ip}: #{hostname}" if @verbose
707
- updates.push(site)
708
- refresh(site)
709
- end
650
+ updates=Array.new
651
+ sites=get_ip_sites
652
+ my_tracker=Wmap::HostTracker.instance
653
+ my_tracker.data_dir=@data_dir
654
+ sites.map do |site|
655
+ puts "Work on resolve the IP site: #{site}" if @verbose
656
+ ip=url_2_host(site)
657
+ hostname=my_tracker.local_ip_2_host(ip)
658
+ if hostname.nil?
659
+ puts "Can't resolve #{ip} from the local host store. Skip #{site}" if @verbose
660
+ else
661
+ puts "Host-name found for IP #{ip}: #{hostname}" if @verbose
662
+ updates.push(site)
663
+ refresh(site)
710
664
  end
711
- updates.sort!
712
- puts "The following sites are now refreshed: #{updates}" if @verbose
713
- my_tracker=nil
714
- return updates
715
- rescue Exception => ee
716
- puts "Exception on method #{__method__}: #{ee}" if @verbose
717
665
  end
666
+ updates.sort!
667
+ puts "The following sites are now refreshed: #{updates}" if @verbose
668
+ my_tracker=nil
669
+ return updates
670
+ rescue Exception => ee
671
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
718
672
  end
719
673
 
720
674
  # Search potential matching sites from the site store by using simple regular expression. Note that any upper-case char in the search string will be automatically converted into lower case
721
675
  def search (pattern)
722
676
  puts "Search site store based on the regular expression: #{pattern}" if @verbose
723
- begin
724
- pattern=pattern.strip.downcase
725
- results=Array.new
726
- @known_sites.keys.map do |key|
727
- if key =~ /#{pattern}/i
728
- results.push(key)
729
- end
677
+ pattern=pattern.strip.downcase
678
+ results=Array.new
679
+ @known_sites.keys.map do |key|
680
+ if key =~ /#{pattern}/i
681
+ results.push(key)
730
682
  end
731
- return results
732
- rescue Exception => ee
733
- puts "Exception on method search: #{ee}" if @verbose
734
- return nil
735
683
  end
684
+ return results
685
+ rescue Exception => ee
686
+ puts "Exception on method search: #{ee}" if @verbose
687
+ return nil
736
688
  end
737
689
 
738
690
  # Print summary report on all sites that contain an IP in the site URL
@@ -741,26 +693,26 @@ class Wmap::SiteTracker
741
693
  sites=get_ip_sites
742
694
  sites.map { |x| puts x }
743
695
  puts "End of report. "
696
+ rescue => ee
697
+ puts "Exception on method #{__method__} "
744
698
  end
745
699
 
746
700
  # Retrieve and print specific information of a site in the site store
747
701
  def print_site(site)
748
702
  puts "Site Information Report for: #{site}" if @verbose
749
- begin
750
- site=site.strip unless site.nil?
751
- raise "Unknown site: #{site}" unless @known_sites.key?(site)
752
- ip=@known_sites[site]['ip']
753
- port=@known_sites[site]['port']
754
- status=@known_sites[site]['status']
755
- server=@known_sites[site]['server']
756
- fp=@known_sites[site]['md5']
757
- loc=@known_sites[site]['redirection']
758
- res=@known_sites[site]['code']
759
- timestamp=@known_sites[site]['timestamp']
760
- puts "#{site},#{ip},#{port},#{status},#{server},#{res},#{fp},#{loc},#{timestamp}"
761
- rescue => ee
762
- puts "Exception on method #{__method__} for #{site}: #{ee}"
763
- end
703
+ site=site.strip unless site.nil?
704
+ raise "Unknown site: #{site}" unless @known_sites.key?(site)
705
+ ip=@known_sites[site]['ip']
706
+ port=@known_sites[site]['port']
707
+ status=@known_sites[site]['status']
708
+ server=@known_sites[site]['server']
709
+ fp=@known_sites[site]['md5']
710
+ loc=@known_sites[site]['redirection']
711
+ res=@known_sites[site]['code']
712
+ timestamp=@known_sites[site]['timestamp']
713
+ puts "#{site},#{ip},#{port},#{status},#{server},#{res},#{fp},#{loc},#{timestamp}"
714
+ rescue => ee
715
+ puts "Exception on method #{__method__} for #{site}: #{ee}"
764
716
  end
765
717
  alias_method :print, :print_site
766
718
 
@@ -772,142 +724,136 @@ class Wmap::SiteTracker
772
724
  sites.each do |site|
773
725
  puts site
774
726
  end
775
-
776
727
  puts "End of the summary"
777
- #return sites
728
+ rescue => ee
729
+ puts "Exception on method #{__method__} "
778
730
  end
779
731
  alias_method :print_all, :print_all_sites
780
732
 
781
733
  # Retrieve and save unique sites information for the quarterly scan into a plain local file
782
734
  def save_uniq_sites(file)
783
735
  puts "Save unique sites information into a flat file: #{file}\nThis may take a long while as it go through a lengthy self correction check process, please be patient ..."
784
- begin
785
- prime_sites=get_prim_uniq_sites
786
- puts "Primary Sites: #{prime_sites}" if @verbose
787
- f=File.open(file,"w")
788
- f.write "Unique Sites Information Report\n"
789
- f.write "Site, IP, Port, Server, Hosting, Response Code, MD5, Redirect, Timestamps\n"
790
- prime_sites.map do |key|
791
- next if key.nil?
792
- site=key.strip
793
- raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\wadd #{site}\n" unless @known_sites.key?(site)
794
- ip=@known_sites[site]['ip']
795
- port=@known_sites[site]['port']
796
- status=@known_sites[site]['status']
797
- server=@known_sites[site]['server']
798
- fp=@known_sites[site]['md5']
799
- loc=@known_sites[site]['redirection']
800
- res=@known_sites[site]['code']
801
- timestamp=@known_sites[site]['timestamp']
802
- f.write "#{site},#{ip},#{port},#{server},#{status},#{res},#{fp},#{loc},#{timestamp}\n"
803
- end
804
- f.close
805
- puts "Done!"
806
- return true # success
807
- rescue => ee
808
- puts "Exception on method #{__method__}: #{ee}"
809
- return false # fail
736
+ prime_sites=get_prim_uniq_sites
737
+ puts "Primary Sites: #{prime_sites}" if @verbose
738
+ f=File.open(file,"w")
739
+ f.write "Unique Sites Information Report\n"
740
+ f.write "Site, IP, Port, Server, Hosting, Response Code, MD5, Redirect, Timestamps\n"
741
+ prime_sites.map do |key|
742
+ next if key.nil?
743
+ site=key.strip
744
+ raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\wadd #{site}\n" unless @known_sites.key?(site)
745
+ ip=@known_sites[site]['ip']
746
+ port=@known_sites[site]['port']
747
+ status=@known_sites[site]['status']
748
+ server=@known_sites[site]['server']
749
+ fp=@known_sites[site]['md5']
750
+ loc=@known_sites[site]['redirection']
751
+ res=@known_sites[site]['code']
752
+ timestamp=@known_sites[site]['timestamp']
753
+ f.write "#{site},#{ip},#{port},#{server},#{status},#{res},#{fp},#{loc},#{timestamp}\n"
810
754
  end
755
+ f.close
756
+ puts "Done!"
757
+ return true # success
758
+ rescue => ee
759
+ puts "Exception on method #{__method__}: #{ee}"
760
+ return false # fail
811
761
  end
812
762
  alias_method :dump, :save_uniq_sites
813
763
 
814
764
  # Retrieve and save unique sites information for the quarterly scan into a XML file
815
765
  def save_uniq_sites_xml(file)
816
766
  puts "Save unique sites information into XML file: #{file}\nThis may take a long while as it go through lengthy self correctness check, please be patient ..."
817
- begin
818
- prime_sites=get_prim_uniq_sites
819
- builder = Nokogiri::XML::Builder.new do |xml|
820
- xml.root {
821
- xml.websites {
822
- prime_sites.each do |key|
823
- next if key.nil?
824
- site=key.strip
825
- raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twmap #{site}\n" unless @known_sites.key?(site)
826
- xml.site {
827
- xml.name site
828
- xml.ip_ @known_sites[site]['ip']
829
- xml.port_ @known_sites[site]['port']
830
- xml.status_ @known_sites[site]['status']
831
- xml.server_ @known_sites[site]['server']
832
- xml.fingerprint_ @known_sites[site]['md5']
833
- xml.redirection_ @known_sites[site]['redirection']
834
- xml.responsecode_ @known_sites[site]['code']
835
- xml.timestamp_ @known_sites[site]['timestamp']
836
- }
837
- end
838
- }
767
+ prime_sites=get_prim_uniq_sites
768
+ builder = Nokogiri::XML::Builder.new do |xml|
769
+ xml.root {
770
+ xml.websites {
771
+ prime_sites.each do |key|
772
+ next if key.nil?
773
+ site=key.strip
774
+ raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twmap #{site}\n" unless @known_sites.key?(site)
775
+ xml.site {
776
+ xml.name site
777
+ xml.ip_ @known_sites[site]['ip']
778
+ xml.port_ @known_sites[site]['port']
779
+ xml.status_ @known_sites[site]['status']
780
+ xml.server_ @known_sites[site]['server']
781
+ xml.fingerprint_ @known_sites[site]['md5']
782
+ xml.redirection_ @known_sites[site]['redirection']
783
+ xml.responsecode_ @known_sites[site]['code']
784
+ xml.timestamp_ @known_sites[site]['timestamp']
785
+ }
786
+ end
839
787
  }
840
- end
841
- puts builder.to_xml if @verbose
842
- f=File.new(file,'w')
843
- f.write(builder.to_xml)
844
- f.close
845
- puts "Done!"
846
- return true
847
- rescue => ee
848
- puts "Exception on method #{__method__}: #{ee}"
849
- return false
850
- end
788
+ }
789
+ end
790
+ puts builder.to_xml if @verbose
791
+ f=File.new(file,'w')
792
+ f.write(builder.to_xml)
793
+ f.close
794
+ puts "Done!"
795
+ return true
796
+ rescue => ee
797
+ puts "Exception on method #{__method__}: #{ee}"
798
+ return false
851
799
  end
852
800
  alias_method :dump_xml, :save_uniq_sites_xml
853
801
 
854
802
  # Retrieve the unique sites from the local site store in the primary host format
855
803
  def get_prim_uniq_sites
856
804
  puts "Retrieve and prime unique sites in the site store. " if @verbose
857
- #begin
858
- host_tracker=Wmap::HostTracker.instance
859
- host_tracker.data_dir=@data_dir
860
- primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
861
- primary_host_tracker.data_dir=@data_dir
862
- # Step 1. Retrieve the unique site list first
863
- sites=get_uniq_sites
864
- prim_uniq_sites=Array.new
865
- # Step 2. Iterate on the unique site list, spit out the site in the primary host format one at a time
866
- sites.map do |site|
867
- puts "Work on priming unique site: #{site}" if @verbose
868
- host=url_2_host(site)
869
- # case#1, for the IP only site, do nothing (presuming 'refresh_ip_sites' or 'refresh_all' method already take care of the potential discrepancy here).
870
- if is_ip?(host)
871
- prim_uniq_sites.push(site)
872
- next
873
- end
874
- ip=@known_sites[site]['ip']
875
- # case#2, for site with an unique IP, do nothing
876
- puts "Local hosts table entry count for #{ip}: #{host_tracker.alias[ip]}" if @verbose
877
- if host_tracker.alias[ip] == 1
878
- prim_uniq_sites.push(site)
879
- next
880
- end
881
- # case#3, case of multiple IPs for A DNS record, where the site IP may have 0 alias count, do nothing
882
- if host_tracker.alias[ip] == nil
883
- prim_uniq_sites.push(site)
884
- next
885
- end
886
- # case#4, for the site has a duplicate IP with others, we try to determine which one is the primary site
887
- # raise "Error: inconsistency detected on record: #{site}. Please run the following shell command to refresh it first: \n\srefresh #{site}" if tracker1.alias[ip].nil?
888
- if ( primary_host_tracker.known_hosts.key?(ip) and (host_tracker.alias[ip] > 1) )
889
- new_host=primary_host_tracker.prime(host)
890
- puts "Host: #{host}, New host:#{new_host}" if @verbose
891
- unless host==new_host
892
- new_site=site.sub(host,new_host)
893
- raise "Site not found in the site tracking data repository: #{new_site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twadd #{new_site}\n" unless @known_sites.key?(new_site)
894
- new_ip=@known_sites[new_site]['ip']
895
- if new_ip==ip # consistency check
896
- site=new_site
897
- else
898
- # TBD - case of multiple IPs for A DNS record
899
- #raise "Inconsistency found on prime host entrance: #{new_ip}, #{ip}; #{new_site}, #{site}. Please refresh your entries by running the following shell command: \n\s refresh #{new_site}"
900
- end
805
+ host_tracker=Wmap::HostTracker.instance
806
+ host_tracker.data_dir=@data_dir
807
+ primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
808
+ primary_host_tracker.data_dir=@data_dir
809
+ # Step 1. Retrieve the unique site list first
810
+ sites=get_uniq_sites
811
+ prim_uniq_sites=Array.new
812
+ # Step 2. Iterate on the unique site list, spit out the site in the primary host format one at a time
813
+ sites.map do |site|
814
+ puts "Work on priming unique site: #{site}" if @verbose
815
+ host=url_2_host(site)
816
+ # case#1, for the IP only site, do nothing (presuming 'refresh_ip_sites' or 'refresh_all' method already take care of the potential discrepancy here).
817
+ if is_ip?(host)
818
+ prim_uniq_sites.push(site)
819
+ next
820
+ end
821
+ ip=@known_sites[site]['ip']
822
+ # case#2, for site with an unique IP, do nothing
823
+ puts "Local hosts table entry count for #{ip}: #{host_tracker.alias[ip]}" if @verbose
824
+ if host_tracker.alias[ip] == 1
825
+ prim_uniq_sites.push(site)
826
+ next
827
+ end
828
+ # case#3, case of multiple IPs for A DNS record, where the site IP may have 0 alias count, do nothing
829
+ if host_tracker.alias[ip] == nil
830
+ prim_uniq_sites.push(site)
831
+ next
832
+ end
833
+ # case#4, for the site has a duplicate IP with others, we try to determine which one is the primary site
834
+ # raise "Error: inconsistency detected on record: #{site}. Please run the following shell command to refresh it first: \n\srefresh #{site}" if tracker1.alias[ip].nil?
835
+ if ( primary_host_tracker.known_hosts.key?(ip) and (host_tracker.alias[ip] > 1) )
836
+ new_host=primary_host_tracker.prime(host)
837
+ puts "Host: #{host}, New host:#{new_host}" if @verbose
838
+ unless host==new_host
839
+ new_site=site.sub(host,new_host)
840
+ raise "Site not found in the site tracking data repository: #{new_site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twadd #{new_site}\n" unless @known_sites.key?(new_site)
841
+ new_ip=@known_sites[new_site]['ip']
842
+ if new_ip==ip # consistency check
843
+ site=new_site
844
+ else
845
+ # TBD - case of multiple IPs for A DNS record
846
+ #raise "Inconsistency found on prime host entrance: #{new_ip}, #{ip}; #{new_site}, #{site}. Please refresh your entries by running the following shell command: \n\s refresh #{new_site}"
901
847
  end
902
848
  end
903
- prim_uniq_sites.push(site)
904
849
  end
905
- primary_host_tracker=nil
906
- host_tracker=nil
907
- return prim_uniq_sites
908
- #rescue => ee
909
- # puts "Exception on method #{__method__}: #{ee}"
910
- #end
850
+ prim_uniq_sites.push(site)
851
+ end
852
+ primary_host_tracker=nil
853
+ host_tracker=nil
854
+ return prim_uniq_sites
855
+ #rescue => ee
856
+ # puts "Exception on method #{__method__}: #{ee}"
911
857
  end
912
858
  alias_method :get_prime, :get_prim_uniq_sites
913
859