wmap 2.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +141 -0
  3. data/LICENSE.txt +15 -0
  4. data/README.rdoc +98 -0
  5. data/TODO +13 -0
  6. data/bin/deprime +21 -0
  7. data/bin/distrust +38 -0
  8. data/bin/googleBot +23 -0
  9. data/bin/prime +21 -0
  10. data/bin/refresh +26 -0
  11. data/bin/run_tests +16 -0
  12. data/bin/spiderBot +26 -0
  13. data/bin/trust +38 -0
  14. data/bin/updateAll +57 -0
  15. data/bin/wadd +25 -0
  16. data/bin/wadds +26 -0
  17. data/bin/wcheck +28 -0
  18. data/bin/wdel +25 -0
  19. data/bin/wdump +21 -0
  20. data/bin/wmap +151 -0
  21. data/bin/wscan +32 -0
  22. data/data/cidrs +2 -0
  23. data/data/deactivated_sites +1 -0
  24. data/data/domains +2 -0
  25. data/data/hosts +1 -0
  26. data/data/prime_hosts +1 -0
  27. data/data/sites +2 -0
  28. data/data/sub_domains +2 -0
  29. data/demos/bruter.rb +27 -0
  30. data/demos/dns_brutes.rb +28 -0
  31. data/demos/filter_cidr.rb +18 -0
  32. data/demos/filter_crawls.rb +5 -0
  33. data/demos/filter_domain.rb +25 -0
  34. data/demos/filter_geoip.rb +26 -0
  35. data/demos/filter_known_services.rb +59 -0
  36. data/demos/filter_netinfo.rb +23 -0
  37. data/demos/filter_prime.rb +25 -0
  38. data/demos/filter_profiler.rb +3 -0
  39. data/demos/filter_redirection.rb +19 -0
  40. data/demos/filter_site.rb +40 -0
  41. data/demos/filter_siteip.rb +31 -0
  42. data/demos/filter_status.rb +17 -0
  43. data/demos/filter_timestamp.rb +23 -0
  44. data/demos/filter_url.rb +19 -0
  45. data/demos/new_fnd.rb +66 -0
  46. data/demos/nmap_parser.pl +138 -0
  47. data/demos/site_format.rb +18 -0
  48. data/demos/whois_domain.rb +78 -0
  49. data/dicts/GeoIP.dat +0 -0
  50. data/dicts/GeoIPASNum.dat +0 -0
  51. data/dicts/GeoLiteCity.dat +0 -0
  52. data/dicts/ccsld.txt +2646 -0
  53. data/dicts/cctld.txt +243 -0
  54. data/dicts/gtld.txt +25 -0
  55. data/dicts/hostnames-dict.big +1402 -0
  56. data/dicts/hostnames-dict.txt +101 -0
  57. data/lib/wmap/cidr_tracker.rb +327 -0
  58. data/lib/wmap/dns_bruter.rb +308 -0
  59. data/lib/wmap/domain_tracker/sub_domain.rb +142 -0
  60. data/lib/wmap/domain_tracker.rb +342 -0
  61. data/lib/wmap/geoip_tracker.rb +72 -0
  62. data/lib/wmap/google_search_scraper.rb +177 -0
  63. data/lib/wmap/host_tracker/primary_host.rb +130 -0
  64. data/lib/wmap/host_tracker.rb +550 -0
  65. data/lib/wmap/network_profiler.rb +144 -0
  66. data/lib/wmap/port_scanner.rb +208 -0
  67. data/lib/wmap/site_tracker/deactivated_site.rb +85 -0
  68. data/lib/wmap/site_tracker.rb +937 -0
  69. data/lib/wmap/url_checker.rb +314 -0
  70. data/lib/wmap/url_crawler.rb +381 -0
  71. data/lib/wmap/utils/domain_root.rb +184 -0
  72. data/lib/wmap/utils/logger.rb +53 -0
  73. data/lib/wmap/utils/url_magic.rb +343 -0
  74. data/lib/wmap/utils/utils.rb +333 -0
  75. data/lib/wmap/whois.rb +76 -0
  76. data/lib/wmap.rb +227 -0
  77. data/logs/wmap.log +17 -0
  78. data/ruby_whois_patches/base_cocca2.rb +149 -0
  79. data/ruby_whois_patches/kero.yachay.pe.rb +120 -0
  80. data/ruby_whois_patches/whois.PublicDomainRegistry.com.rb +124 -0
  81. data/ruby_whois_patches/whois.above.com.rb +61 -0
  82. data/ruby_whois_patches/whois.adamsnames.tc.rb +107 -0
  83. data/ruby_whois_patches/whois.aeda.net.ae.rb +105 -0
  84. data/ruby_whois_patches/whois.ai.rb +112 -0
  85. data/ruby_whois_patches/whois.arnes.si.rb +121 -0
  86. data/ruby_whois_patches/whois.ascio.com.rb +91 -0
  87. data/ruby_whois_patches/whois.cnnic.cn.rb +123 -0
  88. data/ruby_whois_patches/whois.corporatedomains.com.rb +67 -0
  89. data/ruby_whois_patches/whois.crsnic.net.rb +108 -0
  90. data/ruby_whois_patches/whois.denic.de.rb +174 -0
  91. data/ruby_whois_patches/whois.dk-hostmaster.dk.rb +120 -0
  92. data/ruby_whois_patches/whois.dns.be.rb +134 -0
  93. data/ruby_whois_patches/whois.dns.lu.rb +129 -0
  94. data/ruby_whois_patches/whois.dns.pl.rb +150 -0
  95. data/ruby_whois_patches/whois.dns.pt.rb +119 -0
  96. data/ruby_whois_patches/whois.domain.kg.rb +126 -0
  97. data/ruby_whois_patches/whois.domainregistry.my.rb +123 -0
  98. data/ruby_whois_patches/whois.domreg.lt.rb +110 -0
  99. data/ruby_whois_patches/whois.dot.tk.rb +140 -0
  100. data/ruby_whois_patches/whois.hkirc.hk.rb +121 -0
  101. data/ruby_whois_patches/whois.isnic.is.rb +130 -0
  102. data/ruby_whois_patches/whois.je.rb +119 -0
  103. data/ruby_whois_patches/whois.jprs.jp.rb +137 -0
  104. data/ruby_whois_patches/whois.kenic.or.ke.rb +140 -0
  105. data/ruby_whois_patches/whois.markmonitor.com.rb +118 -0
  106. data/ruby_whois_patches/whois.melbourneit.com.rb +58 -0
  107. data/ruby_whois_patches/whois.nic.as.rb +96 -0
  108. data/ruby_whois_patches/whois.nic.at.rb +109 -0
  109. data/ruby_whois_patches/whois.nic.ch.rb +141 -0
  110. data/ruby_whois_patches/whois.nic.cl.rb +117 -0
  111. data/ruby_whois_patches/whois.nic.ec.rb +157 -0
  112. data/ruby_whois_patches/whois.nic.im.rb +120 -0
  113. data/ruby_whois_patches/whois.nic.it.rb +170 -0
  114. data/ruby_whois_patches/whois.nic.lv.rb +116 -0
  115. data/ruby_whois_patches/whois.nic.ly.rb +127 -0
  116. data/ruby_whois_patches/whois.nic.mu.rb +27 -0
  117. data/ruby_whois_patches/whois.nic.mx.rb +123 -0
  118. data/ruby_whois_patches/whois.nic.net.sa.rb +111 -0
  119. data/ruby_whois_patches/whois.nic.or.kr.rb +101 -0
  120. data/ruby_whois_patches/whois.nic.tel.rb +129 -0
  121. data/ruby_whois_patches/whois.nic.tr.rb +133 -0
  122. data/ruby_whois_patches/whois.nic.us.rb +129 -0
  123. data/ruby_whois_patches/whois.nic.ve.rb +135 -0
  124. data/ruby_whois_patches/whois.norid.no.rb +127 -0
  125. data/ruby_whois_patches/whois.pandi.or.id.rb +118 -0
  126. data/ruby_whois_patches/whois.psi-usa.info.rb +63 -0
  127. data/ruby_whois_patches/whois.registro.br.rb +109 -0
  128. data/ruby_whois_patches/whois.registrygate.com.rb +55 -0
  129. data/ruby_whois_patches/whois.rrpproxy.net.rb +61 -0
  130. data/ruby_whois_patches/whois.sgnic.sg.rb +130 -0
  131. data/ruby_whois_patches/whois.srs.net.nz.rb +166 -0
  132. data/ruby_whois_patches/whois.tucows.com.rb +70 -0
  133. data/ruby_whois_patches/whois.twnic.net.tw.rb +133 -0
  134. data/settings/discovery_ports +24 -0
  135. data/settings/google_keywords.txt +9 -0
  136. data/settings/google_locator.txt +23 -0
  137. data/test/domain_tracker_test.rb +31 -0
  138. data/test/utils_test.rb +168 -0
  139. data/version.txt +13 -0
  140. data/wmap.gemspec +49 -0
  141. metadata +202 -0
@@ -0,0 +1,130 @@
1
+ #--
2
+ # Wmap
3
+ #
4
+ # A pure Ruby library for Internet web application discovery and tracking.
5
+ #
6
+ # Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
7
+ #++
8
+ #require "singleton" # Implement singleton pattern to avoid race condition under parallel engine
9
+
10
+
11
+ module Wmap
12
+ class HostTracker
13
+
14
+ # Class to differentiate the primary host-name from the potential aliases. This is needed in order to minimize the confusion on our final site inventory list, as it contains a large number of duplicates (aliases). More specifically, a filter could be built by using this class to track the primary url of a website.
15
+ class PrimaryHost < Wmap::HostTracker
16
+ include Wmap::Utils
17
+ include Singleton
18
+
19
+ attr_accessor :hosts_file, :verbose, :data_dir
20
+ attr_reader :known_hosts, :known_ips
21
+
22
+ # Initialize the instance variables
23
+ def initialize (params = {})
24
+ @verbose=params.fetch(:verbose, false)
25
+ @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../../data/')
26
+ # Set default instance variables
27
+ @file_hosts=@data_dir + 'prime_hosts'
28
+ file=params.fetch(:hosts_file, @file_hosts)
29
+ # Initialize the instance variables
30
+ File.write(@file_hosts, "") unless File.exist?(@file_hosts)
31
+ @known_hosts=load_known_hosts_from_file(file)
32
+ @known_ips=Hash.new
33
+ de_duplicate
34
+ end
35
+
36
+ # Procedures to identify primary host-name from the site store SSL certificates. The assumption is that the CN used in the cert application must be primary hostname and used by the users.
37
+ def update_from_site_store!
38
+ puts "Invoke internal procedures to update the primary host-name table from the site store."
39
+ begin
40
+ # Step 1 - update the prime host table based on the SSL cert CN fields
41
+ cns=Hash.new
42
+ checker=Wmap::UrlChecker.new(:data_dir=>@data_dir)
43
+ my_tracker = Wmap::SiteTracker.new(:data_dir=>@data_dir)
44
+ my_tracker.get_ssl_sites.map do |site|
45
+ puts "Exam SSL enabled site entry #{site} ..."
46
+ my_host=url_2_host(site)
47
+ next if @known_hosts.key?(my_host) # add the logic to optimize the process
48
+ puts "Pull SSL cert details on site: #{site}"
49
+ cn=checker.get_cert_cn(site)
50
+ unless cn.nil? or cns.key?(cn)
51
+ cns[cn]=true
52
+ end
53
+ end
54
+ cns.keys.map do |cn|
55
+ if is_fqdn?(cn)
56
+ next if @known_hosts.key?(cn)
57
+ self.add(cn)
58
+ puts "New entry added: #{cn}\t#{@known_hosts[cn]}"
59
+ end
60
+ end
61
+ # Step 2 - Save the cache into the file
62
+ self.save!
63
+ checker=nil
64
+ my_tracker=nil
65
+ rescue Exception => ee
66
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
67
+ checker=nil
68
+ my_tracker=nil
69
+ return nil
70
+ end
71
+ end
72
+ alias_method :update!, :update_from_site_store!
73
+
74
+ # Procedures to identify primary host-name from the site store redirection URLs. The assumption is that on site redirection, it must be directed to the well known primary site.
75
+ def update_from_site_redirections!
76
+ puts "Invoke internal procedures to update the primary host-name table from the site store."
77
+ begin
78
+ urls=Wmap::SiteTracker.new(:data_dir=>@data_dir).get_redirection_urls
79
+ urls.map do |url|
80
+ if is_url?(url)
81
+ host=url_2_host(url)
82
+ if is_fqdn?(host)
83
+ ip=host_2_ip(host)
84
+ # Add duplication check
85
+ unless @known_hosts.key?(ip)
86
+ self.add(host)
87
+ end
88
+ end
89
+ end
90
+ end
91
+ self.save!
92
+ rescue Exception => ee
93
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
94
+ return nil
95
+ end
96
+ end
97
+
98
+ # Procedures to remove the redundant entries in the primary hosts data repository
99
+ def de_duplicate
100
+ @known_hosts.keys.map do |key|
101
+ ip=@known_hosts[key]
102
+ if @known_ips.key?(ip)
103
+ @known_hosts.delete(key)
104
+ else
105
+ @known_ips[ip]=true
106
+ end
107
+ end
108
+ end
109
+ alias_method :deduplicate, :de_duplicate
110
+
111
+ # Method to replace hostname with known primary hostname
112
+ def prime (host)
113
+ begin
114
+ raise "Unknown hostname format: #{host}" unless is_fqdn?(host)
115
+ ip=local_host_2_ip(host)
116
+ ip=host_2_ip(host) if ip.nil?
117
+ if @known_ips.key?(ip)
118
+ return @known_hosts[ip]
119
+ end
120
+ return host
121
+ rescue Exception => ee
122
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
123
+ return host
124
+ end
125
+ end
126
+
127
+ end
128
+
129
+ end
130
+ end
@@ -0,0 +1,550 @@
1
+ #--
2
+ # Wmap
3
+ #
4
+ # A pure Ruby library for Internet web application discovery and tracking.
5
+ #
6
+ # Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
7
+ #++
8
+ require "parallel"
9
+ require "singleton" # Implement singleton pattern to avoid race condition under parallel engine
10
+
11
+
12
+ # Class to handle the local host data repository file where lists of known hosts from discovery and past assessment efforts are stored
13
+ class Wmap::HostTracker
14
+ #include Singleton
15
+ include Wmap::Utils
16
+
17
+ attr_accessor :hosts_file, :max_parallel, :verbose, :data_dir
18
+ attr_reader :known_hosts, :alias
19
+
20
+ # Instance default variables
21
+ def initialize (params = {})
22
+ @verbose=params.fetch(:verbose, false)
23
+ @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
24
+ # Set default instance variables
25
+ @file_hosts=@data_dir + 'hosts'
26
+ file=params.fetch(:hosts_file, @file_hosts)
27
+ @max_parallel=params.fetch(:max_parallel, 40)
28
+ # Initialize the instance variables
29
+ File.write(@file_hosts, "") unless File.exist?(@file_hosts)
30
+ @known_hosts=load_known_hosts_from_file(file)
31
+ end
32
+
33
+ # Setter to load the known hosts from the local hosts file into a class instance
34
+ def load_known_hosts_from_file (f_hosts=@file_hosts)
35
+ puts "Loading local hosts from file: #{f_hosts} ..." if @verbose
36
+ begin
37
+ known_hosts=Hash.new
38
+ @alias = Hash.new
39
+ f=File.open(f_hosts, 'r')
40
+ f.each do |line|
41
+ next unless line =~ /\d+\.\d+\.\d+\.\d+/
42
+ entry=line.chomp.split(%r{\t+|\s+|\,})
43
+ key=entry[0].downcase
44
+ value=entry[1]
45
+ puts "Loading value pair: #{key} - #{value}" if @verbose
46
+ known_hosts[key] = Hash.new unless known_hosts.key?(key)
47
+ known_hosts[key]= value
48
+ # For reverse host lookup
49
+ known_hosts[value] = Hash.new unless known_hosts.key?(value)
50
+ known_hosts[value] = key
51
+ # Count the number of alias for the recorded IP
52
+ if @alias.key?(value)
53
+ @alias[value]+=1
54
+ else
55
+ @alias[value]=1
56
+ end
57
+ end
58
+ f.close
59
+ return known_hosts
60
+ rescue => ee
61
+ puts "Exception on method #{__method__}: #{ee}"
62
+ return known_hosts
63
+ end
64
+ end
65
+
66
+ # Save the current local hosts hash table into a (random) data repository file
67
+ def save_known_hosts_to_file!(f_hosts=@file_hosts)
68
+ puts "Saving the local host repository from memory to file: #{f_hosts} ..."
69
+ begin
70
+ timestamp=Time.now
71
+ f=File.open(f_hosts, 'w')
72
+ f.write "# local hosts file created by the #{self.class} class #{__method__} method at: #{timestamp}"
73
+ @known_hosts.keys.sort.map do |key|
74
+ unless key =~ /\d+\.\d+\.\d+\.\d+/
75
+ f.write "\n#{key}\t#{@known_hosts[key]}"
76
+ end
77
+ end
78
+ f.close
79
+ puts "local host repository is successfully saved to: #{f_hosts}"
80
+ rescue => ee
81
+ puts "Exception on method #{__method__}: #{ee}"
82
+ end
83
+ end
84
+ alias_method :save!, :save_known_hosts_to_file!
85
+
86
+ # Count numbers of entries in the local host repository
87
+ def count
88
+ puts "Counting number of entries in the local host repository ..."
89
+ begin
90
+ cnt=0
91
+ @known_hosts.keys.map do |key|
92
+ unless is_ip?(key)
93
+ cnt=cnt+1
94
+ end
95
+ end
96
+ puts "Current number of entries: #{cnt}"
97
+ return cnt
98
+ rescue => ee
99
+ puts "Exception on method #{__method__}: #{ee}"
100
+ end
101
+ end
102
+
103
+ # Setter to add host entry to the cache once at a time
104
+ def add(host)
105
+ puts "Add entry to the local host repository: #{host}"
106
+ begin
107
+ host=host.strip.downcase unless host.nil?
108
+ unless @known_hosts.key?(host)
109
+ ip=host_2_ip(host)
110
+ record=Hash.new
111
+ if is_ip?(ip)
112
+ # filter host to known domains only
113
+ root=get_domain_root(host)
114
+ if Wmap::DomainTracker.new(:data_dir=>@data_dir).domain_known?(root)
115
+ record[host]=ip
116
+ record[ip]=host
117
+ puts "Host data repository entry loaded: #{host} <=> #{ip}"
118
+ # Replace instance with the class variable to avoid potential race condition under parallel engine
119
+ # add additional logic to update the sub-domain table as well, 02/10/2014
120
+ sub=get_sub_domain(host)
121
+ if sub!=root
122
+ tracker=Wmap::DomainTracker::SubDomain.new(:data_dir=>@data_dir)
123
+ unless tracker.domain_known?(sub)
124
+ tracker.add(sub)
125
+ tracker.save!
126
+ end
127
+ tracker=nil
128
+ end
129
+ @known_hosts.merge!(record)
130
+ return record
131
+ else
132
+ puts "Error - host #{host} has an untrusted internet root domain: #{root}\nPlease update the trusted domain seeds file first if necessary."
133
+ end
134
+ else
135
+ puts "Problem resolve host #{host} - unknown IP: #{ip}"
136
+ end
137
+ else
138
+ puts "Host is already exist. Skip: #{host}"
139
+ end
140
+ rescue => ee
141
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
142
+ end
143
+ end
144
+
145
+ # Setter to add host entry to the local hosts in batch (from an array)
146
+ def bulk_add(list, num=@max_parallel)
147
+ puts "Add entries to the local host repository: #{list}"
148
+ begin
149
+ results=Hash.new
150
+ if list.size > 0
151
+ puts "Start parallel host update processing on:\n #{list}" if @verbose
152
+ Parallel.map(list, :in_processes => num) { |target|
153
+ add(target)
154
+ }.each do |process|
155
+ if process.nil?
156
+ next
157
+ elsif process.empty?
158
+ #do nothing
159
+ else
160
+ results.merge!(process)
161
+ end
162
+ end
163
+ @known_hosts.merge!(results)
164
+ puts "Done loading entries."
165
+ return results
166
+ else
167
+ puts "Error: empty list - no entry is loaded. Please check your input list and try again."
168
+ end
169
+ return results
170
+ rescue => ee
171
+ puts "Exception on method #{__method__}: #{ee}"
172
+ end
173
+ end
174
+ alias_method :adds, :bulk_add
175
+
176
+ # 'setter' to add host entry to the local hosts in batch (from a file)
177
+ def file_add(file)
178
+ begin
179
+ puts "Add entries to the local host repository from file: #{file}"
180
+ raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
181
+ hosts=file_2_list(file)
182
+ changes=bulk_add(hosts)
183
+ return changes
184
+ rescue => ee
185
+ puts "Exception on method #{__method__}: #{ee}"
186
+ end
187
+ end
188
+
189
+ # 'setter' to remove entry from the local hosts one at a time
190
+ def delete(host)
191
+ puts "Remove entry from the local host repository: #{host} "
192
+ begin
193
+ host=host.strip.downcase
194
+ if @known_hosts.key?(host)
195
+ @known_hosts.delete(host)
196
+ puts "Entry cleared."
197
+ return host
198
+ else
199
+ puts "Entry not fund. Skip: #{host}"
200
+ end
201
+ rescue => ee
202
+ puts "Exception on method #{__method__}: #{ee}"
203
+ end
204
+ end
205
+
206
+ # 'setter' to delete host entry to the cache in batch (from an array)
207
+ def bulk_delete(list)
208
+ puts "Delete entries to the local host repository from:\n #{list}"
209
+ begin
210
+ hosts=list
211
+ changes=Array.new
212
+ if hosts.size > 0
213
+ hosts.map do |x|
214
+ host=delete(x)
215
+ changes.push(host) unless host.nil?
216
+ end
217
+ puts "Done deleting hosts."
218
+ return changes
219
+ else
220
+ puts "Error: empty list - no entry is loaded. Please check your list and try again."
221
+ end
222
+ rescue => ee
223
+ puts "Exception on method #{__method__}: #{ee}"
224
+ end
225
+ end
226
+ alias_method :dels, :bulk_delete
227
+
228
+ # Setter to delete host entries in the local hosts in batch (from a file)
229
+ def file_delete(file)
230
+ begin
231
+ puts "Delete the local host repository entries from file: #{file}"
232
+ raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
233
+ hosts=file_2_list(file)
234
+ changes=bulk_delete(hosts)
235
+ puts "Delete done."
236
+ return changes
237
+ rescue => ee
238
+ puts "Exception on method #{__method__}: #{ee}"
239
+ end
240
+ end
241
+
242
+ # Setter to refresh the entry from the cache one at a time
243
+ def refresh(host)
244
+ puts "Refresh the local host repository for host: #{host} "
245
+ begin
246
+ host=host.strip.downcase
247
+ if @known_hosts.key?(host)
248
+ old_ip=@known_hosts[host]
249
+ new_ip=host_2_ip(host)
250
+ if is_ip?(new_ip)
251
+ if old_ip==new_ip
252
+ puts "No change for the host entry: #{host}\t#{old_ip}"
253
+ return nil
254
+ else
255
+ @known_hosts[host]=new_ip
256
+ @known_hosts[new_ip]=host
257
+ puts "Entry refreshed: #{host}\t#{@known_hosts[host]}"
258
+ return host
259
+ end
260
+ else
261
+ puts "Host can no longer be resolved in the Internet. Entry removed: #{host}\t#{@known_hosts[host]}"
262
+ @known_hosts.delete(host)
263
+ return host
264
+ end
265
+ else
266
+ puts "Error entry non exist: #{host}"
267
+ end
268
+ rescue => ee
269
+ puts "Exception on method #{__method__}: #{ee}"
270
+ end
271
+ end
272
+
273
+ # Refresh all the entries in the local hosts by querying the Internet
274
+ def refresh_all
275
+ puts "Refresh all the entries in the local host repository in one shot."
276
+ begin
277
+ changes=Hash.new
278
+ hosts=@known_hosts.keys
279
+ @known_hosts=Hash.new
280
+ changes=bulk_add(hosts)
281
+ @known_hosts.merge!(changes)
282
+ #@known_hosts.keys.map do |key|
283
+ # unless is_ip?(key)
284
+ # host=refresh(key)
285
+ # changes.push(host) unless host.nil?
286
+ # end
287
+ #end
288
+ puts "\n#{changes.size} Entries Refreshed:" if changes.size>0
289
+ #changes.map { |x| puts x }
290
+ puts "Done refreshing the local hosts."
291
+ return changes
292
+ rescue => ee
293
+ puts "Exception on method #{__method__}: #{ee}"
294
+ end
295
+ end
296
+
297
+ # Extract known root domains from the local host repository @known_hosts
298
+ def get_root_domains
299
+ puts "Dump out all active root domains from the cache."
300
+ begin
301
+ zones=Array.new
302
+ (@known_hosts.keys-["",nil]).map do |hostname|
303
+ next if is_ip?(hostname)
304
+ hostname = hostname.strip
305
+ zone = get_domain_root(hostname)
306
+ zones.push(zone) unless zone.nil?
307
+ end
308
+ zones.uniq!.sort!
309
+ return zones
310
+ rescue => ee
311
+ puts "Exception on method #{__method__}: #{ee}"
312
+ end
313
+ end
314
+ alias_method :dump_root_domains, :get_root_domains
315
+
316
+ # Extract hostname without the root domain part from the @known_hosts. Data can be used for statistics study.
317
+ def get_a_records
318
+ puts "Dump out all known A records from the local hosts."
319
+ begin
320
+ records=Array.new
321
+ (@known_hosts.keys-["",nil]).map do |hostname|
322
+ next if is_ip?(hostname)
323
+ hostname = hostname.strip
324
+ root = get_domain_root(hostname)
325
+ record = hostname.sub('.'+root,'')
326
+ records.push(record) unless record.nil?
327
+ end
328
+ records.sort!
329
+ return records
330
+ rescue => ee
331
+ puts "Exception on method #{__method__}: #{ee}"
332
+ end
333
+ end
334
+ alias_method :dump_a_records, :get_a_records
335
+
336
+ # Print summary report on the cache
337
+ def print_known_hosts
338
+ puts "\nSummary of local hosts Table:"
339
+ puts "Total entries: #{@known_hosts.size}"
340
+ (@known_hosts.keys.sort-["",nil]).each do |key|
341
+ value=@known_hosts[key]
342
+ puts "#{key}\t#{value}" if is_fqdn?(key)
343
+ end
344
+ puts "End of the summary"
345
+ end
346
+ alias_method :print_all, :print_known_hosts
347
+
348
+ # Print summary report on the cache
349
+ def print_host(host)
350
+ puts "Local host store entry for #{host}"
351
+ begin
352
+ host.strip!
353
+ raise "Invalid input: #{host}" unless is_fqdn?(host)
354
+ if @known_hosts.key?(host)
355
+ value=@known_hosts[host]
356
+ puts "#{host}\t#{value}"
357
+ else
358
+ puts "Unknown host in the local store: #{host}"
359
+ end
360
+ rescue => ee
361
+ puts "Exception on method #{__method__}: #{ee}"
362
+ end
363
+ end
364
+ alias_method :print, :print_host
365
+
366
+ # Check if the specific IP within @known_hosts table
367
+ def ip_known? (ip)
368
+ known = false
369
+ begin
370
+ ip=ip.strip unless ip.nil?
371
+ return false if @known_hosts==nil
372
+ return @known_hosts.key?(ip.strip)
373
+ rescue => ee
374
+ if @verbose
375
+ puts "IP Lookup Error: #{ee}"
376
+ end
377
+ return false
378
+ end
379
+ return known
380
+ end
381
+ alias_method :has_a_record?, :ip_known?
382
+
383
+ # Check if the specific host within @known_hosts table
384
+ def host_known? (host)
385
+ begin
386
+ host=host.strip.downcase unless host.nil?
387
+ return false if @known_hosts==nil
388
+ return @known_hosts.key?(host.strip)
389
+ rescue => ee
390
+ if @verbose
391
+ puts "Host Lookup Error: #{ee}"
392
+ end
393
+ return false
394
+ end
395
+ end
396
+ alias_method :is_known?, :host_known?
397
+
398
+ # Perform reverse DNS lookup on the local host repository. Not to confuse with the reverse DNS lookup from the Internet
399
+ def local_ip_2_host (ip)
400
+ puts "Reverse DNS lookup from the local host repository" if @verbose
401
+ begin
402
+ ip=ip.strip unless ip.nil?
403
+ if @known_hosts.key?(ip)
404
+ return @known_hosts[ip]
405
+ else
406
+ return nil
407
+ end
408
+ rescue => ee
409
+ puts "Exception on method #{__method__}: #{ee}"
410
+ end
411
+ return nil
412
+ end
413
+
414
+ # Perform DNS lookup on the local host repository. Not to confuse with the DNS lookup from the Internet
415
+ def local_host_2_ip (host)
416
+ puts "DNS lookup from the local host repository" if @verbose
417
+ begin
418
+ host=host.strip unless host.nil?
419
+ if @known_hosts.key?(host)
420
+ return @known_hosts[host]
421
+ else
422
+ return nil
423
+ end
424
+ rescue => ee
425
+ puts "Exception on method #{__method__}: #{ee}"
426
+ return nil
427
+ end
428
+ end
429
+
430
+ # Extract a list of sub-domains from the local host repository @known_hosts
431
+ def dump_sub_domains
432
+ puts "Dump out all active sub domains from the local hosts." if @verbose
433
+ begin
434
+ subs=Array.new
435
+ @known_hosts.keys.each do |hostname|
436
+ next if is_ip?(hostname)
437
+ hostname = hostname.strip
438
+ sub = get_subdomain(hostname)
439
+ subs.push(sub) unless sub.nil?
440
+ end
441
+ subs.uniq!.sort!
442
+ puts "Found sub domains: #{subs}" if @verbose
443
+ return subs
444
+ rescue Exception => ee
445
+ puts "Exception on method #{__method__}: #{ee}"
446
+ return subs
447
+ end
448
+ end
449
+ alias_method :get_sub_domains, :dump_sub_domains
450
+
451
+ # Based on the current host store, to determine if an entry is a known sub-domain
452
+ def sub_domain_known?(domain)
453
+ puts "Validate sub-domain: #{domain}" if @verbose
454
+ begin
455
+ domain=domain.strip.downcase
456
+ subs=dump_sub_domains
457
+ return subs.include?(domain)
458
+ rescue Exception => ee
459
+ puts "Exception on method #{__method__}: #{ee}"
460
+ end
461
+ end
462
+
463
+ # Search potential matching sites from the host store by using simple regular expression. Note that any upper-case char in the search string will be automatically converted into lower case
464
+ def search (pattern)
465
+ puts "Search host store based on the regular expression: #{pattern}" if @verbose
466
+ begin
467
+ pattern=pattern.strip.downcase
468
+ results=Array.new
469
+ @known_hosts.keys.map do |key|
470
+ if key =~ /#{pattern}/i
471
+ results.push(key)
472
+ end
473
+ end
474
+ return results
475
+ rescue Exception => ee
476
+ puts "Exception on method #{__method__}: #{ee}"
477
+ return nil
478
+ end
479
+ end
480
+ alias_method :find, :search
481
+
482
+ # Search local host repository and return a list of aliases for the host
483
+ def host_aliases (host)
484
+ puts "Search aliases in the local hosts data repository for host: #{host}" if @verbose
485
+ begin
486
+ host.strip!
487
+ raise "Unknown method input: #{host} We expect a FQDN host-name string from you. " unless is_fqdn?(host)
488
+ aliases=Array.new
489
+ if @known_hosts.key?(host)
490
+ ip=local_host_2_ip(host)
491
+ @known_hosts.keys.map do |key|
492
+ my_ip=local_host_2_ip(key)
493
+ if ip == my_ip
494
+ aliases.push(key)
495
+ end
496
+ end
497
+ else
498
+ raise "Unknown host-name in the local hosts data repository: #{host}"
499
+ end
500
+ return aliases-[host]
501
+ rescue Exception => ee
502
+ puts "Exception on method #{__method__}: #{ee}"
503
+ return nil
504
+ end
505
+ end
506
+ alias_method :aliases, :host_aliases
507
+
508
+ # Top hostname - sort out most common host-name in the host store in descendant order
509
+ def top_hostname (num)
510
+ puts "Sort the host store for the most common hostname. " if @verbose
511
+ h=Hash.new
512
+ host_store=Hash.new
513
+ top=Array.new
514
+ begin
515
+ # Build a host table from the host file
516
+ f=File.open(@file_hosts, 'r')
517
+ f.each do |line|
518
+ next unless line =~ /\d+\.\d+\.\d+\.\d+/
519
+ # skip the domain roots in the host list
520
+ next if is_domain_root?(line.chomp)
521
+ entry=line.chomp.split(%r{\t+|\s+|\,})
522
+ key=entry[0].downcase
523
+ value=entry[1]
524
+ puts "Loading value pair: #{key} - #{value}" if @verbose
525
+ host_store[key] = Hash.new unless known_hosts.key?(key)
526
+ host_store[key]= value
527
+ end
528
+ f.close
529
+ host_store.keys.map do |key|
530
+ host=key.split('.')
531
+ if h.key?(host[0])
532
+ h[host[0]]+=1
533
+ else
534
+ h[host[0]]=1
535
+ end
536
+ end
537
+ result = h.keys.sort { |a,b| h[b] <=> h[a] } # Sort by value descendantly
538
+ num = result.size if result.size < num
539
+ for i in 0...num
540
+ top.push(result[i])
541
+ end
542
+ return top
543
+ rescue Exception => ee
544
+ puts "Exception on method #{__method__}: #{ee}"
545
+ return nil
546
+ end
547
+ end
548
+
549
+ private :load_known_hosts_from_file
550
+ end