wmap 2.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +141 -0
- data/LICENSE.txt +15 -0
- data/README.rdoc +98 -0
- data/TODO +13 -0
- data/bin/deprime +21 -0
- data/bin/distrust +38 -0
- data/bin/googleBot +23 -0
- data/bin/prime +21 -0
- data/bin/refresh +26 -0
- data/bin/run_tests +16 -0
- data/bin/spiderBot +26 -0
- data/bin/trust +38 -0
- data/bin/updateAll +57 -0
- data/bin/wadd +25 -0
- data/bin/wadds +26 -0
- data/bin/wcheck +28 -0
- data/bin/wdel +25 -0
- data/bin/wdump +21 -0
- data/bin/wmap +151 -0
- data/bin/wscan +32 -0
- data/data/cidrs +2 -0
- data/data/deactivated_sites +1 -0
- data/data/domains +2 -0
- data/data/hosts +1 -0
- data/data/prime_hosts +1 -0
- data/data/sites +2 -0
- data/data/sub_domains +2 -0
- data/demos/bruter.rb +27 -0
- data/demos/dns_brutes.rb +28 -0
- data/demos/filter_cidr.rb +18 -0
- data/demos/filter_crawls.rb +5 -0
- data/demos/filter_domain.rb +25 -0
- data/demos/filter_geoip.rb +26 -0
- data/demos/filter_known_services.rb +59 -0
- data/demos/filter_netinfo.rb +23 -0
- data/demos/filter_prime.rb +25 -0
- data/demos/filter_profiler.rb +3 -0
- data/demos/filter_redirection.rb +19 -0
- data/demos/filter_site.rb +40 -0
- data/demos/filter_siteip.rb +31 -0
- data/demos/filter_status.rb +17 -0
- data/demos/filter_timestamp.rb +23 -0
- data/demos/filter_url.rb +19 -0
- data/demos/new_fnd.rb +66 -0
- data/demos/nmap_parser.pl +138 -0
- data/demos/site_format.rb +18 -0
- data/demos/whois_domain.rb +78 -0
- data/dicts/GeoIP.dat +0 -0
- data/dicts/GeoIPASNum.dat +0 -0
- data/dicts/GeoLiteCity.dat +0 -0
- data/dicts/ccsld.txt +2646 -0
- data/dicts/cctld.txt +243 -0
- data/dicts/gtld.txt +25 -0
- data/dicts/hostnames-dict.big +1402 -0
- data/dicts/hostnames-dict.txt +101 -0
- data/lib/wmap/cidr_tracker.rb +327 -0
- data/lib/wmap/dns_bruter.rb +308 -0
- data/lib/wmap/domain_tracker/sub_domain.rb +142 -0
- data/lib/wmap/domain_tracker.rb +342 -0
- data/lib/wmap/geoip_tracker.rb +72 -0
- data/lib/wmap/google_search_scraper.rb +177 -0
- data/lib/wmap/host_tracker/primary_host.rb +130 -0
- data/lib/wmap/host_tracker.rb +550 -0
- data/lib/wmap/network_profiler.rb +144 -0
- data/lib/wmap/port_scanner.rb +208 -0
- data/lib/wmap/site_tracker/deactivated_site.rb +85 -0
- data/lib/wmap/site_tracker.rb +937 -0
- data/lib/wmap/url_checker.rb +314 -0
- data/lib/wmap/url_crawler.rb +381 -0
- data/lib/wmap/utils/domain_root.rb +184 -0
- data/lib/wmap/utils/logger.rb +53 -0
- data/lib/wmap/utils/url_magic.rb +343 -0
- data/lib/wmap/utils/utils.rb +333 -0
- data/lib/wmap/whois.rb +76 -0
- data/lib/wmap.rb +227 -0
- data/logs/wmap.log +17 -0
- data/ruby_whois_patches/base_cocca2.rb +149 -0
- data/ruby_whois_patches/kero.yachay.pe.rb +120 -0
- data/ruby_whois_patches/whois.PublicDomainRegistry.com.rb +124 -0
- data/ruby_whois_patches/whois.above.com.rb +61 -0
- data/ruby_whois_patches/whois.adamsnames.tc.rb +107 -0
- data/ruby_whois_patches/whois.aeda.net.ae.rb +105 -0
- data/ruby_whois_patches/whois.ai.rb +112 -0
- data/ruby_whois_patches/whois.arnes.si.rb +121 -0
- data/ruby_whois_patches/whois.ascio.com.rb +91 -0
- data/ruby_whois_patches/whois.cnnic.cn.rb +123 -0
- data/ruby_whois_patches/whois.corporatedomains.com.rb +67 -0
- data/ruby_whois_patches/whois.crsnic.net.rb +108 -0
- data/ruby_whois_patches/whois.denic.de.rb +174 -0
- data/ruby_whois_patches/whois.dk-hostmaster.dk.rb +120 -0
- data/ruby_whois_patches/whois.dns.be.rb +134 -0
- data/ruby_whois_patches/whois.dns.lu.rb +129 -0
- data/ruby_whois_patches/whois.dns.pl.rb +150 -0
- data/ruby_whois_patches/whois.dns.pt.rb +119 -0
- data/ruby_whois_patches/whois.domain.kg.rb +126 -0
- data/ruby_whois_patches/whois.domainregistry.my.rb +123 -0
- data/ruby_whois_patches/whois.domreg.lt.rb +110 -0
- data/ruby_whois_patches/whois.dot.tk.rb +140 -0
- data/ruby_whois_patches/whois.hkirc.hk.rb +121 -0
- data/ruby_whois_patches/whois.isnic.is.rb +130 -0
- data/ruby_whois_patches/whois.je.rb +119 -0
- data/ruby_whois_patches/whois.jprs.jp.rb +137 -0
- data/ruby_whois_patches/whois.kenic.or.ke.rb +140 -0
- data/ruby_whois_patches/whois.markmonitor.com.rb +118 -0
- data/ruby_whois_patches/whois.melbourneit.com.rb +58 -0
- data/ruby_whois_patches/whois.nic.as.rb +96 -0
- data/ruby_whois_patches/whois.nic.at.rb +109 -0
- data/ruby_whois_patches/whois.nic.ch.rb +141 -0
- data/ruby_whois_patches/whois.nic.cl.rb +117 -0
- data/ruby_whois_patches/whois.nic.ec.rb +157 -0
- data/ruby_whois_patches/whois.nic.im.rb +120 -0
- data/ruby_whois_patches/whois.nic.it.rb +170 -0
- data/ruby_whois_patches/whois.nic.lv.rb +116 -0
- data/ruby_whois_patches/whois.nic.ly.rb +127 -0
- data/ruby_whois_patches/whois.nic.mu.rb +27 -0
- data/ruby_whois_patches/whois.nic.mx.rb +123 -0
- data/ruby_whois_patches/whois.nic.net.sa.rb +111 -0
- data/ruby_whois_patches/whois.nic.or.kr.rb +101 -0
- data/ruby_whois_patches/whois.nic.tel.rb +129 -0
- data/ruby_whois_patches/whois.nic.tr.rb +133 -0
- data/ruby_whois_patches/whois.nic.us.rb +129 -0
- data/ruby_whois_patches/whois.nic.ve.rb +135 -0
- data/ruby_whois_patches/whois.norid.no.rb +127 -0
- data/ruby_whois_patches/whois.pandi.or.id.rb +118 -0
- data/ruby_whois_patches/whois.psi-usa.info.rb +63 -0
- data/ruby_whois_patches/whois.registro.br.rb +109 -0
- data/ruby_whois_patches/whois.registrygate.com.rb +55 -0
- data/ruby_whois_patches/whois.rrpproxy.net.rb +61 -0
- data/ruby_whois_patches/whois.sgnic.sg.rb +130 -0
- data/ruby_whois_patches/whois.srs.net.nz.rb +166 -0
- data/ruby_whois_patches/whois.tucows.com.rb +70 -0
- data/ruby_whois_patches/whois.twnic.net.tw.rb +133 -0
- data/settings/discovery_ports +24 -0
- data/settings/google_keywords.txt +9 -0
- data/settings/google_locator.txt +23 -0
- data/test/domain_tracker_test.rb +31 -0
- data/test/utils_test.rb +168 -0
- data/version.txt +13 -0
- data/wmap.gemspec +49 -0
- metadata +202 -0
@@ -0,0 +1,342 @@
|
|
1
|
+
#--
|
2
|
+
# Wmap
|
3
|
+
#
|
4
|
+
# A pure Ruby library for the Internet web application discovery and tracking.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
|
7
|
+
#++
|
8
|
+
require "parallel"
|
9
|
+
#require "singleton"
|
10
|
+
|
11
|
+
|
12
|
+
# Class to track the known (trusted) Internet domains
|
13
|
+
class Wmap::DomainTracker
|
14
|
+
include Wmap::Utils
|
15
|
+
#include Singleton
|
16
|
+
|
17
|
+
|
18
|
+
attr_accessor :verbose, :max_parallel, :domains_file, :file_domains, :data_dir
|
19
|
+
attr_reader :known_internet_domains
|
20
|
+
|
21
|
+
# Set default instance variables
|
22
|
+
def initialize (params = {})
|
23
|
+
# Initialize the instance variables
|
24
|
+
@verbose=params.fetch(:verbose, false)
|
25
|
+
@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
|
26
|
+
@file_domains=params.fetch(:domains_file, @data_dir+'domains')
|
27
|
+
@max_parallel=params.fetch(:max_parallel, 40)
|
28
|
+
# Hash table to hold the trusted domains
|
29
|
+
File.write(@file_domains, "") unless File.exist?(@file_domains)
|
30
|
+
@known_internet_domains=load_domains_from_file(@file_domains)
|
31
|
+
#@known_internet_sub_domains=Hash.new
|
32
|
+
end
|
33
|
+
|
34
|
+
# 'setter' to load the known Internet domains into an instance variable
|
35
|
+
def load_domains_from_file (file=@file_domains, lc=true)
|
36
|
+
puts "Loading trusted domain file: #{file}" if @verbose
|
37
|
+
begin
|
38
|
+
known_internet_domains=Hash.new
|
39
|
+
f_domains=File.open(file, 'r')
|
40
|
+
f_domains.each_line do |line|
|
41
|
+
puts "Processing line: #{line}" if @verbose
|
42
|
+
line=line.chomp.strip
|
43
|
+
next if line.nil?
|
44
|
+
next if line.empty?
|
45
|
+
next if line =~ /^\s*#/
|
46
|
+
line=line.downcase if lc==true
|
47
|
+
entry=line.split(',')
|
48
|
+
if known_internet_domains.key?(entry[0])
|
49
|
+
next
|
50
|
+
else
|
51
|
+
if entry[1] =~ /yes/i
|
52
|
+
known_internet_domains[entry[0]]=true
|
53
|
+
else
|
54
|
+
known_internet_domains[entry[0]]=false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
f_domains.close
|
60
|
+
return known_internet_domains
|
61
|
+
rescue => ee
|
62
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
63
|
+
return nil
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Save the current domain hash table into a file
|
68
|
+
def save_domains_to_file!(file_domains=@file_domains, domains=@known_internet_domains)
|
69
|
+
puts "Saving the current domains cache table from memory to file: #{file_domains} ..." if @verbose
|
70
|
+
begin
|
71
|
+
timestamp=Time.now
|
72
|
+
f=File.open(file_domains, 'w')
|
73
|
+
f.write "# Local domains file created by class #{self.class} method #{__method__} at: #{timestamp}\n"
|
74
|
+
f.write "# domain name, free zone transfer detected?\n"
|
75
|
+
domains.keys.sort.map do |key|
|
76
|
+
if domains[key]
|
77
|
+
f.write "#{key}, yes\n"
|
78
|
+
else
|
79
|
+
f.write "#{key}, no\n"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
f.close
|
83
|
+
puts "Domain cache table is successfully saved: #{file_domains}"
|
84
|
+
rescue => ee
|
85
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
86
|
+
end
|
87
|
+
end
|
88
|
+
alias_method :save!, :save_domains_to_file!
|
89
|
+
|
90
|
+
# Count numbers of entries in the domain cache table
|
91
|
+
def count
|
92
|
+
puts "Counting number of entries in the domain cache table ..."
|
93
|
+
begin
|
94
|
+
cnt=0
|
95
|
+
@known_internet_domains.map do |key|
|
96
|
+
unless key =~ /\w+\.\w+/
|
97
|
+
cnt=cnt+1
|
98
|
+
end
|
99
|
+
end
|
100
|
+
puts "Current number of entries: #{cnt}"
|
101
|
+
return cnt
|
102
|
+
rescue => ee
|
103
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
104
|
+
end
|
105
|
+
end
|
106
|
+
alias_method :size, :count
|
107
|
+
|
108
|
+
# 'setter' to add domain entry to the cache one at a time
|
109
|
+
def add(host)
|
110
|
+
puts "Add entry to the local domains cache table: #{host}" if @verbose
|
111
|
+
#begin
|
112
|
+
host=host.strip.downcase
|
113
|
+
if @known_internet_domains.key?(host)
|
114
|
+
puts "Domain is already exist. Skipping: #{host}"
|
115
|
+
else
|
116
|
+
root=get_domain_root(host)
|
117
|
+
sub=get_subdomain(host)
|
118
|
+
record=Hash.new
|
119
|
+
if host == root
|
120
|
+
if zone_transferable?(root)
|
121
|
+
record[root]=true
|
122
|
+
#@known_internet_domains[root]=true
|
123
|
+
else
|
124
|
+
record[root]=false
|
125
|
+
#@known_internet_domains[root]=false
|
126
|
+
end
|
127
|
+
puts "Entry loaded: #{record}"
|
128
|
+
@known_internet_domains.merge!(record)
|
129
|
+
return record
|
130
|
+
elsif sub.nil? # 2/10/2014, additional logic to support sub-domains
|
131
|
+
# do nothing
|
132
|
+
elsif host != sub
|
133
|
+
if zone_transferable?(sub)
|
134
|
+
#@known_internet_domains[sub]=true
|
135
|
+
record[sub]=true
|
136
|
+
else
|
137
|
+
#@known_internet_domains[sub]=false
|
138
|
+
record[sub]=false
|
139
|
+
end
|
140
|
+
puts "Entry loaded: #{record}"
|
141
|
+
@known_internet_domains.merge!(record)
|
142
|
+
return record
|
143
|
+
else
|
144
|
+
puts "Problem add domain #{host} - please use legal root domain or sub domain only."
|
145
|
+
end
|
146
|
+
end
|
147
|
+
#rescue => ee
|
148
|
+
#puts "Exception on method #{__method__}: #{ee}" if @verbose
|
149
|
+
#end
|
150
|
+
end
|
151
|
+
|
152
|
+
# 'setter' to add domain entry to the cache in batch (from a file)
|
153
|
+
def file_add(file)
|
154
|
+
begin
|
155
|
+
puts "Add entries to the local domains cache table from file: #{file}" if @verbose
|
156
|
+
raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
|
157
|
+
changes=Array.new
|
158
|
+
domains=file_2_list(file)
|
159
|
+
changes=bulk_add(domains)
|
160
|
+
rescue => ee
|
161
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# 'setter' to add domain entry to the cache in batch (from a list)
|
166
|
+
def bulk_add(list, num=@max_parallel)
|
167
|
+
puts "Add entries to the local domains cache table from list: #{list}" if @verbose
|
168
|
+
begin
|
169
|
+
results=Hash.new
|
170
|
+
domains=list
|
171
|
+
if domains.size > 0
|
172
|
+
Parallel.map(list, :in_processes => num) { |target|
|
173
|
+
add(target)
|
174
|
+
}.each do |process|
|
175
|
+
if process.nil?
|
176
|
+
next
|
177
|
+
elsif process.empty?
|
178
|
+
#do nothing
|
179
|
+
else
|
180
|
+
results.merge!(process)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
@known_internet_domains.merge!(results)
|
184
|
+
puts "Done loading entries."
|
185
|
+
return results
|
186
|
+
else
|
187
|
+
puts "Error: no entry is loaded. Please check your list and try again."
|
188
|
+
end
|
189
|
+
return results
|
190
|
+
rescue => ee
|
191
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
192
|
+
end
|
193
|
+
end
|
194
|
+
alias_method :adds, :bulk_add
|
195
|
+
|
196
|
+
# 'setter' to remove entry from the cache one at a time
|
197
|
+
def delete(domain)
|
198
|
+
puts "Remove entry from the domains cache table: #{domain} " if @verbose
|
199
|
+
begin
|
200
|
+
domain=domain.strip.downcase
|
201
|
+
if @known_internet_domains.key?(domain)
|
202
|
+
@known_internet_domains.delete(domain)
|
203
|
+
puts "Entry cleared: #{domain}"
|
204
|
+
return domain
|
205
|
+
else
|
206
|
+
puts "Entry not fund. Skipping: #{domain}"
|
207
|
+
end
|
208
|
+
rescue => ee
|
209
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# 'setter' to delete domain entry to the cache in batch (from a list)
|
214
|
+
def bulk_delete(list)
|
215
|
+
puts "Delete entries to the local domains cache table from list: #{list}" if @verbose
|
216
|
+
begin
|
217
|
+
domains=list
|
218
|
+
changes=Array.new
|
219
|
+
if domains.size > 0
|
220
|
+
domains.map do |x|
|
221
|
+
domain=delete(x)
|
222
|
+
changes.push(domain) unless domain.nil?
|
223
|
+
end
|
224
|
+
puts "Done deleting domains from list: #{list}"
|
225
|
+
return changes
|
226
|
+
else
|
227
|
+
puts "Exception on method bulk_delete: no entry is loaded. Please check your list and try again."
|
228
|
+
end
|
229
|
+
rescue => ee
|
230
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
231
|
+
end
|
232
|
+
end
|
233
|
+
alias_method :dels, :bulk_delete
|
234
|
+
|
235
|
+
# 'setter' to delete domain entry to the cache in batch (from a file)
|
236
|
+
def file_delete(file)
|
237
|
+
begin
|
238
|
+
puts "Delete entries to the local domains cache table from file: #{file}" if @verbose
|
239
|
+
raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
|
240
|
+
domains=file_2_list(file)
|
241
|
+
changes=bulk_delete(domains)
|
242
|
+
rescue => ee
|
243
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# 'setter' to remove all entries from the store
|
248
|
+
def delete_all
|
249
|
+
puts "Delete all entries in the domain store! " if @verbose
|
250
|
+
begin
|
251
|
+
@known_internet_domains.keys.map do |domain|
|
252
|
+
delete(domain)
|
253
|
+
end
|
254
|
+
rescue => ee
|
255
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
# Refresh the domain entry one at a time
|
260
|
+
def refresh(domain)
|
261
|
+
begin
|
262
|
+
abort "Trusted Internet domain file not loaded properly! " if @known_internet_domains.nil?
|
263
|
+
domain=domain.strip.downcase unless domain.nil?
|
264
|
+
if domain_known?(domain)
|
265
|
+
delete(domain)
|
266
|
+
add(domain)
|
267
|
+
return domain
|
268
|
+
else
|
269
|
+
puts "Unknown domain: #{domain}"
|
270
|
+
return nil
|
271
|
+
end
|
272
|
+
rescue => ee
|
273
|
+
puts "Exception on method #{__method__} for #{domain}: #{ee}" if @verbose
|
274
|
+
return nil
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
# Simple method to check if a domain is already within the domain cache table
|
279
|
+
def domain_known?(domain)
|
280
|
+
begin
|
281
|
+
#abort "Trusted Internet domain file not loaded properly! " if @known_internet_domains.nil? or @known_internet_sub_domains.nil?
|
282
|
+
domain=domain.strip.downcase unless domain.nil?
|
283
|
+
case self.class.name
|
284
|
+
when "Wmap::DomainTracker"
|
285
|
+
return @known_internet_domains.key?(domain)
|
286
|
+
when "Wmap::DomainTracker::SubDomain"
|
287
|
+
return @known_internet_sub_domains.key?(domain)
|
288
|
+
else
|
289
|
+
return nil
|
290
|
+
end
|
291
|
+
rescue => ee
|
292
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
293
|
+
end
|
294
|
+
return false
|
295
|
+
end
|
296
|
+
alias_method :is_known?, :domain_known?
|
297
|
+
alias_method :is_domain_known?, :domain_known?
|
298
|
+
|
299
|
+
# Dump out the list of known domains
|
300
|
+
def get_domains
|
301
|
+
puts "Retrieve a list of known domain ..." if @verbose
|
302
|
+
begin
|
303
|
+
return @known_internet_domains.keys
|
304
|
+
rescue Exception => ee
|
305
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
306
|
+
return nil
|
307
|
+
end
|
308
|
+
end
|
309
|
+
alias_method :dump_domains, :get_domains
|
310
|
+
alias_method :dump, :get_domains
|
311
|
+
|
312
|
+
# Search potential matching domains from the domain store by using simple regular expression. Note that any upper-case char in the search string will be automatically converted into lower case
|
313
|
+
def search (pattern)
|
314
|
+
puts "Search domain store for the regular expression: #{pattern}" if @verbose
|
315
|
+
begin
|
316
|
+
pattern=pattern.strip.downcase
|
317
|
+
results=Array.new
|
318
|
+
@known_internet_domains.keys.map do |key|
|
319
|
+
if key =~ /#{pattern}/i
|
320
|
+
results.push(key)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
return results
|
324
|
+
rescue Exception => ee
|
325
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
326
|
+
return nil
|
327
|
+
end
|
328
|
+
end
|
329
|
+
alias_method :find, :search
|
330
|
+
|
331
|
+
# Print summary report on all known / trust domains in the domain cache table
|
332
|
+
def print_known_domains
|
333
|
+
puts "\nSummary of known Internet Domains:"
|
334
|
+
@known_internet_domains.keys.sort.each do |domain|
|
335
|
+
puts domain
|
336
|
+
end
|
337
|
+
puts "End of the summary"
|
338
|
+
end
|
339
|
+
alias_method :print, :print_known_domains
|
340
|
+
|
341
|
+
private :load_domains_from_file
|
342
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#--
|
2
|
+
# Wmap
|
3
|
+
#
|
4
|
+
# A pure Ruby library for Internet web application discovery and tracking.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
|
7
|
+
#++
|
8
|
+
require "geoip"
|
9
|
+
|
10
|
+
|
11
|
+
# Wrapper class of the 'GeoIP' library - http://geoip.rubyforge.org/
|
12
|
+
# For detail explanation of Geographic information of an IP address (GeoIP) and its data repository, please refer to the vendor MaxMind (http://www.maxmind.com)
|
13
|
+
class Wmap::GeoIPTracker
|
14
|
+
include Wmap::Utils
|
15
|
+
|
16
|
+
attr_accessor :db, :verbose
|
17
|
+
|
18
|
+
# This product includes GeoLite data created by MaxMind, available from
|
19
|
+
# <a href="http://www.maxmind.com">http://www.maxmind.com</a>.
|
20
|
+
Db_city=File.dirname(__FILE__)+"/../../dicts/GeoLiteCity.dat"
|
21
|
+
Db_asn=File.dirname(__FILE__)+"/../../dicts/GeoIPASNum.dat"
|
22
|
+
Db_country=File.dirname(__FILE__)+"/../../dicts/GeoIP.dat"
|
23
|
+
|
24
|
+
# Set default instance variables
|
25
|
+
def initialize (params = {})
|
26
|
+
@verbose=params.fetch(:verbose, false)
|
27
|
+
@db=params.fetch(:db, Db_city)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Wrapper for the Ruby GeoIP City class - return data structure below on successful lookup
|
31
|
+
# Struct.new(:request, :ip, :country_code2, :country_code3, :country_name, :continent_code, :region_name, :city_name, :postal_code, :latitude, :longitude, :dma_code, :area_code, :timezone)
|
32
|
+
def city(object)
|
33
|
+
puts "Perform GeoIP city lookup on: #{object}" if @verbose
|
34
|
+
begin
|
35
|
+
object=object.strip
|
36
|
+
raise "Unknown object format - only valid hostname or IP is accepted: #{object}" unless is_ip?(object) or is_fqdn?(object)
|
37
|
+
GeoIP.new(Db_city).city(object)
|
38
|
+
rescue Exception => ee
|
39
|
+
puts "Exception on method city: #{object}" if @verbose
|
40
|
+
return nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
alias_method :query, :city
|
44
|
+
|
45
|
+
# Wrapper for the Ruby GeoIP Country class - return data structure below on successful lookup
|
46
|
+
# Struct.new(:request, :ip, :country_code, :country_code2, :country_code3, :country_name, :continent_code)
|
47
|
+
def country(object)
|
48
|
+
puts "Perform GeoIP country lookup on: #{object}" if @verbose
|
49
|
+
begin
|
50
|
+
object=object.strip
|
51
|
+
raise "Unknown object format - only valid hostname or IP is accepted: #{object}" unless is_ip?(object) or is_fqdn?(object)
|
52
|
+
GeoIP.new(Db_country).country(object)
|
53
|
+
rescue Exception => ee
|
54
|
+
puts "Exception on method country: #{object}" if @verbose
|
55
|
+
return nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Wrapper for the Ruby GeoIP ASN class - return data structure below on successful lookup
|
60
|
+
# Struct.new(:number, :asn)
|
61
|
+
def asn(object)
|
62
|
+
puts "Perform GeoIP ASN lookup on: #{object}" if @verbose
|
63
|
+
begin
|
64
|
+
object=object.strip
|
65
|
+
raise "Unknown object format - only valid hostname or IP is accepted: #{object}" unless is_ip?(object) or is_fqdn?(object)
|
66
|
+
GeoIP.new(Db_asn).asn(object)
|
67
|
+
rescue Exception => ee
|
68
|
+
puts "Exception on method asn: #{object}" if @verbose
|
69
|
+
return nil
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,177 @@
|
|
1
|
+
#--
|
2
|
+
# Wmap
|
3
|
+
#
|
4
|
+
# A pure Ruby library for Internet web application discovery and tracking.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2012-2015 Yang Li
|
7
|
+
#++
|
8
|
+
require 'open-uri'
|
9
|
+
require 'nokogiri'
|
10
|
+
|
11
|
+
|
12
|
+
# We build our own Google search class by querying Google search engine from its web interface, by simulating
|
13
|
+
# an anonymous web surfer.
|
14
|
+
# Note: we don't use the native Google API due to its pricing structure - We don't have budget for
|
15
|
+
# this project, and we can not use the free version due to the limitation of 100 queries per day for free. See https://github.com/google/google-api-ruby-client for details.
|
16
|
+
class Wmap::GoogleSearchScraper
|
17
|
+
include Wmap::Utils
|
18
|
+
|
19
|
+
attr_accessor :verbose, :http_timeout, :keyword_list
|
20
|
+
attr_reader :discovered_urls_from_scraper, :discovered_sites_from_scraper
|
21
|
+
|
22
|
+
# Google search engine web interface locators
|
23
|
+
File_locator = File.dirname(__FILE__)+'/../../settings/google_locator.txt'
|
24
|
+
# Google search key words
|
25
|
+
File_keywords = File.dirname(__FILE__)+'/../../settings/google_keywords.txt'
|
26
|
+
|
27
|
+
|
28
|
+
# Scraper default variables
|
29
|
+
def initialize (params = {})
|
30
|
+
@verbose=params.fetch(:verbose, false)
|
31
|
+
@http_timeout=params.fetch(:http_timeout, 5000)
|
32
|
+
# Discovered data store
|
33
|
+
@discovered_urls_from_scraper=Hash.new
|
34
|
+
@discovered_sites_from_scraper=Hash.new
|
35
|
+
end
|
36
|
+
|
37
|
+
# Main worker method to simulate extensive google keyword searches on over 100+ countries and regions. The search will extract known web services related to the keyword by the Google Inc.
|
38
|
+
def google_worker (keyword)
|
39
|
+
begin
|
40
|
+
puts "Start the Google worker for: #{keyword}" if @verbose
|
41
|
+
links=Array.new
|
42
|
+
keyword=keyword.strip
|
43
|
+
google_locators = file_2_list(File_locator)
|
44
|
+
google_locators.map do |locator|
|
45
|
+
doc=google_search(locator,keyword) unless keyword.nil?
|
46
|
+
links+=extract_links(doc) unless doc.nil?
|
47
|
+
end
|
48
|
+
return links.uniq.sort-["",nil]
|
49
|
+
rescue Exception => ee
|
50
|
+
puts "Exception on the method google_worker for #{keyword}: #{ee}" if @verbose
|
51
|
+
return nil
|
52
|
+
end
|
53
|
+
end
|
54
|
+
alias_method :worker, :google_worker
|
55
|
+
alias_method :search, :google_worker
|
56
|
+
|
57
|
+
# Main method to collect intelligences on the Google vast data warehouse. It works by hitting the Google engines with the keyword list. This exhausive method will sweep through the Google engines in over 100+ countries and regions one by one, in order to collect all related web service links collected by known the Google, Inc. across the global Internet.
|
58
|
+
def google_workers(keyword_list=file_2_list(File_keywords))
|
59
|
+
begin
|
60
|
+
puts "Start the Google worker for: #{keyword_list}" if @verbose
|
61
|
+
links=Array.new
|
62
|
+
keyword_list.map do |keyword|
|
63
|
+
links+=google_worker(keyword)
|
64
|
+
end
|
65
|
+
return links.uniq.sort
|
66
|
+
rescue Exception => ee
|
67
|
+
puts "Exception on the method google_workers for #{keyword_list}: #{ee}" if @verbose
|
68
|
+
return nil
|
69
|
+
end
|
70
|
+
end
|
71
|
+
alias_method :workers, :google_workers
|
72
|
+
|
73
|
+
# Perform a Google web interface keyword search, return as a Nokogiri::HTML:Document object for the search result page
|
74
|
+
def google_search (locator,keyword)
|
75
|
+
begin
|
76
|
+
puts "Perform the keyword search on the Google web engine for: #{keyword}" if @verbose
|
77
|
+
link_search = locator + "search?q=" + URI::encode(keyword)
|
78
|
+
doc = Nokogiri::HTML(open(link_search))
|
79
|
+
return doc
|
80
|
+
rescue Exception => ee
|
81
|
+
puts "Exception on method google_search at Google engine location #{link_search} for the keyword #{keyword} : #{ee}" if @verbose
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Search for nodes by css, and extract the hyper links
|
86
|
+
def extract_links (doc)
|
87
|
+
begin
|
88
|
+
puts "Extract the meaningful links from the DOC." if @verbose
|
89
|
+
links=Array.new
|
90
|
+
doc.css('a').each do |link|
|
91
|
+
ref=link.attribute('href').to_s
|
92
|
+
if ref =~ /\/url\?/
|
93
|
+
my_key=ref.sub(/\/url\?q\=/,'')
|
94
|
+
my_site=url_2_site(my_key)
|
95
|
+
links.push(my_key)
|
96
|
+
@discovered_urls_from_scraper[my_key]=true unless @discovered_urls_from_scraper.key?(my_key)
|
97
|
+
@discovered_sites_from_scraper[my_site]=true unless @discovered_sites_from_scraper.key?(my_site)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
return links
|
101
|
+
rescue Exception => ee
|
102
|
+
puts "Exception on method extract_links: #{ee}" if @verbose
|
103
|
+
return nil
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Method to print out discovery URL result
|
108
|
+
def print_discovered_urls_from_scraper
|
109
|
+
puts "Print discovered urls by the scraper. " if @verbose
|
110
|
+
begin
|
111
|
+
puts "\nSummary Report of Discovered URLs from the Scraper:"
|
112
|
+
@discovered_urls_from_scraper.keys.each do |url|
|
113
|
+
puts url
|
114
|
+
end
|
115
|
+
puts "Total: #{@discovered_urls_from_scraper.keys.size} url(s)"
|
116
|
+
puts "End of the summary"
|
117
|
+
rescue => ee
|
118
|
+
puts "Error on method print_discovered_urls_from_scraper: #{ee}" if @verbose
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Method to print out discovery Sites result
|
123
|
+
def print_discovered_sites_from_scraper
|
124
|
+
puts "Print discovered sites by the scraper. " if @verbose
|
125
|
+
begin
|
126
|
+
puts "\nSummary Report of Discovered Sites from the Scraper:"
|
127
|
+
@discovered_sites_from_scraper.keys.each do |site|
|
128
|
+
puts site
|
129
|
+
end
|
130
|
+
puts "Total: #{@discovered_sites_from_scraper.keys.size} site(s)"
|
131
|
+
puts "End of the summary"
|
132
|
+
rescue => ee
|
133
|
+
puts "Error on method print_discovered_sites_from_scraper: #{ee}" if @verbose
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
# 'getter' for the discovered sites from the Google search
|
138
|
+
def get_discovered_sites_from_scraper
|
139
|
+
puts "Getter for the discovered sites by the scraper. " if @verbose
|
140
|
+
begin
|
141
|
+
return @discovered_sites_from_scraper.keys.sort
|
142
|
+
rescue => ee
|
143
|
+
puts "Error on method get_discovered_sites_from_scraper: #{ee}" if @verbose
|
144
|
+
end
|
145
|
+
end
|
146
|
+
alias_method :print, :get_discovered_sites_from_scraper
|
147
|
+
|
148
|
+
# 'getter' for the discovered urls from the Google search
|
149
|
+
def get_discovered_urls_from_scraper
|
150
|
+
puts "Getter for the discovered urls by the scraper. " if @verbose
|
151
|
+
begin
|
152
|
+
return @discovered_urls_from_scraper.keys.sort
|
153
|
+
rescue => ee
|
154
|
+
puts "Error on method get_discovered_urls_from_scraper: #{ee}" if @verbose
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Save the discovered sites into a local file
|
159
|
+
def save_discovered_sites_from_scraper (file)
|
160
|
+
puts "Save the discovery result(sites) into a local file: #{file}" if @verbose
|
161
|
+
begin
|
162
|
+
f=File.open(file, 'w')
|
163
|
+
timestamp=Time.now
|
164
|
+
f.puts "# Discovery result written by Wmap::GoogleSearchScraper.save_discovered_sites_from_scraper method at #{timestamp}\n"
|
165
|
+
@discovered_sites_from_scraper.keys.sort.map { |x| f.puts "#{x}\n" }
|
166
|
+
f.close
|
167
|
+
raise "Unknown problem saving the result to file: #{file}" unless File.exist?(file)
|
168
|
+
puts "Done saving the discovery result into the local file: #{file}"
|
169
|
+
rescue => ee
|
170
|
+
puts "Error on method save_discovered_sites_from_scraper: #{ee}" if @verbose
|
171
|
+
end
|
172
|
+
end
|
173
|
+
alias_method :save, :save_discovered_sites_from_scraper
|
174
|
+
|
175
|
+
private
|
176
|
+
|
177
|
+
end
|