wmap 2.4.4 → 2.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.rdoc +27 -20
- data/bin/RHPG +85 -0
- data/bin/trust +5 -5
- data/bin/trusts +38 -0
- data/bin/updateAll +5 -9
- data/bin/wadds +1 -1
- data/bin/wmaps +24 -0
- data/dicts/tlds.txt +1537 -0
- data/lib/wmap/cidr_tracker.rb +22 -15
- data/lib/wmap/host_tracker/primary_host.rb +1 -1
- data/lib/wmap/host_tracker.rb +6 -6
- data/lib/wmap/site_tracker.rb +7 -7
- data/lib/wmap/url_crawler.rb +17 -17
- data/lib/wmap/utils/domain_root.rb +28 -24
- data/lib/wmap/wp_tracker.rb +302 -0
- data/logs/wmap.log +1516 -17
- data/version.txt +4 -4
- data/wmap.gemspec +20 -5
- metadata +179 -14
- data/data/cidrs +0 -2
- data/data/deactivated_sites +0 -1
- data/data/domains +0 -2
- data/data/hosts +0 -1
- data/data/prime_hosts +0 -1
- data/data/sites +0 -2
- data/data/sub_domains +0 -2
- data/lib/wmap.rb +0 -227
@@ -0,0 +1,302 @@
|
|
1
|
+
#--
|
2
|
+
# Wmap
|
3
|
+
#
|
4
|
+
# A pure Ruby library for the Internet web application discovery and tracking.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
|
7
|
+
#++
|
8
|
+
require "parallel"
|
9
|
+
#require "singleton"
|
10
|
+
require "open-uri"
|
11
|
+
require "open_uri_redirections"
|
12
|
+
require "nokogiri"
|
13
|
+
require "css_parser"
|
14
|
+
|
15
|
+
|
16
|
+
# Main class to automatically track the site inventory
|
17
|
+
class Wmap::WpTracker
|
18
|
+
include Wmap::Utils
|
19
|
+
#include Singleton
|
20
|
+
|
21
|
+
attr_accessor :http_timeout, :max_parallel, :verbose, :sites_wp, :data_dir
|
22
|
+
attr_reader :known_wp_sites
|
23
|
+
# set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
|
24
|
+
Max_http_timeout=8000
|
25
|
+
|
26
|
+
# WordPress checker instance default variables
|
27
|
+
def initialize (params = {})
|
28
|
+
@verbose=params.fetch(:verbose, false)
|
29
|
+
@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
|
30
|
+
wp_sites=@data_dir+'wp_sites'
|
31
|
+
@file_wps=params.fetch(:sites_wp, wp_sites)
|
32
|
+
@http_timeout=params.fetch(:http_timeout, 5000)
|
33
|
+
@max_parallel=params.fetch(:max_parallel, 40)
|
34
|
+
Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
|
35
|
+
@log_file=@data_dir + "wp_checker.log"
|
36
|
+
@known_wp_sites=load_from_file(@file_wps)
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
# 'setter' to load the known wordpress sites into an instance variable
|
41
|
+
def load_from_file (file=@file_stores, lc=true)
|
42
|
+
puts "Loading trusted file: #{file}" if @verbose
|
43
|
+
begin
|
44
|
+
known_wp_sites=Hash.new
|
45
|
+
f_wp_sites=File.open(file, 'r')
|
46
|
+
f_wp_sites.each_line do |line|
|
47
|
+
puts "Processing line: #{line}" if @verbose
|
48
|
+
line=line.chomp.strip
|
49
|
+
next if line.nil?
|
50
|
+
next if line.empty?
|
51
|
+
next if line =~ /^\s*#/
|
52
|
+
line=line.downcase if lc==true
|
53
|
+
entry=line.split(',')
|
54
|
+
if known_wp_sites.key?(entry[0])
|
55
|
+
next
|
56
|
+
else
|
57
|
+
if entry[1] =~ /yes/i
|
58
|
+
known_wp_sites[entry[0]]=true
|
59
|
+
else
|
60
|
+
known_wp_sites[entry[0]]=false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
f_wp_sites.close
|
66
|
+
return known_wp_sites
|
67
|
+
rescue => ee
|
68
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
69
|
+
return nil
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Save the current domain hash table into a file
|
74
|
+
def save_to_file!(file_wps=@file_wps, wps=@known_wp_sites)
|
75
|
+
puts "Saving the current wordpress site table from memory to file: #{file_wps} ..." if @verbose
|
76
|
+
begin
|
77
|
+
timestamp=Time.now
|
78
|
+
f=File.open(file_wps, 'w')
|
79
|
+
f.write "# Local wps file created by class #{self.class} method #{__method__} at: #{timestamp}\n"
|
80
|
+
f.write "# domain name, free zone transfer detected?\n"
|
81
|
+
wps.keys.sort.map do |key|
|
82
|
+
if wps[key]
|
83
|
+
f.write "#{key}, yes\n"
|
84
|
+
else
|
85
|
+
f.write "#{key}, no\n"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
f.close
|
89
|
+
puts "Domain cache table is successfully saved: #{file_wps}"
|
90
|
+
rescue => ee
|
91
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
92
|
+
end
|
93
|
+
end
|
94
|
+
alias_method :save!, :save_to_file!
|
95
|
+
|
96
|
+
# 'setter' to add wordpress entry to the cache one at a time
|
97
|
+
def add(url)
|
98
|
+
begin
|
99
|
+
puts "Add entry to the local cache table: #{url}" if @verbose
|
100
|
+
site=url_2_site(url)
|
101
|
+
if @known_wp_sites.key?(site)
|
102
|
+
puts "Site is already exist. Skipping: #{site}"
|
103
|
+
else
|
104
|
+
record=Hash.new
|
105
|
+
if is_wp?(site)
|
106
|
+
record[site]=true
|
107
|
+
else
|
108
|
+
record[site]=false
|
109
|
+
end
|
110
|
+
puts "Entry loaded: #{record}"
|
111
|
+
end
|
112
|
+
@known_wp_sites.merge!(record)
|
113
|
+
return record
|
114
|
+
rescue => ee
|
115
|
+
puts "Exception on method #{__method__}: #{ee}: #{url}" if @verbose
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# logic to determin if it's a wordpress site
|
120
|
+
def is_wp?(url)
|
121
|
+
#begin
|
122
|
+
site=url_2_site(url)
|
123
|
+
if wp_readme?(site)
|
124
|
+
found=true
|
125
|
+
elsif wp_css?(site)
|
126
|
+
found=true
|
127
|
+
elsif wp_meta?(site)
|
128
|
+
found=true
|
129
|
+
else
|
130
|
+
found=false
|
131
|
+
end
|
132
|
+
return found
|
133
|
+
#rescue => ee
|
134
|
+
# puts "Exception on method #{__method__}: #{ee}: #{url}" if @verbose
|
135
|
+
#end
|
136
|
+
end
|
137
|
+
|
138
|
+
# add wordpress site entries (from a sitetracker list)
|
139
|
+
def refresh (num=@max_parallel)
|
140
|
+
#begin
|
141
|
+
puts "Add entries to the local cache table from site tracker: " if @verbose
|
142
|
+
results=Hash.new
|
143
|
+
wps=Wmap::SiteTracker.new.known_sites.keys
|
144
|
+
if wps.size > 0
|
145
|
+
Parallel.map(wps, :in_processes => num) { |target|
|
146
|
+
add(target)
|
147
|
+
}.each do |process|
|
148
|
+
if process.nil?
|
149
|
+
next
|
150
|
+
elsif process.empty?
|
151
|
+
#do nothing
|
152
|
+
else
|
153
|
+
results.merge!(process)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
@known_wp_sites.merge!(results)
|
157
|
+
puts "Done loading entries."
|
158
|
+
return results
|
159
|
+
else
|
160
|
+
puts "Error: no entry is loaded. Please check your list and try again."
|
161
|
+
end
|
162
|
+
return results
|
163
|
+
#rescue => ee
|
164
|
+
# puts "Exception on method #{__method__}: #{ee}" if @verbose
|
165
|
+
#end
|
166
|
+
end
|
167
|
+
|
168
|
+
# Wrapper to use OpenURI method 'read' to return url body contents
|
169
|
+
def read_url(url)
|
170
|
+
begin
|
171
|
+
puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
|
172
|
+
url_object=open_url(url)
|
173
|
+
html_body=url_object.read
|
174
|
+
doc = Nokogiri::HTML(html_body)
|
175
|
+
return doc
|
176
|
+
rescue => ee
|
177
|
+
puts "Exception on method #{__method__}: #{ee}" if @verbose
|
178
|
+
return nil
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
|
183
|
+
def open_url(url)
|
184
|
+
#url_object = nil
|
185
|
+
puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
|
186
|
+
if url =~ /http\:/i
|
187
|
+
# patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
|
188
|
+
url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
|
189
|
+
elsif url =~ /https\:/i
|
190
|
+
url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
|
191
|
+
else
|
192
|
+
raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
|
193
|
+
end
|
194
|
+
return url_object
|
195
|
+
end
|
196
|
+
|
197
|
+
# Wordpress detection checkpoint - readme.html
|
198
|
+
def wp_readme?(site)
|
199
|
+
readme_url=site + "/readme.html"
|
200
|
+
k=Wmap::UrlChecker.new
|
201
|
+
if k.response_code(readme_url) == 200
|
202
|
+
k=nil
|
203
|
+
doc=read_url(readme_url)
|
204
|
+
title=doc.css('title')
|
205
|
+
if title.to_s =~ /wordpress/i
|
206
|
+
return true
|
207
|
+
else
|
208
|
+
return false
|
209
|
+
end
|
210
|
+
else
|
211
|
+
k=nil
|
212
|
+
return false
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# Wordpress detection checkpoint - install.css
|
217
|
+
def wp_css?(site)
|
218
|
+
css_url=site + "/wp-admin/css/install.css"
|
219
|
+
k=Wmap::UrlChecker.new
|
220
|
+
if k.response_code(css_url) == 200
|
221
|
+
k=nil
|
222
|
+
parser = CssParser::Parser.new
|
223
|
+
parser.load_uri!(css_url)
|
224
|
+
rule = parser.find_by_selector('#logo a')
|
225
|
+
if rule.length >0
|
226
|
+
if rule[0] =~ /wordpress/i
|
227
|
+
return true
|
228
|
+
end
|
229
|
+
end
|
230
|
+
else
|
231
|
+
k=nil
|
232
|
+
return false
|
233
|
+
end
|
234
|
+
return false
|
235
|
+
end
|
236
|
+
|
237
|
+
# Wordpress detection checkpoint - meta generator
|
238
|
+
def wp_meta?(url)
|
239
|
+
site=url_2_site(url)
|
240
|
+
k=Wmap::UrlChecker.new
|
241
|
+
if k.response_code(site) == 200
|
242
|
+
k=nil
|
243
|
+
doc=read_url(site)
|
244
|
+
meta=doc.css('meta')
|
245
|
+
if meta.to_s =~ /wordpress/i
|
246
|
+
return true
|
247
|
+
else
|
248
|
+
return false
|
249
|
+
end
|
250
|
+
end
|
251
|
+
return false
|
252
|
+
end
|
253
|
+
|
254
|
+
def wp_ver(url)
|
255
|
+
if !wp_ver_readme(url).nil?
|
256
|
+
return wp_ver_readme(url)
|
257
|
+
elsif !wp_ver_meta(url).nil?
|
258
|
+
return wp_ver_meta(url)
|
259
|
+
else
|
260
|
+
return nil
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
# Identify wordpress version through the meta tag
|
265
|
+
def wp_ver_meta(url)
|
266
|
+
site=url_2_site(url)
|
267
|
+
k=Wmap::UrlChecker.new
|
268
|
+
if k.response_code(site) == 200
|
269
|
+
doc=read_url(site)
|
270
|
+
#puts doc.inspect
|
271
|
+
meta=doc.css('meta')
|
272
|
+
#puts meta.inspect
|
273
|
+
meta.each do |tag|
|
274
|
+
if tag.to_s =~ /wordpress/i
|
275
|
+
#puts tag.to_s
|
276
|
+
k=nil
|
277
|
+
return tag.to_s.scan(/[\d+\.]+\d+/).first
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
k=nil
|
282
|
+
return nil
|
283
|
+
end
|
284
|
+
|
285
|
+
# Wordpress version detection via - readme.html
|
286
|
+
def wp_ver_readme(url)
|
287
|
+
site=url_2_site(url)
|
288
|
+
readme_url=site + "/readme.html"
|
289
|
+
k=Wmap::UrlChecker.new
|
290
|
+
if k.response_code(readme_url) == 200
|
291
|
+
k=nil
|
292
|
+
doc=read_url(readme_url)
|
293
|
+
logo=doc.css('h1#logo')[0]
|
294
|
+
#puts logo.inspect
|
295
|
+
return logo.to_s.scan(/[\d+\.]+\d+/).first
|
296
|
+
end
|
297
|
+
k=nil
|
298
|
+
return nil
|
299
|
+
end
|
300
|
+
|
301
|
+
|
302
|
+
end
|