wmap 2.4.4 → 2.4.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,302 @@
1
+ #--
2
+ # Wmap
3
+ #
4
+ # A pure Ruby library for the Internet web application discovery and tracking.
5
+ #
6
+ # Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
7
+ #++
8
+ require "parallel"
9
+ #require "singleton"
10
+ require "open-uri"
11
+ require "open_uri_redirections"
12
+ require "nokogiri"
13
+ require "css_parser"
14
+
15
+
16
+ # Main class to automatically track the site inventory
17
+ class Wmap::WpTracker
18
+ include Wmap::Utils
19
+ #include Singleton
20
+
21
+ attr_accessor :http_timeout, :max_parallel, :verbose, :sites_wp, :data_dir
22
+ attr_reader :known_wp_sites
23
+ # set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
24
+ Max_http_timeout=8000
25
+
26
+ # WordPress checker instance default variables
27
+ def initialize (params = {})
28
+ @verbose=params.fetch(:verbose, false)
29
+ @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
30
+ wp_sites=@data_dir+'wp_sites'
31
+ @file_wps=params.fetch(:sites_wp, wp_sites)
32
+ @http_timeout=params.fetch(:http_timeout, 5000)
33
+ @max_parallel=params.fetch(:max_parallel, 40)
34
+ Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
35
+ @log_file=@data_dir + "wp_checker.log"
36
+ @known_wp_sites=load_from_file(@file_wps)
37
+ end
38
+
39
+
40
+ # 'setter' to load the known wordpress sites into an instance variable
41
+ def load_from_file (file=@file_stores, lc=true)
42
+ puts "Loading trusted file: #{file}" if @verbose
43
+ begin
44
+ known_wp_sites=Hash.new
45
+ f_wp_sites=File.open(file, 'r')
46
+ f_wp_sites.each_line do |line|
47
+ puts "Processing line: #{line}" if @verbose
48
+ line=line.chomp.strip
49
+ next if line.nil?
50
+ next if line.empty?
51
+ next if line =~ /^\s*#/
52
+ line=line.downcase if lc==true
53
+ entry=line.split(',')
54
+ if known_wp_sites.key?(entry[0])
55
+ next
56
+ else
57
+ if entry[1] =~ /yes/i
58
+ known_wp_sites[entry[0]]=true
59
+ else
60
+ known_wp_sites[entry[0]]=false
61
+ end
62
+ end
63
+
64
+ end
65
+ f_wp_sites.close
66
+ return known_wp_sites
67
+ rescue => ee
68
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
69
+ return nil
70
+ end
71
+ end
72
+
73
+ # Save the current domain hash table into a file
74
+ def save_to_file!(file_wps=@file_wps, wps=@known_wp_sites)
75
+ puts "Saving the current wordpress site table from memory to file: #{file_wps} ..." if @verbose
76
+ begin
77
+ timestamp=Time.now
78
+ f=File.open(file_wps, 'w')
79
+ f.write "# Local wps file created by class #{self.class} method #{__method__} at: #{timestamp}\n"
80
+ f.write "# domain name, free zone transfer detected?\n"
81
+ wps.keys.sort.map do |key|
82
+ if wps[key]
83
+ f.write "#{key}, yes\n"
84
+ else
85
+ f.write "#{key}, no\n"
86
+ end
87
+ end
88
+ f.close
89
+ puts "Domain cache table is successfully saved: #{file_wps}"
90
+ rescue => ee
91
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
92
+ end
93
+ end
94
+ alias_method :save!, :save_to_file!
95
+
96
+ # 'setter' to add wordpress entry to the cache one at a time
97
+ def add(url)
98
+ begin
99
+ puts "Add entry to the local cache table: #{url}" if @verbose
100
+ site=url_2_site(url)
101
+ if @known_wp_sites.key?(site)
102
+ puts "Site is already exist. Skipping: #{site}"
103
+ else
104
+ record=Hash.new
105
+ if is_wp?(site)
106
+ record[site]=true
107
+ else
108
+ record[site]=false
109
+ end
110
+ puts "Entry loaded: #{record}"
111
+ end
112
+ @known_wp_sites.merge!(record)
113
+ return record
114
+ rescue => ee
115
+ puts "Exception on method #{__method__}: #{ee}: #{url}" if @verbose
116
+ end
117
+ end
118
+
119
+ # logic to determin if it's a wordpress site
120
+ def is_wp?(url)
121
+ #begin
122
+ site=url_2_site(url)
123
+ if wp_readme?(site)
124
+ found=true
125
+ elsif wp_css?(site)
126
+ found=true
127
+ elsif wp_meta?(site)
128
+ found=true
129
+ else
130
+ found=false
131
+ end
132
+ return found
133
+ #rescue => ee
134
+ # puts "Exception on method #{__method__}: #{ee}: #{url}" if @verbose
135
+ #end
136
+ end
137
+
138
+ # add wordpress site entries (from a sitetracker list)
139
+ def refresh (num=@max_parallel)
140
+ #begin
141
+ puts "Add entries to the local cache table from site tracker: " if @verbose
142
+ results=Hash.new
143
+ wps=Wmap::SiteTracker.new.known_sites.keys
144
+ if wps.size > 0
145
+ Parallel.map(wps, :in_processes => num) { |target|
146
+ add(target)
147
+ }.each do |process|
148
+ if process.nil?
149
+ next
150
+ elsif process.empty?
151
+ #do nothing
152
+ else
153
+ results.merge!(process)
154
+ end
155
+ end
156
+ @known_wp_sites.merge!(results)
157
+ puts "Done loading entries."
158
+ return results
159
+ else
160
+ puts "Error: no entry is loaded. Please check your list and try again."
161
+ end
162
+ return results
163
+ #rescue => ee
164
+ # puts "Exception on method #{__method__}: #{ee}" if @verbose
165
+ #end
166
+ end
167
+
168
+ # Wrapper to use OpenURI method 'read' to return url body contents
169
+ def read_url(url)
170
+ begin
171
+ puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
172
+ url_object=open_url(url)
173
+ html_body=url_object.read
174
+ doc = Nokogiri::HTML(html_body)
175
+ return doc
176
+ rescue => ee
177
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
178
+ return nil
179
+ end
180
+ end
181
+
182
+ # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
183
+ def open_url(url)
184
+ #url_object = nil
185
+ puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
186
+ if url =~ /http\:/i
187
+ # patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
188
+ url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
189
+ elsif url =~ /https\:/i
190
+ url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
191
+ else
192
+ raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
193
+ end
194
+ return url_object
195
+ end
196
+
197
+ # Wordpress detection checkpoint - readme.html
198
+ def wp_readme?(site)
199
+ readme_url=site + "/readme.html"
200
+ k=Wmap::UrlChecker.new
201
+ if k.response_code(readme_url) == 200
202
+ k=nil
203
+ doc=read_url(readme_url)
204
+ title=doc.css('title')
205
+ if title.to_s =~ /wordpress/i
206
+ return true
207
+ else
208
+ return false
209
+ end
210
+ else
211
+ k=nil
212
+ return false
213
+ end
214
+ end
215
+
216
+ # Wordpress detection checkpoint - install.css
217
+ def wp_css?(site)
218
+ css_url=site + "/wp-admin/css/install.css"
219
+ k=Wmap::UrlChecker.new
220
+ if k.response_code(css_url) == 200
221
+ k=nil
222
+ parser = CssParser::Parser.new
223
+ parser.load_uri!(css_url)
224
+ rule = parser.find_by_selector('#logo a')
225
+ if rule.length >0
226
+ if rule[0] =~ /wordpress/i
227
+ return true
228
+ end
229
+ end
230
+ else
231
+ k=nil
232
+ return false
233
+ end
234
+ return false
235
+ end
236
+
237
+ # Wordpress detection checkpoint - meta generator
238
+ def wp_meta?(url)
239
+ site=url_2_site(url)
240
+ k=Wmap::UrlChecker.new
241
+ if k.response_code(site) == 200
242
+ k=nil
243
+ doc=read_url(site)
244
+ meta=doc.css('meta')
245
+ if meta.to_s =~ /wordpress/i
246
+ return true
247
+ else
248
+ return false
249
+ end
250
+ end
251
+ return false
252
+ end
253
+
254
+ def wp_ver(url)
255
+ if !wp_ver_readme(url).nil?
256
+ return wp_ver_readme(url)
257
+ elsif !wp_ver_meta(url).nil?
258
+ return wp_ver_meta(url)
259
+ else
260
+ return nil
261
+ end
262
+ end
263
+
264
+ # Identify wordpress version through the meta tag
265
+ def wp_ver_meta(url)
266
+ site=url_2_site(url)
267
+ k=Wmap::UrlChecker.new
268
+ if k.response_code(site) == 200
269
+ doc=read_url(site)
270
+ #puts doc.inspect
271
+ meta=doc.css('meta')
272
+ #puts meta.inspect
273
+ meta.each do |tag|
274
+ if tag.to_s =~ /wordpress/i
275
+ #puts tag.to_s
276
+ k=nil
277
+ return tag.to_s.scan(/[\d+\.]+\d+/).first
278
+ end
279
+ end
280
+ end
281
+ k=nil
282
+ return nil
283
+ end
284
+
285
+ # Wordpress version detection via - readme.html
286
+ def wp_ver_readme(url)
287
+ site=url_2_site(url)
288
+ readme_url=site + "/readme.html"
289
+ k=Wmap::UrlChecker.new
290
+ if k.response_code(readme_url) == 200
291
+ k=nil
292
+ doc=read_url(readme_url)
293
+ logo=doc.css('h1#logo')[0]
294
+ #puts logo.inspect
295
+ return logo.to_s.scan(/[\d+\.]+\d+/).first
296
+ end
297
+ k=nil
298
+ return nil
299
+ end
300
+
301
+
302
+ end