wmap 2.4.4 → 2.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,302 @@
1
+ #--
2
+ # Wmap
3
+ #
4
+ # A pure Ruby library for the Internet web application discovery and tracking.
5
+ #
6
+ # Copyright (c) 2012-2015 Yang Li <yang.li@owasp.org>
7
+ #++
8
+ require "parallel"
9
+ #require "singleton"
10
+ require "open-uri"
11
+ require "open_uri_redirections"
12
+ require "nokogiri"
13
+ require "css_parser"
14
+
15
+
16
+ # Main class to automatically track the site inventory
17
+ class Wmap::WpTracker
18
+ include Wmap::Utils
19
+ #include Singleton
20
+
21
+ attr_accessor :http_timeout, :max_parallel, :verbose, :sites_wp, :data_dir
22
+ attr_reader :known_wp_sites
23
+ # set hard stop limit of http time-out to 8 seconds, in order to avoid severe performance penalty for certain 'weird' site(s)
24
+ Max_http_timeout=8000
25
+
26
+ # WordPress checker instance default variables
27
+ def initialize (params = {})
28
+ @verbose=params.fetch(:verbose, false)
29
+ @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
30
+ wp_sites=@data_dir+'wp_sites'
31
+ @file_wps=params.fetch(:sites_wp, wp_sites)
32
+ @http_timeout=params.fetch(:http_timeout, 5000)
33
+ @max_parallel=params.fetch(:max_parallel, 40)
34
+ Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
35
+ @log_file=@data_dir + "wp_checker.log"
36
+ @known_wp_sites=load_from_file(@file_wps)
37
+ end
38
+
39
+
40
+ # 'setter' to load the known wordpress sites into an instance variable
41
+ def load_from_file (file=@file_stores, lc=true)
42
+ puts "Loading trusted file: #{file}" if @verbose
43
+ begin
44
+ known_wp_sites=Hash.new
45
+ f_wp_sites=File.open(file, 'r')
46
+ f_wp_sites.each_line do |line|
47
+ puts "Processing line: #{line}" if @verbose
48
+ line=line.chomp.strip
49
+ next if line.nil?
50
+ next if line.empty?
51
+ next if line =~ /^\s*#/
52
+ line=line.downcase if lc==true
53
+ entry=line.split(',')
54
+ if known_wp_sites.key?(entry[0])
55
+ next
56
+ else
57
+ if entry[1] =~ /yes/i
58
+ known_wp_sites[entry[0]]=true
59
+ else
60
+ known_wp_sites[entry[0]]=false
61
+ end
62
+ end
63
+
64
+ end
65
+ f_wp_sites.close
66
+ return known_wp_sites
67
+ rescue => ee
68
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
69
+ return nil
70
+ end
71
+ end
72
+
73
+ # Save the current domain hash table into a file
74
+ def save_to_file!(file_wps=@file_wps, wps=@known_wp_sites)
75
+ puts "Saving the current wordpress site table from memory to file: #{file_wps} ..." if @verbose
76
+ begin
77
+ timestamp=Time.now
78
+ f=File.open(file_wps, 'w')
79
+ f.write "# Local wps file created by class #{self.class} method #{__method__} at: #{timestamp}\n"
80
+ f.write "# domain name, free zone transfer detected?\n"
81
+ wps.keys.sort.map do |key|
82
+ if wps[key]
83
+ f.write "#{key}, yes\n"
84
+ else
85
+ f.write "#{key}, no\n"
86
+ end
87
+ end
88
+ f.close
89
+ puts "Domain cache table is successfully saved: #{file_wps}"
90
+ rescue => ee
91
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
92
+ end
93
+ end
94
+ alias_method :save!, :save_to_file!
95
+
96
+ # 'setter' to add wordpress entry to the cache one at a time
97
+ def add(url)
98
+ begin
99
+ puts "Add entry to the local cache table: #{url}" if @verbose
100
+ site=url_2_site(url)
101
+ if @known_wp_sites.key?(site)
102
+ puts "Site is already exist. Skipping: #{site}"
103
+ else
104
+ record=Hash.new
105
+ if is_wp?(site)
106
+ record[site]=true
107
+ else
108
+ record[site]=false
109
+ end
110
+ puts "Entry loaded: #{record}"
111
+ end
112
+ @known_wp_sites.merge!(record)
113
+ return record
114
+ rescue => ee
115
+ puts "Exception on method #{__method__}: #{ee}: #{url}" if @verbose
116
+ end
117
+ end
118
+
119
+ # logic to determin if it's a wordpress site
120
+ def is_wp?(url)
121
+ #begin
122
+ site=url_2_site(url)
123
+ if wp_readme?(site)
124
+ found=true
125
+ elsif wp_css?(site)
126
+ found=true
127
+ elsif wp_meta?(site)
128
+ found=true
129
+ else
130
+ found=false
131
+ end
132
+ return found
133
+ #rescue => ee
134
+ # puts "Exception on method #{__method__}: #{ee}: #{url}" if @verbose
135
+ #end
136
+ end
137
+
138
+ # add wordpress site entries (from a sitetracker list)
139
+ def refresh (num=@max_parallel)
140
+ #begin
141
+ puts "Add entries to the local cache table from site tracker: " if @verbose
142
+ results=Hash.new
143
+ wps=Wmap::SiteTracker.new.known_sites.keys
144
+ if wps.size > 0
145
+ Parallel.map(wps, :in_processes => num) { |target|
146
+ add(target)
147
+ }.each do |process|
148
+ if process.nil?
149
+ next
150
+ elsif process.empty?
151
+ #do nothing
152
+ else
153
+ results.merge!(process)
154
+ end
155
+ end
156
+ @known_wp_sites.merge!(results)
157
+ puts "Done loading entries."
158
+ return results
159
+ else
160
+ puts "Error: no entry is loaded. Please check your list and try again."
161
+ end
162
+ return results
163
+ #rescue => ee
164
+ # puts "Exception on method #{__method__}: #{ee}" if @verbose
165
+ #end
166
+ end
167
+
168
+ # Wrapper to use OpenURI method 'read' to return url body contents
169
+ def read_url(url)
170
+ begin
171
+ puts "Wrapper to return the OpenURI object for url: #{url}" if @verbose
172
+ url_object=open_url(url)
173
+ html_body=url_object.read
174
+ doc = Nokogiri::HTML(html_body)
175
+ return doc
176
+ rescue => ee
177
+ puts "Exception on method #{__method__}: #{ee}" if @verbose
178
+ return nil
179
+ end
180
+ end
181
+
182
+ # Wrapper for the OpenURI open method - create an open_uri object and return the reference upon success
183
+ def open_url(url)
184
+ #url_object = nil
185
+ puts "Open url #{url} by creating an open_uri object. Return the reference upon success." if @verbose
186
+ if url =~ /http\:/i
187
+ # patch for allow the 'un-safe' URL redirection i.e. https://www.example.com -> http://www.example.com
188
+ url_object = open(url, :allow_redirections=>:safe, :read_timeout=>Max_http_timeout/1000)
189
+ elsif url =~ /https\:/i
190
+ url_object = open(url,:ssl_verify_mode => 0, :allow_redirections =>:safe, :read_timeout=>Max_http_timeout/1000)
191
+ else
192
+ raise "Invalid URL format - please specify the protocol prefix http(s) in the URL: #{url}"
193
+ end
194
+ return url_object
195
+ end
196
+
197
+ # Wordpress detection checkpoint - readme.html
198
+ def wp_readme?(site)
199
+ readme_url=site + "/readme.html"
200
+ k=Wmap::UrlChecker.new
201
+ if k.response_code(readme_url) == 200
202
+ k=nil
203
+ doc=read_url(readme_url)
204
+ title=doc.css('title')
205
+ if title.to_s =~ /wordpress/i
206
+ return true
207
+ else
208
+ return false
209
+ end
210
+ else
211
+ k=nil
212
+ return false
213
+ end
214
+ end
215
+
216
+ # Wordpress detection checkpoint - install.css
217
+ def wp_css?(site)
218
+ css_url=site + "/wp-admin/css/install.css"
219
+ k=Wmap::UrlChecker.new
220
+ if k.response_code(css_url) == 200
221
+ k=nil
222
+ parser = CssParser::Parser.new
223
+ parser.load_uri!(css_url)
224
+ rule = parser.find_by_selector('#logo a')
225
+ if rule.length >0
226
+ if rule[0] =~ /wordpress/i
227
+ return true
228
+ end
229
+ end
230
+ else
231
+ k=nil
232
+ return false
233
+ end
234
+ return false
235
+ end
236
+
237
+ # Wordpress detection checkpoint - meta generator
238
+ def wp_meta?(url)
239
+ site=url_2_site(url)
240
+ k=Wmap::UrlChecker.new
241
+ if k.response_code(site) == 200
242
+ k=nil
243
+ doc=read_url(site)
244
+ meta=doc.css('meta')
245
+ if meta.to_s =~ /wordpress/i
246
+ return true
247
+ else
248
+ return false
249
+ end
250
+ end
251
+ return false
252
+ end
253
+
254
+ def wp_ver(url)
255
+ if !wp_ver_readme(url).nil?
256
+ return wp_ver_readme(url)
257
+ elsif !wp_ver_meta(url).nil?
258
+ return wp_ver_meta(url)
259
+ else
260
+ return nil
261
+ end
262
+ end
263
+
264
+ # Identify wordpress version through the meta tag
265
+ def wp_ver_meta(url)
266
+ site=url_2_site(url)
267
+ k=Wmap::UrlChecker.new
268
+ if k.response_code(site) == 200
269
+ doc=read_url(site)
270
+ #puts doc.inspect
271
+ meta=doc.css('meta')
272
+ #puts meta.inspect
273
+ meta.each do |tag|
274
+ if tag.to_s =~ /wordpress/i
275
+ #puts tag.to_s
276
+ k=nil
277
+ return tag.to_s.scan(/[\d+\.]+\d+/).first
278
+ end
279
+ end
280
+ end
281
+ k=nil
282
+ return nil
283
+ end
284
+
285
+ # Wordpress version detection via - readme.html
286
+ def wp_ver_readme(url)
287
+ site=url_2_site(url)
288
+ readme_url=site + "/readme.html"
289
+ k=Wmap::UrlChecker.new
290
+ if k.response_code(readme_url) == 200
291
+ k=nil
292
+ doc=read_url(readme_url)
293
+ logo=doc.css('h1#logo')[0]
294
+ #puts logo.inspect
295
+ return logo.to_s.scan(/[\d+\.]+\d+/).first
296
+ end
297
+ k=nil
298
+ return nil
299
+ end
300
+
301
+
302
+ end