miketracy-wwmd 0.2.11
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/README +62 -0
- data/README.txt +62 -0
- data/Rakefile +34 -0
- data/examples/config_example.yaml +24 -0
- data/examples/wwmd_example.rb +73 -0
- data/lib/wwmd.rb +78 -0
- data/lib/wwmd/encoding.rb +40 -0
- data/lib/wwmd/form.rb +110 -0
- data/lib/wwmd/form_array.rb +273 -0
- data/lib/wwmd/guid.rb +155 -0
- data/lib/wwmd/hpricot_html2text.rb +76 -0
- data/lib/wwmd/mixins.rb +318 -0
- data/lib/wwmd/mixins_extends.rb +188 -0
- data/lib/wwmd/mixins_external.rb +18 -0
- data/lib/wwmd/nokogiri_html2text.rb +41 -0
- data/lib/wwmd/page.rb +414 -0
- data/lib/wwmd/page/auth.rb +183 -0
- data/lib/wwmd/page/config.rb +44 -0
- data/lib/wwmd/page/constants.rb +60 -0
- data/lib/wwmd/page/headers.rb +107 -0
- data/lib/wwmd/page/inputs.rb +47 -0
- data/lib/wwmd/page/irb_helpers.rb +90 -0
- data/lib/wwmd/page/scrape.rb +202 -0
- data/lib/wwmd/page/spider.rb +127 -0
- data/lib/wwmd/page/urlparse.rb +79 -0
- data/lib/wwmd/page/utils.rb +30 -0
- data/lib/wwmd/viewstate.rb +118 -0
- data/lib/wwmd/viewstate/viewstate_class_helpers.rb +35 -0
- data/lib/wwmd/viewstate/viewstate_deserializer_methods.rb +213 -0
- data/lib/wwmd/viewstate/viewstate_from_xml.rb +126 -0
- data/lib/wwmd/viewstate/viewstate_types.rb +51 -0
- data/lib/wwmd/viewstate/viewstate_utils.rb +157 -0
- data/lib/wwmd/viewstate/viewstate_yaml.rb +25 -0
- data/lib/wwmd/viewstate/vs_array.rb +36 -0
- data/lib/wwmd/viewstate/vs_binary_serialized.rb +28 -0
- data/lib/wwmd/viewstate/vs_hashtable.rb +40 -0
- data/lib/wwmd/viewstate/vs_hybrid_dict.rb +40 -0
- data/lib/wwmd/viewstate/vs_indexed_string.rb +6 -0
- data/lib/wwmd/viewstate/vs_indexed_string_ref.rb +22 -0
- data/lib/wwmd/viewstate/vs_int_enum.rb +25 -0
- data/lib/wwmd/viewstate/vs_list.rb +32 -0
- data/lib/wwmd/viewstate/vs_pair.rb +27 -0
- data/lib/wwmd/viewstate/vs_read_types.rb +11 -0
- data/lib/wwmd/viewstate/vs_read_value.rb +33 -0
- data/lib/wwmd/viewstate/vs_sparse_array.rb +56 -0
- data/lib/wwmd/viewstate/vs_string.rb +29 -0
- data/lib/wwmd/viewstate/vs_string_array.rb +37 -0
- data/lib/wwmd/viewstate/vs_string_formatted.rb +30 -0
- data/lib/wwmd/viewstate/vs_triplet.rb +29 -0
- data/lib/wwmd/viewstate/vs_type.rb +21 -0
- data/lib/wwmd/viewstate/vs_unit.rb +28 -0
- data/lib/wwmd/viewstate/vs_value.rb +33 -0
- data/spec/README +3 -0
- data/spec/form_array.spec +49 -0
- data/spec/spider_csrf_test.spec +28 -0
- data/spec/urlparse_test.spec +89 -0
- data/tasks/ann.rake +80 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +201 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +51 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/setup.rb +292 -0
- data/tasks/spec.rake +54 -0
- data/tasks/test.rake +40 -0
- data/tasks/zentest.rake +36 -0
- metadata +164 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
=begin rdoc
|
2
|
+
html2text that works with Nokogiri
|
3
|
+
=end
|
4
|
+
module WWMD
|
5
|
+
|
6
|
+
INLINETAGS = ['a','abbr','acronym','address','b','bdo','big','cite',
|
7
|
+
'code','del','dfn','em','font','i','ins','kbd','label',
|
8
|
+
'noframes','noscript','q','s','samp','small','span',
|
9
|
+
'strike','strong','sub','sup','td','th','tt','u',
|
10
|
+
'html','body','table']
|
11
|
+
BLOCKTAGS = ['blockquote','center','dd','div','fieldset','form',
|
12
|
+
'h1','h2','h3','h4','h5','h6','p','pre','tr','var',]
|
13
|
+
LISTTAGS = ['dir','dl','menu','ol','ul']
|
14
|
+
ITEMTAGS = ['li','dt']
|
15
|
+
SPECIALTAGS = ['br','hr']
|
16
|
+
|
17
|
+
class Page
|
18
|
+
def html2text
|
19
|
+
arr = []
|
20
|
+
self.scrape.hdoc.traverse do |x|
|
21
|
+
arr << [x.parent.name,x.text] if x.text?
|
22
|
+
if x.elem?
|
23
|
+
arr << [x.name,""] if SPECIALTAGS.include?(x.name)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
ret = ""
|
27
|
+
arr.each do |name,str|
|
28
|
+
(ret += "\n"; next ) if name == "br"
|
29
|
+
(ret += "\n" + ("-" * 72) + "\n"; next) if name == "hr"
|
30
|
+
s = str.strip
|
31
|
+
if BLOCKTAGS.include?(name) or LISTTAGS.include?(name)
|
32
|
+
s += "\n"
|
33
|
+
elsif ITEMTAGS.include?(name)
|
34
|
+
s = "* " + s + "\n"
|
35
|
+
end
|
36
|
+
ret += s
|
37
|
+
end
|
38
|
+
ret.gsub(/\n+/) { "\n" }
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/wwmd/page.rb
ADDED
@@ -0,0 +1,414 @@
|
|
1
|
+
module WWMD
|
2
|
+
attr_accessor :curl_object
|
3
|
+
attr_accessor :body_data
|
4
|
+
attr_accessor :post_data
|
5
|
+
attr_accessor :header_data
|
6
|
+
attr_accessor :use_referer
|
7
|
+
attr_reader :forms
|
8
|
+
attr_reader :last_error
|
9
|
+
attr_reader :links # array of links (urls)
|
10
|
+
attr_reader :jlinks # array of included javascript files
|
11
|
+
attr_reader :spider # spider object
|
12
|
+
attr_reader :scrape # scrape object
|
13
|
+
attr_reader :urlparse # urlparse object
|
14
|
+
attr_reader :comments
|
15
|
+
|
16
|
+
attr_accessor :base_url # needed to properly munge relative urls into fq urls
|
17
|
+
attr_accessor :logged_in # are we logged in?
|
18
|
+
|
19
|
+
attr_accessor :opts
|
20
|
+
attr_accessor :inputs
|
21
|
+
|
22
|
+
# WWMD::Page is an extension of a Curl::Easy object which provides methods to
|
23
|
+
# enhance and ease the performance of web application penetration testing.
|
24
|
+
class Page
|
25
|
+
|
26
|
+
def initialize(opts={})
|
27
|
+
@opts = opts.clone
|
28
|
+
DEFAULTS.each { |k,v| @opts[k] = v if not opts[k] }
|
29
|
+
@spider = Spider.new(opts)
|
30
|
+
@scrape = Scrape.new
|
31
|
+
@base_url ||= opts[:base_url]
|
32
|
+
@scrape.warn = opts[:scrape_warn] if !opts[:scrape_warn].nil?
|
33
|
+
if opts.empty?
|
34
|
+
putw "Page initialized without opts"
|
35
|
+
@scrape.warn = false
|
36
|
+
end
|
37
|
+
@urlparse = URLParse.new()
|
38
|
+
@inputs = Inputs.new(self)
|
39
|
+
@logged_in = false
|
40
|
+
@body_data = ""
|
41
|
+
@post_data = ""
|
42
|
+
@comments = []
|
43
|
+
@header_data = FormArray.new
|
44
|
+
|
45
|
+
@curl_object = Curl::Easy.new
|
46
|
+
@opts.each do |k,v|
|
47
|
+
next if !(@curl_object.methods.include?("#{k}="))
|
48
|
+
next if k == :proxy_url
|
49
|
+
@curl_object.send("#{k}=",v)
|
50
|
+
end
|
51
|
+
@curl_object.on_body { |data| self._body_cb(data) }
|
52
|
+
@curl_object.on_header { |data| self._header_cb(data) }
|
53
|
+
|
54
|
+
# cookies?
|
55
|
+
@curl_object.enable_cookies = @opts[:enable_cookies]
|
56
|
+
if @curl_object.enable_cookies?
|
57
|
+
@curl_object.cookiejar = @opts[:cookiejar] || "./__cookiejar"
|
58
|
+
end
|
59
|
+
|
60
|
+
#proxy?
|
61
|
+
@curl_object.proxy_url = @opts[:proxy_url] if @opts[:use_proxy]
|
62
|
+
end
|
63
|
+
|
64
|
+
#:section: Heavy Lifting
|
65
|
+
|
66
|
+
# set reporting data for the page
|
67
|
+
#
|
68
|
+
# Scan for comments, anchors, links and javascript includes and
|
69
|
+
# set page flags. The heavy lifting for parsing is done in the
|
70
|
+
# scrape class.
|
71
|
+
#
|
72
|
+
# returns: <tt>array [ code, page_status, body_data.size ]</tt>
|
73
|
+
def set_data
|
74
|
+
# reset scrape and inputs object
|
75
|
+
# transparently gunzip
|
76
|
+
begin
|
77
|
+
io = StringIO.new(self.body_data)
|
78
|
+
gz = Zlib::GzipReader.new(io)
|
79
|
+
self.body_data.replace(gz.read)
|
80
|
+
rescue => e
|
81
|
+
end
|
82
|
+
@scrape.reset(self.body_data)
|
83
|
+
@inputs.set
|
84
|
+
|
85
|
+
@comments = @scrape.for_comments
|
86
|
+
# remove comments that are css selectors for IE silliness
|
87
|
+
@comments.reject! do |c|
|
88
|
+
c =~ /\[if IE\]/ ||
|
89
|
+
c =~ /\[if IE \d/ ||
|
90
|
+
c =~ /\[if lt IE \d/
|
91
|
+
end
|
92
|
+
@links = @scrape.for_links.map do |url|
|
93
|
+
@urlparse.parse(self.last_effective_url,url).to_s
|
94
|
+
end
|
95
|
+
@jlinks = @scrape.for_javascript_links
|
96
|
+
@forms = []
|
97
|
+
self.search("//form").each { |f| @forms << Form.new(f) }
|
98
|
+
@spider.add(self.last_effective_url,@links)
|
99
|
+
return [self.code,self.page_status,self.body_data.size]
|
100
|
+
end
|
101
|
+
|
102
|
+
# clear self.body_data and self.header_data
|
103
|
+
def clear_data
|
104
|
+
return false if self.opts[:parse] = false
|
105
|
+
@body_data = ""
|
106
|
+
@header_data.clear
|
107
|
+
@last_error = nil
|
108
|
+
end
|
109
|
+
|
110
|
+
# override Curl::Easy.perform to perform page actions,
|
111
|
+
# call <tt>self.set_data</tt>
|
112
|
+
#
|
113
|
+
# returns: <tt>array [ code, page_status, body_data.size ]</tt>
|
114
|
+
#
|
115
|
+
# don't call this directly if we are in console mode
|
116
|
+
# use get and submit respectively for GET and POST
|
117
|
+
def perform
|
118
|
+
self.clear_data
|
119
|
+
self.headers["Referer"] = self.cur if self.use_referer
|
120
|
+
begin
|
121
|
+
@curl_object.perform
|
122
|
+
rescue => e
|
123
|
+
@last_error = e
|
124
|
+
putw "WARN: #{e.class}" if e.class =~ /Curl::Err/
|
125
|
+
self.logged_in = false
|
126
|
+
end
|
127
|
+
self.set_data
|
128
|
+
return [self.code,self.page_status,self.body_data.size]
|
129
|
+
end
|
130
|
+
|
131
|
+
# replacement for Curl::Easy.http_post
|
132
|
+
#
|
133
|
+
# post the form attempting to remove curl supplied headers (Expect, X-Forwarded-For
|
134
|
+
# call <tt>self.set_data</tt>
|
135
|
+
#
|
136
|
+
# if passed a regexp, escape values in the form using regexp before submitting
|
137
|
+
# if passed nil for the regexp arg, the form will not be escaped
|
138
|
+
# default: WWMD::ESCAPE[:url]
|
139
|
+
#
|
140
|
+
# returns: <tt>array [ code, body_data.size ]</tt>
|
141
|
+
def submit(iform=nil,reg=WWMD::ESCAPE[:default])
|
142
|
+
=begin
|
143
|
+
this is just getting worse and worse
|
144
|
+
=end
|
145
|
+
if iform.class == "Symbol"
|
146
|
+
reg = iform
|
147
|
+
iform = nil
|
148
|
+
end
|
149
|
+
self.clear_data
|
150
|
+
["Expect","X-Forwarded-For","Content-length"].each { |s| self.clear_header(s) }
|
151
|
+
self.headers["Referer"] = self.cur if self.use_referer
|
152
|
+
if iform == nil
|
153
|
+
if not self.form.empty?
|
154
|
+
sform = self.form.clone
|
155
|
+
else
|
156
|
+
return "no form provided"
|
157
|
+
end
|
158
|
+
else
|
159
|
+
sform = iform.clone # clone the form so that we don't change the original
|
160
|
+
end
|
161
|
+
sform.escape_all!(reg)
|
162
|
+
if sform.empty?
|
163
|
+
self.http_post('')
|
164
|
+
else
|
165
|
+
self.http_post(self.post_data = sform.to_post)
|
166
|
+
end
|
167
|
+
begin
|
168
|
+
self.set_data
|
169
|
+
rescue => e
|
170
|
+
STDERR.puts "FATAL: could not parse page"
|
171
|
+
end
|
172
|
+
return [self.code, self.body_data.size]
|
173
|
+
end
|
174
|
+
|
175
|
+
# submit a form using POST string
|
176
|
+
def submit_string(post_string)
|
177
|
+
self.clear_data
|
178
|
+
self.http_post(post_string)
|
179
|
+
self.set_data
|
180
|
+
if self.ntlm?
|
181
|
+
putw "WARN: this page requires NTLM Authentication"
|
182
|
+
putw "WARN: use ntlm_get instead of get"
|
183
|
+
end
|
184
|
+
return [self.code, self.body_data.size]
|
185
|
+
end
|
186
|
+
|
187
|
+
# override for Curl::Easy.perform
|
188
|
+
#
|
189
|
+
# if the passed url string doesn't contain an fully qualified
|
190
|
+
# path, we'll guess and prepend opts[:base_url]
|
191
|
+
#
|
192
|
+
# returns: <tt>array [ code, body_data.size ]</tt>
|
193
|
+
def get(url=nil,parse=true)
|
194
|
+
if url && parse
|
195
|
+
self.url = @urlparse.parse(self.opts[:base_url],url).to_s if url
|
196
|
+
=begin
|
197
|
+
base = url.clip
|
198
|
+
args = url.clop
|
199
|
+
base = @urlparse.parse(self.opts[:base_url],base).to_s
|
200
|
+
self.url = base
|
201
|
+
self.url += ("?" + args) if args
|
202
|
+
=end
|
203
|
+
elsif url
|
204
|
+
self.url = url
|
205
|
+
end
|
206
|
+
self.perform
|
207
|
+
if self.ntlm?
|
208
|
+
putw "WARN: this page requires NTLM Authentication"
|
209
|
+
putw "use ntlm_get instead of get"
|
210
|
+
end
|
211
|
+
self.set_data
|
212
|
+
return [self.code, self.body_data.size]
|
213
|
+
end
|
214
|
+
|
215
|
+
# GET with params and POST it as a form
|
216
|
+
def post(url=nil)
|
217
|
+
ep = url.clip
|
218
|
+
self.url = @urlparse.parse(self.opts[:base_url],ep).to_s if ep
|
219
|
+
form = url.clop.to_form
|
220
|
+
self.submit(form)
|
221
|
+
end
|
222
|
+
|
223
|
+
def furl(url)
|
224
|
+
self.url = @urlparse.parse(self.opts[:base_url],url).to_s
|
225
|
+
end
|
226
|
+
|
227
|
+
#:section: Reporting helper methods
|
228
|
+
# These are methods that generate data for a parsed page
|
229
|
+
|
230
|
+
# return text representation of page code
|
231
|
+
#
|
232
|
+
# override with specific statuses in helper depending on page text
|
233
|
+
# etc to include statuses outside 200 = OK and other = ERR
|
234
|
+
def page_status
|
235
|
+
return "ERR" if self.response_code != 200
|
236
|
+
return "OK"
|
237
|
+
end
|
238
|
+
|
239
|
+
alias_method :status, :page_status#:nodoc:
|
240
|
+
|
241
|
+
# return value of @logged_in
|
242
|
+
def logged_in?
|
243
|
+
return @logged_in
|
244
|
+
end
|
245
|
+
|
246
|
+
# return a string of flags:
|
247
|
+
# Ll links
|
248
|
+
# Jj javascript includes
|
249
|
+
# Ff forms
|
250
|
+
# Cc comments
|
251
|
+
def report_flags
|
252
|
+
self.has_links? ? ret = "L" : ret = "l"
|
253
|
+
self.has_jlinks? ? ret += "J" : ret += "j"
|
254
|
+
self.has_form? ? ret += "F" : ret += "f"
|
255
|
+
self.has_comments? ? ret += "C" : ret += "c"
|
256
|
+
return ret
|
257
|
+
end
|
258
|
+
|
259
|
+
def has_links?; return !@links.empty?; end
|
260
|
+
def has_jlinks?; return !@jlinks.empty?; end
|
261
|
+
def has_form?; return !(@forms.size < 1); end
|
262
|
+
def has_comments?; return !@comments.empty?; end
|
263
|
+
|
264
|
+
# return page size in bytes
|
265
|
+
def size
|
266
|
+
return self.body_data.size
|
267
|
+
end
|
268
|
+
|
269
|
+
#:section: Other methods
|
270
|
+
|
271
|
+
def all_tags#:nodoc:
|
272
|
+
return self.search("*").map { |x| x.name }
|
273
|
+
end
|
274
|
+
|
275
|
+
# return MD5 for DOM fingerprint
|
276
|
+
# take all tag names in page.to_s.md5
|
277
|
+
def fingerprint
|
278
|
+
self.all_tags.to_s.md5
|
279
|
+
end
|
280
|
+
alias_method :fp, :fingerprint #:nodoc:
|
281
|
+
|
282
|
+
# set link using an integer link from self.report
|
283
|
+
#--
|
284
|
+
# NOTE: I always use page.get(page.l(1)) anyway.
|
285
|
+
#++
|
286
|
+
def set_link(index)
|
287
|
+
self.url = @links[index]
|
288
|
+
end
|
289
|
+
|
290
|
+
# return link at index from @links array
|
291
|
+
def get_link(index)
|
292
|
+
@links[index]
|
293
|
+
end
|
294
|
+
|
295
|
+
alias_method :link, :get_link #:nodoc:
|
296
|
+
alias_method :l, :get_link #:nodoc:
|
297
|
+
|
298
|
+
# alias_method for body_data
|
299
|
+
def raw
|
300
|
+
self.body_data
|
301
|
+
end
|
302
|
+
|
303
|
+
# alias_method for last_effective_url
|
304
|
+
def current_url
|
305
|
+
self.last_effective_url
|
306
|
+
end
|
307
|
+
|
308
|
+
alias_method :current, :current_url
|
309
|
+
alias_method :cur, :current_url
|
310
|
+
|
311
|
+
# the last http response code
|
312
|
+
def code
|
313
|
+
self.response_code # .to_s
|
314
|
+
end
|
315
|
+
|
316
|
+
#:section: Parsing convenience methods
|
317
|
+
# methods that help parse and find information on a page including
|
318
|
+
# access to forms etc.
|
319
|
+
|
320
|
+
# grep for regexp and remove leading whitespace
|
321
|
+
def grep(reg)
|
322
|
+
self.body_data.grep(reg).map { |i| i.gsub(/^\s+/, "") }
|
323
|
+
end
|
324
|
+
|
325
|
+
# return this page's form (at index id) as a FormArray
|
326
|
+
def get_form(id=nil)
|
327
|
+
id = 0 if not id
|
328
|
+
return nil if forms.empty?
|
329
|
+
@forms[id].to_form_array
|
330
|
+
end
|
331
|
+
|
332
|
+
# return the complete url to the form action on this page
|
333
|
+
def action(id=nil)
|
334
|
+
id = 0 if not id
|
335
|
+
act = self.forms[id].action
|
336
|
+
return self.last_effective_url if (act.nil? || act.empty?)
|
337
|
+
return @urlparse.parse(self.last_effective_url,act).to_s
|
338
|
+
end
|
339
|
+
|
340
|
+
# return an array of Element objects for an xpath search
|
341
|
+
def search(xpath)
|
342
|
+
self.scrape.hdoc.search(xpath)
|
343
|
+
end
|
344
|
+
|
345
|
+
# return an array of inner_html for each <script> tag encountered
|
346
|
+
def dump_scripts
|
347
|
+
self.get_tags("//script").map { |s| s.inner_html if s.inner_html.strip != '' }
|
348
|
+
end
|
349
|
+
|
350
|
+
alias_method :scripts, :dump_scripts
|
351
|
+
|
352
|
+
#:section: Input and Output Helpers
|
353
|
+
|
354
|
+
# set self.opts[:base_url]
|
355
|
+
def setbase(url=nil)
|
356
|
+
return nil if not url
|
357
|
+
self.opts[:base_url] = url
|
358
|
+
self.base_url = url
|
359
|
+
end
|
360
|
+
|
361
|
+
# return md5sum for self.body_data
|
362
|
+
def md5
|
363
|
+
return self.body_data.md5
|
364
|
+
end
|
365
|
+
|
366
|
+
# write self.body_data to file
|
367
|
+
def write(filename)
|
368
|
+
File.write(filename,self.body_data)
|
369
|
+
return "wrote to " + filename
|
370
|
+
end
|
371
|
+
|
372
|
+
# read self.body_data from file
|
373
|
+
def read(filename)
|
374
|
+
self.body_data = File.read(filename)
|
375
|
+
self.set_data
|
376
|
+
end
|
377
|
+
|
378
|
+
# does this response have SET-COOKIE headers?
|
379
|
+
def set_cookies?
|
380
|
+
ret = []
|
381
|
+
self.header_data.each do |x|
|
382
|
+
if x[0].upcase == "SET-COOKIE"
|
383
|
+
ret << x[1]
|
384
|
+
end
|
385
|
+
end
|
386
|
+
return ret
|
387
|
+
end
|
388
|
+
|
389
|
+
def time
|
390
|
+
self.total_time
|
391
|
+
end
|
392
|
+
|
393
|
+
#:section: Data callbacks and method_missing
|
394
|
+
|
395
|
+
# callback for <tt>self.on_body</tt>
|
396
|
+
def _body_cb(data)
|
397
|
+
@body_data << data if data
|
398
|
+
return data.length.to_i
|
399
|
+
end
|
400
|
+
|
401
|
+
# callback for <tt>self.on_header</tt>
|
402
|
+
def _header_cb(data)
|
403
|
+
myArr = Array.new(data.split(":",2))
|
404
|
+
@header_data.extend! myArr[0].to_s.strip,myArr[1].to_s.strip
|
405
|
+
return data.length.to_i
|
406
|
+
end
|
407
|
+
|
408
|
+
# send methods not defined here to <tt>@curl_object</tt>
|
409
|
+
def method_missing(methodname, *args)
|
410
|
+
@curl_object.send(methodname, *args)
|
411
|
+
end
|
412
|
+
|
413
|
+
end
|
414
|
+
end
|