miketracy-wwmd 0.2.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/History.txt +3 -0
  2. data/README +62 -0
  3. data/README.txt +62 -0
  4. data/Rakefile +34 -0
  5. data/examples/config_example.yaml +24 -0
  6. data/examples/wwmd_example.rb +73 -0
  7. data/lib/wwmd.rb +78 -0
  8. data/lib/wwmd/encoding.rb +40 -0
  9. data/lib/wwmd/form.rb +110 -0
  10. data/lib/wwmd/form_array.rb +273 -0
  11. data/lib/wwmd/guid.rb +155 -0
  12. data/lib/wwmd/hpricot_html2text.rb +76 -0
  13. data/lib/wwmd/mixins.rb +318 -0
  14. data/lib/wwmd/mixins_extends.rb +188 -0
  15. data/lib/wwmd/mixins_external.rb +18 -0
  16. data/lib/wwmd/nokogiri_html2text.rb +41 -0
  17. data/lib/wwmd/page.rb +414 -0
  18. data/lib/wwmd/page/auth.rb +183 -0
  19. data/lib/wwmd/page/config.rb +44 -0
  20. data/lib/wwmd/page/constants.rb +60 -0
  21. data/lib/wwmd/page/headers.rb +107 -0
  22. data/lib/wwmd/page/inputs.rb +47 -0
  23. data/lib/wwmd/page/irb_helpers.rb +90 -0
  24. data/lib/wwmd/page/scrape.rb +202 -0
  25. data/lib/wwmd/page/spider.rb +127 -0
  26. data/lib/wwmd/page/urlparse.rb +79 -0
  27. data/lib/wwmd/page/utils.rb +30 -0
  28. data/lib/wwmd/viewstate.rb +118 -0
  29. data/lib/wwmd/viewstate/viewstate_class_helpers.rb +35 -0
  30. data/lib/wwmd/viewstate/viewstate_deserializer_methods.rb +213 -0
  31. data/lib/wwmd/viewstate/viewstate_from_xml.rb +126 -0
  32. data/lib/wwmd/viewstate/viewstate_types.rb +51 -0
  33. data/lib/wwmd/viewstate/viewstate_utils.rb +157 -0
  34. data/lib/wwmd/viewstate/viewstate_yaml.rb +25 -0
  35. data/lib/wwmd/viewstate/vs_array.rb +36 -0
  36. data/lib/wwmd/viewstate/vs_binary_serialized.rb +28 -0
  37. data/lib/wwmd/viewstate/vs_hashtable.rb +40 -0
  38. data/lib/wwmd/viewstate/vs_hybrid_dict.rb +40 -0
  39. data/lib/wwmd/viewstate/vs_indexed_string.rb +6 -0
  40. data/lib/wwmd/viewstate/vs_indexed_string_ref.rb +22 -0
  41. data/lib/wwmd/viewstate/vs_int_enum.rb +25 -0
  42. data/lib/wwmd/viewstate/vs_list.rb +32 -0
  43. data/lib/wwmd/viewstate/vs_pair.rb +27 -0
  44. data/lib/wwmd/viewstate/vs_read_types.rb +11 -0
  45. data/lib/wwmd/viewstate/vs_read_value.rb +33 -0
  46. data/lib/wwmd/viewstate/vs_sparse_array.rb +56 -0
  47. data/lib/wwmd/viewstate/vs_string.rb +29 -0
  48. data/lib/wwmd/viewstate/vs_string_array.rb +37 -0
  49. data/lib/wwmd/viewstate/vs_string_formatted.rb +30 -0
  50. data/lib/wwmd/viewstate/vs_triplet.rb +29 -0
  51. data/lib/wwmd/viewstate/vs_type.rb +21 -0
  52. data/lib/wwmd/viewstate/vs_unit.rb +28 -0
  53. data/lib/wwmd/viewstate/vs_value.rb +33 -0
  54. data/spec/README +3 -0
  55. data/spec/form_array.spec +49 -0
  56. data/spec/spider_csrf_test.spec +28 -0
  57. data/spec/urlparse_test.spec +89 -0
  58. data/tasks/ann.rake +80 -0
  59. data/tasks/bones.rake +20 -0
  60. data/tasks/gem.rake +201 -0
  61. data/tasks/git.rake +40 -0
  62. data/tasks/notes.rake +27 -0
  63. data/tasks/post_load.rake +34 -0
  64. data/tasks/rdoc.rake +51 -0
  65. data/tasks/rubyforge.rake +55 -0
  66. data/tasks/setup.rb +292 -0
  67. data/tasks/spec.rake +54 -0
  68. data/tasks/test.rake +40 -0
  69. data/tasks/zentest.rake +36 -0
  70. metadata +164 -0
@@ -0,0 +1,18 @@
1
+ module REXML
2
+ class Element
3
+
4
+ # pretty print (indent=0) to stdout or filename [fn]
5
+ def pp(fn=nil)
6
+ tmp = ""
7
+ self.write(tmp,0)
8
+ if fn
9
+ tmp.write(fn)
10
+ return fn
11
+ else
12
+ return tmp
13
+ end
14
+ nil
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,41 @@
1
+ =begin rdoc
2
+ html2text that works with Nokogiri
3
+ =end
4
+ module WWMD
5
+
6
+ INLINETAGS = ['a','abbr','acronym','address','b','bdo','big','cite',
7
+ 'code','del','dfn','em','font','i','ins','kbd','label',
8
+ 'noframes','noscript','q','s','samp','small','span',
9
+ 'strike','strong','sub','sup','td','th','tt','u',
10
+ 'html','body','table']
11
+ BLOCKTAGS = ['blockquote','center','dd','div','fieldset','form',
12
+ 'h1','h2','h3','h4','h5','h6','p','pre','tr','var',]
13
+ LISTTAGS = ['dir','dl','menu','ol','ul']
14
+ ITEMTAGS = ['li','dt']
15
+ SPECIALTAGS = ['br','hr']
16
+
17
+ class Page
18
+ def html2text
19
+ arr = []
20
+ self.scrape.hdoc.traverse do |x|
21
+ arr << [x.parent.name,x.text] if x.text?
22
+ if x.elem?
23
+ arr << [x.name,""] if SPECIALTAGS.include?(x.name)
24
+ end
25
+ end
26
+ ret = ""
27
+ arr.each do |name,str|
28
+ (ret += "\n"; next ) if name == "br"
29
+ (ret += "\n" + ("-" * 72) + "\n"; next) if name == "hr"
30
+ s = str.strip
31
+ if BLOCKTAGS.include?(name) or LISTTAGS.include?(name)
32
+ s += "\n"
33
+ elsif ITEMTAGS.include?(name)
34
+ s = "* " + s + "\n"
35
+ end
36
+ ret += s
37
+ end
38
+ ret.gsub(/\n+/) { "\n" }
39
+ end
40
+ end
41
+ end
data/lib/wwmd/page.rb ADDED
@@ -0,0 +1,414 @@
1
+ module WWMD
2
+ attr_accessor :curl_object
3
+ attr_accessor :body_data
4
+ attr_accessor :post_data
5
+ attr_accessor :header_data
6
+ attr_accessor :use_referer
7
+ attr_reader :forms
8
+ attr_reader :last_error
9
+ attr_reader :links # array of links (urls)
10
+ attr_reader :jlinks # array of included javascript files
11
+ attr_reader :spider # spider object
12
+ attr_reader :scrape # scrape object
13
+ attr_reader :urlparse # urlparse object
14
+ attr_reader :comments
15
+
16
+ attr_accessor :base_url # needed to properly munge relative urls into fq urls
17
+ attr_accessor :logged_in # are we logged in?
18
+
19
+ attr_accessor :opts
20
+ attr_accessor :inputs
21
+
22
+ # WWMD::Page is an extension of a Curl::Easy object which provides methods to
23
+ # enhance and ease the performance of web application penetration testing.
24
+ class Page
25
+
26
+ def initialize(opts={})
27
+ @opts = opts.clone
28
+ DEFAULTS.each { |k,v| @opts[k] = v if not opts[k] }
29
+ @spider = Spider.new(opts)
30
+ @scrape = Scrape.new
31
+ @base_url ||= opts[:base_url]
32
+ @scrape.warn = opts[:scrape_warn] if !opts[:scrape_warn].nil?
33
+ if opts.empty?
34
+ putw "Page initialized without opts"
35
+ @scrape.warn = false
36
+ end
37
+ @urlparse = URLParse.new()
38
+ @inputs = Inputs.new(self)
39
+ @logged_in = false
40
+ @body_data = ""
41
+ @post_data = ""
42
+ @comments = []
43
+ @header_data = FormArray.new
44
+
45
+ @curl_object = Curl::Easy.new
46
+ @opts.each do |k,v|
47
+ next if !(@curl_object.methods.include?("#{k}="))
48
+ next if k == :proxy_url
49
+ @curl_object.send("#{k}=",v)
50
+ end
51
+ @curl_object.on_body { |data| self._body_cb(data) }
52
+ @curl_object.on_header { |data| self._header_cb(data) }
53
+
54
+ # cookies?
55
+ @curl_object.enable_cookies = @opts[:enable_cookies]
56
+ if @curl_object.enable_cookies?
57
+ @curl_object.cookiejar = @opts[:cookiejar] || "./__cookiejar"
58
+ end
59
+
60
+ #proxy?
61
+ @curl_object.proxy_url = @opts[:proxy_url] if @opts[:use_proxy]
62
+ end
63
+
64
+ #:section: Heavy Lifting
65
+
66
+ # set reporting data for the page
67
+ #
68
+ # Scan for comments, anchors, links and javascript includes and
69
+ # set page flags. The heavy lifting for parsing is done in the
70
+ # scrape class.
71
+ #
72
+ # returns: <tt>array [ code, page_status, body_data.size ]</tt>
73
+ def set_data
74
+ # reset scrape and inputs object
75
+ # transparently gunzip
76
+ begin
77
+ io = StringIO.new(self.body_data)
78
+ gz = Zlib::GzipReader.new(io)
79
+ self.body_data.replace(gz.read)
80
+ rescue => e
81
+ end
82
+ @scrape.reset(self.body_data)
83
+ @inputs.set
84
+
85
+ @comments = @scrape.for_comments
86
+ # remove comments that are css selectors for IE silliness
87
+ @comments.reject! do |c|
88
+ c =~ /\[if IE\]/ ||
89
+ c =~ /\[if IE \d/ ||
90
+ c =~ /\[if lt IE \d/
91
+ end
92
+ @links = @scrape.for_links.map do |url|
93
+ @urlparse.parse(self.last_effective_url,url).to_s
94
+ end
95
+ @jlinks = @scrape.for_javascript_links
96
+ @forms = []
97
+ self.search("//form").each { |f| @forms << Form.new(f) }
98
+ @spider.add(self.last_effective_url,@links)
99
+ return [self.code,self.page_status,self.body_data.size]
100
+ end
101
+
102
+ # clear self.body_data and self.header_data
103
+ def clear_data
104
+ return false if self.opts[:parse] = false
105
+ @body_data = ""
106
+ @header_data.clear
107
+ @last_error = nil
108
+ end
109
+
110
+ # override Curl::Easy.perform to perform page actions,
111
+ # call <tt>self.set_data</tt>
112
+ #
113
+ # returns: <tt>array [ code, page_status, body_data.size ]</tt>
114
+ #
115
+ # don't call this directly if we are in console mode
116
+ # use get and submit respectively for GET and POST
117
+ def perform
118
+ self.clear_data
119
+ self.headers["Referer"] = self.cur if self.use_referer
120
+ begin
121
+ @curl_object.perform
122
+ rescue => e
123
+ @last_error = e
124
+ putw "WARN: #{e.class}" if e.class =~ /Curl::Err/
125
+ self.logged_in = false
126
+ end
127
+ self.set_data
128
+ return [self.code,self.page_status,self.body_data.size]
129
+ end
130
+
131
+ # replacement for Curl::Easy.http_post
132
+ #
133
+ # post the form attempting to remove curl supplied headers (Expect, X-Forwarded-For
134
+ # call <tt>self.set_data</tt>
135
+ #
136
+ # if passed a regexp, escape values in the form using regexp before submitting
137
+ # if passed nil for the regexp arg, the form will not be escaped
138
+ # default: WWMD::ESCAPE[:url]
139
+ #
140
+ # returns: <tt>array [ code, body_data.size ]</tt>
141
+ def submit(iform=nil,reg=WWMD::ESCAPE[:default])
142
+ =begin
143
+ this is just getting worse and worse
144
+ =end
145
+ if iform.class == "Symbol"
146
+ reg = iform
147
+ iform = nil
148
+ end
149
+ self.clear_data
150
+ ["Expect","X-Forwarded-For","Content-length"].each { |s| self.clear_header(s) }
151
+ self.headers["Referer"] = self.cur if self.use_referer
152
+ if iform == nil
153
+ if not self.form.empty?
154
+ sform = self.form.clone
155
+ else
156
+ return "no form provided"
157
+ end
158
+ else
159
+ sform = iform.clone # clone the form so that we don't change the original
160
+ end
161
+ sform.escape_all!(reg)
162
+ if sform.empty?
163
+ self.http_post('')
164
+ else
165
+ self.http_post(self.post_data = sform.to_post)
166
+ end
167
+ begin
168
+ self.set_data
169
+ rescue => e
170
+ STDERR.puts "FATAL: could not parse page"
171
+ end
172
+ return [self.code, self.body_data.size]
173
+ end
174
+
175
+ # submit a form using POST string
176
+ def submit_string(post_string)
177
+ self.clear_data
178
+ self.http_post(post_string)
179
+ self.set_data
180
+ if self.ntlm?
181
+ putw "WARN: this page requires NTLM Authentication"
182
+ putw "WARN: use ntlm_get instead of get"
183
+ end
184
+ return [self.code, self.body_data.size]
185
+ end
186
+
187
+ # override for Curl::Easy.perform
188
+ #
189
+ # if the passed url string doesn't contain an fully qualified
190
+ # path, we'll guess and prepend opts[:base_url]
191
+ #
192
+ # returns: <tt>array [ code, body_data.size ]</tt>
193
+ def get(url=nil,parse=true)
194
+ if url && parse
195
+ self.url = @urlparse.parse(self.opts[:base_url],url).to_s if url
196
+ =begin
197
+ base = url.clip
198
+ args = url.clop
199
+ base = @urlparse.parse(self.opts[:base_url],base).to_s
200
+ self.url = base
201
+ self.url += ("?" + args) if args
202
+ =end
203
+ elsif url
204
+ self.url = url
205
+ end
206
+ self.perform
207
+ if self.ntlm?
208
+ putw "WARN: this page requires NTLM Authentication"
209
+ putw "use ntlm_get instead of get"
210
+ end
211
+ self.set_data
212
+ return [self.code, self.body_data.size]
213
+ end
214
+
215
+ # GET with params and POST it as a form
216
+ def post(url=nil)
217
+ ep = url.clip
218
+ self.url = @urlparse.parse(self.opts[:base_url],ep).to_s if ep
219
+ form = url.clop.to_form
220
+ self.submit(form)
221
+ end
222
+
223
+ def furl(url)
224
+ self.url = @urlparse.parse(self.opts[:base_url],url).to_s
225
+ end
226
+
227
+ #:section: Reporting helper methods
228
+ # These are methods that generate data for a parsed page
229
+
230
+ # return text representation of page code
231
+ #
232
+ # override with specific statuses in helper depending on page text
233
+ # etc to include statuses outside 200 = OK and other = ERR
234
+ def page_status
235
+ return "ERR" if self.response_code != 200
236
+ return "OK"
237
+ end
238
+
239
+ alias_method :status, :page_status#:nodoc:
240
+
241
+ # return value of @logged_in
242
+ def logged_in?
243
+ return @logged_in
244
+ end
245
+
246
+ # return a string of flags:
247
+ # Ll links
248
+ # Jj javascript includes
249
+ # Ff forms
250
+ # Cc comments
251
+ def report_flags
252
+ self.has_links? ? ret = "L" : ret = "l"
253
+ self.has_jlinks? ? ret += "J" : ret += "j"
254
+ self.has_form? ? ret += "F" : ret += "f"
255
+ self.has_comments? ? ret += "C" : ret += "c"
256
+ return ret
257
+ end
258
+
259
+ def has_links?; return !@links.empty?; end
260
+ def has_jlinks?; return !@jlinks.empty?; end
261
+ def has_form?; return !(@forms.size < 1); end
262
+ def has_comments?; return !@comments.empty?; end
263
+
264
+ # return page size in bytes
265
+ def size
266
+ return self.body_data.size
267
+ end
268
+
269
+ #:section: Other methods
270
+
271
+ def all_tags#:nodoc:
272
+ return self.search("*").map { |x| x.name }
273
+ end
274
+
275
+ # return MD5 for DOM fingerprint
276
+ # take all tag names in page.to_s.md5
277
+ def fingerprint
278
+ self.all_tags.to_s.md5
279
+ end
280
+ alias_method :fp, :fingerprint #:nodoc:
281
+
282
+ # set link using an integer link from self.report
283
+ #--
284
+ # NOTE: I always use page.get(page.l(1)) anyway.
285
+ #++
286
+ def set_link(index)
287
+ self.url = @links[index]
288
+ end
289
+
290
+ # return link at index from @links array
291
+ def get_link(index)
292
+ @links[index]
293
+ end
294
+
295
+ alias_method :link, :get_link #:nodoc:
296
+ alias_method :l, :get_link #:nodoc:
297
+
298
+ # alias_method for body_data
299
+ def raw
300
+ self.body_data
301
+ end
302
+
303
+ # alias_method for last_effective_url
304
+ def current_url
305
+ self.last_effective_url
306
+ end
307
+
308
+ alias_method :current, :current_url
309
+ alias_method :cur, :current_url
310
+
311
+ # the last http response code
312
+ def code
313
+ self.response_code # .to_s
314
+ end
315
+
316
+ #:section: Parsing convenience methods
317
+ # methods that help parse and find information on a page including
318
+ # access to forms etc.
319
+
320
+ # grep for regexp and remove leading whitespace
321
+ def grep(reg)
322
+ self.body_data.grep(reg).map { |i| i.gsub(/^\s+/, "") }
323
+ end
324
+
325
+ # return this page's form (at index id) as a FormArray
326
+ def get_form(id=nil)
327
+ id = 0 if not id
328
+ return nil if forms.empty?
329
+ @forms[id].to_form_array
330
+ end
331
+
332
+ # return the complete url to the form action on this page
333
+ def action(id=nil)
334
+ id = 0 if not id
335
+ act = self.forms[id].action
336
+ return self.last_effective_url if (act.nil? || act.empty?)
337
+ return @urlparse.parse(self.last_effective_url,act).to_s
338
+ end
339
+
340
+ # return an array of Element objects for an xpath search
341
+ def search(xpath)
342
+ self.scrape.hdoc.search(xpath)
343
+ end
344
+
345
+ # return an array of inner_html for each <script> tag encountered
346
+ def dump_scripts
347
+ self.get_tags("//script").map { |s| s.inner_html if s.inner_html.strip != '' }
348
+ end
349
+
350
+ alias_method :scripts, :dump_scripts
351
+
352
+ #:section: Input and Output Helpers
353
+
354
+ # set self.opts[:base_url]
355
+ def setbase(url=nil)
356
+ return nil if not url
357
+ self.opts[:base_url] = url
358
+ self.base_url = url
359
+ end
360
+
361
+ # return md5sum for self.body_data
362
+ def md5
363
+ return self.body_data.md5
364
+ end
365
+
366
+ # write self.body_data to file
367
+ def write(filename)
368
+ File.write(filename,self.body_data)
369
+ return "wrote to " + filename
370
+ end
371
+
372
+ # read self.body_data from file
373
+ def read(filename)
374
+ self.body_data = File.read(filename)
375
+ self.set_data
376
+ end
377
+
378
+ # does this response have SET-COOKIE headers?
379
+ def set_cookies?
380
+ ret = []
381
+ self.header_data.each do |x|
382
+ if x[0].upcase == "SET-COOKIE"
383
+ ret << x[1]
384
+ end
385
+ end
386
+ return ret
387
+ end
388
+
389
+ def time
390
+ self.total_time
391
+ end
392
+
393
+ #:section: Data callbacks and method_missing
394
+
395
+ # callback for <tt>self.on_body</tt>
396
+ def _body_cb(data)
397
+ @body_data << data if data
398
+ return data.length.to_i
399
+ end
400
+
401
+ # callback for <tt>self.on_header</tt>
402
+ def _header_cb(data)
403
+ myArr = Array.new(data.split(":",2))
404
+ @header_data.extend! myArr[0].to_s.strip,myArr[1].to_s.strip
405
+ return data.length.to_i
406
+ end
407
+
408
+ # send methods not defined here to <tt>@curl_object</tt>
409
+ def method_missing(methodname, *args)
410
+ @curl_object.send(methodname, *args)
411
+ end
412
+
413
+ end
414
+ end