miketracy-wwmd 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/History.txt +3 -0
  2. data/README +62 -0
  3. data/README.txt +62 -0
  4. data/Rakefile +34 -0
  5. data/examples/config_example.yaml +24 -0
  6. data/examples/wwmd_example.rb +73 -0
  7. data/lib/wwmd.rb +78 -0
  8. data/lib/wwmd/encoding.rb +40 -0
  9. data/lib/wwmd/form.rb +110 -0
  10. data/lib/wwmd/form_array.rb +273 -0
  11. data/lib/wwmd/guid.rb +155 -0
  12. data/lib/wwmd/hpricot_html2text.rb +76 -0
  13. data/lib/wwmd/mixins.rb +318 -0
  14. data/lib/wwmd/mixins_extends.rb +188 -0
  15. data/lib/wwmd/mixins_external.rb +18 -0
  16. data/lib/wwmd/nokogiri_html2text.rb +41 -0
  17. data/lib/wwmd/page.rb +414 -0
  18. data/lib/wwmd/page/auth.rb +183 -0
  19. data/lib/wwmd/page/config.rb +44 -0
  20. data/lib/wwmd/page/constants.rb +60 -0
  21. data/lib/wwmd/page/headers.rb +107 -0
  22. data/lib/wwmd/page/inputs.rb +47 -0
  23. data/lib/wwmd/page/irb_helpers.rb +90 -0
  24. data/lib/wwmd/page/scrape.rb +202 -0
  25. data/lib/wwmd/page/spider.rb +127 -0
  26. data/lib/wwmd/page/urlparse.rb +79 -0
  27. data/lib/wwmd/page/utils.rb +30 -0
  28. data/lib/wwmd/viewstate.rb +118 -0
  29. data/lib/wwmd/viewstate/viewstate_class_helpers.rb +35 -0
  30. data/lib/wwmd/viewstate/viewstate_deserializer_methods.rb +213 -0
  31. data/lib/wwmd/viewstate/viewstate_from_xml.rb +126 -0
  32. data/lib/wwmd/viewstate/viewstate_types.rb +51 -0
  33. data/lib/wwmd/viewstate/viewstate_utils.rb +157 -0
  34. data/lib/wwmd/viewstate/viewstate_yaml.rb +25 -0
  35. data/lib/wwmd/viewstate/vs_array.rb +36 -0
  36. data/lib/wwmd/viewstate/vs_binary_serialized.rb +28 -0
  37. data/lib/wwmd/viewstate/vs_hashtable.rb +40 -0
  38. data/lib/wwmd/viewstate/vs_hybrid_dict.rb +40 -0
  39. data/lib/wwmd/viewstate/vs_indexed_string.rb +6 -0
  40. data/lib/wwmd/viewstate/vs_indexed_string_ref.rb +22 -0
  41. data/lib/wwmd/viewstate/vs_int_enum.rb +25 -0
  42. data/lib/wwmd/viewstate/vs_list.rb +32 -0
  43. data/lib/wwmd/viewstate/vs_pair.rb +27 -0
  44. data/lib/wwmd/viewstate/vs_read_types.rb +11 -0
  45. data/lib/wwmd/viewstate/vs_read_value.rb +33 -0
  46. data/lib/wwmd/viewstate/vs_sparse_array.rb +56 -0
  47. data/lib/wwmd/viewstate/vs_string.rb +29 -0
  48. data/lib/wwmd/viewstate/vs_string_array.rb +37 -0
  49. data/lib/wwmd/viewstate/vs_string_formatted.rb +30 -0
  50. data/lib/wwmd/viewstate/vs_triplet.rb +29 -0
  51. data/lib/wwmd/viewstate/vs_type.rb +21 -0
  52. data/lib/wwmd/viewstate/vs_unit.rb +28 -0
  53. data/lib/wwmd/viewstate/vs_value.rb +33 -0
  54. data/spec/README +3 -0
  55. data/spec/form_array.spec +49 -0
  56. data/spec/spider_csrf_test.spec +28 -0
  57. data/spec/urlparse_test.spec +89 -0
  58. data/tasks/ann.rake +80 -0
  59. data/tasks/bones.rake +20 -0
  60. data/tasks/gem.rake +201 -0
  61. data/tasks/git.rake +40 -0
  62. data/tasks/notes.rake +27 -0
  63. data/tasks/post_load.rake +34 -0
  64. data/tasks/rdoc.rake +51 -0
  65. data/tasks/rubyforge.rake +55 -0
  66. data/tasks/setup.rb +292 -0
  67. data/tasks/spec.rake +54 -0
  68. data/tasks/test.rake +40 -0
  69. data/tasks/zentest.rake +36 -0
  70. metadata +164 -0
@@ -0,0 +1,18 @@
1
+ module REXML
2
+ class Element
3
+
4
+ # pretty print (indent=0) to stdout or filename [fn]
5
+ def pp(fn=nil)
6
+ tmp = ""
7
+ self.write(tmp,0)
8
+ if fn
9
+ tmp.write(fn)
10
+ return fn
11
+ else
12
+ return tmp
13
+ end
14
+ nil
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,41 @@
1
+ =begin rdoc
2
+ html2text that works with Nokogiri
3
+ =end
4
+ module WWMD
5
+
6
+ INLINETAGS = ['a','abbr','acronym','address','b','bdo','big','cite',
7
+ 'code','del','dfn','em','font','i','ins','kbd','label',
8
+ 'noframes','noscript','q','s','samp','small','span',
9
+ 'strike','strong','sub','sup','td','th','tt','u',
10
+ 'html','body','table']
11
+ BLOCKTAGS = ['blockquote','center','dd','div','fieldset','form',
12
+ 'h1','h2','h3','h4','h5','h6','p','pre','tr','var',]
13
+ LISTTAGS = ['dir','dl','menu','ol','ul']
14
+ ITEMTAGS = ['li','dt']
15
+ SPECIALTAGS = ['br','hr']
16
+
17
+ class Page
18
+ def html2text
19
+ arr = []
20
+ self.scrape.hdoc.traverse do |x|
21
+ arr << [x.parent.name,x.text] if x.text?
22
+ if x.elem?
23
+ arr << [x.name,""] if SPECIALTAGS.include?(x.name)
24
+ end
25
+ end
26
+ ret = ""
27
+ arr.each do |name,str|
28
+ (ret += "\n"; next ) if name == "br"
29
+ (ret += "\n" + ("-" * 72) + "\n"; next) if name == "hr"
30
+ s = str.strip
31
+ if BLOCKTAGS.include?(name) or LISTTAGS.include?(name)
32
+ s += "\n"
33
+ elsif ITEMTAGS.include?(name)
34
+ s = "* " + s + "\n"
35
+ end
36
+ ret += s
37
+ end
38
+ ret.gsub(/\n+/) { "\n" }
39
+ end
40
+ end
41
+ end
data/lib/wwmd/page.rb ADDED
@@ -0,0 +1,414 @@
1
+ module WWMD
2
+ attr_accessor :curl_object
3
+ attr_accessor :body_data
4
+ attr_accessor :post_data
5
+ attr_accessor :header_data
6
+ attr_accessor :use_referer
7
+ attr_reader :forms
8
+ attr_reader :last_error
9
+ attr_reader :links # array of links (urls)
10
+ attr_reader :jlinks # array of included javascript files
11
+ attr_reader :spider # spider object
12
+ attr_reader :scrape # scrape object
13
+ attr_reader :urlparse # urlparse object
14
+ attr_reader :comments
15
+
16
+ attr_accessor :base_url # needed to properly munge relative urls into fq urls
17
+ attr_accessor :logged_in # are we logged in?
18
+
19
+ attr_accessor :opts
20
+ attr_accessor :inputs
21
+
22
+ # WWMD::Page is an extension of a Curl::Easy object which provides methods to
23
+ # enhance and ease the performance of web application penetration testing.
24
+ class Page
25
+
26
+ def initialize(opts={})
27
+ @opts = opts.clone
28
+ DEFAULTS.each { |k,v| @opts[k] = v if not opts[k] }
29
+ @spider = Spider.new(opts)
30
+ @scrape = Scrape.new
31
+ @base_url ||= opts[:base_url]
32
+ @scrape.warn = opts[:scrape_warn] if !opts[:scrape_warn].nil?
33
+ if opts.empty?
34
+ putw "Page initialized without opts"
35
+ @scrape.warn = false
36
+ end
37
+ @urlparse = URLParse.new()
38
+ @inputs = Inputs.new(self)
39
+ @logged_in = false
40
+ @body_data = ""
41
+ @post_data = ""
42
+ @comments = []
43
+ @header_data = FormArray.new
44
+
45
+ @curl_object = Curl::Easy.new
46
+ @opts.each do |k,v|
47
+ next if !(@curl_object.methods.include?("#{k}="))
48
+ next if k == :proxy_url
49
+ @curl_object.send("#{k}=",v)
50
+ end
51
+ @curl_object.on_body { |data| self._body_cb(data) }
52
+ @curl_object.on_header { |data| self._header_cb(data) }
53
+
54
+ # cookies?
55
+ @curl_object.enable_cookies = @opts[:enable_cookies]
56
+ if @curl_object.enable_cookies?
57
+ @curl_object.cookiejar = @opts[:cookiejar] || "./__cookiejar"
58
+ end
59
+
60
+ #proxy?
61
+ @curl_object.proxy_url = @opts[:proxy_url] if @opts[:use_proxy]
62
+ end
63
+
64
+ #:section: Heavy Lifting
65
+
66
+ # set reporting data for the page
67
+ #
68
+ # Scan for comments, anchors, links and javascript includes and
69
+ # set page flags. The heavy lifting for parsing is done in the
70
+ # scrape class.
71
+ #
72
+ # returns: <tt>array [ code, page_status, body_data.size ]</tt>
73
+ def set_data
74
+ # reset scrape and inputs object
75
+ # transparently gunzip
76
+ begin
77
+ io = StringIO.new(self.body_data)
78
+ gz = Zlib::GzipReader.new(io)
79
+ self.body_data.replace(gz.read)
80
+ rescue => e
81
+ end
82
+ @scrape.reset(self.body_data)
83
+ @inputs.set
84
+
85
+ @comments = @scrape.for_comments
86
+ # remove comments that are css selectors for IE silliness
87
+ @comments.reject! do |c|
88
+ c =~ /\[if IE\]/ ||
89
+ c =~ /\[if IE \d/ ||
90
+ c =~ /\[if lt IE \d/
91
+ end
92
+ @links = @scrape.for_links.map do |url|
93
+ @urlparse.parse(self.last_effective_url,url).to_s
94
+ end
95
+ @jlinks = @scrape.for_javascript_links
96
+ @forms = []
97
+ self.search("//form").each { |f| @forms << Form.new(f) }
98
+ @spider.add(self.last_effective_url,@links)
99
+ return [self.code,self.page_status,self.body_data.size]
100
+ end
101
+
102
+ # clear self.body_data and self.header_data
103
+ def clear_data
104
+ return false if self.opts[:parse] = false
105
+ @body_data = ""
106
+ @header_data.clear
107
+ @last_error = nil
108
+ end
109
+
110
+ # override Curl::Easy.perform to perform page actions,
111
+ # call <tt>self.set_data</tt>
112
+ #
113
+ # returns: <tt>array [ code, page_status, body_data.size ]</tt>
114
+ #
115
+ # don't call this directly if we are in console mode
116
+ # use get and submit respectively for GET and POST
117
+ def perform
118
+ self.clear_data
119
+ self.headers["Referer"] = self.cur if self.use_referer
120
+ begin
121
+ @curl_object.perform
122
+ rescue => e
123
+ @last_error = e
124
+ putw "WARN: #{e.class}" if e.class =~ /Curl::Err/
125
+ self.logged_in = false
126
+ end
127
+ self.set_data
128
+ return [self.code,self.page_status,self.body_data.size]
129
+ end
130
+
131
+ # replacement for Curl::Easy.http_post
132
+ #
133
+ # post the form attempting to remove curl supplied headers (Expect, X-Forwarded-For
134
+ # call <tt>self.set_data</tt>
135
+ #
136
+ # if passed a regexp, escape values in the form using regexp before submitting
137
+ # if passed nil for the regexp arg, the form will not be escaped
138
+ # default: WWMD::ESCAPE[:url]
139
+ #
140
+ # returns: <tt>array [ code, body_data.size ]</tt>
141
+ def submit(iform=nil,reg=WWMD::ESCAPE[:default])
142
+ =begin
143
+ this is just getting worse and worse
144
+ =end
145
+ if iform.class == "Symbol"
146
+ reg = iform
147
+ iform = nil
148
+ end
149
+ self.clear_data
150
+ ["Expect","X-Forwarded-For","Content-length"].each { |s| self.clear_header(s) }
151
+ self.headers["Referer"] = self.cur if self.use_referer
152
+ if iform == nil
153
+ if not self.form.empty?
154
+ sform = self.form.clone
155
+ else
156
+ return "no form provided"
157
+ end
158
+ else
159
+ sform = iform.clone # clone the form so that we don't change the original
160
+ end
161
+ sform.escape_all!(reg)
162
+ if sform.empty?
163
+ self.http_post('')
164
+ else
165
+ self.http_post(self.post_data = sform.to_post)
166
+ end
167
+ begin
168
+ self.set_data
169
+ rescue => e
170
+ STDERR.puts "FATAL: could not parse page"
171
+ end
172
+ return [self.code, self.body_data.size]
173
+ end
174
+
175
+ # submit a form using POST string
176
+ def submit_string(post_string)
177
+ self.clear_data
178
+ self.http_post(post_string)
179
+ self.set_data
180
+ if self.ntlm?
181
+ putw "WARN: this page requires NTLM Authentication"
182
+ putw "WARN: use ntlm_get instead of get"
183
+ end
184
+ return [self.code, self.body_data.size]
185
+ end
186
+
187
+ # override for Curl::Easy.perform
188
+ #
189
+ # if the passed url string doesn't contain an fully qualified
190
+ # path, we'll guess and prepend opts[:base_url]
191
+ #
192
+ # returns: <tt>array [ code, body_data.size ]</tt>
193
+ def get(url=nil,parse=true)
194
+ if url && parse
195
+ self.url = @urlparse.parse(self.opts[:base_url],url).to_s if url
196
+ =begin
197
+ base = url.clip
198
+ args = url.clop
199
+ base = @urlparse.parse(self.opts[:base_url],base).to_s
200
+ self.url = base
201
+ self.url += ("?" + args) if args
202
+ =end
203
+ elsif url
204
+ self.url = url
205
+ end
206
+ self.perform
207
+ if self.ntlm?
208
+ putw "WARN: this page requires NTLM Authentication"
209
+ putw "use ntlm_get instead of get"
210
+ end
211
+ self.set_data
212
+ return [self.code, self.body_data.size]
213
+ end
214
+
215
+ # GET with params and POST it as a form
216
+ def post(url=nil)
217
+ ep = url.clip
218
+ self.url = @urlparse.parse(self.opts[:base_url],ep).to_s if ep
219
+ form = url.clop.to_form
220
+ self.submit(form)
221
+ end
222
+
223
+ def furl(url)
224
+ self.url = @urlparse.parse(self.opts[:base_url],url).to_s
225
+ end
226
+
227
+ #:section: Reporting helper methods
228
+ # These are methods that generate data for a parsed page
229
+
230
+ # return text representation of page code
231
+ #
232
+ # override with specific statuses in helper depending on page text
233
+ # etc to include statuses outside 200 = OK and other = ERR
234
+ def page_status
235
+ return "ERR" if self.response_code != 200
236
+ return "OK"
237
+ end
238
+
239
+ alias_method :status, :page_status#:nodoc:
240
+
241
+ # return value of @logged_in
242
+ def logged_in?
243
+ return @logged_in
244
+ end
245
+
246
+ # return a string of flags:
247
+ # Ll links
248
+ # Jj javascript includes
249
+ # Ff forms
250
+ # Cc comments
251
+ def report_flags
252
+ self.has_links? ? ret = "L" : ret = "l"
253
+ self.has_jlinks? ? ret += "J" : ret += "j"
254
+ self.has_form? ? ret += "F" : ret += "f"
255
+ self.has_comments? ? ret += "C" : ret += "c"
256
+ return ret
257
+ end
258
+
259
+ def has_links?; return !@links.empty?; end
260
+ def has_jlinks?; return !@jlinks.empty?; end
261
+ def has_form?; return !(@forms.size < 1); end
262
+ def has_comments?; return !@comments.empty?; end
263
+
264
+ # return page size in bytes
265
+ def size
266
+ return self.body_data.size
267
+ end
268
+
269
+ #:section: Other methods
270
+
271
+ def all_tags#:nodoc:
272
+ return self.search("*").map { |x| x.name }
273
+ end
274
+
275
+ # return MD5 for DOM fingerprint
276
+ # take all tag names in page.to_s.md5
277
+ def fingerprint
278
+ self.all_tags.to_s.md5
279
+ end
280
+ alias_method :fp, :fingerprint #:nodoc:
281
+
282
+ # set link using an integer link from self.report
283
+ #--
284
+ # NOTE: I always use page.get(page.l(1)) anyway.
285
+ #++
286
+ def set_link(index)
287
+ self.url = @links[index]
288
+ end
289
+
290
+ # return link at index from @links array
291
+ def get_link(index)
292
+ @links[index]
293
+ end
294
+
295
+ alias_method :link, :get_link #:nodoc:
296
+ alias_method :l, :get_link #:nodoc:
297
+
298
+ # alias_method for body_data
299
+ def raw
300
+ self.body_data
301
+ end
302
+
303
+ # alias_method for last_effective_url
304
+ def current_url
305
+ self.last_effective_url
306
+ end
307
+
308
+ alias_method :current, :current_url
309
+ alias_method :cur, :current_url
310
+
311
+ # the last http response code
312
+ def code
313
+ self.response_code # .to_s
314
+ end
315
+
316
+ #:section: Parsing convenience methods
317
+ # methods that help parse and find information on a page including
318
+ # access to forms etc.
319
+
320
+ # grep for regexp and remove leading whitespace
321
+ def grep(reg)
322
+ self.body_data.grep(reg).map { |i| i.gsub(/^\s+/, "") }
323
+ end
324
+
325
+ # return this page's form (at index id) as a FormArray
326
+ def get_form(id=nil)
327
+ id = 0 if not id
328
+ return nil if forms.empty?
329
+ @forms[id].to_form_array
330
+ end
331
+
332
+ # return the complete url to the form action on this page
333
+ def action(id=nil)
334
+ id = 0 if not id
335
+ act = self.forms[id].action
336
+ return self.last_effective_url if (act.nil? || act.empty?)
337
+ return @urlparse.parse(self.last_effective_url,act).to_s
338
+ end
339
+
340
+ # return an array of Element objects for an xpath search
341
+ def search(xpath)
342
+ self.scrape.hdoc.search(xpath)
343
+ end
344
+
345
+ # return an array of inner_html for each <script> tag encountered
346
+ def dump_scripts
347
+ self.get_tags("//script").map { |s| s.inner_html if s.inner_html.strip != '' }
348
+ end
349
+
350
+ alias_method :scripts, :dump_scripts
351
+
352
+ #:section: Input and Output Helpers
353
+
354
+ # set self.opts[:base_url]
355
+ def setbase(url=nil)
356
+ return nil if not url
357
+ self.opts[:base_url] = url
358
+ self.base_url = url
359
+ end
360
+
361
+ # return md5sum for self.body_data
362
+ def md5
363
+ return self.body_data.md5
364
+ end
365
+
366
+ # write self.body_data to file
367
+ def write(filename)
368
+ File.write(filename,self.body_data)
369
+ return "wrote to " + filename
370
+ end
371
+
372
+ # read self.body_data from file
373
+ def read(filename)
374
+ self.body_data = File.read(filename)
375
+ self.set_data
376
+ end
377
+
378
+ # does this response have SET-COOKIE headers?
379
+ def set_cookies?
380
+ ret = []
381
+ self.header_data.each do |x|
382
+ if x[0].upcase == "SET-COOKIE"
383
+ ret << x[1]
384
+ end
385
+ end
386
+ return ret
387
+ end
388
+
389
+ def time
390
+ self.total_time
391
+ end
392
+
393
+ #:section: Data callbacks and method_missing
394
+
395
+ # callback for <tt>self.on_body</tt>
396
+ def _body_cb(data)
397
+ @body_data << data if data
398
+ return data.length.to_i
399
+ end
400
+
401
+ # callback for <tt>self.on_header</tt>
402
+ def _header_cb(data)
403
+ myArr = Array.new(data.split(":",2))
404
+ @header_data.extend! myArr[0].to_s.strip,myArr[1].to_s.strip
405
+ return data.length.to_i
406
+ end
407
+
408
+ # send methods not defined here to <tt>@curl_object</tt>
409
+ def method_missing(methodname, *args)
410
+ @curl_object.send(methodname, *args)
411
+ end
412
+
413
+ end
414
+ end