miketracy-wwmd 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/README +62 -0
- data/README.txt +62 -0
- data/Rakefile +34 -0
- data/examples/config_example.yaml +24 -0
- data/examples/wwmd_example.rb +73 -0
- data/lib/wwmd.rb +78 -0
- data/lib/wwmd/encoding.rb +40 -0
- data/lib/wwmd/form.rb +110 -0
- data/lib/wwmd/form_array.rb +273 -0
- data/lib/wwmd/guid.rb +155 -0
- data/lib/wwmd/hpricot_html2text.rb +76 -0
- data/lib/wwmd/mixins.rb +318 -0
- data/lib/wwmd/mixins_extends.rb +188 -0
- data/lib/wwmd/mixins_external.rb +18 -0
- data/lib/wwmd/nokogiri_html2text.rb +41 -0
- data/lib/wwmd/page.rb +414 -0
- data/lib/wwmd/page/auth.rb +183 -0
- data/lib/wwmd/page/config.rb +44 -0
- data/lib/wwmd/page/constants.rb +60 -0
- data/lib/wwmd/page/headers.rb +107 -0
- data/lib/wwmd/page/inputs.rb +47 -0
- data/lib/wwmd/page/irb_helpers.rb +90 -0
- data/lib/wwmd/page/scrape.rb +202 -0
- data/lib/wwmd/page/spider.rb +127 -0
- data/lib/wwmd/page/urlparse.rb +79 -0
- data/lib/wwmd/page/utils.rb +30 -0
- data/lib/wwmd/viewstate.rb +118 -0
- data/lib/wwmd/viewstate/viewstate_class_helpers.rb +35 -0
- data/lib/wwmd/viewstate/viewstate_deserializer_methods.rb +213 -0
- data/lib/wwmd/viewstate/viewstate_from_xml.rb +126 -0
- data/lib/wwmd/viewstate/viewstate_types.rb +51 -0
- data/lib/wwmd/viewstate/viewstate_utils.rb +157 -0
- data/lib/wwmd/viewstate/viewstate_yaml.rb +25 -0
- data/lib/wwmd/viewstate/vs_array.rb +36 -0
- data/lib/wwmd/viewstate/vs_binary_serialized.rb +28 -0
- data/lib/wwmd/viewstate/vs_hashtable.rb +40 -0
- data/lib/wwmd/viewstate/vs_hybrid_dict.rb +40 -0
- data/lib/wwmd/viewstate/vs_indexed_string.rb +6 -0
- data/lib/wwmd/viewstate/vs_indexed_string_ref.rb +22 -0
- data/lib/wwmd/viewstate/vs_int_enum.rb +25 -0
- data/lib/wwmd/viewstate/vs_list.rb +32 -0
- data/lib/wwmd/viewstate/vs_pair.rb +27 -0
- data/lib/wwmd/viewstate/vs_read_types.rb +11 -0
- data/lib/wwmd/viewstate/vs_read_value.rb +33 -0
- data/lib/wwmd/viewstate/vs_sparse_array.rb +56 -0
- data/lib/wwmd/viewstate/vs_string.rb +29 -0
- data/lib/wwmd/viewstate/vs_string_array.rb +37 -0
- data/lib/wwmd/viewstate/vs_string_formatted.rb +30 -0
- data/lib/wwmd/viewstate/vs_triplet.rb +29 -0
- data/lib/wwmd/viewstate/vs_type.rb +21 -0
- data/lib/wwmd/viewstate/vs_unit.rb +28 -0
- data/lib/wwmd/viewstate/vs_value.rb +33 -0
- data/spec/README +3 -0
- data/spec/form_array.spec +49 -0
- data/spec/spider_csrf_test.spec +28 -0
- data/spec/urlparse_test.spec +89 -0
- data/tasks/ann.rake +80 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +201 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +51 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/setup.rb +292 -0
- data/tasks/spec.rake +54 -0
- data/tasks/test.rake +40 -0
- data/tasks/zentest.rake +36 -0
- metadata +164 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
=begin rdoc
|
2
|
+
html2text that works with Nokogiri
|
3
|
+
=end
|
4
|
+
module WWMD
|
5
|
+
|
6
|
+
INLINETAGS = ['a','abbr','acronym','address','b','bdo','big','cite',
|
7
|
+
'code','del','dfn','em','font','i','ins','kbd','label',
|
8
|
+
'noframes','noscript','q','s','samp','small','span',
|
9
|
+
'strike','strong','sub','sup','td','th','tt','u',
|
10
|
+
'html','body','table']
|
11
|
+
BLOCKTAGS = ['blockquote','center','dd','div','fieldset','form',
|
12
|
+
'h1','h2','h3','h4','h5','h6','p','pre','tr','var',]
|
13
|
+
LISTTAGS = ['dir','dl','menu','ol','ul']
|
14
|
+
ITEMTAGS = ['li','dt']
|
15
|
+
SPECIALTAGS = ['br','hr']
|
16
|
+
|
17
|
+
class Page
|
18
|
+
def html2text
|
19
|
+
arr = []
|
20
|
+
self.scrape.hdoc.traverse do |x|
|
21
|
+
arr << [x.parent.name,x.text] if x.text?
|
22
|
+
if x.elem?
|
23
|
+
arr << [x.name,""] if SPECIALTAGS.include?(x.name)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
ret = ""
|
27
|
+
arr.each do |name,str|
|
28
|
+
(ret += "\n"; next ) if name == "br"
|
29
|
+
(ret += "\n" + ("-" * 72) + "\n"; next) if name == "hr"
|
30
|
+
s = str.strip
|
31
|
+
if BLOCKTAGS.include?(name) or LISTTAGS.include?(name)
|
32
|
+
s += "\n"
|
33
|
+
elsif ITEMTAGS.include?(name)
|
34
|
+
s = "* " + s + "\n"
|
35
|
+
end
|
36
|
+
ret += s
|
37
|
+
end
|
38
|
+
ret.gsub(/\n+/) { "\n" }
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/wwmd/page.rb
ADDED
@@ -0,0 +1,414 @@
|
|
1
|
+
module WWMD
|
2
|
+
attr_accessor :curl_object
|
3
|
+
attr_accessor :body_data
|
4
|
+
attr_accessor :post_data
|
5
|
+
attr_accessor :header_data
|
6
|
+
attr_accessor :use_referer
|
7
|
+
attr_reader :forms
|
8
|
+
attr_reader :last_error
|
9
|
+
attr_reader :links # array of links (urls)
|
10
|
+
attr_reader :jlinks # array of included javascript files
|
11
|
+
attr_reader :spider # spider object
|
12
|
+
attr_reader :scrape # scrape object
|
13
|
+
attr_reader :urlparse # urlparse object
|
14
|
+
attr_reader :comments
|
15
|
+
|
16
|
+
attr_accessor :base_url # needed to properly munge relative urls into fq urls
|
17
|
+
attr_accessor :logged_in # are we logged in?
|
18
|
+
|
19
|
+
attr_accessor :opts
|
20
|
+
attr_accessor :inputs
|
21
|
+
|
22
|
+
# WWMD::Page is an extension of a Curl::Easy object which provides methods to
|
23
|
+
# enhance and ease the performance of web application penetration testing.
|
24
|
+
class Page
|
25
|
+
|
26
|
+
def initialize(opts={})
|
27
|
+
@opts = opts.clone
|
28
|
+
DEFAULTS.each { |k,v| @opts[k] = v if not opts[k] }
|
29
|
+
@spider = Spider.new(opts)
|
30
|
+
@scrape = Scrape.new
|
31
|
+
@base_url ||= opts[:base_url]
|
32
|
+
@scrape.warn = opts[:scrape_warn] if !opts[:scrape_warn].nil?
|
33
|
+
if opts.empty?
|
34
|
+
putw "Page initialized without opts"
|
35
|
+
@scrape.warn = false
|
36
|
+
end
|
37
|
+
@urlparse = URLParse.new()
|
38
|
+
@inputs = Inputs.new(self)
|
39
|
+
@logged_in = false
|
40
|
+
@body_data = ""
|
41
|
+
@post_data = ""
|
42
|
+
@comments = []
|
43
|
+
@header_data = FormArray.new
|
44
|
+
|
45
|
+
@curl_object = Curl::Easy.new
|
46
|
+
@opts.each do |k,v|
|
47
|
+
next if !(@curl_object.methods.include?("#{k}="))
|
48
|
+
next if k == :proxy_url
|
49
|
+
@curl_object.send("#{k}=",v)
|
50
|
+
end
|
51
|
+
@curl_object.on_body { |data| self._body_cb(data) }
|
52
|
+
@curl_object.on_header { |data| self._header_cb(data) }
|
53
|
+
|
54
|
+
# cookies?
|
55
|
+
@curl_object.enable_cookies = @opts[:enable_cookies]
|
56
|
+
if @curl_object.enable_cookies?
|
57
|
+
@curl_object.cookiejar = @opts[:cookiejar] || "./__cookiejar"
|
58
|
+
end
|
59
|
+
|
60
|
+
#proxy?
|
61
|
+
@curl_object.proxy_url = @opts[:proxy_url] if @opts[:use_proxy]
|
62
|
+
end
|
63
|
+
|
64
|
+
#:section: Heavy Lifting
|
65
|
+
|
66
|
+
# set reporting data for the page
|
67
|
+
#
|
68
|
+
# Scan for comments, anchors, links and javascript includes and
|
69
|
+
# set page flags. The heavy lifting for parsing is done in the
|
70
|
+
# scrape class.
|
71
|
+
#
|
72
|
+
# returns: <tt>array [ code, page_status, body_data.size ]</tt>
|
73
|
+
def set_data
|
74
|
+
# reset scrape and inputs object
|
75
|
+
# transparently gunzip
|
76
|
+
begin
|
77
|
+
io = StringIO.new(self.body_data)
|
78
|
+
gz = Zlib::GzipReader.new(io)
|
79
|
+
self.body_data.replace(gz.read)
|
80
|
+
rescue => e
|
81
|
+
end
|
82
|
+
@scrape.reset(self.body_data)
|
83
|
+
@inputs.set
|
84
|
+
|
85
|
+
@comments = @scrape.for_comments
|
86
|
+
# remove comments that are css selectors for IE silliness
|
87
|
+
@comments.reject! do |c|
|
88
|
+
c =~ /\[if IE\]/ ||
|
89
|
+
c =~ /\[if IE \d/ ||
|
90
|
+
c =~ /\[if lt IE \d/
|
91
|
+
end
|
92
|
+
@links = @scrape.for_links.map do |url|
|
93
|
+
@urlparse.parse(self.last_effective_url,url).to_s
|
94
|
+
end
|
95
|
+
@jlinks = @scrape.for_javascript_links
|
96
|
+
@forms = []
|
97
|
+
self.search("//form").each { |f| @forms << Form.new(f) }
|
98
|
+
@spider.add(self.last_effective_url,@links)
|
99
|
+
return [self.code,self.page_status,self.body_data.size]
|
100
|
+
end
|
101
|
+
|
102
|
+
# clear self.body_data and self.header_data
|
103
|
+
def clear_data
|
104
|
+
return false if self.opts[:parse] = false
|
105
|
+
@body_data = ""
|
106
|
+
@header_data.clear
|
107
|
+
@last_error = nil
|
108
|
+
end
|
109
|
+
|
110
|
+
# override Curl::Easy.perform to perform page actions,
|
111
|
+
# call <tt>self.set_data</tt>
|
112
|
+
#
|
113
|
+
# returns: <tt>array [ code, page_status, body_data.size ]</tt>
|
114
|
+
#
|
115
|
+
# don't call this directly if we are in console mode
|
116
|
+
# use get and submit respectively for GET and POST
|
117
|
+
def perform
|
118
|
+
self.clear_data
|
119
|
+
self.headers["Referer"] = self.cur if self.use_referer
|
120
|
+
begin
|
121
|
+
@curl_object.perform
|
122
|
+
rescue => e
|
123
|
+
@last_error = e
|
124
|
+
putw "WARN: #{e.class}" if e.class =~ /Curl::Err/
|
125
|
+
self.logged_in = false
|
126
|
+
end
|
127
|
+
self.set_data
|
128
|
+
return [self.code,self.page_status,self.body_data.size]
|
129
|
+
end
|
130
|
+
|
131
|
+
# replacement for Curl::Easy.http_post
|
132
|
+
#
|
133
|
+
# post the form attempting to remove curl supplied headers (Expect, X-Forwarded-For
|
134
|
+
# call <tt>self.set_data</tt>
|
135
|
+
#
|
136
|
+
# if passed a regexp, escape values in the form using regexp before submitting
|
137
|
+
# if passed nil for the regexp arg, the form will not be escaped
|
138
|
+
# default: WWMD::ESCAPE[:url]
|
139
|
+
#
|
140
|
+
# returns: <tt>array [ code, body_data.size ]</tt>
|
141
|
+
def submit(iform=nil,reg=WWMD::ESCAPE[:default])
|
142
|
+
=begin
|
143
|
+
this is just getting worse and worse
|
144
|
+
=end
|
145
|
+
if iform.class == "Symbol"
|
146
|
+
reg = iform
|
147
|
+
iform = nil
|
148
|
+
end
|
149
|
+
self.clear_data
|
150
|
+
["Expect","X-Forwarded-For","Content-length"].each { |s| self.clear_header(s) }
|
151
|
+
self.headers["Referer"] = self.cur if self.use_referer
|
152
|
+
if iform == nil
|
153
|
+
if not self.form.empty?
|
154
|
+
sform = self.form.clone
|
155
|
+
else
|
156
|
+
return "no form provided"
|
157
|
+
end
|
158
|
+
else
|
159
|
+
sform = iform.clone # clone the form so that we don't change the original
|
160
|
+
end
|
161
|
+
sform.escape_all!(reg)
|
162
|
+
if sform.empty?
|
163
|
+
self.http_post('')
|
164
|
+
else
|
165
|
+
self.http_post(self.post_data = sform.to_post)
|
166
|
+
end
|
167
|
+
begin
|
168
|
+
self.set_data
|
169
|
+
rescue => e
|
170
|
+
STDERR.puts "FATAL: could not parse page"
|
171
|
+
end
|
172
|
+
return [self.code, self.body_data.size]
|
173
|
+
end
|
174
|
+
|
175
|
+
# submit a form using POST string
|
176
|
+
def submit_string(post_string)
|
177
|
+
self.clear_data
|
178
|
+
self.http_post(post_string)
|
179
|
+
self.set_data
|
180
|
+
if self.ntlm?
|
181
|
+
putw "WARN: this page requires NTLM Authentication"
|
182
|
+
putw "WARN: use ntlm_get instead of get"
|
183
|
+
end
|
184
|
+
return [self.code, self.body_data.size]
|
185
|
+
end
|
186
|
+
|
187
|
+
# override for Curl::Easy.perform
|
188
|
+
#
|
189
|
+
# if the passed url string doesn't contain an fully qualified
|
190
|
+
# path, we'll guess and prepend opts[:base_url]
|
191
|
+
#
|
192
|
+
# returns: <tt>array [ code, body_data.size ]</tt>
|
193
|
+
def get(url=nil,parse=true)
|
194
|
+
if url && parse
|
195
|
+
self.url = @urlparse.parse(self.opts[:base_url],url).to_s if url
|
196
|
+
=begin
|
197
|
+
base = url.clip
|
198
|
+
args = url.clop
|
199
|
+
base = @urlparse.parse(self.opts[:base_url],base).to_s
|
200
|
+
self.url = base
|
201
|
+
self.url += ("?" + args) if args
|
202
|
+
=end
|
203
|
+
elsif url
|
204
|
+
self.url = url
|
205
|
+
end
|
206
|
+
self.perform
|
207
|
+
if self.ntlm?
|
208
|
+
putw "WARN: this page requires NTLM Authentication"
|
209
|
+
putw "use ntlm_get instead of get"
|
210
|
+
end
|
211
|
+
self.set_data
|
212
|
+
return [self.code, self.body_data.size]
|
213
|
+
end
|
214
|
+
|
215
|
+
# GET with params and POST it as a form
|
216
|
+
def post(url=nil)
|
217
|
+
ep = url.clip
|
218
|
+
self.url = @urlparse.parse(self.opts[:base_url],ep).to_s if ep
|
219
|
+
form = url.clop.to_form
|
220
|
+
self.submit(form)
|
221
|
+
end
|
222
|
+
|
223
|
+
def furl(url)
|
224
|
+
self.url = @urlparse.parse(self.opts[:base_url],url).to_s
|
225
|
+
end
|
226
|
+
|
227
|
+
#:section: Reporting helper methods
|
228
|
+
# These are methods that generate data for a parsed page
|
229
|
+
|
230
|
+
# return text representation of page code
|
231
|
+
#
|
232
|
+
# override with specific statuses in helper depending on page text
|
233
|
+
# etc to include statuses outside 200 = OK and other = ERR
|
234
|
+
def page_status
|
235
|
+
return "ERR" if self.response_code != 200
|
236
|
+
return "OK"
|
237
|
+
end
|
238
|
+
|
239
|
+
alias_method :status, :page_status#:nodoc:
|
240
|
+
|
241
|
+
# return value of @logged_in
|
242
|
+
def logged_in?
|
243
|
+
return @logged_in
|
244
|
+
end
|
245
|
+
|
246
|
+
# return a string of flags:
|
247
|
+
# Ll links
|
248
|
+
# Jj javascript includes
|
249
|
+
# Ff forms
|
250
|
+
# Cc comments
|
251
|
+
def report_flags
|
252
|
+
self.has_links? ? ret = "L" : ret = "l"
|
253
|
+
self.has_jlinks? ? ret += "J" : ret += "j"
|
254
|
+
self.has_form? ? ret += "F" : ret += "f"
|
255
|
+
self.has_comments? ? ret += "C" : ret += "c"
|
256
|
+
return ret
|
257
|
+
end
|
258
|
+
|
259
|
+
def has_links?; return !@links.empty?; end
|
260
|
+
def has_jlinks?; return !@jlinks.empty?; end
|
261
|
+
def has_form?; return !(@forms.size < 1); end
|
262
|
+
def has_comments?; return !@comments.empty?; end
|
263
|
+
|
264
|
+
# return page size in bytes
|
265
|
+
def size
|
266
|
+
return self.body_data.size
|
267
|
+
end
|
268
|
+
|
269
|
+
#:section: Other methods
|
270
|
+
|
271
|
+
def all_tags#:nodoc:
|
272
|
+
return self.search("*").map { |x| x.name }
|
273
|
+
end
|
274
|
+
|
275
|
+
# return MD5 for DOM fingerprint
|
276
|
+
# take all tag names in page.to_s.md5
|
277
|
+
def fingerprint
|
278
|
+
self.all_tags.to_s.md5
|
279
|
+
end
|
280
|
+
alias_method :fp, :fingerprint #:nodoc:
|
281
|
+
|
282
|
+
# set link using an integer link from self.report
|
283
|
+
#--
|
284
|
+
# NOTE: I always use page.get(page.l(1)) anyway.
|
285
|
+
#++
|
286
|
+
def set_link(index)
|
287
|
+
self.url = @links[index]
|
288
|
+
end
|
289
|
+
|
290
|
+
# return link at index from @links array
|
291
|
+
def get_link(index)
|
292
|
+
@links[index]
|
293
|
+
end
|
294
|
+
|
295
|
+
alias_method :link, :get_link #:nodoc:
|
296
|
+
alias_method :l, :get_link #:nodoc:
|
297
|
+
|
298
|
+
# alias_method for body_data
|
299
|
+
def raw
|
300
|
+
self.body_data
|
301
|
+
end
|
302
|
+
|
303
|
+
# alias_method for last_effective_url
|
304
|
+
def current_url
|
305
|
+
self.last_effective_url
|
306
|
+
end
|
307
|
+
|
308
|
+
alias_method :current, :current_url
|
309
|
+
alias_method :cur, :current_url
|
310
|
+
|
311
|
+
# the last http response code
|
312
|
+
def code
|
313
|
+
self.response_code # .to_s
|
314
|
+
end
|
315
|
+
|
316
|
+
#:section: Parsing convenience methods
|
317
|
+
# methods that help parse and find information on a page including
|
318
|
+
# access to forms etc.
|
319
|
+
|
320
|
+
# grep for regexp and remove leading whitespace
|
321
|
+
def grep(reg)
|
322
|
+
self.body_data.grep(reg).map { |i| i.gsub(/^\s+/, "") }
|
323
|
+
end
|
324
|
+
|
325
|
+
# return this page's form (at index id) as a FormArray
|
326
|
+
def get_form(id=nil)
|
327
|
+
id = 0 if not id
|
328
|
+
return nil if forms.empty?
|
329
|
+
@forms[id].to_form_array
|
330
|
+
end
|
331
|
+
|
332
|
+
# return the complete url to the form action on this page
|
333
|
+
def action(id=nil)
|
334
|
+
id = 0 if not id
|
335
|
+
act = self.forms[id].action
|
336
|
+
return self.last_effective_url if (act.nil? || act.empty?)
|
337
|
+
return @urlparse.parse(self.last_effective_url,act).to_s
|
338
|
+
end
|
339
|
+
|
340
|
+
# return an array of Element objects for an xpath search
|
341
|
+
def search(xpath)
|
342
|
+
self.scrape.hdoc.search(xpath)
|
343
|
+
end
|
344
|
+
|
345
|
+
# return an array of inner_html for each <script> tag encountered
|
346
|
+
def dump_scripts
|
347
|
+
self.get_tags("//script").map { |s| s.inner_html if s.inner_html.strip != '' }
|
348
|
+
end
|
349
|
+
|
350
|
+
alias_method :scripts, :dump_scripts
|
351
|
+
|
352
|
+
#:section: Input and Output Helpers
|
353
|
+
|
354
|
+
# set self.opts[:base_url]
|
355
|
+
def setbase(url=nil)
|
356
|
+
return nil if not url
|
357
|
+
self.opts[:base_url] = url
|
358
|
+
self.base_url = url
|
359
|
+
end
|
360
|
+
|
361
|
+
# return md5sum for self.body_data
|
362
|
+
def md5
|
363
|
+
return self.body_data.md5
|
364
|
+
end
|
365
|
+
|
366
|
+
# write self.body_data to file
|
367
|
+
def write(filename)
|
368
|
+
File.write(filename,self.body_data)
|
369
|
+
return "wrote to " + filename
|
370
|
+
end
|
371
|
+
|
372
|
+
# read self.body_data from file
|
373
|
+
def read(filename)
|
374
|
+
self.body_data = File.read(filename)
|
375
|
+
self.set_data
|
376
|
+
end
|
377
|
+
|
378
|
+
# does this response have SET-COOKIE headers?
|
379
|
+
def set_cookies?
|
380
|
+
ret = []
|
381
|
+
self.header_data.each do |x|
|
382
|
+
if x[0].upcase == "SET-COOKIE"
|
383
|
+
ret << x[1]
|
384
|
+
end
|
385
|
+
end
|
386
|
+
return ret
|
387
|
+
end
|
388
|
+
|
389
|
+
def time
|
390
|
+
self.total_time
|
391
|
+
end
|
392
|
+
|
393
|
+
#:section: Data callbacks and method_missing
|
394
|
+
|
395
|
+
# callback for <tt>self.on_body</tt>
|
396
|
+
def _body_cb(data)
|
397
|
+
@body_data << data if data
|
398
|
+
return data.length.to_i
|
399
|
+
end
|
400
|
+
|
401
|
+
# callback for <tt>self.on_header</tt>
|
402
|
+
def _header_cb(data)
|
403
|
+
myArr = Array.new(data.split(":",2))
|
404
|
+
@header_data.extend! myArr[0].to_s.strip,myArr[1].to_s.strip
|
405
|
+
return data.length.to_i
|
406
|
+
end
|
407
|
+
|
408
|
+
# send methods not defined here to <tt>@curl_object</tt>
|
409
|
+
def method_missing(methodname, *args)
|
410
|
+
@curl_object.send(methodname, *args)
|
411
|
+
end
|
412
|
+
|
413
|
+
end
|
414
|
+
end
|