iZsh-wwmd 0.2.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. data/History.txt +24 -0
  2. data/README.rdoc +87 -0
  3. data/Rakefile +34 -0
  4. data/examples/config_example.yaml +24 -0
  5. data/examples/wwmd_example.rb +73 -0
  6. data/lib/wwmd.rb +78 -0
  7. data/lib/wwmd/class_extensions.rb +2 -0
  8. data/lib/wwmd/class_extensions/extensions_base.rb +235 -0
  9. data/lib/wwmd/class_extensions/extensions_encoding.rb +79 -0
  10. data/lib/wwmd/class_extensions/extensions_external.rb +18 -0
  11. data/lib/wwmd/class_extensions/extensions_nilclass.rb +11 -0
  12. data/lib/wwmd/class_extensions/extensions_rbkb.rb +188 -0
  13. data/lib/wwmd/class_extensions/mixins_string_encoding.rb +40 -0
  14. data/lib/wwmd/guid.rb +155 -0
  15. data/lib/wwmd/page.rb +3 -0
  16. data/lib/wwmd/page/auth.rb +17 -0
  17. data/lib/wwmd/page/constants.rb +64 -0
  18. data/lib/wwmd/page/form.rb +99 -0
  19. data/lib/wwmd/page/form_array.rb +296 -0
  20. data/lib/wwmd/page/headers.rb +111 -0
  21. data/lib/wwmd/page/helpers.rb +30 -0
  22. data/lib/wwmd/page/html2text_hpricot.rb +76 -0
  23. data/lib/wwmd/page/html2text_nokogiri.rb +42 -0
  24. data/lib/wwmd/page/inputs.rb +47 -0
  25. data/lib/wwmd/page/irb_helpers.rb +114 -0
  26. data/lib/wwmd/page/page.rb +241 -0
  27. data/lib/wwmd/page/parsing_convenience.rb +94 -0
  28. data/lib/wwmd/page/reporting_helpers.rb +87 -0
  29. data/lib/wwmd/page/scrape.rb +198 -0
  30. data/lib/wwmd/page/spider.rb +127 -0
  31. data/lib/wwmd/urlparse.rb +104 -0
  32. data/lib/wwmd/viewstate.rb +17 -0
  33. data/lib/wwmd/viewstate/viewstate.rb +101 -0
  34. data/lib/wwmd/viewstate/viewstate_deserializer_methods.rb +217 -0
  35. data/lib/wwmd/viewstate/viewstate_from_xml.rb +128 -0
  36. data/lib/wwmd/viewstate/viewstate_types.rb +51 -0
  37. data/lib/wwmd/viewstate/viewstate_utils.rb +162 -0
  38. data/lib/wwmd/viewstate/viewstate_yaml.rb +25 -0
  39. data/lib/wwmd/viewstate/vs_stubs.rb +22 -0
  40. data/lib/wwmd/viewstate/vs_stubs/vs_array.rb +38 -0
  41. data/lib/wwmd/viewstate/vs_stubs/vs_binary_serialized.rb +30 -0
  42. data/lib/wwmd/viewstate/vs_stubs/vs_hashtable.rb +42 -0
  43. data/lib/wwmd/viewstate/vs_stubs/vs_hybrid_dict.rb +42 -0
  44. data/lib/wwmd/viewstate/vs_stubs/vs_indexed_string.rb +6 -0
  45. data/lib/wwmd/viewstate/vs_stubs/vs_indexed_string_ref.rb +24 -0
  46. data/lib/wwmd/viewstate/vs_stubs/vs_int_enum.rb +27 -0
  47. data/lib/wwmd/viewstate/vs_stubs/vs_list.rb +34 -0
  48. data/lib/wwmd/viewstate/vs_stubs/vs_pair.rb +29 -0
  49. data/lib/wwmd/viewstate/vs_stubs/vs_read_types.rb +11 -0
  50. data/lib/wwmd/viewstate/vs_stubs/vs_read_value.rb +35 -0
  51. data/lib/wwmd/viewstate/vs_stubs/vs_sparse_array.rb +58 -0
  52. data/lib/wwmd/viewstate/vs_stubs/vs_string.rb +33 -0
  53. data/lib/wwmd/viewstate/vs_stubs/vs_string_array.rb +39 -0
  54. data/lib/wwmd/viewstate/vs_stubs/vs_string_formatted.rb +32 -0
  55. data/lib/wwmd/viewstate/vs_stubs/vs_stub_helpers.rb +37 -0
  56. data/lib/wwmd/viewstate/vs_stubs/vs_triplet.rb +31 -0
  57. data/lib/wwmd/viewstate/vs_stubs/vs_type.rb +23 -0
  58. data/lib/wwmd/viewstate/vs_stubs/vs_unit.rb +30 -0
  59. data/lib/wwmd/viewstate/vs_stubs/vs_value.rb +35 -0
  60. data/lib/wwmd/wwmd_config.rb +52 -0
  61. data/lib/wwmd/wwmd_puts.rb +9 -0
  62. data/lib/wwmd/wwmd_utils.rb +28 -0
  63. data/spec/README +3 -0
  64. data/spec/form_array.spec +49 -0
  65. data/spec/spider_csrf_test.spec +28 -0
  66. data/spec/urlparse_test.spec +101 -0
  67. data/tasks/ann.rake +80 -0
  68. data/tasks/bones.rake +20 -0
  69. data/tasks/gem.rake +201 -0
  70. data/tasks/git.rake +40 -0
  71. data/tasks/notes.rake +27 -0
  72. data/tasks/post_load.rake +34 -0
  73. data/tasks/rdoc.rake +51 -0
  74. data/tasks/rubyforge.rake +55 -0
  75. data/tasks/setup.rb +292 -0
  76. data/tasks/spec.rake +54 -0
  77. data/tasks/test.rake +40 -0
  78. data/tasks/zentest.rake +36 -0
  79. metadata +174 -0
@@ -0,0 +1,111 @@
1
+ module WWMD
2
+ class Page
3
+
4
+ #:section: Header helper methods
5
+
6
+ # clear header at <key>
7
+ def clear_header(key)
8
+ self.headers.delete_if { |k,v| k.upcase == key.upcase }
9
+ return nil
10
+ end
11
+
12
+ alias_method :delete_header, :clear_header#:nodoc:
13
+
14
+ # clear all headers
15
+ def clear_headers
16
+ self.headers.delete_if { |k,v| true }
17
+ "headers cleared"
18
+ end
19
+
20
+ # set headers from passed argument
21
+ # Nil: set headers from WWMD::DEFAULT_HEADERS
22
+ # Symbol: entry in WWMD::HEADERS to set from
23
+ # Hash: hash to set headers from
24
+ # String: filename (NOT IMPLEMENTED)
25
+ #
26
+ # if clear == true then headers will be cleared before setting
27
+ def set_headers(arg=nil,clear=false)
28
+ clear_headers if clear
29
+ if arg.nil?
30
+ begin
31
+ clear_headers
32
+ WWMD::DEFAULT_HEADERS.each { |k,v| self.headers[k] = v }
33
+ return "headers set from default"
34
+ rescue => e
35
+ putw "WARN: " + e
36
+ return false
37
+ end
38
+ elsif arg.class == Symbol
39
+ set_headers(WWMD::HEADERS[arg])
40
+ putw "headers set from #{arg}"
41
+ return true
42
+ elsif arg.class == Hash
43
+ arg.each { |k,v| self.headers[k] = v }
44
+ putw "headers set from hash"
45
+ return true
46
+ end
47
+ putw "error setting headers"
48
+ return false
49
+ end
50
+
51
+ # set headers back to default headers
52
+ def default_headers(arg=nil)
53
+ set_headers
54
+ end
55
+
56
+ alias_method :set_default, :default_headers
57
+
58
+ # set headers from text
59
+ def headers_from_array(arr)
60
+ clear_headers
61
+ arr.each do |line|
62
+ next if (line.empty? || line =~ /^(GET|POST)/)
63
+ k,v = line.split(":",2)
64
+ self.headers[k.strip] = v.strip
65
+ end
66
+ nil
67
+ end
68
+
69
+ # set headers from paste
70
+ def headers_from_paste
71
+ headers_from_array(%x[pbpaste])
72
+ end
73
+
74
+ # set headers from file
75
+ def headers_from_file(fn)
76
+ headers_from_array(File.read(fn).split("\n"))
77
+ return "headers set from #{fn}"
78
+ end
79
+
80
+ # set headers to utf7 encoding post
81
+ def set_utf7_headers
82
+ self.headers["Content-Type"] = "application/x-www-form-urlencoded;charset=UTF-7"
83
+ return "headers set to utf7"
84
+ end
85
+
86
+ # set headers to ajax
87
+ def set_ajax_headers
88
+ self.headers["X-Requested-With"] = "XMLHttpRequest"
89
+ self.headers["X-Prototype-Version"] = "1.5.0"
90
+ return "headers set to ajax"
91
+ end
92
+
93
+ # set headers to SOAP request headers
94
+ def set_soap_headers
95
+ self.headers['Content-Type'] = "text/xml;charset=utf-8"
96
+ self.headers['SOAPAction'] = "\"\""
97
+ return "headers set to soap"
98
+ end
99
+
100
+ # get the current Cookie header
101
+ def get_cookie
102
+ self.headers["Cookie"]
103
+ end
104
+
105
+ # set the Cookie header
106
+ def set_cookie(cookie=nil)
107
+ self.headers["Cookie"] = cookie
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,30 @@
1
+ module WWMD
2
+ class Page
3
+ # copy and paste from burp request windows
4
+ # page object gets set with headers and url (not correct)
5
+ # returns [headers,form]
6
+ # form = page.from_paste
7
+ def from_paste
8
+ self.enable_cookies = false
9
+ req = %x[pbpaste]
10
+ return false if not req
11
+ h,b = req.chomp.split("\r\n\r\n",2)
12
+ oh = h
13
+ h = h.split("\r\n")
14
+ m,u,p = h.shift.split(" ")
15
+ return nil unless m =~ (/^(POST|GET)/)
16
+ self.url = self.base_url + u
17
+ self.headers_from_array(h)
18
+ self.body_data = b
19
+ self.set_data
20
+ form = b.to_form
21
+ form.action = @urlparse.parse(self.base_url, u).to_s
22
+ [oh,form]
23
+ end
24
+
25
+ def resp_paste
26
+ self.body_data = %x[pbpaste].split("\r\n\r\n",2)[1]
27
+ self.set_data
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,76 @@
1
+ # Geoff Davis geoff at geoffdavis.net
2
+ # Wed May 2 20:08:44 EDT 2007
3
+ # http://rubyforge.org/pipermail/raleigh-rb-members/2007-May/000789.html
4
+ # modified by mtracy at matasano.com for WWMD
5
+
6
+ module WWMD
7
+ InlineTags = ['a','abbr','acronym','address','b','bdo','big','cite','code','del','dfn','em','font','i','ins','kbd','label','noframes','noscript','q','s','samp','small','span','strike','strong','sub','sup','td','th','tt','u','html','body','table']
8
+ BlockTags = ['blockquote','br','center','dd','div','fieldset','form','h1','h2','h3', 'h4','h5','h6','hr','p','pre','tr','var',]
9
+ ListTags = ['dir','dl','menu','ol','ul']
10
+ ItemTags = ['li','dt']
11
+ # AsciiEquivalents = {"amp"=>"&","bull"=>"*","copy"=>"(c)","laquo"=>"<<","raquo"=>">>","ge"=> ">=","le"=>"<=","mdash"=>"-","ndash"=>"-","plusmn"=>"+/-","times"=>"x"}
12
+
13
+ # NamedCharRegex = Regexp.new("(&("+Hpricot::NamedCharacters.keys.join("|")+");)")
14
+
15
+ class Page
16
+ def element_to_text(n)
17
+ tag = n.etag || n.stag
18
+ name = tag.name.downcase
19
+ s = ""
20
+ is_block = BlockTags.include?(name)
21
+ is_list = ListTags.include?(name)
22
+ is_item = ItemTags.include?(name)
23
+ is_inline = InlineTags.include?(name)
24
+ if is_block or is_list or is_item or is_inline
25
+ n.each_child do |c|
26
+ s += node_to_text(c)
27
+ end
28
+ if is_block or is_list
29
+ s += "\n"
30
+ elsif is_item
31
+ s = "* " + s + "\n"
32
+ end
33
+ end
34
+ s
35
+ end
36
+
37
+ def node_to_text(n)
38
+ return "" if n.comment?
39
+ return element_to_text(n) if n.elem?
40
+ return n.inner_text if n.text?
41
+
42
+ s = ""
43
+ begin
44
+ n.each_child do |c|
45
+ s += node_to_text(c)
46
+ end
47
+ rescue => e
48
+ putw "WARN: #{e.inspect}"
49
+ end
50
+ return s
51
+ end
52
+
53
+ # def lookup_named_char(s)
54
+ # c = Hpricot::NamedCharacters[s[1...-1]]
55
+ # c.chr if c
56
+ # end
57
+
58
+ def html2text
59
+ doc = self.scrape.hdoc
60
+ text = node_to_text(doc)
61
+ # text.gsub!(NamedCharRegex){|s| "#{lookup_named_char(s)}"}
62
+ # clean up white space
63
+ text.gsub!("\r"," ")
64
+ text.squeeze!(" ")
65
+ text.strip!
66
+ ret = ''
67
+ text.split(/\n/).each do |l|
68
+ l.strip!
69
+ next if l == ''
70
+ next if l =~ /^\?+$/
71
+ ret += "#{l}\n"
72
+ end
73
+ return ret
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,42 @@
1
+ =begin rdoc
2
+ html2text that works with Nokogiri
3
+ =end
4
+ module WWMD
5
+
6
+ INLINETAGS = ['a','abbr','acronym','address','b','bdo','big','cite',
7
+ 'code','del','dfn','em','font','i','ins','kbd','label',
8
+ 'noframes','noscript','q','s','samp','small','span',
9
+ 'strike','strong','sub','sup','td','th','tt','u',
10
+ 'html','body','table']
11
+ BLOCKTAGS = ['blockquote','center','dd','div','fieldset','form',
12
+ 'h1','h2','h3','h4','h5','h6','p','pre','tr','var',]
13
+ LISTTAGS = ['dir','dl','menu','ol','ul']
14
+ ITEMTAGS = ['li','dt']
15
+ SPECIALTAGS = ['br','hr']
16
+
17
+ class Page
18
+ def html2text
19
+ arr = []
20
+ self.scrape.hdoc.traverse do |x|
21
+ arr << [x.parent.name,x.text] if x.text?
22
+ if x.elem?
23
+ arr << [x.name,""] if SPECIALTAGS.include?(x.name)
24
+ end
25
+ end
26
+ ret = ""
27
+ arr.each do |name,str|
28
+ (ret += "\n"; next ) if name == "br"
29
+ (ret += "\n" + ("-" * 72) + "\n"; next) if name == "hr"
30
+ s = str.strip
31
+ if BLOCKTAGS.include?(name) or LISTTAGS.include?(name)
32
+ s += "\n"
33
+ elsif ITEMTAGS.include?(name)
34
+ s = "* " + s + "\n"
35
+ end
36
+ ret += s
37
+ end
38
+ ret.gsub(/\n+/) { "\n" }
39
+ ret.gsub(/[^\x20-\x7e,\n]/,"").gsub(/^\n/,"")
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,47 @@
1
+ module WWMD
2
+ class Inputs
3
+ attr_accessor :elems
4
+
5
+ @cobj = '' # wwmd object
6
+ @elems = '' # array of elems parse out by self.new()
7
+
8
+ def initialize(*args)
9
+ @cobj = args.shift
10
+ end
11
+
12
+ def show
13
+ putx @elems
14
+ end
15
+
16
+ # call me from Page.set_data
17
+ def set
18
+ @elems = [@cobj.search("//input").map,@cobj.search("//select").map].flatten
19
+ end
20
+
21
+ def get(attr=nil)
22
+ @elems.map { |x| x[attr] }.reject { |y| y.nil? }
23
+ end
24
+
25
+ #
26
+ # return: FormArray containing all page inputs
27
+ def form
28
+ ret = {}
29
+ @elems.map do |x|
30
+ name = x['name']
31
+ id = x['id']
32
+ next if (name.nil? && id.nil?)
33
+ value = x['value']
34
+ type = x['type']
35
+ ret[name] = value
36
+ ret[id] = value if ((id || name) != name)
37
+ end
38
+ return FormArray.new(ret)
39
+ end
40
+
41
+ #
42
+ # return: FormArray containing get params
43
+ def params
44
+ return FormArray.new(@cobj.cur.clop.to_form)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,114 @@
1
+ =begin rdoc
2
+ this file contains methods to help operations in irb (display methods etc.).
3
+ =end
4
+ module WWMD
5
+ class Page
6
+
7
+ #:section: IRB helper methods
8
+
9
+ def head(i=1)
10
+ if i.kind_of?(Range)
11
+ puts self.body_data.split("\n")[i].join("\n")
12
+ return nil
13
+ end
14
+ puts self.body_data.head(i)
15
+ end
16
+
17
+ # IRB: text report what has been parsed from this page
18
+ def report(short=nil)
19
+ puts "-------------------------------------------------"
20
+ self.summary
21
+ puts "---- links found [#{self.has_links?.to_s} | #{self.links.size}]"
22
+ self.links.each_index { |i| puts "#{i.to_s} :: #{@links[i]}" } if short.nil?
23
+ puts "---- javascript found [#{self.has_jlinks?.to_s} | #{self.jlinks.size}]"
24
+ self.jlinks.each { |url| puts url } if short.nil?
25
+ puts "---- forms found [#{self.has_form?.to_s} | #{self.forms.size}]"
26
+ puts "---- comments found [#{self.has_comments?.to_s}]"
27
+ return nil
28
+ end
29
+
30
+ alias_method :show, :report#:nodoc:
31
+
32
+ # IRB: display summary of what has been parsed from this page
33
+ def summary
34
+ status = self.page_status
35
+ puts "XXXX[#{self.report_flags}] | #{self.response_code.to_s} | #{status} | #{self.url} | #{self.size}"
36
+ return nil
37
+ end
38
+
39
+ # IRB: display current headers
40
+ def request_headers
41
+ self.headers.each_pair { |k,v| puts "#{k}: #{v}" }
42
+ return nil
43
+ end
44
+
45
+ alias_method :show_headers, :request_headers#:nodoc:
46
+ alias_method :req_headers, :request_headers#:nodoc:
47
+
48
+ # IRB: display response headers
49
+ def response_headers
50
+ self.header_data.each { |x| puts "#{x[0]} :: #{x[1]}" }
51
+ return nil
52
+ end
53
+
54
+ alias_method :resp_headers, :response_headers#:nodoc:
55
+
56
+ # display self.body_data
57
+ def dump_body
58
+ puts self.body_data
59
+ end
60
+
61
+ alias_method :dump, :dump_body#:nodoc:
62
+
63
+ # IRB: puts the page filtered through html2text
64
+ def to_text; puts self.html2text; end
65
+ def text; self.html2text; end
66
+
67
+ # IRB: display a human readable report of all forms contained in page.body_data
68
+ def all_forms
69
+ self.forms.each_index { |x| puts "[#{x.to_s}]-------"; self.forms[x].report }
70
+ nil
71
+ end
72
+
73
+ def onclicks
74
+ self.search("//*[@onclick]").each { |x| puts x[:onclick] }
75
+ nil
76
+ end
77
+
78
+ # hexdump self.body_data
79
+ def hexdump
80
+ puts self.body_data.hexdump
81
+ end
82
+
83
+ # this only works on a mac so get a mac
84
+ def open #:nodoc:
85
+ fn = "wwmdtmp_#{Guid.new}.html"
86
+ self.write(fn)
87
+ %x[open #{fn}]
88
+ end
89
+ end
90
+
91
+ class Form
92
+ def report
93
+ return nil if not WWMD::console
94
+ puts "action = #{self.action}"
95
+ self.fields.each { |field| puts field.to_text }
96
+ return nil
97
+ end
98
+ alias_method :show, :report
99
+ end
100
+
101
+ class FormArray
102
+ # IRB: puts the form in human readable format
103
+ # if you <tt>form.show(true)</tt> it will show unescaped values
104
+ def show(unescape=false)
105
+ if unescape
106
+ self.each_index { |i| puts i.to_s + " :: " + self[i][0].to_s + " = " + self[i][1].to_s.unescape }
107
+ else
108
+ self.each_index { |i| puts i.to_s + " :: " + self[i][0].to_s + " = " + self[i][1].to_s }
109
+ end
110
+ return nil
111
+ end
112
+
113
+ end
114
+ end
@@ -0,0 +1,241 @@
1
+ module WWMD
2
+ # WWMD::Page is an extension of a Curl::Easy object which provides methods to
3
+ # enhance and ease the performance of web application penetration testing.
4
+ class Page
5
+
6
+ attr_accessor :curl_object
7
+ attr_accessor :body_data
8
+ attr_accessor :post_data
9
+ attr_accessor :header_data
10
+ attr_accessor :use_referer
11
+ attr_reader :forms
12
+ attr_reader :last_error
13
+ attr_reader :links # array of links (urls)
14
+ attr_reader :jlinks # array of included javascript files
15
+ attr_reader :spider # spider object
16
+ attr_reader :scrape # scrape object
17
+ attr_reader :urlparse # urlparse object
18
+ attr_reader :comments
19
+
20
+ attr_accessor :base_url # needed to properly munge relative urls into fq urls
21
+ attr_accessor :logged_in # are we logged in?
22
+
23
+ attr_accessor :opts
24
+ attr_accessor :inputs
25
+
26
+ include WWMDUtils
27
+
28
+ def initialize(opts={}, &block)
29
+ @opts = opts.clone
30
+ DEFAULTS.each { |k,v| @opts[k] = v unless opts[k] }
31
+ @spider = Spider.new(opts)
32
+ @scrape = Scrape.new
33
+ @base_url ||= opts[:base_url]
34
+ @scrape.warn = opts[:scrape_warn] if !opts[:scrape_warn].nil? # yeah yeah... bool false
35
+ @urlparse = URLParse.new()
36
+ @inputs = Inputs.new(self)
37
+ @logged_in = false
38
+ @body_data = ""
39
+ @post_data = ""
40
+ @comments = []
41
+ @header_data = FormArray.new
42
+
43
+ @curl_object = Curl::Easy.new
44
+ @opts.each do |k,v|
45
+ next if k == :proxy_url
46
+ self.instance_variable_set("@#{k.to_s}".intern,v)
47
+ if (@curl_object.methods.include?("#{k}="))
48
+ @curl_object.send("#{k}=",v)
49
+ end
50
+ end
51
+ @curl_object.on_body { |data| self._body_cb(data) }
52
+ @curl_object.on_header { |data| self._header_cb(data) }
53
+
54
+ # cookies?
55
+ @curl_object.enable_cookies = @opts[:enable_cookies]
56
+ if @curl_object.enable_cookies?
57
+ @curl_object.cookiejar = @opts[:cookiejar] || "./__cookiejar"
58
+ end
59
+
60
+ #proxy?
61
+ @curl_object.proxy_url = @opts[:proxy_url] if @opts[:use_proxy]
62
+ instance_eval(&block) if block_given?
63
+ if opts.empty? && @scrape.warn
64
+ putw "Page initialized without opts"
65
+ @scrape.warn = false
66
+ end
67
+ end
68
+
69
+ #:section: Heavy Lifting
70
+
71
+ # set reporting data for the page
72
+ #
73
+ # Scan for comments, anchors, links and javascript includes and
74
+ # set page flags. The heavy lifting for parsing is done in the
75
+ # scrape class.
76
+ #
77
+ # returns: <tt>array [ code, page_status, body_data.size ]</tt>
78
+ def set_data
79
+ # reset scrape and inputs object
80
+ # transparently gunzip
81
+ begin
82
+ io = StringIO.new(self.body_data)
83
+ gz = Zlib::GzipReader.new(io)
84
+ self.body_data.replace(gz.read)
85
+ rescue => e
86
+ end
87
+ @scrape.reset(self.body_data)
88
+ @inputs.set
89
+
90
+ # remove comments that are css selectors for IE silliness
91
+ @comments = @scrape.for_comments.reject do |c|
92
+ c =~ /\[if IE\]/ ||
93
+ c =~ /\[if IE \d/ ||
94
+ c =~ /\[if lt IE \d/
95
+ end
96
+ @links = @scrape.for_links.map do |url|
97
+ @urlparse.parse(self.last_effective_url,url).to_s
98
+ end
99
+ @jlinks = @scrape.for_javascript_links
100
+ @forms = @scrape.for_forms
101
+ @spider.add(self.last_effective_url,@links)
102
+ return [self.code,self.body_data.size]
103
+ end
104
+
105
+ # clear self.body_data and self.header_data
106
+ def clear_data
107
+ return false if self.opts[:parse] = false
108
+ @body_data = ""
109
+ @header_data.clear
110
+ @post_data = ""
111
+ @last_error = nil
112
+ end
113
+
114
+ # override Curl::Easy.perform to perform page actions,
115
+ # call <tt>self.set_data</tt>
116
+ #
117
+ # returns: <tt>array [ code, page_status, body_data.size ]</tt>
118
+ #
119
+ # don't call this directly if we are in console mode
120
+ # use get and submit respectively for GET and POST
121
+ def perform
122
+ self.clear_data
123
+ self.headers["Referer"] = self.cur if self.use_referer
124
+ begin
125
+ @curl_object.perform
126
+ rescue => e
127
+ @last_error = e
128
+ putw "WARN: #{e.class}" if e.class =~ /Curl::Err/
129
+ end
130
+ self.set_data
131
+ end
132
+
133
+ # replacement for Curl::Easy.http_post
134
+ #
135
+ # post the form attempting to remove curl supplied headers (Expect, X-Forwarded-For
136
+ # call <tt>self.set_data</tt>
137
+ #
138
+ # if passed a regexp, escape values in the form using regexp before submitting
139
+ # if passed nil for the regexp arg, the form will not be escaped
140
+ # default: WWMD::ESCAPE[:url]
141
+ #
142
+ # returns: <tt>array [ code, body_data.size ]</tt>
143
+ def submit(iform=nil,reg=WWMD::ESCAPE[:default])
144
+ ## this is just getting worse and worse
145
+ if iform.class == "Symbol"
146
+ reg = iform
147
+ iform = nil
148
+ end
149
+ self.clear_data
150
+ ["Expect","X-Forwarded-For","Content-length"].each { |s| self.clear_header(s) }
151
+ self.headers["Referer"] = self.cur if self.use_referer
152
+ unless iform
153
+ unless self.form.empty?
154
+ sform = self.form.clone
155
+ else
156
+ return "no form provided"
157
+ end
158
+ else
159
+ sform = iform.clone # clone the form so that we don't change the original
160
+ end
161
+ sform.escape_all!(reg)
162
+ self.url = sform.action if sform.action
163
+ if sform.empty?
164
+ self.http_post('')
165
+ else
166
+ self.http_post(self.post_data = sform.to_post)
167
+ end
168
+ self.set_data
169
+ end
170
+
171
+ # submit a form using POST string
172
+ def submit_string(post_string)
173
+ self.clear_data
174
+ self.http_post(post_string)
175
+ putw "WARN: authentication headers in response" if self.auth?
176
+ self.set_data
177
+ end
178
+
179
+ # override for Curl::Easy.perform
180
+ #
181
+ # if the passed url string doesn't contain an fully qualified
182
+ # path, we'll guess and prepend opts[:base_url]
183
+ #
184
+ # returns: <tt>array [ code, body_data.size ]</tt>
185
+ def get(url=nil,parse=true)
186
+ self.clear_data
187
+ self.headers["Referer"] = self.cur if self.use_referer
188
+ if !(url =~ /[a-z]+:\/\//) && parse
189
+ self.url = @urlparse.parse(self.opts[:base_url],url).to_s if url
190
+ elsif url
191
+ self.url = url
192
+ end
193
+ self.http_get
194
+ putw "WARN: authentication headers in response" if self.auth?
195
+ self.set_data
196
+ end
197
+
198
+ # GET with params and POST it as a form
199
+ def post(url=nil)
200
+ ep = url.clip
201
+ self.url = @urlparse.parse(self.opts[:base_url],ep).to_s if ep
202
+ form = url.clop.to_form
203
+ self.submit(form)
204
+ end
205
+
206
+ # send arbitrary verb (only works with patch to taf2-curb)
207
+ def verb(verb,url=nil)
208
+ return false if !@curl_object.respond_to?(:http_verb)
209
+ self.url = url if url
210
+ self.clear_data
211
+ self.headers["Referer"] = self.cur if self.use_referer
212
+ self.http_verb(verb)
213
+ self.set_data
214
+ end
215
+
216
+ #:section: Data callbacks and method_missing
217
+
218
+ # callback for <tt>self.on_body</tt>
219
+ def _body_cb(data)
220
+ @body_data << data if data
221
+ return data.length.to_i
222
+ end
223
+
224
+ # callback for <tt>self.on_header</tt>
225
+ def _header_cb(data)
226
+ myArr = Array.new(data.split(":",2))
227
+ @header_data.extend! myArr[0].to_s.strip,myArr[1].to_s.strip
228
+ return data.length.to_i
229
+ end
230
+
231
+ # send methods not defined here to <tt>@curl_object</tt>
232
+ def method_missing(methodname, *args)
233
+ if self.respond_to?(methodname)
234
+ self.send(methodname, *args)
235
+ else
236
+ @curl_object.send(methodname, *args)
237
+ end
238
+ end
239
+
240
+ end
241
+ end