arachni 0.2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. data/ACKNOWLEDGMENTS.md +14 -0
  2. data/AUTHORS.md +6 -0
  3. data/CHANGELOG.md +162 -0
  4. data/CONTRIBUTORS.md +10 -0
  5. data/EXPLOITATION.md +429 -0
  6. data/HACKING.md +101 -0
  7. data/LICENSE.md +341 -0
  8. data/README.md +350 -0
  9. data/Rakefile +86 -0
  10. data/bin/arachni +22 -0
  11. data/bin/arachni_web +77 -0
  12. data/bin/arachni_xmlrpc +21 -0
  13. data/bin/arachni_xmlrpcd +82 -0
  14. data/bin/arachni_xmlrpcd_monitor +74 -0
  15. data/conf/README.webui.yaml.txt +44 -0
  16. data/conf/webui.yaml +11 -0
  17. data/external/metasploit/LICENSE +24 -0
  18. data/external/metasploit/modules/exploits/unix/webapp/arachni_exec.rb +142 -0
  19. data/external/metasploit/modules/exploits/unix/webapp/arachni_path_traversal.rb +113 -0
  20. data/external/metasploit/modules/exploits/unix/webapp/arachni_php_eval.rb +150 -0
  21. data/external/metasploit/modules/exploits/unix/webapp/arachni_php_include.rb +141 -0
  22. data/external/metasploit/modules/exploits/unix/webapp/arachni_sqlmap.rb +92 -0
  23. data/external/metasploit/plugins/arachni.rb +536 -0
  24. data/getoptslong.rb +241 -0
  25. data/lib/anemone.rb +2 -0
  26. data/lib/anemone/cookie_store.rb +35 -0
  27. data/lib/anemone/core.rb +371 -0
  28. data/lib/anemone/exceptions.rb +5 -0
  29. data/lib/anemone/http.rb +144 -0
  30. data/lib/anemone/page.rb +337 -0
  31. data/lib/anemone/page_store.rb +160 -0
  32. data/lib/anemone/storage.rb +34 -0
  33. data/lib/anemone/storage/base.rb +75 -0
  34. data/lib/anemone/storage/exceptions.rb +15 -0
  35. data/lib/anemone/storage/mongodb.rb +89 -0
  36. data/lib/anemone/storage/pstore.rb +50 -0
  37. data/lib/anemone/storage/redis.rb +90 -0
  38. data/lib/anemone/storage/tokyo_cabinet.rb +57 -0
  39. data/lib/anemone/tentacle.rb +40 -0
  40. data/lib/arachni.rb +16 -0
  41. data/lib/audit_store.rb +346 -0
  42. data/lib/component_manager.rb +293 -0
  43. data/lib/component_options.rb +395 -0
  44. data/lib/exceptions.rb +76 -0
  45. data/lib/framework.rb +637 -0
  46. data/lib/http.rb +809 -0
  47. data/lib/issue.rb +302 -0
  48. data/lib/module.rb +4 -0
  49. data/lib/module/auditor.rb +455 -0
  50. data/lib/module/base.rb +188 -0
  51. data/lib/module/element_db.rb +158 -0
  52. data/lib/module/key_filler.rb +87 -0
  53. data/lib/module/manager.rb +87 -0
  54. data/lib/module/output.rb +68 -0
  55. data/lib/module/trainer.rb +240 -0
  56. data/lib/module/utilities.rb +110 -0
  57. data/lib/options.rb +547 -0
  58. data/lib/parser.rb +2 -0
  59. data/lib/parser/auditable.rb +522 -0
  60. data/lib/parser/elements.rb +296 -0
  61. data/lib/parser/page.rb +149 -0
  62. data/lib/parser/parser.rb +717 -0
  63. data/lib/plugin.rb +4 -0
  64. data/lib/plugin/base.rb +110 -0
  65. data/lib/plugin/manager.rb +162 -0
  66. data/lib/report.rb +4 -0
  67. data/lib/report/base.rb +119 -0
  68. data/lib/report/manager.rb +92 -0
  69. data/lib/rpc/xml/client/base.rb +71 -0
  70. data/lib/rpc/xml/client/dispatcher.rb +49 -0
  71. data/lib/rpc/xml/client/instance.rb +88 -0
  72. data/lib/rpc/xml/server/base.rb +90 -0
  73. data/lib/rpc/xml/server/dispatcher.rb +357 -0
  74. data/lib/rpc/xml/server/framework.rb +206 -0
  75. data/lib/rpc/xml/server/instance.rb +191 -0
  76. data/lib/rpc/xml/server/module/manager.rb +46 -0
  77. data/lib/rpc/xml/server/options.rb +124 -0
  78. data/lib/rpc/xml/server/output.rb +299 -0
  79. data/lib/rpc/xml/server/plugin/manager.rb +58 -0
  80. data/lib/ruby.rb +5 -0
  81. data/lib/ruby/object.rb +32 -0
  82. data/lib/ruby/string.rb +74 -0
  83. data/lib/ruby/xmlrpc/server.rb +27 -0
  84. data/lib/spider.rb +200 -0
  85. data/lib/typhoeus/request.rb +91 -0
  86. data/lib/typhoeus/response.rb +34 -0
  87. data/lib/ui/cli/cli.rb +744 -0
  88. data/lib/ui/cli/output.rb +279 -0
  89. data/lib/ui/web/log.rb +82 -0
  90. data/lib/ui/web/output_stream.rb +94 -0
  91. data/lib/ui/web/report_manager.rb +222 -0
  92. data/lib/ui/web/server.rb +903 -0
  93. data/lib/ui/web/server/db/placeholder +0 -0
  94. data/lib/ui/web/server/public/banner.png +0 -0
  95. data/lib/ui/web/server/public/bodybg-small.png +0 -0
  96. data/lib/ui/web/server/public/bodybg.png +0 -0
  97. data/lib/ui/web/server/public/css/smoothness/images/pbar-ani.gif +0 -0
  98. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
  99. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
  100. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
  101. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
  102. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png +0 -0
  103. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
  104. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
  105. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
  106. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_222222_256x240.png +0 -0
  107. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_2e83ff_256x240.png +0 -0
  108. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_454545_256x240.png +0 -0
  109. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_888888_256x240.png +0 -0
  110. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_cd0a0a_256x240.png +0 -0
  111. data/lib/ui/web/server/public/css/smoothness/jquery-ui-1.8.9.custom.css +573 -0
  112. data/lib/ui/web/server/public/favicon.ico +0 -0
  113. data/lib/ui/web/server/public/footer.jpg +0 -0
  114. data/lib/ui/web/server/public/icons/error.png +0 -0
  115. data/lib/ui/web/server/public/icons/info.png +0 -0
  116. data/lib/ui/web/server/public/icons/ok.png +0 -0
  117. data/lib/ui/web/server/public/icons/status.png +0 -0
  118. data/lib/ui/web/server/public/js/jquery-1.4.4.min.js +167 -0
  119. data/lib/ui/web/server/public/js/jquery-ui-1.8.9.custom.min.js +781 -0
  120. data/lib/ui/web/server/public/logo.png +0 -0
  121. data/lib/ui/web/server/public/nav-left.jpg +0 -0
  122. data/lib/ui/web/server/public/nav-right.jpg +0 -0
  123. data/lib/ui/web/server/public/nav-selected-left.jpg +0 -0
  124. data/lib/ui/web/server/public/nav-selected-right.jpg +0 -0
  125. data/lib/ui/web/server/public/reports/placeholder +1 -0
  126. data/lib/ui/web/server/public/sidebar-bottom.jpg +0 -0
  127. data/lib/ui/web/server/public/sidebar-h4.jpg +0 -0
  128. data/lib/ui/web/server/public/sidebar-top.jpg +0 -0
  129. data/lib/ui/web/server/public/spider.png +0 -0
  130. data/lib/ui/web/server/public/style.css +604 -0
  131. data/lib/ui/web/server/tmp/placeholder +0 -0
  132. data/lib/ui/web/server/views/dispatcher.erb +85 -0
  133. data/lib/ui/web/server/views/dispatcher_error.erb +14 -0
  134. data/lib/ui/web/server/views/error.erb +1 -0
  135. data/lib/ui/web/server/views/flash.erb +18 -0
  136. data/lib/ui/web/server/views/home.erb +14 -0
  137. data/lib/ui/web/server/views/instance.erb +213 -0
  138. data/lib/ui/web/server/views/layout.erb +95 -0
  139. data/lib/ui/web/server/views/log.erb +40 -0
  140. data/lib/ui/web/server/views/modules.erb +71 -0
  141. data/lib/ui/web/server/views/options.erb +23 -0
  142. data/lib/ui/web/server/views/output_results.erb +51 -0
  143. data/lib/ui/web/server/views/plugins.erb +42 -0
  144. data/lib/ui/web/server/views/report_formats.erb +30 -0
  145. data/lib/ui/web/server/views/reports.erb +55 -0
  146. data/lib/ui/web/server/views/settings.erb +120 -0
  147. data/lib/ui/web/server/views/welcome.erb +38 -0
  148. data/lib/ui/xmlrpc/dispatcher_monitor.rb +204 -0
  149. data/lib/ui/xmlrpc/xmlrpc.rb +843 -0
  150. data/logs/placeholder +0 -0
  151. data/metamodules/autothrottle.rb +74 -0
  152. data/metamodules/timeout_notice.rb +118 -0
  153. data/metamodules/uniformity.rb +98 -0
  154. data/modules/audit/code_injection.rb +136 -0
  155. data/modules/audit/code_injection_timing.rb +115 -0
  156. data/modules/audit/code_injection_timing/payloads.txt +4 -0
  157. data/modules/audit/csrf.rb +301 -0
  158. data/modules/audit/ldapi.rb +103 -0
  159. data/modules/audit/ldapi/errors.txt +26 -0
  160. data/modules/audit/os_cmd_injection.rb +103 -0
  161. data/modules/audit/os_cmd_injection/payloads.txt +2 -0
  162. data/modules/audit/os_cmd_injection_timing.rb +104 -0
  163. data/modules/audit/os_cmd_injection_timing/payloads.txt +3 -0
  164. data/modules/audit/path_traversal.rb +141 -0
  165. data/modules/audit/response_splitting.rb +105 -0
  166. data/modules/audit/rfi.rb +193 -0
  167. data/modules/audit/sqli.rb +120 -0
  168. data/modules/audit/sqli/regexp_ids.txt +90 -0
  169. data/modules/audit/sqli_blind_rdiff.rb +321 -0
  170. data/modules/audit/sqli_blind_timing.rb +103 -0
  171. data/modules/audit/sqli_blind_timing/payloads.txt +51 -0
  172. data/modules/audit/trainer.rb +89 -0
  173. data/modules/audit/unvalidated_redirect.rb +90 -0
  174. data/modules/audit/xpath.rb +104 -0
  175. data/modules/audit/xpath/errors.txt +26 -0
  176. data/modules/audit/xss.rb +99 -0
  177. data/modules/audit/xss_event.rb +134 -0
  178. data/modules/audit/xss_path.rb +125 -0
  179. data/modules/audit/xss_script_tag.rb +112 -0
  180. data/modules/audit/xss_tag.rb +112 -0
  181. data/modules/audit/xss_uri.rb +125 -0
  182. data/modules/recon/allowed_methods.rb +104 -0
  183. data/modules/recon/backdoors.rb +131 -0
  184. data/modules/recon/backdoors/filenames.txt +16 -0
  185. data/modules/recon/backup_files.rb +177 -0
  186. data/modules/recon/backup_files/extensions.txt +28 -0
  187. data/modules/recon/common_directories.rb +138 -0
  188. data/modules/recon/common_directories/directories.txt +265 -0
  189. data/modules/recon/common_files.rb +138 -0
  190. data/modules/recon/common_files/filenames.txt +17 -0
  191. data/modules/recon/directory_listing.rb +171 -0
  192. data/modules/recon/grep/captcha.rb +62 -0
  193. data/modules/recon/grep/credit_card.rb +85 -0
  194. data/modules/recon/grep/cvs_svn_users.rb +73 -0
  195. data/modules/recon/grep/emails.rb +59 -0
  196. data/modules/recon/grep/html_objects.rb +53 -0
  197. data/modules/recon/grep/private_ip.rb +54 -0
  198. data/modules/recon/grep/ssn.rb +53 -0
  199. data/modules/recon/htaccess_limit.rb +82 -0
  200. data/modules/recon/http_put.rb +95 -0
  201. data/modules/recon/interesting_responses.rb +118 -0
  202. data/modules/recon/unencrypted_password_forms.rb +119 -0
  203. data/modules/recon/webdav.rb +126 -0
  204. data/modules/recon/xst.rb +107 -0
  205. data/path_extractors/anchors.rb +35 -0
  206. data/path_extractors/forms.rb +35 -0
  207. data/path_extractors/frames.rb +38 -0
  208. data/path_extractors/generic.rb +39 -0
  209. data/path_extractors/links.rb +35 -0
  210. data/path_extractors/meta_refresh.rb +39 -0
  211. data/path_extractors/scripts.rb +37 -0
  212. data/path_extractors/sitemap.rb +31 -0
  213. data/plugins/autologin.rb +137 -0
  214. data/plugins/content_types.rb +90 -0
  215. data/plugins/cookie_collector.rb +99 -0
  216. data/plugins/form_dicattack.rb +185 -0
  217. data/plugins/healthmap.rb +94 -0
  218. data/plugins/http_dicattack.rb +133 -0
  219. data/plugins/metamodules.rb +118 -0
  220. data/plugins/proxy.rb +248 -0
  221. data/plugins/proxy/server.rb +66 -0
  222. data/plugins/waf_detector.rb +184 -0
  223. data/profiles/comprehensive.afp +74 -0
  224. data/profiles/full.afp +75 -0
  225. data/reports/afr.rb +59 -0
  226. data/reports/ap.rb +55 -0
  227. data/reports/html.rb +179 -0
  228. data/reports/html/default.erb +967 -0
  229. data/reports/metareport.rb +139 -0
  230. data/reports/metareport/arachni_metareport.rb +174 -0
  231. data/reports/plugin_formatters/html/content_types.rb +82 -0
  232. data/reports/plugin_formatters/html/cookie_collector.rb +66 -0
  233. data/reports/plugin_formatters/html/form_dicattack.rb +54 -0
  234. data/reports/plugin_formatters/html/healthmap.rb +76 -0
  235. data/reports/plugin_formatters/html/http_dicattack.rb +54 -0
  236. data/reports/plugin_formatters/html/metaformatters/timeout_notice.rb +65 -0
  237. data/reports/plugin_formatters/html/metaformatters/uniformity.rb +71 -0
  238. data/reports/plugin_formatters/html/metamodules.rb +93 -0
  239. data/reports/plugin_formatters/html/waf_detector.rb +54 -0
  240. data/reports/plugin_formatters/stdout/content_types.rb +73 -0
  241. data/reports/plugin_formatters/stdout/cookie_collector.rb +61 -0
  242. data/reports/plugin_formatters/stdout/form_dicattack.rb +52 -0
  243. data/reports/plugin_formatters/stdout/healthmap.rb +72 -0
  244. data/reports/plugin_formatters/stdout/http_dicattack.rb +53 -0
  245. data/reports/plugin_formatters/stdout/metaformatters/timeout_notice.rb +55 -0
  246. data/reports/plugin_formatters/stdout/metaformatters/uniformity.rb +68 -0
  247. data/reports/plugin_formatters/stdout/metamodules.rb +89 -0
  248. data/reports/plugin_formatters/stdout/waf_detector.rb +48 -0
  249. data/reports/plugin_formatters/xml/content_types.rb +91 -0
  250. data/reports/plugin_formatters/xml/cookie_collector.rb +70 -0
  251. data/reports/plugin_formatters/xml/form_dicattack.rb +57 -0
  252. data/reports/plugin_formatters/xml/healthmap.rb +82 -0
  253. data/reports/plugin_formatters/xml/http_dicattack.rb +57 -0
  254. data/reports/plugin_formatters/xml/metaformatters/timeout_notice.rb +67 -0
  255. data/reports/plugin_formatters/xml/metaformatters/uniformity.rb +82 -0
  256. data/reports/plugin_formatters/xml/metamodules.rb +91 -0
  257. data/reports/plugin_formatters/xml/waf_detector.rb +58 -0
  258. data/reports/stdout.rb +182 -0
  259. data/reports/txt.rb +77 -0
  260. data/reports/xml.rb +231 -0
  261. data/reports/xml/buffer.rb +98 -0
  262. metadata +516 -0
@@ -0,0 +1,296 @@
1
+ =begin
2
+ Arachni
3
+ Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
4
+
5
+ This is free software; you can copy and distribute and modify
6
+ this program under the term of the GPL v2.0 License
7
+ (See LICENSE file for details)
8
+ =end
9
+
10
+ module Arachni
11
+
12
+ opts = Arachni::Options.instance
13
+ require opts.dir['lib'] + 'parser/auditable'
14
+
15
+ class Parser
16
+
17
+ module Element
18
+
19
+ #
20
+ # Base element class.
21
+ #
22
+ # Should be extended/implemented by all HTML/HTTP modules.
23
+ #
24
+ # @author: Tasos "Zapotek" Laskos
25
+ # <tasos.laskos@gmail.com>
26
+ # <zapotek@segfault.gr>
27
+ # @version: 0.1
28
+ #
29
+ # @abstract
30
+ #
31
+ class Base < Arachni::Element::Auditable
32
+
33
+ #
34
+ # The URL of the page that owns the element.
35
+ #
36
+ # @return [String]
37
+ #
38
+ attr_accessor :url
39
+
40
+ #
41
+ # The url to which the element points and should be audited against.
42
+ #
43
+ # Ex. 'href' for links, 'action' for forms, etc.
44
+ #
45
+ # @return [String]
46
+ #
47
+ attr_accessor :action
48
+
49
+ attr_accessor :auditable
50
+
51
+ #
52
+ # Relatively 'raw' hash holding the element's attributes, values, etc.
53
+ #
54
+ # @return [Hash]
55
+ #
56
+ attr_accessor :raw
57
+
58
+ #
59
+ # Method of the element.
60
+ #
61
+ # Should represent a method in {Arachni::Module::HTTP}.
62
+ #
63
+ # Ex. get, post, cookie, header
64
+ #
65
+ # @see Arachni::Module::HTTP
66
+ #
67
+ # @return [String]
68
+ #
69
+ attr_accessor :method
70
+
71
+ #
72
+ # Initialize the element.
73
+ #
74
+ # @param [String] url {#url}
75
+ # @param [Hash] raw {#raw}
76
+ #
77
+ def initialize( url, raw = {} )
78
+ @raw = raw.dup
79
+ @url = url.dup
80
+ end
81
+
82
+ #
83
+ # Must provide a string uniquely identifying self.
84
+ #
85
+ # @return [String]
86
+ #
87
+ def id
88
+ return @raw.to_s
89
+ end
90
+
91
+ #
92
+ # Must provide a simple hash representation of self
93
+ #
94
+ def simple
95
+
96
+ end
97
+
98
+ #
99
+ # Must provide the element type, one of {Arachni::Module::Auditor::Element}.
100
+ #
101
+ def type
102
+
103
+ end
104
+
105
+ end
106
+
107
+ class Link < Base
108
+
109
+ def initialize( url, raw = {} )
110
+ super( url, raw )
111
+
112
+ @action = @raw['href']
113
+ @method = 'get'
114
+
115
+ @auditable = @raw['vars']
116
+ end
117
+
118
+ def http_request( url, opts )
119
+ return @auditor.http.get( url, opts )
120
+ end
121
+
122
+ def simple
123
+ return { @action => @auditable }
124
+ end
125
+
126
+ def type
127
+ Arachni::Module::Auditor::Element::LINK
128
+ end
129
+
130
+ def audit_id( injection_str, opts = {} )
131
+ vars = auditable.keys.sort.to_s
132
+ url = URI( @auditor.page.url ).merge( URI( @action ).path ).to_s
133
+
134
+ timeout = opts[:timeout] || ''
135
+ return "#{@auditor.class.info[:name]}:" +
136
+ "#{url}:" + "#{self.type}:" +
137
+ "#{vars}=#{injection_str.to_s}:timeout=#{timeout}"
138
+ end
139
+
140
+
141
+ end
142
+
143
+
144
+ class Form < Base
145
+
146
+ include Arachni::Module::Utilities
147
+
148
+ FORM_VALUES_ORIGINAL = '__original_values__'
149
+ FORM_VALUES_SAMPLE = '__sample_values__'
150
+
151
+ def initialize( url, raw = {} )
152
+ super( url, raw )
153
+
154
+ @action = @raw['attrs']['action']
155
+ @method = @raw['attrs']['method']
156
+
157
+ @auditable = simple['auditable'] || {}
158
+ end
159
+
160
+ def http_request( url, opts )
161
+
162
+
163
+ params = opts[:params]
164
+ altered = opts[:altered]
165
+
166
+ curr_opts = opts.dup
167
+ if( altered == FORM_VALUES_ORIGINAL )
168
+ orig_id = audit_id( FORM_VALUES_ORIGINAL )
169
+
170
+ return if !opts[:redundant] && audited?( orig_id )
171
+ audited( orig_id )
172
+
173
+ print_debug( 'Submitting form with original values;' +
174
+ ' overriding trainer option.' )
175
+ opts[:train] = true
176
+ print_debug_trainer( opts )
177
+ end
178
+
179
+ if( altered == FORM_VALUES_SAMPLE )
180
+ sample_id = audit_id( FORM_VALUES_SAMPLE )
181
+
182
+ return if !opts[:redundant] && audited?( sample_id )
183
+ audited( sample_id )
184
+
185
+ print_debug( 'Submitting form with sample values;' +
186
+ ' overriding trainer option.' )
187
+ opts[:train] = true
188
+ print_debug_trainer( opts )
189
+ end
190
+
191
+
192
+ if( @method.downcase != 'get' )
193
+ return @auditor.http.post( url, opts )
194
+ else
195
+ return @auditor.http.get( url, opts )
196
+ end
197
+ end
198
+
199
+ def id
200
+
201
+ id = simple['attrs'].to_s
202
+
203
+ auditable.map {
204
+ |name, value|
205
+ next if name.substring?( seed )
206
+ id += name
207
+ }
208
+
209
+ return id
210
+
211
+ end
212
+
213
+ def simple
214
+
215
+ form = Hash.new
216
+
217
+ return form if !@raw || !@raw['auditable'] || @raw['auditable'].empty?
218
+
219
+ form['attrs'] = @raw['attrs']
220
+ form['auditable'] = {}
221
+ @raw['auditable'].each {
222
+ |item|
223
+ if( !item['name'] ) then next end
224
+ form['auditable'][item['name']] = item['value']
225
+ }
226
+
227
+ return form.dup
228
+ end
229
+
230
+ def type
231
+ Arachni::Module::Auditor::Element::FORM
232
+ end
233
+
234
+ end
235
+
236
+ class Cookie < Base
237
+
238
+
239
+ def initialize( url, raw = {} )
240
+ super( url, raw )
241
+
242
+ @action = @url
243
+ @method = 'cookie'
244
+
245
+ @auditable = { @raw['name'] => @raw['value'] }
246
+ @simple = @auditable.dup
247
+ @auditable.reject! {
248
+ |cookie|
249
+ Options.instance.exclude_cookies.include?( cookie )
250
+ }
251
+ end
252
+
253
+ def http_request( url, opts )
254
+ return @auditor.http.cookie( url, opts )
255
+ end
256
+
257
+ def simple
258
+ return @simple
259
+ end
260
+
261
+ def type
262
+ Arachni::Module::Auditor::Element::COOKIE
263
+ end
264
+
265
+ end
266
+
267
+ class Header < Base
268
+
269
+
270
+ def initialize( url, raw = {} )
271
+ super( url, raw )
272
+
273
+ @action = @url
274
+ @method = 'header'
275
+
276
+ @auditable = @raw
277
+ end
278
+
279
+ def http_request( url, opts )
280
+ return @auditor.http.header( url, opts )
281
+ end
282
+
283
+ def simple
284
+ return @auditable.dup
285
+ end
286
+
287
+ def type
288
+ Arachni::Module::Auditor::Element::HEADER
289
+ end
290
+
291
+ end
292
+
293
+
294
+ end
295
+ end
296
+ end
@@ -0,0 +1,149 @@
1
+ =begin
2
+ Arachni
3
+ Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
4
+
5
+ This is free software; you can copy and distribute and modify
6
+ this program under the term of the GPL v2.0 License
7
+ (See LICENSE file for details)
8
+
9
+ =end
10
+
11
+ module Arachni
12
+
13
+ class Parser
14
+ #
15
+ # Arachni::Page class
16
+ #
17
+ # It holds page data like elements, cookies, headers, etc...
18
+ #
19
+ # @author: Tasos "Zapotek" Laskos
20
+ # <tasos.laskos@gmail.com>
21
+ # <zapotek@segfault.gr>
22
+ # @version: 0.2
23
+ #
24
+ class Page
25
+
26
+ #
27
+ # @return [String] url of the page
28
+ #
29
+ attr_accessor :url
30
+
31
+ #
32
+ # @return [Fixnum] the HTTP response code
33
+ #
34
+ attr_accessor :code
35
+
36
+ #
37
+ # @return [String] the request method that returned the page
38
+ #
39
+ attr_accessor :method
40
+
41
+ #
42
+ # @return [Hash] url variables
43
+ #
44
+ attr_accessor :query_vars
45
+
46
+ #
47
+ # @return [String] the HTML response
48
+ #
49
+ attr_accessor :html
50
+
51
+ #
52
+ # Request headers
53
+ #
54
+ # @return [Array<Arachni::Parser::Element::Header>]
55
+ #
56
+ attr_accessor :headers
57
+
58
+ #
59
+ # @return [Hash]
60
+ #
61
+ attr_accessor :response_headers
62
+
63
+ #
64
+ # @see Parser#links
65
+ #
66
+ # @return [Array<Arachni::Parser::Element::Link>]
67
+ #
68
+ attr_accessor :links
69
+
70
+ #
71
+ # @see Parser#forms
72
+ #
73
+ # @return [Array<Arachni::Parser::Element::Form>]
74
+ #
75
+ attr_accessor :forms
76
+
77
+ #
78
+ # @see Parser#cookies
79
+ #
80
+ # @return [Array<Arachni::Parser::Element::Cookie>]
81
+ #
82
+ attr_accessor :cookies
83
+
84
+ #
85
+ # Cookies extracted from the supplied cookiejar
86
+ #
87
+ # @return [Hash]
88
+ #
89
+ attr_accessor :cookiejar
90
+
91
+ def initialize( opts = {} )
92
+ opts.each {
93
+ |k, v|
94
+ send( "#{k}=", v )
95
+ }
96
+
97
+ end
98
+
99
+ def body
100
+ @html
101
+ end
102
+
103
+ #
104
+ # Returns an array of forms from {#forms} with its attributes and<br/>
105
+ # its auditable inputs as a name=>value hash
106
+ #
107
+ # @return [Array]
108
+ #
109
+ def forms_simple( )
110
+ forms = []
111
+ @forms.each {
112
+ |form|
113
+ forms << form.simple
114
+ }
115
+ return forms
116
+ end
117
+
118
+ #
119
+ # Returns links from {#links} as a name=>value hash with href as key
120
+ #
121
+ # @return [Hash]
122
+ #
123
+ def links_simple
124
+ links = []
125
+ @links.each {
126
+ |link|
127
+ links << link.simple
128
+ }
129
+ return links
130
+ end
131
+
132
+ #
133
+ # Returns cookies from {#cookies} as a name=>value hash
134
+ #
135
+ # @return [Hash] the cookie attributes, values, etc
136
+ #
137
+ def cookies_simple
138
+ cookies = { }
139
+
140
+ @cookies.each {
141
+ |cookie|
142
+ cookies.merge!( cookie.simple )
143
+ }
144
+ return cookies
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,717 @@
1
+ =begin
2
+ Arachni
3
+ Copyright (c) 2010-2011 Tasos "Zapotek" Laskos <tasos.laskos@gmail.com>
4
+
5
+ This is free software; you can copy and distribute and modify
6
+ this program under the term of the GPL v2.0 License
7
+ (See LICENSE file for details)
8
+ =end
9
+
10
+ module Arachni
11
+
12
+ opts = Arachni::Options.instance
13
+ require opts.dir['lib'] + 'parser/elements'
14
+ require opts.dir['lib'] + 'parser/page'
15
+ require opts.dir['lib'] + 'module/utilities'
16
+
17
+ #
18
+ # Analyzer class
19
+ #
20
+ # Analyzes HTML code extracting forms, links and cookies
21
+ # depending on user opts.<br/>
22
+ #
23
+ # It grabs <b>all</b> element attributes not just URLs and variables.<br/>
24
+ # All URLs are converted to absolute and URLs outside the domain are ignored.<br/>
25
+ #
26
+ # === Forms
27
+ # Form analysis uses both regular expressions and the Nokogiri parser<br/>
28
+ # in order to be able to handle badly written HTML code, such as not closed<br/>
29
+ # tags and tag overlaps.
30
+ #
31
+ # In order to ease audits, in addition to parsing forms into data structures<br/>
32
+ # like "select" and "option", all auditable inputs are put under the<br/>
33
+ # "auditable" key.
34
+ #
35
+ # === Links
36
+ # Links are extracted using the Nokogiri parser.
37
+ #
38
+ # === Cookies
39
+ # Cookies are extracted from the HTTP headers and parsed by WEBrick::Cookie
40
+ #
41
+ # @author: Tasos "Zapotek" Laskos
42
+ # <tasos.laskos@gmail.com>
43
+ # <zapotek@segfault.gr>
44
+ # @version: 0.2
45
+ #
46
+ class Parser
47
+
48
+ include Arachni::Module::Utilities
49
+
50
+ #
51
+ # @return [String] the url of the page
52
+ #
53
+ attr_accessor :url
54
+
55
+ #
56
+ # Options instance
57
+ #
58
+ # @return [Options]
59
+ #
60
+ attr_reader :opts
61
+
62
+ #
63
+ # Constructor <br/>
64
+ # Instantiates Analyzer class with user options.
65
+ #
66
+ # @param [Options] opts
67
+ #
68
+ def initialize( opts, res )
69
+ @opts = opts
70
+
71
+ @url = res.effective_url
72
+ @html = res.body
73
+ @response_headers = res.headers_hash
74
+ end
75
+
76
+ #
77
+ # Runs the Analyzer and extracts forms, links and cookies
78
+ #
79
+ # @return [Page]
80
+ #
81
+ def run
82
+
83
+ # non text files won't contain any auditable elements
84
+ type = Arachni::HTTP.content_type( @response_headers )
85
+ if type.is_a?( String) && !type.substring?( 'text' )
86
+ return Page.new( {
87
+ :url => @url,
88
+ :query_vars => link_vars( @url ),
89
+ :html => @html,
90
+ :headers => [],
91
+ :response_headers => @response_headers,
92
+ :forms => [],
93
+ :links => [],
94
+ :cookies => [],
95
+ :cookiejar => []
96
+ } )
97
+ end
98
+
99
+
100
+ cookies_arr = cookies
101
+ cookies_arr = merge_with_cookiejar( cookies_arr.flatten.uniq )
102
+
103
+ jar = {}
104
+ jar = @opts.cookies = Arachni::HTTP.parse_cookiejar( @opts.cookie_jar ) if @opts.cookie_jar
105
+
106
+ preped = {}
107
+ cookies_arr.each{ |cookie| preped.merge!( cookie.simple ) }
108
+
109
+ jar = preped.merge( jar )
110
+
111
+ return Page.new( {
112
+ :url => @url,
113
+ :query_vars => link_vars( @url ),
114
+ :html => @html,
115
+ :headers => headers(),
116
+ :response_headers => @response_headers,
117
+ :forms => @opts.audit_forms ? forms() : [],
118
+ :links => @opts.audit_links ? links() : [],
119
+ :cookies => merge_with_cookiestore( merge_with_cookiejar( cookies_arr ) ),
120
+ :cookiejar => jar
121
+ } )
122
+
123
+ end
124
+
125
+ def doc
126
+ return @doc if @doc
127
+ @doc = Nokogiri::HTML( @html ) if @html rescue nil
128
+ end
129
+
130
+ def merge_with_cookiestore( cookies )
131
+
132
+ @cookiestore ||= []
133
+
134
+ if @cookiestore.empty?
135
+ @cookiestore = cookies
136
+ else
137
+ tmp = {}
138
+ @cookiestore.each {
139
+ |cookie|
140
+ tmp.merge!( cookie.simple )
141
+ }
142
+
143
+ cookies.each {
144
+ |cookie|
145
+ tmp.merge!( cookie.simple )
146
+ }
147
+
148
+ @cookiestore = tmp.map {
149
+ |name, value|
150
+ Element::Cookie.new( @url, {
151
+ 'name' => name,
152
+ 'value' => value
153
+ } )
154
+ }
155
+ end
156
+
157
+ return @cookiestore
158
+
159
+ end
160
+
161
+ #
162
+ # Merges 'cookies' with the cookiejar and returns it as an array
163
+ #
164
+ # @param [Array<Hash>] cookies
165
+ #
166
+ # @return [Array<Element::Cookie>] the merged cookies
167
+ #
168
+ def merge_with_cookiejar( cookies )
169
+ return cookies if !@opts.cookies
170
+
171
+ @opts.cookies.each_pair {
172
+ |name, value|
173
+ cookies << Element::Cookie.new( @url,
174
+ {
175
+ 'name' => name,
176
+ 'value' => value
177
+ } )
178
+ }
179
+
180
+ return cookies
181
+ end
182
+
183
+
184
+ #
185
+ # Returns a list of valid auditable HTTP header fields.
186
+ #
187
+ # It's more of a placeholder method, it doesn't actually analyze anything.<br/>
188
+ # It's a long shot that any of these will be vulnerable but better
189
+ # be safe than sorry.
190
+ #
191
+ # @return [Hash] HTTP header fields
192
+ #
193
+ def headers( )
194
+ headers_arr = []
195
+ {
196
+ 'accept' => 'text/html,application/xhtml+xml,application' +
197
+ '/xml;q=0.9,*/*;q=0.8',
198
+ 'accept-charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
199
+ 'accept-language' => 'en-gb,en;q=0.5',
200
+ 'accept-encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
201
+ 'from' => @opts.authed_by || '',
202
+ 'user-agent' => @opts.user_agent || '',
203
+ 'referer' => @url,
204
+ 'pragma' => 'no-cache'
205
+ }.each {
206
+ |k,v|
207
+ headers_arr << Element::Header.new( @url, { k => v } )
208
+ }
209
+
210
+ return headers_arr
211
+ end
212
+
213
+ # TODO: Add support for radio buttons.
214
+ #
215
+ # Extracts forms from HTML document
216
+ #
217
+ # @see #form_attrs
218
+ # @see #form_textareas
219
+ # @see #form_selects
220
+ # @see #form_inputs
221
+ # @see #merge_select_with_input
222
+ #
223
+ # @param [String] html
224
+ #
225
+ # @return [Array<Element::Form>] array of forms
226
+ #
227
+ def forms( html = nil )
228
+
229
+ elements = []
230
+
231
+ begin
232
+ html = html || @html.clone
233
+ #
234
+ # This imitates Firefox's behavior when it comes to
235
+ # broken/unclosed form tags
236
+ #
237
+
238
+ # get properly closed forms
239
+ forms = html.scan( /<form(.*?)<\/form>/ixm ).flatten
240
+
241
+ # now remove them from html...
242
+ forms.each { |form| html.gsub!( form, '' ) }
243
+
244
+ # and get unclosed forms.
245
+ forms |= html.scan( /<form (.*)(?!<\/form>)/ixm ).flatten
246
+
247
+ rescue Exception => e
248
+ return elements
249
+ end
250
+
251
+ i = 0
252
+ forms.each {
253
+ |form|
254
+
255
+ elements[i] = Hash.new
256
+ elements[i]['attrs'] = form_attrs( form )
257
+
258
+ if( !elements[i]['attrs'] || !elements[i]['attrs']['action'] )
259
+ action = @url.to_s
260
+ else
261
+ action = elements[i]['attrs']['action']
262
+ end
263
+ action = URI.escape( action ).to_s
264
+
265
+ elements[i]['attrs']['action'] = to_absolute( action.clone ).to_s
266
+
267
+ if( !elements[i]['attrs']['method'] )
268
+ elements[i]['attrs']['method'] = 'post'
269
+ else
270
+ elements[i]['attrs']['method'] =
271
+ elements[i]['attrs']['method'].downcase
272
+ end
273
+
274
+ url = URI.parse( URI.escape( elements[i]['attrs']['action'] ) )
275
+ if !in_domain?( url )
276
+ next
277
+ end
278
+
279
+ elements[i]['textarea'] = form_textareas( form )
280
+ elements[i]['select'] = form_selects( form )
281
+ elements[i]['input'] = form_inputs( form )
282
+
283
+ # merge the form elements to make auditing easier
284
+ elements[i]['auditable'] =
285
+ elements[i]['input'] | elements[i]['textarea']
286
+
287
+ elements[i]['auditable'] =
288
+ merge_select_with_input( elements[i]['auditable'],
289
+ elements[i]['select'] )
290
+
291
+ elements[i] = Element::Form.new( @url, elements[i] )
292
+
293
+
294
+ i += 1
295
+ }
296
+
297
+ elements.reject {
298
+ |form|
299
+ !form.is_a?( Element::Form ) || form.auditable.empty?
300
+ }
301
+ end
302
+
303
+ #
304
+ # Extracts links from HTML document
305
+ #
306
+ # @see #link_vars
307
+ #
308
+ # @param [String] html
309
+ #
310
+ # @return [Array<Element::Link>] of links
311
+ #
312
+ def links
313
+
314
+ link_arr = []
315
+ elements_by_name( 'a' ).each_with_index {
316
+ |link|
317
+
318
+ link['href'] = to_absolute( link['href'] )
319
+
320
+ if !link['href'] then next end
321
+ if( exclude?( link['href'] ) ) then next end
322
+ if( !include?( link['href'] ) ) then next end
323
+ if !in_domain?( URI.parse( link['href'] ) ) then next end
324
+
325
+ link['vars'] = link_vars( link['href'] )
326
+
327
+
328
+ link_arr << Element::Link.new( @url, link )
329
+
330
+ }
331
+
332
+ return link_arr
333
+ end
334
+
335
+ #
336
+ # Extracts cookies from an HTTP headers
337
+ #
338
+ # @param [String] headers HTTP headers
339
+ # @param [String] html the HTML code of the page
340
+ #
341
+ # @return [Array<Element::Cookie>] of cookies
342
+ #
343
+ def cookies
344
+
345
+ cookies_arr = []
346
+ cookies = []
347
+
348
+ begin
349
+ doc.search( "//meta[@http-equiv]" ).each {
350
+ |elem|
351
+
352
+ next if elem['http-equiv'].downcase != 'set-cookie'
353
+ k, v = elem['content'].split( ';' )[0].split( '=', 2 )
354
+ cookies_arr << Element::Cookie.new( @url, { 'name' => k, 'value' => v } )
355
+ }
356
+ rescue
357
+ end
358
+
359
+ # don't ask me why....
360
+ if @response_headers.to_s.substring?( 'set-cookie' )
361
+ begin
362
+ cookies << WEBrick::Cookie.parse_set_cookies( @response_headers['Set-Cookie'].to_s )
363
+ cookies << WEBrick::Cookie.parse_set_cookies( @response_headers['set-cookie'].to_s )
364
+ rescue
365
+ return cookies_arr
366
+ end
367
+ end
368
+
369
+ cookies.flatten.uniq.each_with_index {
370
+ |cookie, i|
371
+ cookies_arr[i] = Hash.new
372
+
373
+ cookie.instance_variables.each {
374
+ |var|
375
+ value = cookie.instance_variable_get( var ).to_s
376
+ value.strip!
377
+
378
+ key = normalize_name( var )
379
+ val = value.gsub( /[\"\\\[\]]/, '' )
380
+
381
+ next if val == seed
382
+ cookies_arr[i][key] = val
383
+ }
384
+
385
+ # cookies.reject!{ |cookie| cookie['name'] == cookies_arr[i]['name'] }
386
+
387
+ cookies_arr[i] = Element::Cookie.new( @url, cookies_arr[i] )
388
+ }
389
+ cookies_arr.flatten!
390
+ return cookies_arr
391
+ end
392
+
393
+ #
394
+ # Extracts variables and their values from a link
395
+ #
396
+ # @see #links
397
+ #
398
+ # @param [String] link
399
+ #
400
+ # @return [Hash] name=>value pairs
401
+ #
402
+ def link_vars( link )
403
+ if !link then return {} end
404
+
405
+ var_string = link.split( /\?/ )[1]
406
+ if !var_string then return {} end
407
+
408
+ var_hash = Hash.new
409
+ var_string.split( /&/ ).each {
410
+ |pair|
411
+ name, value = pair.split( /=/ )
412
+
413
+ next if value == seed
414
+ var_hash[name] = value
415
+ }
416
+
417
+ var_hash
418
+
419
+ end
420
+
421
+ #
422
+ # Converts relative URL *link* into an absolute URL based on the
423
+ # location of the page
424
+ #
425
+ # @param [String] link
426
+ #
427
+ # @return [String]
428
+ #
429
+ def to_absolute( link )
430
+
431
+ begin
432
+ if URI.parse( link ).host
433
+ return link
434
+ end
435
+ rescue Exception => e
436
+ return nil if link.nil?
437
+ # return link
438
+ end
439
+
440
+ # remove anchor
441
+ link = URI.encode( link.to_s.gsub( /#[a-zA-Z0-9_-]*$/, '' ) )
442
+
443
+ begin
444
+ relative = URI(link)
445
+ url = URI.parse( @url )
446
+
447
+ absolute = url.merge(relative)
448
+
449
+ absolute.path = '/' if absolute.path.empty?
450
+ rescue Exception => e
451
+ return
452
+ end
453
+
454
+ return absolute.to_s
455
+ end
456
+
457
+ #
458
+ # Returns +true+ if *uri* is in the same domain as the page, returns
459
+ # +false+ otherwise
460
+ #
461
+ def in_domain?( uri )
462
+ curi = URI.parse( normalize_url( uri.to_s ) )
463
+
464
+ if( @opts.follow_subdomains )
465
+ return extract_domain( curi ) == extract_domain( URI( @url.to_s ) )
466
+ end
467
+
468
+ return curi.host == URI.parse( normalize_url( @url.to_s ) ).host
469
+ end
470
+
471
+ #
472
+ # Extracts the domain from a URI object
473
+ #
474
+ # @param [URI] url
475
+ #
476
+ # @return [String]
477
+ #
478
+ def extract_domain( url )
479
+
480
+ if !url.host then return false end
481
+
482
+ splits = url.host.split( /\./ )
483
+
484
+ if splits.length == 1 then return true end
485
+
486
+ splits[-2] + "." + splits[-1]
487
+ end
488
+
489
+ def exclude?( url )
490
+ @opts.exclude.each {
491
+ |pattern|
492
+ return true if url.to_s =~ pattern
493
+ }
494
+
495
+ return false
496
+ end
497
+
498
+ def include?( url )
499
+ return true if @opts.include.empty?
500
+
501
+ @opts.include.each {
502
+ |pattern|
503
+ return true if url.to_s =~ pattern
504
+ }
505
+ return false
506
+ end
507
+
508
+
509
+ private
510
+
511
+ #
512
+ # Merges an array of form inputs with an array of form selects
513
+ #
514
+ # @see #forms
515
+ #
516
+ # @param [Array] form inputs
517
+ # @param [Array] form selects
518
+ #
519
+ # @return [Array] merged array
520
+ #
521
+ def merge_select_with_input( inputs, selects )
522
+
523
+ new_arr = []
524
+ inputs.each {
525
+ |input|
526
+ new_arr << input
527
+ }
528
+
529
+ i = new_arr.size
530
+ selects.each {
531
+ |select|
532
+ select['attrs']['value'] = select['options'][0]['value']
533
+ new_arr << select['attrs']
534
+ }
535
+
536
+ new_arr
537
+ end
538
+
539
+
540
+ #
541
+ # Parses the attributes inside the <form ....> tag
542
+ #
543
+ # @see #forms
544
+ # @see #attrs_from_tag
545
+ #
546
+ # @param [String] form HTML code for the form tag
547
+ #
548
+ # @return [Array<Hash<String, String>>]
549
+ #
550
+ def form_attrs( form )
551
+ form_attr_html = form.scan( /(.*?)>/ixm )
552
+ attrs_from_tag( 'form', '<form ' + form_attr_html[0][0] + '>' )[0]
553
+ end
554
+
555
+
556
+ #
557
+ # Extracts HTML select elements, their attributes and their options
558
+ #
559
+ # @see #forms
560
+ # @see #form_selects_options
561
+ #
562
+ # @param [String] HTML
563
+ #
564
+ # @return [Array] array of select elements
565
+ #
566
+ def form_selects( html )
567
+ selects = html.scan( /<select(.*?)>/ixm )
568
+
569
+ elements = []
570
+ selects.each_with_index {
571
+ |select, i|
572
+ elements[i] = Hash.new
573
+ elements[i]['options'] = form_selects_options( html )
574
+
575
+ elements[i]['attrs'] =
576
+ attrs_from_tag( 'select',
577
+ '<select ' + select[0] + '/>' )[0]
578
+
579
+ }
580
+
581
+ elements
582
+ end
583
+
584
+ #
585
+ # Extracts HTML option elements and their attributes
586
+ # from select elements
587
+ #
588
+ # @see #forms
589
+ # @see #form_selects
590
+ #
591
+ # @param [String] HTML selects
592
+ #
593
+ # @return [Array] array of option elements
594
+ #
595
+ def form_selects_options( html )
596
+ options = html.scan( /<option(.*?)>/ixm )
597
+
598
+ elements = []
599
+ options.each_with_index {
600
+ |option, i|
601
+ elements[i] =
602
+ attrs_from_tag( 'option',
603
+ '<option ' + option[0] + '/>' )[0]
604
+
605
+ }
606
+
607
+ elements
608
+ end
609
+
610
+ #
611
+ # Extracts HTML textarea elements and their attributes
612
+ # from forms
613
+ #
614
+ # @see #forms
615
+ #
616
+ # @param [String] HTML
617
+ #
618
+ # @return [Array] array of textarea elements
619
+ #
620
+ def form_textareas( html )
621
+ inputs = html.scan( /<textarea(.*?)>/ixm )
622
+
623
+ elements = []
624
+ inputs.each_with_index {
625
+ |input, i|
626
+ elements[i] =
627
+ attrs_from_tag( 'textarea',
628
+ '<textarea ' + input[0] + '/>' )[0]
629
+ }
630
+ elements
631
+ end
632
+
633
+ #
634
+ # Parses the attributes of input fields
635
+ #
636
+ # @see #forms
637
+ #
638
+ # @param [String] html HTML code for the form tag
639
+ #
640
+ # @return [Hash<Hash<String, String>>]
641
+ #
642
+ def form_inputs( html )
643
+ inputs = html.scan( /<input(.*?)>/ixm )
644
+
645
+ elements = []
646
+ inputs.each_with_index {
647
+ |input, i|
648
+ elements[i] =
649
+ attrs_from_tag( 'input',
650
+ '<input ' + input[0] + '/>' )[0]
651
+ }
652
+
653
+ elements
654
+ end
655
+
656
+ #
657
+ # Gets attributes from HTML code of a tag
658
+ #
659
+ # @param [String] tag tag name (a, form, input)
660
+ # @param [String] html HTML code for the form tag
661
+ #
662
+ # @return [Array<Hash<String, String>>]
663
+ #
664
+ def attrs_from_tag( tag, html )
665
+
666
+ elements = []
667
+ Nokogiri::HTML( html ).search( tag ).each_with_index {
668
+ |element, i|
669
+
670
+ elements[i] = Hash.new
671
+
672
+ element.each {
673
+ |attribute|
674
+ next if attribute[1] == seed
675
+ elements[i][attribute[0].downcase] = attribute[1]
676
+ }
677
+
678
+ }
679
+ elements
680
+ end
681
+
682
+ # Extracts elements by name from HTML document
683
+ #
684
+ # @param [String] name 'form', 'a', 'div', etc.
685
+ # @param [String] html
686
+ #
687
+ # @return [Array<Hash <String, String> >] of elements
688
+ #
689
+ def elements_by_name( name )
690
+
691
+ elements = []
692
+ doc.search( name ).each_with_index do |input, i|
693
+
694
+ elements[i] = Hash.new
695
+ input.each {
696
+ |attribute|
697
+ elements[i][attribute[0]] = attribute[1]
698
+ }
699
+
700
+ input.children.each {
701
+ |child|
702
+ child.each{
703
+ |attribute|
704
+ elements[i][attribute[0]] = attribute[1]
705
+ }
706
+ }
707
+
708
+ end rescue []
709
+
710
+ return elements
711
+ end
712
+
713
+ def normalize_name( name )
714
+ name.to_s.gsub( /@/, '' )
715
+ end
716
+ end
717
+ end