arachni 0.2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (262) hide show
  1. data/ACKNOWLEDGMENTS.md +14 -0
  2. data/AUTHORS.md +6 -0
  3. data/CHANGELOG.md +162 -0
  4. data/CONTRIBUTORS.md +10 -0
  5. data/EXPLOITATION.md +429 -0
  6. data/HACKING.md +101 -0
  7. data/LICENSE.md +341 -0
  8. data/README.md +350 -0
  9. data/Rakefile +86 -0
  10. data/bin/arachni +22 -0
  11. data/bin/arachni_web +77 -0
  12. data/bin/arachni_xmlrpc +21 -0
  13. data/bin/arachni_xmlrpcd +82 -0
  14. data/bin/arachni_xmlrpcd_monitor +74 -0
  15. data/conf/README.webui.yaml.txt +44 -0
  16. data/conf/webui.yaml +11 -0
  17. data/external/metasploit/LICENSE +24 -0
  18. data/external/metasploit/modules/exploits/unix/webapp/arachni_exec.rb +142 -0
  19. data/external/metasploit/modules/exploits/unix/webapp/arachni_path_traversal.rb +113 -0
  20. data/external/metasploit/modules/exploits/unix/webapp/arachni_php_eval.rb +150 -0
  21. data/external/metasploit/modules/exploits/unix/webapp/arachni_php_include.rb +141 -0
  22. data/external/metasploit/modules/exploits/unix/webapp/arachni_sqlmap.rb +92 -0
  23. data/external/metasploit/plugins/arachni.rb +536 -0
  24. data/getoptslong.rb +241 -0
  25. data/lib/anemone.rb +2 -0
  26. data/lib/anemone/cookie_store.rb +35 -0
  27. data/lib/anemone/core.rb +371 -0
  28. data/lib/anemone/exceptions.rb +5 -0
  29. data/lib/anemone/http.rb +144 -0
  30. data/lib/anemone/page.rb +337 -0
  31. data/lib/anemone/page_store.rb +160 -0
  32. data/lib/anemone/storage.rb +34 -0
  33. data/lib/anemone/storage/base.rb +75 -0
  34. data/lib/anemone/storage/exceptions.rb +15 -0
  35. data/lib/anemone/storage/mongodb.rb +89 -0
  36. data/lib/anemone/storage/pstore.rb +50 -0
  37. data/lib/anemone/storage/redis.rb +90 -0
  38. data/lib/anemone/storage/tokyo_cabinet.rb +57 -0
  39. data/lib/anemone/tentacle.rb +40 -0
  40. data/lib/arachni.rb +16 -0
  41. data/lib/audit_store.rb +346 -0
  42. data/lib/component_manager.rb +293 -0
  43. data/lib/component_options.rb +395 -0
  44. data/lib/exceptions.rb +76 -0
  45. data/lib/framework.rb +637 -0
  46. data/lib/http.rb +809 -0
  47. data/lib/issue.rb +302 -0
  48. data/lib/module.rb +4 -0
  49. data/lib/module/auditor.rb +455 -0
  50. data/lib/module/base.rb +188 -0
  51. data/lib/module/element_db.rb +158 -0
  52. data/lib/module/key_filler.rb +87 -0
  53. data/lib/module/manager.rb +87 -0
  54. data/lib/module/output.rb +68 -0
  55. data/lib/module/trainer.rb +240 -0
  56. data/lib/module/utilities.rb +110 -0
  57. data/lib/options.rb +547 -0
  58. data/lib/parser.rb +2 -0
  59. data/lib/parser/auditable.rb +522 -0
  60. data/lib/parser/elements.rb +296 -0
  61. data/lib/parser/page.rb +149 -0
  62. data/lib/parser/parser.rb +717 -0
  63. data/lib/plugin.rb +4 -0
  64. data/lib/plugin/base.rb +110 -0
  65. data/lib/plugin/manager.rb +162 -0
  66. data/lib/report.rb +4 -0
  67. data/lib/report/base.rb +119 -0
  68. data/lib/report/manager.rb +92 -0
  69. data/lib/rpc/xml/client/base.rb +71 -0
  70. data/lib/rpc/xml/client/dispatcher.rb +49 -0
  71. data/lib/rpc/xml/client/instance.rb +88 -0
  72. data/lib/rpc/xml/server/base.rb +90 -0
  73. data/lib/rpc/xml/server/dispatcher.rb +357 -0
  74. data/lib/rpc/xml/server/framework.rb +206 -0
  75. data/lib/rpc/xml/server/instance.rb +191 -0
  76. data/lib/rpc/xml/server/module/manager.rb +46 -0
  77. data/lib/rpc/xml/server/options.rb +124 -0
  78. data/lib/rpc/xml/server/output.rb +299 -0
  79. data/lib/rpc/xml/server/plugin/manager.rb +58 -0
  80. data/lib/ruby.rb +5 -0
  81. data/lib/ruby/object.rb +32 -0
  82. data/lib/ruby/string.rb +74 -0
  83. data/lib/ruby/xmlrpc/server.rb +27 -0
  84. data/lib/spider.rb +200 -0
  85. data/lib/typhoeus/request.rb +91 -0
  86. data/lib/typhoeus/response.rb +34 -0
  87. data/lib/ui/cli/cli.rb +744 -0
  88. data/lib/ui/cli/output.rb +279 -0
  89. data/lib/ui/web/log.rb +82 -0
  90. data/lib/ui/web/output_stream.rb +94 -0
  91. data/lib/ui/web/report_manager.rb +222 -0
  92. data/lib/ui/web/server.rb +903 -0
  93. data/lib/ui/web/server/db/placeholder +0 -0
  94. data/lib/ui/web/server/public/banner.png +0 -0
  95. data/lib/ui/web/server/public/bodybg-small.png +0 -0
  96. data/lib/ui/web/server/public/bodybg.png +0 -0
  97. data/lib/ui/web/server/public/css/smoothness/images/pbar-ani.gif +0 -0
  98. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
  99. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
  100. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
  101. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
  102. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png +0 -0
  103. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
  104. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
  105. data/lib/ui/web/server/public/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
  106. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_222222_256x240.png +0 -0
  107. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_2e83ff_256x240.png +0 -0
  108. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_454545_256x240.png +0 -0
  109. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_888888_256x240.png +0 -0
  110. data/lib/ui/web/server/public/css/smoothness/images/ui-icons_cd0a0a_256x240.png +0 -0
  111. data/lib/ui/web/server/public/css/smoothness/jquery-ui-1.8.9.custom.css +573 -0
  112. data/lib/ui/web/server/public/favicon.ico +0 -0
  113. data/lib/ui/web/server/public/footer.jpg +0 -0
  114. data/lib/ui/web/server/public/icons/error.png +0 -0
  115. data/lib/ui/web/server/public/icons/info.png +0 -0
  116. data/lib/ui/web/server/public/icons/ok.png +0 -0
  117. data/lib/ui/web/server/public/icons/status.png +0 -0
  118. data/lib/ui/web/server/public/js/jquery-1.4.4.min.js +167 -0
  119. data/lib/ui/web/server/public/js/jquery-ui-1.8.9.custom.min.js +781 -0
  120. data/lib/ui/web/server/public/logo.png +0 -0
  121. data/lib/ui/web/server/public/nav-left.jpg +0 -0
  122. data/lib/ui/web/server/public/nav-right.jpg +0 -0
  123. data/lib/ui/web/server/public/nav-selected-left.jpg +0 -0
  124. data/lib/ui/web/server/public/nav-selected-right.jpg +0 -0
  125. data/lib/ui/web/server/public/reports/placeholder +1 -0
  126. data/lib/ui/web/server/public/sidebar-bottom.jpg +0 -0
  127. data/lib/ui/web/server/public/sidebar-h4.jpg +0 -0
  128. data/lib/ui/web/server/public/sidebar-top.jpg +0 -0
  129. data/lib/ui/web/server/public/spider.png +0 -0
  130. data/lib/ui/web/server/public/style.css +604 -0
  131. data/lib/ui/web/server/tmp/placeholder +0 -0
  132. data/lib/ui/web/server/views/dispatcher.erb +85 -0
  133. data/lib/ui/web/server/views/dispatcher_error.erb +14 -0
  134. data/lib/ui/web/server/views/error.erb +1 -0
  135. data/lib/ui/web/server/views/flash.erb +18 -0
  136. data/lib/ui/web/server/views/home.erb +14 -0
  137. data/lib/ui/web/server/views/instance.erb +213 -0
  138. data/lib/ui/web/server/views/layout.erb +95 -0
  139. data/lib/ui/web/server/views/log.erb +40 -0
  140. data/lib/ui/web/server/views/modules.erb +71 -0
  141. data/lib/ui/web/server/views/options.erb +23 -0
  142. data/lib/ui/web/server/views/output_results.erb +51 -0
  143. data/lib/ui/web/server/views/plugins.erb +42 -0
  144. data/lib/ui/web/server/views/report_formats.erb +30 -0
  145. data/lib/ui/web/server/views/reports.erb +55 -0
  146. data/lib/ui/web/server/views/settings.erb +120 -0
  147. data/lib/ui/web/server/views/welcome.erb +38 -0
  148. data/lib/ui/xmlrpc/dispatcher_monitor.rb +204 -0
  149. data/lib/ui/xmlrpc/xmlrpc.rb +843 -0
  150. data/logs/placeholder +0 -0
  151. data/metamodules/autothrottle.rb +74 -0
  152. data/metamodules/timeout_notice.rb +118 -0
  153. data/metamodules/uniformity.rb +98 -0
  154. data/modules/audit/code_injection.rb +136 -0
  155. data/modules/audit/code_injection_timing.rb +115 -0
  156. data/modules/audit/code_injection_timing/payloads.txt +4 -0
  157. data/modules/audit/csrf.rb +301 -0
  158. data/modules/audit/ldapi.rb +103 -0
  159. data/modules/audit/ldapi/errors.txt +26 -0
  160. data/modules/audit/os_cmd_injection.rb +103 -0
  161. data/modules/audit/os_cmd_injection/payloads.txt +2 -0
  162. data/modules/audit/os_cmd_injection_timing.rb +104 -0
  163. data/modules/audit/os_cmd_injection_timing/payloads.txt +3 -0
  164. data/modules/audit/path_traversal.rb +141 -0
  165. data/modules/audit/response_splitting.rb +105 -0
  166. data/modules/audit/rfi.rb +193 -0
  167. data/modules/audit/sqli.rb +120 -0
  168. data/modules/audit/sqli/regexp_ids.txt +90 -0
  169. data/modules/audit/sqli_blind_rdiff.rb +321 -0
  170. data/modules/audit/sqli_blind_timing.rb +103 -0
  171. data/modules/audit/sqli_blind_timing/payloads.txt +51 -0
  172. data/modules/audit/trainer.rb +89 -0
  173. data/modules/audit/unvalidated_redirect.rb +90 -0
  174. data/modules/audit/xpath.rb +104 -0
  175. data/modules/audit/xpath/errors.txt +26 -0
  176. data/modules/audit/xss.rb +99 -0
  177. data/modules/audit/xss_event.rb +134 -0
  178. data/modules/audit/xss_path.rb +125 -0
  179. data/modules/audit/xss_script_tag.rb +112 -0
  180. data/modules/audit/xss_tag.rb +112 -0
  181. data/modules/audit/xss_uri.rb +125 -0
  182. data/modules/recon/allowed_methods.rb +104 -0
  183. data/modules/recon/backdoors.rb +131 -0
  184. data/modules/recon/backdoors/filenames.txt +16 -0
  185. data/modules/recon/backup_files.rb +177 -0
  186. data/modules/recon/backup_files/extensions.txt +28 -0
  187. data/modules/recon/common_directories.rb +138 -0
  188. data/modules/recon/common_directories/directories.txt +265 -0
  189. data/modules/recon/common_files.rb +138 -0
  190. data/modules/recon/common_files/filenames.txt +17 -0
  191. data/modules/recon/directory_listing.rb +171 -0
  192. data/modules/recon/grep/captcha.rb +62 -0
  193. data/modules/recon/grep/credit_card.rb +85 -0
  194. data/modules/recon/grep/cvs_svn_users.rb +73 -0
  195. data/modules/recon/grep/emails.rb +59 -0
  196. data/modules/recon/grep/html_objects.rb +53 -0
  197. data/modules/recon/grep/private_ip.rb +54 -0
  198. data/modules/recon/grep/ssn.rb +53 -0
  199. data/modules/recon/htaccess_limit.rb +82 -0
  200. data/modules/recon/http_put.rb +95 -0
  201. data/modules/recon/interesting_responses.rb +118 -0
  202. data/modules/recon/unencrypted_password_forms.rb +119 -0
  203. data/modules/recon/webdav.rb +126 -0
  204. data/modules/recon/xst.rb +107 -0
  205. data/path_extractors/anchors.rb +35 -0
  206. data/path_extractors/forms.rb +35 -0
  207. data/path_extractors/frames.rb +38 -0
  208. data/path_extractors/generic.rb +39 -0
  209. data/path_extractors/links.rb +35 -0
  210. data/path_extractors/meta_refresh.rb +39 -0
  211. data/path_extractors/scripts.rb +37 -0
  212. data/path_extractors/sitemap.rb +31 -0
  213. data/plugins/autologin.rb +137 -0
  214. data/plugins/content_types.rb +90 -0
  215. data/plugins/cookie_collector.rb +99 -0
  216. data/plugins/form_dicattack.rb +185 -0
  217. data/plugins/healthmap.rb +94 -0
  218. data/plugins/http_dicattack.rb +133 -0
  219. data/plugins/metamodules.rb +118 -0
  220. data/plugins/proxy.rb +248 -0
  221. data/plugins/proxy/server.rb +66 -0
  222. data/plugins/waf_detector.rb +184 -0
  223. data/profiles/comprehensive.afp +74 -0
  224. data/profiles/full.afp +75 -0
  225. data/reports/afr.rb +59 -0
  226. data/reports/ap.rb +55 -0
  227. data/reports/html.rb +179 -0
  228. data/reports/html/default.erb +967 -0
  229. data/reports/metareport.rb +139 -0
  230. data/reports/metareport/arachni_metareport.rb +174 -0
  231. data/reports/plugin_formatters/html/content_types.rb +82 -0
  232. data/reports/plugin_formatters/html/cookie_collector.rb +66 -0
  233. data/reports/plugin_formatters/html/form_dicattack.rb +54 -0
  234. data/reports/plugin_formatters/html/healthmap.rb +76 -0
  235. data/reports/plugin_formatters/html/http_dicattack.rb +54 -0
  236. data/reports/plugin_formatters/html/metaformatters/timeout_notice.rb +65 -0
  237. data/reports/plugin_formatters/html/metaformatters/uniformity.rb +71 -0
  238. data/reports/plugin_formatters/html/metamodules.rb +93 -0
  239. data/reports/plugin_formatters/html/waf_detector.rb +54 -0
  240. data/reports/plugin_formatters/stdout/content_types.rb +73 -0
  241. data/reports/plugin_formatters/stdout/cookie_collector.rb +61 -0
  242. data/reports/plugin_formatters/stdout/form_dicattack.rb +52 -0
  243. data/reports/plugin_formatters/stdout/healthmap.rb +72 -0
  244. data/reports/plugin_formatters/stdout/http_dicattack.rb +53 -0
  245. data/reports/plugin_formatters/stdout/metaformatters/timeout_notice.rb +55 -0
  246. data/reports/plugin_formatters/stdout/metaformatters/uniformity.rb +68 -0
  247. data/reports/plugin_formatters/stdout/metamodules.rb +89 -0
  248. data/reports/plugin_formatters/stdout/waf_detector.rb +48 -0
  249. data/reports/plugin_formatters/xml/content_types.rb +91 -0
  250. data/reports/plugin_formatters/xml/cookie_collector.rb +70 -0
  251. data/reports/plugin_formatters/xml/form_dicattack.rb +57 -0
  252. data/reports/plugin_formatters/xml/healthmap.rb +82 -0
  253. data/reports/plugin_formatters/xml/http_dicattack.rb +57 -0
  254. data/reports/plugin_formatters/xml/metaformatters/timeout_notice.rb +67 -0
  255. data/reports/plugin_formatters/xml/metaformatters/uniformity.rb +82 -0
  256. data/reports/plugin_formatters/xml/metamodules.rb +91 -0
  257. data/reports/plugin_formatters/xml/waf_detector.rb +58 -0
  258. data/reports/stdout.rb +182 -0
  259. data/reports/txt.rb +77 -0
  260. data/reports/xml.rb +231 -0
  261. data/reports/xml/buffer.rb +98 -0
  262. metadata +516 -0
@@ -0,0 +1,160 @@
1
+ require 'forwardable'
2
+
3
+ module Anemone
4
+ class PageStore
5
+ extend Forwardable
6
+
7
+ def_delegators :@storage, :keys, :values, :size, :each
8
+
9
+ def initialize(storage = {})
10
+ @storage = storage
11
+ end
12
+
13
+ # We typically index the hash with a URI,
14
+ # but convert it to a String for easier retrieval
15
+ def [](index)
16
+ @storage[index.to_s]
17
+ end
18
+
19
+ def []=(index, other)
20
+ @storage[index.to_s] = other
21
+ end
22
+
23
+ def delete(key)
24
+ @storage.delete key.to_s
25
+ end
26
+
27
+ def has_key?(key)
28
+ @storage.has_key? key.to_s
29
+ end
30
+
31
+ def each_value
32
+ each { |key, value| yield value }
33
+ end
34
+
35
+ def values
36
+ result = []
37
+ each { |key, value| result << value }
38
+ result
39
+ end
40
+
41
+ def touch_key(key)
42
+ self[key] = Page.new(key)
43
+ end
44
+
45
+ def touch_keys(keys)
46
+ @storage.merge! keys.inject({}) { |h, k| h[k.to_s] = Page.new(k); h }
47
+ end
48
+
49
+ # Does this PageStore contain the specified URL?
50
+ # HTTP and HTTPS versions of a URL are considered to be the same page.
51
+ def has_page?(url)
52
+ schemes = %w(http https)
53
+ if schemes.include? url.scheme
54
+ u = url.dup
55
+ return schemes.any? { |s| u.scheme = s; has_key?(u) }
56
+ end
57
+
58
+ has_key? url
59
+ end
60
+
61
+ #
62
+ # Use a breadth-first search to calculate the single-source
63
+ # shortest paths from *root* to all pages in the PageStore
64
+ #
65
+ def shortest_paths!(root)
66
+ root = URI(root) if root.is_a?(String)
67
+ raise "Root node not found" if !has_key?(root)
68
+
69
+ q = Queue.new
70
+
71
+ q.enq root
72
+ root_page = self[root]
73
+ root_page.depth = 0
74
+ root_page.visited = true
75
+ self[root] = root_page
76
+ while !q.empty?
77
+ page = self[q.deq]
78
+ page.links.each do |u|
79
+ begin
80
+ link = self[u]
81
+ next if link.nil? || !link.fetched? || link.visited
82
+
83
+ q << u unless link.redirect?
84
+ link.visited = true
85
+ link.depth = page.depth + 1
86
+ self[u] = link
87
+
88
+ if link.redirect?
89
+ u = link.redirect_to
90
+ redo
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ self
97
+ end
98
+
99
+ #
100
+ # Removes all Pages from storage where redirect? is true
101
+ #
102
+ def uniq!
103
+ each_value { |page| delete page.url if page.redirect? }
104
+ self
105
+ end
106
+
107
+ #
108
+ # If given a single URL (as a String or URI), returns an Array of Pages which link to that URL
109
+ # If given an Array of URLs, returns a Hash (URI => [Page, Page...]) of Pages linking to those URLs
110
+ #
111
+ def pages_linking_to(urls)
112
+ unless urls.is_a?(Array)
113
+ urls = [urls]
114
+ single = true
115
+ end
116
+
117
+ urls.map! do |url|
118
+ unless url.is_a?(URI)
119
+ URI(url) rescue nil
120
+ else
121
+ url
122
+ end
123
+ end
124
+ urls.compact
125
+
126
+ links = {}
127
+ urls.each { |url| links[url] = [] }
128
+ values.each do |page|
129
+ urls.each { |url| links[url] << page if page.links.include?(url) }
130
+ end
131
+
132
+ if single and !links.empty?
133
+ return links[urls.first]
134
+ else
135
+ return links
136
+ end
137
+ end
138
+
139
+ #
140
+ # If given a single URL (as a String or URI), returns an Array of URLs which link to that URL
141
+ # If given an Array of URLs, returns a Hash (URI => [URI, URI...]) of URLs linking to those URLs
142
+ #
143
+ def urls_linking_to(urls)
144
+ unless urls.is_a?(Array)
145
+ urls = [urls] unless urls.is_a?(Array)
146
+ single = true
147
+ end
148
+
149
+ links = pages_linking_to(urls)
150
+ links.each { |url, pages| links[url] = pages.map{|p| p.url} }
151
+
152
+ if single and !links.empty?
153
+ return links[urls.first]
154
+ else
155
+ return links
156
+ end
157
+ end
158
+
159
+ end
160
+ end
@@ -0,0 +1,34 @@
1
+ module Anemone
2
+ module Storage
3
+
4
+ def self.Hash(*args)
5
+ hash = Hash.new(*args)
6
+ # add close method for compatibility with Storage::Base
7
+ class << hash; def close; end; end
8
+ hash
9
+ end
10
+
11
+ def self.PStore(*args)
12
+ require Arachni::Options.instance.dir['lib'] + 'anemone/storage/pstore'
13
+ self::PStore.new(*args)
14
+ end
15
+
16
+ def self.TokyoCabinet(file = 'anemone.tch')
17
+ require Arachni::Options.instance.dir['lib'] + 'anemone/storage/tokyo_cabinet'
18
+ self::TokyoCabinet.new(file)
19
+ end
20
+
21
+ def self.MongoDB(mongo_db = nil, collection_name = 'pages')
22
+ require Arachni::Options.instance.dir['lib'] + 'anemone/storage/mongodb'
23
+ mongo_db ||= Mongo::Connection.new.db('anemone')
24
+ raise "First argument must be an instance of Mongo::DB" unless mongo_db.is_a?(Mongo::DB)
25
+ self::MongoDB.new(mongo_db, collection_name)
26
+ end
27
+
28
+ def self.Redis(opts = {})
29
+ require Arachni::Options.instance.dir['lib'] + 'anemone/storage/redis'
30
+ self::Redis.new(opts)
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,75 @@
1
+ require Arachni::Options.instance.dir['lib'] + 'anemone/storage/exceptions'
2
+
3
+ module Anemone
4
+ module Storage
5
+ class Base
6
+
7
+ def initialize(adapter)
8
+ @adap = adapter
9
+
10
+ # verify adapter conforms to this class's methods
11
+ methods.each do |method|
12
+ if !@adap.respond_to?(method.to_sym)
13
+ raise "Storage adapter must support method #{method}"
14
+ end
15
+ end
16
+ end
17
+
18
+ def [](key)
19
+ @adap[key]
20
+ rescue
21
+ puts key
22
+ raise RetrievalError, $!
23
+ end
24
+
25
+ def []=(key, value)
26
+ @adap[key] = value
27
+ rescue
28
+ raise InsertionError, $!
29
+ end
30
+
31
+ def delete(key)
32
+ @adap.delete(key)
33
+ rescue
34
+ raise DeletionError, $!
35
+ end
36
+
37
+ def each
38
+ @adap.each { |k, v| yield k, v }
39
+ rescue
40
+ raise GenericError, $!
41
+ end
42
+
43
+ def merge!(hash)
44
+ @adap.merge!(hash)
45
+ rescue
46
+ raise GenericError, $!
47
+ end
48
+
49
+ def close
50
+ @adap.close
51
+ rescue
52
+ raise CloseError, $!
53
+ end
54
+
55
+ def size
56
+ @adap.size
57
+ rescue
58
+ raise GenericError, $!
59
+ end
60
+
61
+ def keys
62
+ @adap.keys
63
+ rescue
64
+ raise GenericError, $!
65
+ end
66
+
67
+ def has_key?(key)
68
+ @adap.has_key?(key)
69
+ rescue
70
+ raise GenericError, $!
71
+ end
72
+
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,15 @@
1
+ module Anemone
2
+ module Storage
3
+
4
+ class GenericError < Error; end;
5
+
6
+ class ConnectionError < Error; end
7
+
8
+ class RetrievalError < Error; end
9
+
10
+ class InsertionError < Error; end
11
+
12
+ class CloseError < Error; end
13
+
14
+ end
15
+ end
@@ -0,0 +1,89 @@
1
+ begin
2
+ require 'mongo'
3
+ rescue LoadError
4
+ puts "You need the mongo gem to use Anemone::Storage::MongoDB"
5
+ exit
6
+ end
7
+
8
+ module Anemone
9
+ module Storage
10
+ class MongoDB
11
+
12
+ BINARY_FIELDS = %w(body headers data)
13
+
14
+ def initialize(mongo_db, collection_name)
15
+ @db = mongo_db
16
+ @collection = @db[collection_name]
17
+ @collection.remove
18
+ @collection.create_index 'url'
19
+ end
20
+
21
+ def [](url)
22
+ if value = @collection.find_one('url' => url.to_s)
23
+ load_page(value)
24
+ end
25
+ end
26
+
27
+ def []=(url, page)
28
+ hash = page.to_hash
29
+ BINARY_FIELDS.each do |field|
30
+ hash[field] = BSON::Binary.new(hash[field]) unless hash[field].nil?
31
+ end
32
+ @collection.update(
33
+ {'url' => page.url.to_s},
34
+ hash,
35
+ :upsert => true
36
+ )
37
+ end
38
+
39
+ def delete(url)
40
+ page = self[url]
41
+ @collection.remove('url' => url.to_s)
42
+ page
43
+ end
44
+
45
+ def each
46
+ @collection.find do |cursor|
47
+ cursor.each do |doc|
48
+ page = load_page(doc)
49
+ yield page.url.to_s, page
50
+ end
51
+ end
52
+ end
53
+
54
+ def merge!(hash)
55
+ hash.each { |key, value| self[key] = value }
56
+ self
57
+ end
58
+
59
+ def size
60
+ @collection.count
61
+ end
62
+
63
+ def keys
64
+ keys = []
65
+ self.each { |k, v| keys << k.to_s }
66
+ keys
67
+ end
68
+
69
+ def has_key?(url)
70
+ !!@collection.find_one('url' => url.to_s)
71
+ end
72
+
73
+ def close
74
+ @db.connection.close
75
+ end
76
+
77
+ private
78
+
79
+ def load_page(hash)
80
+ BINARY_FIELDS.each do |field|
81
+ hash[field] = hash[field].to_s
82
+ end
83
+ Page.from_hash(hash)
84
+ end
85
+
86
+ end
87
+ end
88
+ end
89
+
@@ -0,0 +1,50 @@
1
+ require 'pstore'
2
+ require 'forwardable'
3
+
4
+ module Anemone
5
+ module Storage
6
+ class PStore
7
+ extend Forwardable
8
+
9
+ def_delegators :@keys, :has_key?, :keys, :size
10
+
11
+ def initialize(file)
12
+ File.delete(file) if File.exists?(file)
13
+ @store = ::PStore.new(file)
14
+ @keys = {}
15
+ end
16
+
17
+ def [](key)
18
+ @store.transaction { |s| s[key] }
19
+ end
20
+
21
+ def []=(key,value)
22
+ @keys[key] = nil
23
+ @store.transaction { |s| s[key] = value }
24
+ end
25
+
26
+ def delete(key)
27
+ @keys.delete(key)
28
+ @store.transaction { |s| s.delete key}
29
+ end
30
+
31
+ def each
32
+ @keys.each_key do |key|
33
+ value = nil
34
+ @store.transaction { |s| value = s[key] }
35
+ yield key, value
36
+ end
37
+ end
38
+
39
+ def merge!(hash)
40
+ @store.transaction do |s|
41
+ hash.each { |key, value| s[key] = value; @keys[key] = nil }
42
+ end
43
+ self
44
+ end
45
+
46
+ def close; end
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,90 @@
1
+ require 'redis'
2
+
3
+ module Anemone
4
+ module Storage
5
+ class Redis
6
+
7
+ MARSHAL_FIELDS = %w(links visited fetched)
8
+
9
+ def initialize(opts = {})
10
+ @redis = ::Redis.new(opts)
11
+ @key_prefix = opts[:key_prefix] || 'anemone'
12
+ keys.each { |key| delete(key) }
13
+ end
14
+
15
+ def [](key)
16
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
17
+ rget(rkey)
18
+ end
19
+
20
+ def []=(key, value)
21
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
22
+ hash = value.to_hash
23
+ MARSHAL_FIELDS.each do |field|
24
+ hash[field] = Marshal.dump(hash[field])
25
+ end
26
+ hash.each do |field, value|
27
+ @redis.hset(rkey, field, value)
28
+ end
29
+ end
30
+
31
+ def delete(key)
32
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
33
+ page = self[key]
34
+ @redis.del(rkey)
35
+ page
36
+ end
37
+
38
+ def each
39
+ rkeys = @redis.keys("#{@key_prefix}:pages:*")
40
+ rkeys.each do |rkey|
41
+ page = rget(rkey)
42
+ yield page.url.to_s, page
43
+ end
44
+ end
45
+
46
+ def merge!(hash)
47
+ hash.each { |key, value| self[key] = value }
48
+ self
49
+ end
50
+
51
+ def size
52
+ @redis.keys("#{@key_prefix}:pages:*").size
53
+ end
54
+
55
+ def keys
56
+ keys = []
57
+ self.each { |k, v| keys << k.to_s }
58
+ keys
59
+ end
60
+
61
+ def has_key?(key)
62
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
63
+ @redis.exists(rkey)
64
+ end
65
+
66
+ def close
67
+ @redis.quit
68
+ end
69
+
70
+ private
71
+
72
+ def load_value(hash)
73
+ MARSHAL_FIELDS.each do |field|
74
+ unless hash[field].nil? || hash[field] == ''
75
+ hash[field] = Marshal.load(hash[field])
76
+ end
77
+ end
78
+ Page.from_hash(hash)
79
+ end
80
+
81
+ def rget(rkey)
82
+ hash = @redis.hgetall(rkey)
83
+ if !!hash
84
+ load_value(hash)
85
+ end
86
+ end
87
+
88
+ end
89
+ end
90
+ end