arachni 0.2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ACKNOWLEDGMENTS.md +14 -0
- data/AUTHORS.md +6 -0
- data/CHANGELOG.md +162 -0
- data/CONTRIBUTORS.md +10 -0
- data/EXPLOITATION.md +429 -0
- data/HACKING.md +101 -0
- data/LICENSE.md +341 -0
- data/README.md +350 -0
- data/Rakefile +86 -0
- data/bin/arachni +22 -0
- data/bin/arachni_web +77 -0
- data/bin/arachni_xmlrpc +21 -0
- data/bin/arachni_xmlrpcd +82 -0
- data/bin/arachni_xmlrpcd_monitor +74 -0
- data/conf/README.webui.yaml.txt +44 -0
- data/conf/webui.yaml +11 -0
- data/external/metasploit/LICENSE +24 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_exec.rb +142 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_path_traversal.rb +113 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_eval.rb +150 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_php_include.rb +141 -0
- data/external/metasploit/modules/exploits/unix/webapp/arachni_sqlmap.rb +92 -0
- data/external/metasploit/plugins/arachni.rb +536 -0
- data/getoptslong.rb +241 -0
- data/lib/anemone.rb +2 -0
- data/lib/anemone/cookie_store.rb +35 -0
- data/lib/anemone/core.rb +371 -0
- data/lib/anemone/exceptions.rb +5 -0
- data/lib/anemone/http.rb +144 -0
- data/lib/anemone/page.rb +337 -0
- data/lib/anemone/page_store.rb +160 -0
- data/lib/anemone/storage.rb +34 -0
- data/lib/anemone/storage/base.rb +75 -0
- data/lib/anemone/storage/exceptions.rb +15 -0
- data/lib/anemone/storage/mongodb.rb +89 -0
- data/lib/anemone/storage/pstore.rb +50 -0
- data/lib/anemone/storage/redis.rb +90 -0
- data/lib/anemone/storage/tokyo_cabinet.rb +57 -0
- data/lib/anemone/tentacle.rb +40 -0
- data/lib/arachni.rb +16 -0
- data/lib/audit_store.rb +346 -0
- data/lib/component_manager.rb +293 -0
- data/lib/component_options.rb +395 -0
- data/lib/exceptions.rb +76 -0
- data/lib/framework.rb +637 -0
- data/lib/http.rb +809 -0
- data/lib/issue.rb +302 -0
- data/lib/module.rb +4 -0
- data/lib/module/auditor.rb +455 -0
- data/lib/module/base.rb +188 -0
- data/lib/module/element_db.rb +158 -0
- data/lib/module/key_filler.rb +87 -0
- data/lib/module/manager.rb +87 -0
- data/lib/module/output.rb +68 -0
- data/lib/module/trainer.rb +240 -0
- data/lib/module/utilities.rb +110 -0
- data/lib/options.rb +547 -0
- data/lib/parser.rb +2 -0
- data/lib/parser/auditable.rb +522 -0
- data/lib/parser/elements.rb +296 -0
- data/lib/parser/page.rb +149 -0
- data/lib/parser/parser.rb +717 -0
- data/lib/plugin.rb +4 -0
- data/lib/plugin/base.rb +110 -0
- data/lib/plugin/manager.rb +162 -0
- data/lib/report.rb +4 -0
- data/lib/report/base.rb +119 -0
- data/lib/report/manager.rb +92 -0
- data/lib/rpc/xml/client/base.rb +71 -0
- data/lib/rpc/xml/client/dispatcher.rb +49 -0
- data/lib/rpc/xml/client/instance.rb +88 -0
- data/lib/rpc/xml/server/base.rb +90 -0
- data/lib/rpc/xml/server/dispatcher.rb +357 -0
- data/lib/rpc/xml/server/framework.rb +206 -0
- data/lib/rpc/xml/server/instance.rb +191 -0
- data/lib/rpc/xml/server/module/manager.rb +46 -0
- data/lib/rpc/xml/server/options.rb +124 -0
- data/lib/rpc/xml/server/output.rb +299 -0
- data/lib/rpc/xml/server/plugin/manager.rb +58 -0
- data/lib/ruby.rb +5 -0
- data/lib/ruby/object.rb +32 -0
- data/lib/ruby/string.rb +74 -0
- data/lib/ruby/xmlrpc/server.rb +27 -0
- data/lib/spider.rb +200 -0
- data/lib/typhoeus/request.rb +91 -0
- data/lib/typhoeus/response.rb +34 -0
- data/lib/ui/cli/cli.rb +744 -0
- data/lib/ui/cli/output.rb +279 -0
- data/lib/ui/web/log.rb +82 -0
- data/lib/ui/web/output_stream.rb +94 -0
- data/lib/ui/web/report_manager.rb +222 -0
- data/lib/ui/web/server.rb +903 -0
- data/lib/ui/web/server/db/placeholder +0 -0
- data/lib/ui/web/server/public/banner.png +0 -0
- data/lib/ui/web/server/public/bodybg-small.png +0 -0
- data/lib/ui/web/server/public/bodybg.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/pbar-ani.gif +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_222222_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_2e83ff_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_454545_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_888888_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/images/ui-icons_cd0a0a_256x240.png +0 -0
- data/lib/ui/web/server/public/css/smoothness/jquery-ui-1.8.9.custom.css +573 -0
- data/lib/ui/web/server/public/favicon.ico +0 -0
- data/lib/ui/web/server/public/footer.jpg +0 -0
- data/lib/ui/web/server/public/icons/error.png +0 -0
- data/lib/ui/web/server/public/icons/info.png +0 -0
- data/lib/ui/web/server/public/icons/ok.png +0 -0
- data/lib/ui/web/server/public/icons/status.png +0 -0
- data/lib/ui/web/server/public/js/jquery-1.4.4.min.js +167 -0
- data/lib/ui/web/server/public/js/jquery-ui-1.8.9.custom.min.js +781 -0
- data/lib/ui/web/server/public/logo.png +0 -0
- data/lib/ui/web/server/public/nav-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-right.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-left.jpg +0 -0
- data/lib/ui/web/server/public/nav-selected-right.jpg +0 -0
- data/lib/ui/web/server/public/reports/placeholder +1 -0
- data/lib/ui/web/server/public/sidebar-bottom.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-h4.jpg +0 -0
- data/lib/ui/web/server/public/sidebar-top.jpg +0 -0
- data/lib/ui/web/server/public/spider.png +0 -0
- data/lib/ui/web/server/public/style.css +604 -0
- data/lib/ui/web/server/tmp/placeholder +0 -0
- data/lib/ui/web/server/views/dispatcher.erb +85 -0
- data/lib/ui/web/server/views/dispatcher_error.erb +14 -0
- data/lib/ui/web/server/views/error.erb +1 -0
- data/lib/ui/web/server/views/flash.erb +18 -0
- data/lib/ui/web/server/views/home.erb +14 -0
- data/lib/ui/web/server/views/instance.erb +213 -0
- data/lib/ui/web/server/views/layout.erb +95 -0
- data/lib/ui/web/server/views/log.erb +40 -0
- data/lib/ui/web/server/views/modules.erb +71 -0
- data/lib/ui/web/server/views/options.erb +23 -0
- data/lib/ui/web/server/views/output_results.erb +51 -0
- data/lib/ui/web/server/views/plugins.erb +42 -0
- data/lib/ui/web/server/views/report_formats.erb +30 -0
- data/lib/ui/web/server/views/reports.erb +55 -0
- data/lib/ui/web/server/views/settings.erb +120 -0
- data/lib/ui/web/server/views/welcome.erb +38 -0
- data/lib/ui/xmlrpc/dispatcher_monitor.rb +204 -0
- data/lib/ui/xmlrpc/xmlrpc.rb +843 -0
- data/logs/placeholder +0 -0
- data/metamodules/autothrottle.rb +74 -0
- data/metamodules/timeout_notice.rb +118 -0
- data/metamodules/uniformity.rb +98 -0
- data/modules/audit/code_injection.rb +136 -0
- data/modules/audit/code_injection_timing.rb +115 -0
- data/modules/audit/code_injection_timing/payloads.txt +4 -0
- data/modules/audit/csrf.rb +301 -0
- data/modules/audit/ldapi.rb +103 -0
- data/modules/audit/ldapi/errors.txt +26 -0
- data/modules/audit/os_cmd_injection.rb +103 -0
- data/modules/audit/os_cmd_injection/payloads.txt +2 -0
- data/modules/audit/os_cmd_injection_timing.rb +104 -0
- data/modules/audit/os_cmd_injection_timing/payloads.txt +3 -0
- data/modules/audit/path_traversal.rb +141 -0
- data/modules/audit/response_splitting.rb +105 -0
- data/modules/audit/rfi.rb +193 -0
- data/modules/audit/sqli.rb +120 -0
- data/modules/audit/sqli/regexp_ids.txt +90 -0
- data/modules/audit/sqli_blind_rdiff.rb +321 -0
- data/modules/audit/sqli_blind_timing.rb +103 -0
- data/modules/audit/sqli_blind_timing/payloads.txt +51 -0
- data/modules/audit/trainer.rb +89 -0
- data/modules/audit/unvalidated_redirect.rb +90 -0
- data/modules/audit/xpath.rb +104 -0
- data/modules/audit/xpath/errors.txt +26 -0
- data/modules/audit/xss.rb +99 -0
- data/modules/audit/xss_event.rb +134 -0
- data/modules/audit/xss_path.rb +125 -0
- data/modules/audit/xss_script_tag.rb +112 -0
- data/modules/audit/xss_tag.rb +112 -0
- data/modules/audit/xss_uri.rb +125 -0
- data/modules/recon/allowed_methods.rb +104 -0
- data/modules/recon/backdoors.rb +131 -0
- data/modules/recon/backdoors/filenames.txt +16 -0
- data/modules/recon/backup_files.rb +177 -0
- data/modules/recon/backup_files/extensions.txt +28 -0
- data/modules/recon/common_directories.rb +138 -0
- data/modules/recon/common_directories/directories.txt +265 -0
- data/modules/recon/common_files.rb +138 -0
- data/modules/recon/common_files/filenames.txt +17 -0
- data/modules/recon/directory_listing.rb +171 -0
- data/modules/recon/grep/captcha.rb +62 -0
- data/modules/recon/grep/credit_card.rb +85 -0
- data/modules/recon/grep/cvs_svn_users.rb +73 -0
- data/modules/recon/grep/emails.rb +59 -0
- data/modules/recon/grep/html_objects.rb +53 -0
- data/modules/recon/grep/private_ip.rb +54 -0
- data/modules/recon/grep/ssn.rb +53 -0
- data/modules/recon/htaccess_limit.rb +82 -0
- data/modules/recon/http_put.rb +95 -0
- data/modules/recon/interesting_responses.rb +118 -0
- data/modules/recon/unencrypted_password_forms.rb +119 -0
- data/modules/recon/webdav.rb +126 -0
- data/modules/recon/xst.rb +107 -0
- data/path_extractors/anchors.rb +35 -0
- data/path_extractors/forms.rb +35 -0
- data/path_extractors/frames.rb +38 -0
- data/path_extractors/generic.rb +39 -0
- data/path_extractors/links.rb +35 -0
- data/path_extractors/meta_refresh.rb +39 -0
- data/path_extractors/scripts.rb +37 -0
- data/path_extractors/sitemap.rb +31 -0
- data/plugins/autologin.rb +137 -0
- data/plugins/content_types.rb +90 -0
- data/plugins/cookie_collector.rb +99 -0
- data/plugins/form_dicattack.rb +185 -0
- data/plugins/healthmap.rb +94 -0
- data/plugins/http_dicattack.rb +133 -0
- data/plugins/metamodules.rb +118 -0
- data/plugins/proxy.rb +248 -0
- data/plugins/proxy/server.rb +66 -0
- data/plugins/waf_detector.rb +184 -0
- data/profiles/comprehensive.afp +74 -0
- data/profiles/full.afp +75 -0
- data/reports/afr.rb +59 -0
- data/reports/ap.rb +55 -0
- data/reports/html.rb +179 -0
- data/reports/html/default.erb +967 -0
- data/reports/metareport.rb +139 -0
- data/reports/metareport/arachni_metareport.rb +174 -0
- data/reports/plugin_formatters/html/content_types.rb +82 -0
- data/reports/plugin_formatters/html/cookie_collector.rb +66 -0
- data/reports/plugin_formatters/html/form_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/healthmap.rb +76 -0
- data/reports/plugin_formatters/html/http_dicattack.rb +54 -0
- data/reports/plugin_formatters/html/metaformatters/timeout_notice.rb +65 -0
- data/reports/plugin_formatters/html/metaformatters/uniformity.rb +71 -0
- data/reports/plugin_formatters/html/metamodules.rb +93 -0
- data/reports/plugin_formatters/html/waf_detector.rb +54 -0
- data/reports/plugin_formatters/stdout/content_types.rb +73 -0
- data/reports/plugin_formatters/stdout/cookie_collector.rb +61 -0
- data/reports/plugin_formatters/stdout/form_dicattack.rb +52 -0
- data/reports/plugin_formatters/stdout/healthmap.rb +72 -0
- data/reports/plugin_formatters/stdout/http_dicattack.rb +53 -0
- data/reports/plugin_formatters/stdout/metaformatters/timeout_notice.rb +55 -0
- data/reports/plugin_formatters/stdout/metaformatters/uniformity.rb +68 -0
- data/reports/plugin_formatters/stdout/metamodules.rb +89 -0
- data/reports/plugin_formatters/stdout/waf_detector.rb +48 -0
- data/reports/plugin_formatters/xml/content_types.rb +91 -0
- data/reports/plugin_formatters/xml/cookie_collector.rb +70 -0
- data/reports/plugin_formatters/xml/form_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/healthmap.rb +82 -0
- data/reports/plugin_formatters/xml/http_dicattack.rb +57 -0
- data/reports/plugin_formatters/xml/metaformatters/timeout_notice.rb +67 -0
- data/reports/plugin_formatters/xml/metaformatters/uniformity.rb +82 -0
- data/reports/plugin_formatters/xml/metamodules.rb +91 -0
- data/reports/plugin_formatters/xml/waf_detector.rb +58 -0
- data/reports/stdout.rb +182 -0
- data/reports/txt.rb +77 -0
- data/reports/xml.rb +231 -0
- data/reports/xml/buffer.rb +98 -0
- metadata +516 -0
@@ -0,0 +1,160 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Anemone
|
4
|
+
class PageStore
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
def_delegators :@storage, :keys, :values, :size, :each
|
8
|
+
|
9
|
+
def initialize(storage = {})
|
10
|
+
@storage = storage
|
11
|
+
end
|
12
|
+
|
13
|
+
# We typically index the hash with a URI,
|
14
|
+
# but convert it to a String for easier retrieval
|
15
|
+
def [](index)
|
16
|
+
@storage[index.to_s]
|
17
|
+
end
|
18
|
+
|
19
|
+
def []=(index, other)
|
20
|
+
@storage[index.to_s] = other
|
21
|
+
end
|
22
|
+
|
23
|
+
def delete(key)
|
24
|
+
@storage.delete key.to_s
|
25
|
+
end
|
26
|
+
|
27
|
+
def has_key?(key)
|
28
|
+
@storage.has_key? key.to_s
|
29
|
+
end
|
30
|
+
|
31
|
+
def each_value
|
32
|
+
each { |key, value| yield value }
|
33
|
+
end
|
34
|
+
|
35
|
+
def values
|
36
|
+
result = []
|
37
|
+
each { |key, value| result << value }
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
def touch_key(key)
|
42
|
+
self[key] = Page.new(key)
|
43
|
+
end
|
44
|
+
|
45
|
+
def touch_keys(keys)
|
46
|
+
@storage.merge! keys.inject({}) { |h, k| h[k.to_s] = Page.new(k); h }
|
47
|
+
end
|
48
|
+
|
49
|
+
# Does this PageStore contain the specified URL?
|
50
|
+
# HTTP and HTTPS versions of a URL are considered to be the same page.
|
51
|
+
def has_page?(url)
|
52
|
+
schemes = %w(http https)
|
53
|
+
if schemes.include? url.scheme
|
54
|
+
u = url.dup
|
55
|
+
return schemes.any? { |s| u.scheme = s; has_key?(u) }
|
56
|
+
end
|
57
|
+
|
58
|
+
has_key? url
|
59
|
+
end
|
60
|
+
|
61
|
+
#
|
62
|
+
# Use a breadth-first search to calculate the single-source
|
63
|
+
# shortest paths from *root* to all pages in the PageStore
|
64
|
+
#
|
65
|
+
def shortest_paths!(root)
|
66
|
+
root = URI(root) if root.is_a?(String)
|
67
|
+
raise "Root node not found" if !has_key?(root)
|
68
|
+
|
69
|
+
q = Queue.new
|
70
|
+
|
71
|
+
q.enq root
|
72
|
+
root_page = self[root]
|
73
|
+
root_page.depth = 0
|
74
|
+
root_page.visited = true
|
75
|
+
self[root] = root_page
|
76
|
+
while !q.empty?
|
77
|
+
page = self[q.deq]
|
78
|
+
page.links.each do |u|
|
79
|
+
begin
|
80
|
+
link = self[u]
|
81
|
+
next if link.nil? || !link.fetched? || link.visited
|
82
|
+
|
83
|
+
q << u unless link.redirect?
|
84
|
+
link.visited = true
|
85
|
+
link.depth = page.depth + 1
|
86
|
+
self[u] = link
|
87
|
+
|
88
|
+
if link.redirect?
|
89
|
+
u = link.redirect_to
|
90
|
+
redo
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
self
|
97
|
+
end
|
98
|
+
|
99
|
+
#
|
100
|
+
# Removes all Pages from storage where redirect? is true
|
101
|
+
#
|
102
|
+
def uniq!
|
103
|
+
each_value { |page| delete page.url if page.redirect? }
|
104
|
+
self
|
105
|
+
end
|
106
|
+
|
107
|
+
#
|
108
|
+
# If given a single URL (as a String or URI), returns an Array of Pages which link to that URL
|
109
|
+
# If given an Array of URLs, returns a Hash (URI => [Page, Page...]) of Pages linking to those URLs
|
110
|
+
#
|
111
|
+
def pages_linking_to(urls)
|
112
|
+
unless urls.is_a?(Array)
|
113
|
+
urls = [urls]
|
114
|
+
single = true
|
115
|
+
end
|
116
|
+
|
117
|
+
urls.map! do |url|
|
118
|
+
unless url.is_a?(URI)
|
119
|
+
URI(url) rescue nil
|
120
|
+
else
|
121
|
+
url
|
122
|
+
end
|
123
|
+
end
|
124
|
+
urls.compact
|
125
|
+
|
126
|
+
links = {}
|
127
|
+
urls.each { |url| links[url] = [] }
|
128
|
+
values.each do |page|
|
129
|
+
urls.each { |url| links[url] << page if page.links.include?(url) }
|
130
|
+
end
|
131
|
+
|
132
|
+
if single and !links.empty?
|
133
|
+
return links[urls.first]
|
134
|
+
else
|
135
|
+
return links
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
#
|
140
|
+
# If given a single URL (as a String or URI), returns an Array of URLs which link to that URL
|
141
|
+
# If given an Array of URLs, returns a Hash (URI => [URI, URI...]) of URLs linking to those URLs
|
142
|
+
#
|
143
|
+
def urls_linking_to(urls)
|
144
|
+
unless urls.is_a?(Array)
|
145
|
+
urls = [urls] unless urls.is_a?(Array)
|
146
|
+
single = true
|
147
|
+
end
|
148
|
+
|
149
|
+
links = pages_linking_to(urls)
|
150
|
+
links.each { |url, pages| links[url] = pages.map{|p| p.url} }
|
151
|
+
|
152
|
+
if single and !links.empty?
|
153
|
+
return links[urls.first]
|
154
|
+
else
|
155
|
+
return links
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
160
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Anemone
|
2
|
+
module Storage
|
3
|
+
|
4
|
+
def self.Hash(*args)
|
5
|
+
hash = Hash.new(*args)
|
6
|
+
# add close method for compatibility with Storage::Base
|
7
|
+
class << hash; def close; end; end
|
8
|
+
hash
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.PStore(*args)
|
12
|
+
require Arachni::Options.instance.dir['lib'] + 'anemone/storage/pstore'
|
13
|
+
self::PStore.new(*args)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.TokyoCabinet(file = 'anemone.tch')
|
17
|
+
require Arachni::Options.instance.dir['lib'] + 'anemone/storage/tokyo_cabinet'
|
18
|
+
self::TokyoCabinet.new(file)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.MongoDB(mongo_db = nil, collection_name = 'pages')
|
22
|
+
require Arachni::Options.instance.dir['lib'] + 'anemone/storage/mongodb'
|
23
|
+
mongo_db ||= Mongo::Connection.new.db('anemone')
|
24
|
+
raise "First argument must be an instance of Mongo::DB" unless mongo_db.is_a?(Mongo::DB)
|
25
|
+
self::MongoDB.new(mongo_db, collection_name)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.Redis(opts = {})
|
29
|
+
require Arachni::Options.instance.dir['lib'] + 'anemone/storage/redis'
|
30
|
+
self::Redis.new(opts)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require Arachni::Options.instance.dir['lib'] + 'anemone/storage/exceptions'
|
2
|
+
|
3
|
+
module Anemone
|
4
|
+
module Storage
|
5
|
+
class Base
|
6
|
+
|
7
|
+
def initialize(adapter)
|
8
|
+
@adap = adapter
|
9
|
+
|
10
|
+
# verify adapter conforms to this class's methods
|
11
|
+
methods.each do |method|
|
12
|
+
if !@adap.respond_to?(method.to_sym)
|
13
|
+
raise "Storage adapter must support method #{method}"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def [](key)
|
19
|
+
@adap[key]
|
20
|
+
rescue
|
21
|
+
puts key
|
22
|
+
raise RetrievalError, $!
|
23
|
+
end
|
24
|
+
|
25
|
+
def []=(key, value)
|
26
|
+
@adap[key] = value
|
27
|
+
rescue
|
28
|
+
raise InsertionError, $!
|
29
|
+
end
|
30
|
+
|
31
|
+
def delete(key)
|
32
|
+
@adap.delete(key)
|
33
|
+
rescue
|
34
|
+
raise DeletionError, $!
|
35
|
+
end
|
36
|
+
|
37
|
+
def each
|
38
|
+
@adap.each { |k, v| yield k, v }
|
39
|
+
rescue
|
40
|
+
raise GenericError, $!
|
41
|
+
end
|
42
|
+
|
43
|
+
def merge!(hash)
|
44
|
+
@adap.merge!(hash)
|
45
|
+
rescue
|
46
|
+
raise GenericError, $!
|
47
|
+
end
|
48
|
+
|
49
|
+
def close
|
50
|
+
@adap.close
|
51
|
+
rescue
|
52
|
+
raise CloseError, $!
|
53
|
+
end
|
54
|
+
|
55
|
+
def size
|
56
|
+
@adap.size
|
57
|
+
rescue
|
58
|
+
raise GenericError, $!
|
59
|
+
end
|
60
|
+
|
61
|
+
def keys
|
62
|
+
@adap.keys
|
63
|
+
rescue
|
64
|
+
raise GenericError, $!
|
65
|
+
end
|
66
|
+
|
67
|
+
def has_key?(key)
|
68
|
+
@adap.has_key?(key)
|
69
|
+
rescue
|
70
|
+
raise GenericError, $!
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
begin
|
2
|
+
require 'mongo'
|
3
|
+
rescue LoadError
|
4
|
+
puts "You need the mongo gem to use Anemone::Storage::MongoDB"
|
5
|
+
exit
|
6
|
+
end
|
7
|
+
|
8
|
+
module Anemone
|
9
|
+
module Storage
|
10
|
+
class MongoDB
|
11
|
+
|
12
|
+
BINARY_FIELDS = %w(body headers data)
|
13
|
+
|
14
|
+
def initialize(mongo_db, collection_name)
|
15
|
+
@db = mongo_db
|
16
|
+
@collection = @db[collection_name]
|
17
|
+
@collection.remove
|
18
|
+
@collection.create_index 'url'
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](url)
|
22
|
+
if value = @collection.find_one('url' => url.to_s)
|
23
|
+
load_page(value)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def []=(url, page)
|
28
|
+
hash = page.to_hash
|
29
|
+
BINARY_FIELDS.each do |field|
|
30
|
+
hash[field] = BSON::Binary.new(hash[field]) unless hash[field].nil?
|
31
|
+
end
|
32
|
+
@collection.update(
|
33
|
+
{'url' => page.url.to_s},
|
34
|
+
hash,
|
35
|
+
:upsert => true
|
36
|
+
)
|
37
|
+
end
|
38
|
+
|
39
|
+
def delete(url)
|
40
|
+
page = self[url]
|
41
|
+
@collection.remove('url' => url.to_s)
|
42
|
+
page
|
43
|
+
end
|
44
|
+
|
45
|
+
def each
|
46
|
+
@collection.find do |cursor|
|
47
|
+
cursor.each do |doc|
|
48
|
+
page = load_page(doc)
|
49
|
+
yield page.url.to_s, page
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def merge!(hash)
|
55
|
+
hash.each { |key, value| self[key] = value }
|
56
|
+
self
|
57
|
+
end
|
58
|
+
|
59
|
+
def size
|
60
|
+
@collection.count
|
61
|
+
end
|
62
|
+
|
63
|
+
def keys
|
64
|
+
keys = []
|
65
|
+
self.each { |k, v| keys << k.to_s }
|
66
|
+
keys
|
67
|
+
end
|
68
|
+
|
69
|
+
def has_key?(url)
|
70
|
+
!!@collection.find_one('url' => url.to_s)
|
71
|
+
end
|
72
|
+
|
73
|
+
def close
|
74
|
+
@db.connection.close
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def load_page(hash)
|
80
|
+
BINARY_FIELDS.each do |field|
|
81
|
+
hash[field] = hash[field].to_s
|
82
|
+
end
|
83
|
+
Page.from_hash(hash)
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'pstore'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module Anemone
|
5
|
+
module Storage
|
6
|
+
class PStore
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
def_delegators :@keys, :has_key?, :keys, :size
|
10
|
+
|
11
|
+
def initialize(file)
|
12
|
+
File.delete(file) if File.exists?(file)
|
13
|
+
@store = ::PStore.new(file)
|
14
|
+
@keys = {}
|
15
|
+
end
|
16
|
+
|
17
|
+
def [](key)
|
18
|
+
@store.transaction { |s| s[key] }
|
19
|
+
end
|
20
|
+
|
21
|
+
def []=(key,value)
|
22
|
+
@keys[key] = nil
|
23
|
+
@store.transaction { |s| s[key] = value }
|
24
|
+
end
|
25
|
+
|
26
|
+
def delete(key)
|
27
|
+
@keys.delete(key)
|
28
|
+
@store.transaction { |s| s.delete key}
|
29
|
+
end
|
30
|
+
|
31
|
+
def each
|
32
|
+
@keys.each_key do |key|
|
33
|
+
value = nil
|
34
|
+
@store.transaction { |s| value = s[key] }
|
35
|
+
yield key, value
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def merge!(hash)
|
40
|
+
@store.transaction do |s|
|
41
|
+
hash.each { |key, value| s[key] = value; @keys[key] = nil }
|
42
|
+
end
|
43
|
+
self
|
44
|
+
end
|
45
|
+
|
46
|
+
def close; end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'redis'
|
2
|
+
|
3
|
+
module Anemone
|
4
|
+
module Storage
|
5
|
+
class Redis
|
6
|
+
|
7
|
+
MARSHAL_FIELDS = %w(links visited fetched)
|
8
|
+
|
9
|
+
def initialize(opts = {})
|
10
|
+
@redis = ::Redis.new(opts)
|
11
|
+
@key_prefix = opts[:key_prefix] || 'anemone'
|
12
|
+
keys.each { |key| delete(key) }
|
13
|
+
end
|
14
|
+
|
15
|
+
def [](key)
|
16
|
+
rkey = "#{@key_prefix}:pages:#{key.to_s}"
|
17
|
+
rget(rkey)
|
18
|
+
end
|
19
|
+
|
20
|
+
def []=(key, value)
|
21
|
+
rkey = "#{@key_prefix}:pages:#{key.to_s}"
|
22
|
+
hash = value.to_hash
|
23
|
+
MARSHAL_FIELDS.each do |field|
|
24
|
+
hash[field] = Marshal.dump(hash[field])
|
25
|
+
end
|
26
|
+
hash.each do |field, value|
|
27
|
+
@redis.hset(rkey, field, value)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def delete(key)
|
32
|
+
rkey = "#{@key_prefix}:pages:#{key.to_s}"
|
33
|
+
page = self[key]
|
34
|
+
@redis.del(rkey)
|
35
|
+
page
|
36
|
+
end
|
37
|
+
|
38
|
+
def each
|
39
|
+
rkeys = @redis.keys("#{@key_prefix}:pages:*")
|
40
|
+
rkeys.each do |rkey|
|
41
|
+
page = rget(rkey)
|
42
|
+
yield page.url.to_s, page
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def merge!(hash)
|
47
|
+
hash.each { |key, value| self[key] = value }
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
def size
|
52
|
+
@redis.keys("#{@key_prefix}:pages:*").size
|
53
|
+
end
|
54
|
+
|
55
|
+
def keys
|
56
|
+
keys = []
|
57
|
+
self.each { |k, v| keys << k.to_s }
|
58
|
+
keys
|
59
|
+
end
|
60
|
+
|
61
|
+
def has_key?(key)
|
62
|
+
rkey = "#{@key_prefix}:pages:#{key.to_s}"
|
63
|
+
@redis.exists(rkey)
|
64
|
+
end
|
65
|
+
|
66
|
+
def close
|
67
|
+
@redis.quit
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def load_value(hash)
|
73
|
+
MARSHAL_FIELDS.each do |field|
|
74
|
+
unless hash[field].nil? || hash[field] == ''
|
75
|
+
hash[field] = Marshal.load(hash[field])
|
76
|
+
end
|
77
|
+
end
|
78
|
+
Page.from_hash(hash)
|
79
|
+
end
|
80
|
+
|
81
|
+
def rget(rkey)
|
82
|
+
hash = @redis.hgetall(rkey)
|
83
|
+
if !!hash
|
84
|
+
load_value(hash)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|