archipelago 0.2.5 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README +1 -1
- data/TODO +8 -17
- data/lib/archipelago.rb +0 -12
- data/lib/archipelago/client.rb +154 -17
- data/lib/archipelago/current.rb +1 -1
- data/lib/archipelago/disco.rb +269 -74
- data/lib/archipelago/dump.rb +279 -0
- data/lib/archipelago/hashish.rb +264 -108
- data/lib/archipelago/pirate.rb +52 -43
- data/lib/archipelago/sanitation.rb +268 -0
- data/lib/archipelago/tranny.rb +2 -4
- data/lib/archipelago/treasure.rb +173 -27
- data/script/{console → console.rb} +1 -1
- data/script/officer.rb +10 -0
- data/script/pirate.rb +5 -1
- data/script/services.rb +12 -5
- data/tests/disco_benchmark.rb +2 -2
- data/tests/disco_test.rb +39 -7
- data/tests/dump_test.rb +71 -0
- data/tests/pirate_test.rb +74 -21
- data/tests/sanitation_benchmark.rb +50 -0
- data/tests/sanitation_test.rb +219 -0
- data/tests/test_helper.rb +15 -3
- data/tests/tranny_test.rb +0 -2
- data/tests/treasure_benchmark.rb +6 -3
- data/tests/treasure_test.rb +43 -7
- metadata +13 -7
- data/lib/archipelago/cove.rb +0 -68
- data/lib/archipelago/exxon.rb +0 -138
- data/lib/archipelago/oneline.rb +0 -641
data/lib/archipelago/pirate.rb
CHANGED
@@ -73,7 +73,6 @@ module Archipelago
|
|
73
73
|
# Archipelago::Treasure:Dubloons in them.
|
74
74
|
#
|
75
75
|
class Captain < Archipelago::Client::Base
|
76
|
-
attr_reader :chests, :trannies
|
77
76
|
#
|
78
77
|
# Will look for Archipelago::Treasure::Chests matching <i>:chest_description</i> or CHEST_DESCRIPTION and
|
79
78
|
# Archipelago::Tranny::Managers matching <i>:tranny_description</i> or TRANNY_DESCRIPTION.
|
@@ -82,29 +81,28 @@ module Archipelago
|
|
82
81
|
# of required classes and modules at the chest.
|
83
82
|
#
|
84
83
|
def initialize(options = {})
|
85
|
-
super(options)
|
86
|
-
|
87
84
|
@transaction = nil
|
88
|
-
|
89
|
-
start_service_updater
|
90
|
-
|
85
|
+
setup(options)
|
91
86
|
end
|
92
87
|
|
93
88
|
#
|
94
89
|
# Sets up this instance with the given +options+.
|
95
90
|
#
|
96
91
|
def setup(options = {})
|
97
|
-
super(options)
|
98
|
-
|
99
|
-
@chest_description = CHEST_DESCRIPTION.merge(options[:chest_description] || {})
|
100
|
-
@tranny_description = TRANNY_DESCRIPTION.merge(options[:tranny_description] || {})
|
101
|
-
|
102
92
|
@chest_eval_files ||= []
|
103
93
|
@chest_eval_files += options[:chest_eval_files] || []
|
104
|
-
|
105
94
|
@chests_having_evaluated ||= {}
|
106
95
|
|
107
96
|
@yar_counter = 0
|
97
|
+
|
98
|
+
options.merge!({
|
99
|
+
:service_descriptions => {
|
100
|
+
:chests => CHEST_DESCRIPTION.merge(options[:chest_description] || {}),
|
101
|
+
:trannies => TRANNY_DESCRIPTION.merge(options[:tranny_description] || {})
|
102
|
+
}
|
103
|
+
})
|
104
|
+
|
105
|
+
setup_client(options)
|
108
106
|
end
|
109
107
|
|
110
108
|
#
|
@@ -150,11 +148,11 @@ module Archipelago
|
|
150
148
|
# Return a clone of this instance bound to a newly created transaction.
|
151
149
|
#
|
152
150
|
def begin
|
153
|
-
raise NoTransactionManagerAvailableException.new(self) if
|
151
|
+
raise NoTransactionManagerAvailableException.new(self) if self.trannies.empty?
|
154
152
|
|
155
153
|
rval = self.clone
|
156
154
|
rval.instance_eval do
|
157
|
-
@transaction =
|
155
|
+
@transaction = self.trannies.values.first[:service].begin
|
158
156
|
end
|
159
157
|
|
160
158
|
return rval
|
@@ -175,21 +173,20 @@ module Archipelago
|
|
175
173
|
#
|
176
174
|
# Will abort! the transaction if any exception is raised.
|
177
175
|
#
|
178
|
-
def transaction(&block) #:yields: transaction
|
179
|
-
raise NoTransactionManagerAvailableException.new(self) if
|
176
|
+
def transaction(&block) #:yields: a clone of this Archipelago::Pirate::Captain with the given transaction as default @transaction.
|
177
|
+
raise NoTransactionManagerAvailableException.new(self) if self.trannies.empty?
|
180
178
|
|
181
|
-
|
179
|
+
my_clone = self.begin
|
180
|
+
transa = my_clone.active_transaction
|
182
181
|
begin
|
183
182
|
begin
|
184
|
-
return yield(
|
183
|
+
return yield(my_clone)
|
185
184
|
ensure
|
186
|
-
raise CommitFailedException.new(
|
185
|
+
raise CommitFailedException.new(my_clone, transa) unless transa.commit! == :commited
|
187
186
|
end
|
188
187
|
rescue Exception => e
|
189
|
-
|
188
|
+
transa.abort! unless transa.state == :aborted
|
190
189
|
raise e
|
191
|
-
ensure
|
192
|
-
@transaction = nil
|
193
190
|
end
|
194
191
|
end
|
195
192
|
|
@@ -204,17 +201,27 @@ module Archipelago
|
|
204
201
|
#
|
205
202
|
# Commit the transaction we are a member of and forget about it.
|
206
203
|
#
|
204
|
+
# Returns the new state of the transaction.
|
205
|
+
#
|
207
206
|
def commit!
|
208
|
-
|
209
|
-
|
207
|
+
begin
|
208
|
+
return @transaction.commit!
|
209
|
+
ensure
|
210
|
+
@transaction = nil
|
211
|
+
end
|
210
212
|
end
|
211
213
|
|
212
214
|
#
|
213
215
|
# Abort the transaction we are a member of and forget about it.
|
214
216
|
#
|
217
|
+
# Returns the new state of the transaction.
|
218
|
+
#
|
215
219
|
def abort!
|
216
|
-
|
217
|
-
|
220
|
+
begin
|
221
|
+
@transaction.abort!
|
222
|
+
ensure
|
223
|
+
@transaction = nil
|
224
|
+
end
|
218
225
|
end
|
219
226
|
|
220
227
|
#
|
@@ -233,34 +240,36 @@ module Archipelago
|
|
233
240
|
# for management or rescue!
|
234
241
|
#
|
235
242
|
def each(callable)
|
236
|
-
|
243
|
+
self.chests.t_each do |service_id, chest|
|
237
244
|
chest[:service].each(callable)
|
238
245
|
end
|
239
246
|
end
|
240
247
|
|
241
248
|
#
|
242
|
-
#
|
249
|
+
# Get the chest responsible for +key+.
|
243
250
|
#
|
244
|
-
def
|
245
|
-
|
246
|
-
|
247
|
-
|
251
|
+
def responsible_chest(key)
|
252
|
+
raise NoRemoteDatabaseAvailableException.new(self) if self.chests.empty?
|
253
|
+
|
254
|
+
return get_least_greater_than(:chests, Digest::SHA1.hexdigest(Marshal.dump(key)), 1).first
|
255
|
+
end
|
256
|
+
|
257
|
+
#
|
258
|
+
# Gets the successor of +service_id+ in the array of services.
|
259
|
+
#
|
260
|
+
def successor(service_id)
|
261
|
+
return nil if self.chests.empty?
|
262
|
+
return get_least_greater_than(:chests, service_id, 1).first
|
248
263
|
end
|
249
264
|
|
250
265
|
private
|
251
266
|
|
252
267
|
#
|
253
|
-
#
|
268
|
+
# Does an immediate update of our service lists.
|
254
269
|
#
|
255
|
-
def
|
256
|
-
|
257
|
-
|
258
|
-
key_id = Digest::SHA1.hexdigest(Marshal.dump(key))
|
259
|
-
sorted_chest_ids = @chests.keys.sort
|
260
|
-
sorted_chest_ids.each do |id|
|
261
|
-
return @chests[id] if id > key_id
|
262
|
-
end
|
263
|
-
return @chests[sorted_chest_ids.first]
|
270
|
+
def around_update_services(&block)
|
271
|
+
yield
|
272
|
+
evaluate_in_chests
|
264
273
|
end
|
265
274
|
|
266
275
|
#
|
@@ -271,7 +280,7 @@ module Archipelago
|
|
271
280
|
#
|
272
281
|
# For all chests
|
273
282
|
#
|
274
|
-
|
283
|
+
self.chests.values.each do |chest|
|
275
284
|
#
|
276
285
|
# Ensure that this chest has a Set of evaluated files
|
277
286
|
#
|
@@ -0,0 +1,268 @@
|
|
1
|
+
# Archipelago - a distributed computing toolkit for ruby
|
2
|
+
# Copyright (C) 2006 Martin Kihlgren <zond at troja dot ath dot cx>
|
3
|
+
#
|
4
|
+
# This program is free software; you can redistribute it and/or
|
5
|
+
# modify it under the terms of the GNU General Public License
|
6
|
+
# as published by the Free Software Foundation; either version 2
|
7
|
+
# of the License, or (at your option) any later version.
|
8
|
+
#
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
# GNU General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU General Public License
|
15
|
+
# along with this program; if not, write to the Free Software
|
16
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
17
|
+
|
18
|
+
require 'archipelago/client'
|
19
|
+
require 'archipelago/dump'
|
20
|
+
require 'monitor'
|
21
|
+
require 'drb'
|
22
|
+
|
23
|
+
module Archipelago
|
24
|
+
|
25
|
+
module Sanitation
|
26
|
+
|
27
|
+
#
|
28
|
+
# Description of the <i>Dump::Site</i>s that can
|
29
|
+
# store our data.
|
30
|
+
#
|
31
|
+
SITE_DESCRIPTION = {
|
32
|
+
:class => 'Archipelago::Dump::Site'
|
33
|
+
}
|
34
|
+
|
35
|
+
#
|
36
|
+
# The minimum size of data that is reasonable to
|
37
|
+
# recover without too much overhead.
|
38
|
+
#
|
39
|
+
MINIMUM_RECOVERABLE_SIZE = 128
|
40
|
+
#
|
41
|
+
# The minimum number of chunks we want to spread out
|
42
|
+
# to ensure us against server failure.
|
43
|
+
#
|
44
|
+
MINIMUM_NR_OF_CHUNKS = 14
|
45
|
+
#
|
46
|
+
# The minimum ratio of redundancy (used diskspace / data size)
|
47
|
+
# we want to use.
|
48
|
+
#
|
49
|
+
MINIMUM_REDUNDANCY_RATIO = 2
|
50
|
+
#
|
51
|
+
# The extra bytes used by metadata in all check block chunks.
|
52
|
+
#
|
53
|
+
METADATA_OVERHEAD = 8
|
54
|
+
|
55
|
+
#
|
56
|
+
# Raised when you try to do stuff without any remote database
|
57
|
+
# available.
|
58
|
+
#
|
59
|
+
class NoRemoteDatabaseAvailableException < RuntimeError
|
60
|
+
def initialize(officer)
|
61
|
+
super("#{officer} can not find any remote database for you")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Raised when you try to fetch data that we find traces of
|
67
|
+
# but are unable to fully restore.
|
68
|
+
#
|
69
|
+
class NotEnoughDataException < RuntimeError
|
70
|
+
def initialize(officer, key)
|
71
|
+
super("#{officer} can not find enough data to restore the value for #{key}")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
#
|
76
|
+
# The client class for the redundant Archipelago::Dump network.
|
77
|
+
#
|
78
|
+
# Keeps track of our sites and writes and reads data.
|
79
|
+
#
|
80
|
+
# Also keeps track of all the redundancy work needed, but lets Site do the
|
81
|
+
# work.
|
82
|
+
#
|
83
|
+
class Officer < Archipelago::Client::Base
|
84
|
+
def initialize(options = {})
|
85
|
+
setup(options)
|
86
|
+
end
|
87
|
+
|
88
|
+
def setup(options = {})
|
89
|
+
@minimum_recoverable_size = options[:minimum_recoverable_size] || MINIMUM_RECOVERABLE_SIZE
|
90
|
+
@minimum_nr_of_chunks = options[:minimum_nr_of_chunks] || MINIMUM_NR_OF_CHUNKS
|
91
|
+
@minimum_redundancy_ratio = options[:minimum_redundancy_ratio] || MINIMUM_REDUNDANCY_RATIO
|
92
|
+
@metadata_overhead = options[:metadata_overhead] || METADATA_OVERHEAD
|
93
|
+
|
94
|
+
options.merge!({
|
95
|
+
:service_descriptions => {
|
96
|
+
:sites => Archipelago::Disco::Query.new(SITE_DESCRIPTION.merge(options[:site_description] || {}))
|
97
|
+
}
|
98
|
+
})
|
99
|
+
setup_client(options)
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# Write +key+ and +value+ into the site network with a good level of redundancy etc.
|
104
|
+
#
|
105
|
+
# The key should must be a SHA1 hash.
|
106
|
+
#
|
107
|
+
# Optionally the timestamp +t+ can be provided, but it defaults to now.
|
108
|
+
#
|
109
|
+
def []=(key, value, t = [Time.now.to_i].pack("I"))
|
110
|
+
super_string = Oneliner::SuperString.new(value)
|
111
|
+
nr_of_needed_chunks = @minimum_nr_of_chunks / @minimum_redundancy_ratio
|
112
|
+
chunk_size = (super_string.size / nr_of_needed_chunks) + @metadata_overhead
|
113
|
+
chunk_size = @minimum_recoverable_size / nr_of_needed_chunks if chunk_size < @minimum_recoverable_size / nr_of_needed_chunks
|
114
|
+
|
115
|
+
dump_hash = responsible_sites(key)
|
116
|
+
super_string.encode(8)
|
117
|
+
dump_hash.t_each do |dump_id, nr_of_chunks_needed|
|
118
|
+
self.sites[dump_id][:service].insert!(key,
|
119
|
+
(0...nr_of_chunks_needed).collect do |nr_of_chunks_needed|
|
120
|
+
super_string.encode(chunk_size)
|
121
|
+
end,
|
122
|
+
t)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def delete!(key)
|
127
|
+
dump_hash = responsible_sites(key)
|
128
|
+
dump_hash.t_each do |dump_id, nr_of_chunks_available|
|
129
|
+
self.sites[dump_id][:service].delete!(key)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
#
|
134
|
+
# Get the data for +key+ in the site network.
|
135
|
+
#
|
136
|
+
# The key must be a SHA1 hash.
|
137
|
+
#
|
138
|
+
def [](key)
|
139
|
+
fetch(key).first
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
# Returns {service_id => nr_of_chunks_it_should_have}
|
144
|
+
# where sum(nr_of_chunks_it_should_have) == +n+
|
145
|
+
# from self.sites having service_id > +key+.
|
146
|
+
#
|
147
|
+
# Will loop to the beginning if the number of elements run out.
|
148
|
+
#
|
149
|
+
def responsible_sites(key)
|
150
|
+
raise NoRemoteDatabaseAvailableException.new(self) if self.sites.empty?
|
151
|
+
|
152
|
+
rval = {}
|
153
|
+
rval.extend(Archipelago::Current::ThreadedCollection)
|
154
|
+
get_least_greater_than(:sites, key, @minimum_nr_of_chunks).each do |desc|
|
155
|
+
rval[desc[:service_id]] ||= 0
|
156
|
+
rval[desc[:service_id]] += 1
|
157
|
+
end
|
158
|
+
return rval
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Returns whether the key belongs at the service with given id.
|
163
|
+
#
|
164
|
+
def belongs_at?(service_id, key)
|
165
|
+
responsible_sites(key).include?(service_id)
|
166
|
+
end
|
167
|
+
|
168
|
+
#
|
169
|
+
# Ensures that all the dumps responsible for +key+
|
170
|
+
# has chunks for that key without changing the timestamp
|
171
|
+
# for +key+.
|
172
|
+
#
|
173
|
+
def redistribute(key)
|
174
|
+
value, timestamp = fetch(key)
|
175
|
+
#
|
176
|
+
# Even if fetch didnt raise the exception we must, cause this is serious business.
|
177
|
+
#
|
178
|
+
raise NotEnoughDataException.new(self, key) if value.nil?
|
179
|
+
self.[]=(key, value, timestamp)
|
180
|
+
end
|
181
|
+
|
182
|
+
#
|
183
|
+
# Returns whether +service_id1+ and +service_id2+ would come in that
|
184
|
+
# order in the site array if both existed.
|
185
|
+
#
|
186
|
+
def next_to?(service_id1, service_id2)
|
187
|
+
if self.sites.include?(service_id1)
|
188
|
+
return get_least_greater_than(:sites, service_id1, 1).first[:service_id] <= service_id2
|
189
|
+
elsif self.sites.include?(service_id2)
|
190
|
+
return get_greatest_less_than(:sites, service_id2, 1).first[:service_id] >= service_id1
|
191
|
+
else
|
192
|
+
return false
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
#
|
197
|
+
# Returns the site after the first one that has keys that
|
198
|
+
# will be stored in the site identified by +service_id+.
|
199
|
+
#
|
200
|
+
def second_master_to(service_id)
|
201
|
+
return get_greatest_less_than(:sites, service_id, @minimum_nr_of_chunks - 1).first[:service_id]
|
202
|
+
end
|
203
|
+
|
204
|
+
#
|
205
|
+
# Gets the predecessor of +service_id+ in the array of services.
|
206
|
+
#
|
207
|
+
def predecessor(service_id)
|
208
|
+
return get_greatest_less_than(:sites, service_id, 1).first[:service_id]
|
209
|
+
end
|
210
|
+
|
211
|
+
private
|
212
|
+
|
213
|
+
#
|
214
|
+
# Returns [the value for +key+, the timestamp for the value].
|
215
|
+
#
|
216
|
+
def fetch(key)
|
217
|
+
dump_hash = responsible_sites(key)
|
218
|
+
dump_ids = dump_hash.keys
|
219
|
+
newest_timestamp = "\000\000\000\000"
|
220
|
+
threads = []
|
221
|
+
rval = Oneliner::SuperString.new
|
222
|
+
lock = Archipelago::Current::Lock.new
|
223
|
+
|
224
|
+
dump_hash.t_each do |dump_id, nr_of_chunks_available|
|
225
|
+
site = self.sites[dump_id][:service]
|
226
|
+
begin
|
227
|
+
chunks = site.fetch(key)
|
228
|
+
lock.mon_synchronize do
|
229
|
+
while chunks.size > 0
|
230
|
+
t, data = chunks.shift
|
231
|
+
if t > newest_timestamp
|
232
|
+
rval = Oneliner::SuperString.new
|
233
|
+
newest_timestamp = t
|
234
|
+
end
|
235
|
+
|
236
|
+
if t == newest_timestamp
|
237
|
+
rval.decode!(data)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
rescue DRb::DRbConnError => e
|
242
|
+
self.sites.delete(dump_id)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
if rval.decode_done?
|
247
|
+
return [rval.to_s, newest_timestamp]
|
248
|
+
else
|
249
|
+
raise NotEnoughDataException.new(self, key) if newest_timestamp != "\000\000\000\000"
|
250
|
+
return [nil, nil]
|
251
|
+
end
|
252
|
+
|
253
|
+
end
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
#
|
258
|
+
# The default Archipelago::Sanitation::Officer that is always available for lookups is Archipelago::Sanitation::CLEANER.
|
259
|
+
#
|
260
|
+
# If you really need to you can customize it by defining CLEANER_OPTIONS before loading sanitation.rb, and if you REALLY
|
261
|
+
# need to you can disable it completely by setting CLEANER_DISABLED to true.
|
262
|
+
#
|
263
|
+
CLEANER = Officer.new(defined?(CLEANER_OPTIONS) ? CLEANER_OPTIONS : {}) unless defined?(CLEANER_DISABLED) && CLEANER_DISABLED
|
264
|
+
|
265
|
+
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
data/lib/archipelago/tranny.rb
CHANGED
@@ -99,7 +99,7 @@ module Archipelago
|
|
99
99
|
|
100
100
|
#
|
101
101
|
# Will use a BerkeleyHashishProvider using tranny_manager.db in the same dir to get its hashes
|
102
|
-
# if not <i>:
|
102
|
+
# if not <i>:persistence_directory</i> is given.
|
103
103
|
#
|
104
104
|
# Will create Transactions timing out after <i>:transaction_timeout</i> seconds or TRANSACTION_TIMEOUT
|
105
105
|
# if none is given.
|
@@ -107,13 +107,11 @@ module Archipelago
|
|
107
107
|
# Will use Archipelago::Disco::Publishable by calling <b>initialize_publishable</b> with +options+.
|
108
108
|
#
|
109
109
|
def initialize(options = {})
|
110
|
-
@persistence_provider = options[:persistence_provider] || Archipelago::Hashish::BerkeleyHashishProvider.new(Pathname.new(File.expand_path(__FILE__)).parent.join("tranny_manager.db"))
|
111
|
-
|
112
110
|
initialize_publishable(options)
|
113
111
|
|
114
112
|
@transaction_timeout = options[:transaction_timeout] || TRANSACTION_TIMEOUT
|
115
113
|
|
116
|
-
@db = @persistence_provider.get_cached_hashish("db")
|
114
|
+
@db = @persistence_provider.get_cached_hashish(:name => "db")
|
117
115
|
end
|
118
116
|
|
119
117
|
#
|