archipelago 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +1 -1
- data/TODO +8 -17
- data/lib/archipelago.rb +0 -12
- data/lib/archipelago/client.rb +154 -17
- data/lib/archipelago/current.rb +1 -1
- data/lib/archipelago/disco.rb +269 -74
- data/lib/archipelago/dump.rb +279 -0
- data/lib/archipelago/hashish.rb +264 -108
- data/lib/archipelago/pirate.rb +52 -43
- data/lib/archipelago/sanitation.rb +268 -0
- data/lib/archipelago/tranny.rb +2 -4
- data/lib/archipelago/treasure.rb +173 -27
- data/script/{console → console.rb} +1 -1
- data/script/officer.rb +10 -0
- data/script/pirate.rb +5 -1
- data/script/services.rb +12 -5
- data/tests/disco_benchmark.rb +2 -2
- data/tests/disco_test.rb +39 -7
- data/tests/dump_test.rb +71 -0
- data/tests/pirate_test.rb +74 -21
- data/tests/sanitation_benchmark.rb +50 -0
- data/tests/sanitation_test.rb +219 -0
- data/tests/test_helper.rb +15 -3
- data/tests/tranny_test.rb +0 -2
- data/tests/treasure_benchmark.rb +6 -3
- data/tests/treasure_test.rb +43 -7
- metadata +13 -7
- data/lib/archipelago/cove.rb +0 -68
- data/lib/archipelago/exxon.rb +0 -138
- data/lib/archipelago/oneline.rb +0 -641
data/lib/archipelago/pirate.rb
CHANGED
@@ -73,7 +73,6 @@ module Archipelago
|
|
73
73
|
# Archipelago::Treasure:Dubloons in them.
|
74
74
|
#
|
75
75
|
class Captain < Archipelago::Client::Base
|
76
|
-
attr_reader :chests, :trannies
|
77
76
|
#
|
78
77
|
# Will look for Archipelago::Treasure::Chests matching <i>:chest_description</i> or CHEST_DESCRIPTION and
|
79
78
|
# Archipelago::Tranny::Managers matching <i>:tranny_description</i> or TRANNY_DESCRIPTION.
|
@@ -82,29 +81,28 @@ module Archipelago
|
|
82
81
|
# of required classes and modules at the chest.
|
83
82
|
#
|
84
83
|
def initialize(options = {})
|
85
|
-
super(options)
|
86
|
-
|
87
84
|
@transaction = nil
|
88
|
-
|
89
|
-
start_service_updater
|
90
|
-
|
85
|
+
setup(options)
|
91
86
|
end
|
92
87
|
|
93
88
|
#
|
94
89
|
# Sets up this instance with the given +options+.
|
95
90
|
#
|
96
91
|
def setup(options = {})
|
97
|
-
super(options)
|
98
|
-
|
99
|
-
@chest_description = CHEST_DESCRIPTION.merge(options[:chest_description] || {})
|
100
|
-
@tranny_description = TRANNY_DESCRIPTION.merge(options[:tranny_description] || {})
|
101
|
-
|
102
92
|
@chest_eval_files ||= []
|
103
93
|
@chest_eval_files += options[:chest_eval_files] || []
|
104
|
-
|
105
94
|
@chests_having_evaluated ||= {}
|
106
95
|
|
107
96
|
@yar_counter = 0
|
97
|
+
|
98
|
+
options.merge!({
|
99
|
+
:service_descriptions => {
|
100
|
+
:chests => CHEST_DESCRIPTION.merge(options[:chest_description] || {}),
|
101
|
+
:trannies => TRANNY_DESCRIPTION.merge(options[:tranny_description] || {})
|
102
|
+
}
|
103
|
+
})
|
104
|
+
|
105
|
+
setup_client(options)
|
108
106
|
end
|
109
107
|
|
110
108
|
#
|
@@ -150,11 +148,11 @@ module Archipelago
|
|
150
148
|
# Return a clone of this instance bound to a newly created transaction.
|
151
149
|
#
|
152
150
|
def begin
|
153
|
-
raise NoTransactionManagerAvailableException.new(self) if
|
151
|
+
raise NoTransactionManagerAvailableException.new(self) if self.trannies.empty?
|
154
152
|
|
155
153
|
rval = self.clone
|
156
154
|
rval.instance_eval do
|
157
|
-
@transaction =
|
155
|
+
@transaction = self.trannies.values.first[:service].begin
|
158
156
|
end
|
159
157
|
|
160
158
|
return rval
|
@@ -175,21 +173,20 @@ module Archipelago
|
|
175
173
|
#
|
176
174
|
# Will abort! the transaction if any exception is raised.
|
177
175
|
#
|
178
|
-
def transaction(&block) #:yields: transaction
|
179
|
-
raise NoTransactionManagerAvailableException.new(self) if
|
176
|
+
def transaction(&block) #:yields: a clone of this Archipelago::Pirate::Captain with the given transaction as default @transaction.
|
177
|
+
raise NoTransactionManagerAvailableException.new(self) if self.trannies.empty?
|
180
178
|
|
181
|
-
|
179
|
+
my_clone = self.begin
|
180
|
+
transa = my_clone.active_transaction
|
182
181
|
begin
|
183
182
|
begin
|
184
|
-
return yield(
|
183
|
+
return yield(my_clone)
|
185
184
|
ensure
|
186
|
-
raise CommitFailedException.new(
|
185
|
+
raise CommitFailedException.new(my_clone, transa) unless transa.commit! == :commited
|
187
186
|
end
|
188
187
|
rescue Exception => e
|
189
|
-
|
188
|
+
transa.abort! unless transa.state == :aborted
|
190
189
|
raise e
|
191
|
-
ensure
|
192
|
-
@transaction = nil
|
193
190
|
end
|
194
191
|
end
|
195
192
|
|
@@ -204,17 +201,27 @@ module Archipelago
|
|
204
201
|
#
|
205
202
|
# Commit the transaction we are a member of and forget about it.
|
206
203
|
#
|
204
|
+
# Returns the new state of the transaction.
|
205
|
+
#
|
207
206
|
def commit!
|
208
|
-
|
209
|
-
|
207
|
+
begin
|
208
|
+
return @transaction.commit!
|
209
|
+
ensure
|
210
|
+
@transaction = nil
|
211
|
+
end
|
210
212
|
end
|
211
213
|
|
212
214
|
#
|
213
215
|
# Abort the transaction we are a member of and forget about it.
|
214
216
|
#
|
217
|
+
# Returns the new state of the transaction.
|
218
|
+
#
|
215
219
|
def abort!
|
216
|
-
|
217
|
-
|
220
|
+
begin
|
221
|
+
@transaction.abort!
|
222
|
+
ensure
|
223
|
+
@transaction = nil
|
224
|
+
end
|
218
225
|
end
|
219
226
|
|
220
227
|
#
|
@@ -233,34 +240,36 @@ module Archipelago
|
|
233
240
|
# for management or rescue!
|
234
241
|
#
|
235
242
|
def each(callable)
|
236
|
-
|
243
|
+
self.chests.t_each do |service_id, chest|
|
237
244
|
chest[:service].each(callable)
|
238
245
|
end
|
239
246
|
end
|
240
247
|
|
241
248
|
#
|
242
|
-
#
|
249
|
+
# Get the chest responsible for +key+.
|
243
250
|
#
|
244
|
-
def
|
245
|
-
|
246
|
-
|
247
|
-
|
251
|
+
def responsible_chest(key)
|
252
|
+
raise NoRemoteDatabaseAvailableException.new(self) if self.chests.empty?
|
253
|
+
|
254
|
+
return get_least_greater_than(:chests, Digest::SHA1.hexdigest(Marshal.dump(key)), 1).first
|
255
|
+
end
|
256
|
+
|
257
|
+
#
|
258
|
+
# Gets the successor of +service_id+ in the array of services.
|
259
|
+
#
|
260
|
+
def successor(service_id)
|
261
|
+
return nil if self.chests.empty?
|
262
|
+
return get_least_greater_than(:chests, service_id, 1).first
|
248
263
|
end
|
249
264
|
|
250
265
|
private
|
251
266
|
|
252
267
|
#
|
253
|
-
#
|
268
|
+
# Does an immediate update of our service lists.
|
254
269
|
#
|
255
|
-
def
|
256
|
-
|
257
|
-
|
258
|
-
key_id = Digest::SHA1.hexdigest(Marshal.dump(key))
|
259
|
-
sorted_chest_ids = @chests.keys.sort
|
260
|
-
sorted_chest_ids.each do |id|
|
261
|
-
return @chests[id] if id > key_id
|
262
|
-
end
|
263
|
-
return @chests[sorted_chest_ids.first]
|
270
|
+
def around_update_services(&block)
|
271
|
+
yield
|
272
|
+
evaluate_in_chests
|
264
273
|
end
|
265
274
|
|
266
275
|
#
|
@@ -271,7 +280,7 @@ module Archipelago
|
|
271
280
|
#
|
272
281
|
# For all chests
|
273
282
|
#
|
274
|
-
|
283
|
+
self.chests.values.each do |chest|
|
275
284
|
#
|
276
285
|
# Ensure that this chest has a Set of evaluated files
|
277
286
|
#
|
@@ -0,0 +1,268 @@
|
|
1
|
+
# Archipelago - a distributed computing toolkit for ruby
|
2
|
+
# Copyright (C) 2006 Martin Kihlgren <zond at troja dot ath dot cx>
|
3
|
+
#
|
4
|
+
# This program is free software; you can redistribute it and/or
|
5
|
+
# modify it under the terms of the GNU General Public License
|
6
|
+
# as published by the Free Software Foundation; either version 2
|
7
|
+
# of the License, or (at your option) any later version.
|
8
|
+
#
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
# GNU General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU General Public License
|
15
|
+
# along with this program; if not, write to the Free Software
|
16
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
17
|
+
|
18
|
+
require 'archipelago/client'
|
19
|
+
require 'archipelago/dump'
|
20
|
+
require 'monitor'
|
21
|
+
require 'drb'
|
22
|
+
|
23
|
+
module Archipelago
|
24
|
+
|
25
|
+
module Sanitation
|
26
|
+
|
27
|
+
#
|
28
|
+
# Description of the <i>Dump::Site</i>s that can
|
29
|
+
# store our data.
|
30
|
+
#
|
31
|
+
SITE_DESCRIPTION = {
|
32
|
+
:class => 'Archipelago::Dump::Site'
|
33
|
+
}
|
34
|
+
|
35
|
+
#
|
36
|
+
# The minimum size of data that is reasonable to
|
37
|
+
# recover without too much overhead.
|
38
|
+
#
|
39
|
+
MINIMUM_RECOVERABLE_SIZE = 128
|
40
|
+
#
|
41
|
+
# The minimum number of chunks we want to spread out
|
42
|
+
# to ensure us against server failure.
|
43
|
+
#
|
44
|
+
MINIMUM_NR_OF_CHUNKS = 14
|
45
|
+
#
|
46
|
+
# The minimum ratio of redundancy (used diskspace / data size)
|
47
|
+
# we want to use.
|
48
|
+
#
|
49
|
+
MINIMUM_REDUNDANCY_RATIO = 2
|
50
|
+
#
|
51
|
+
# The extra bytes used by metadata in all check block chunks.
|
52
|
+
#
|
53
|
+
METADATA_OVERHEAD = 8
|
54
|
+
|
55
|
+
#
|
56
|
+
# Raised when you try to do stuff without any remote database
|
57
|
+
# available.
|
58
|
+
#
|
59
|
+
class NoRemoteDatabaseAvailableException < RuntimeError
|
60
|
+
def initialize(officer)
|
61
|
+
super("#{officer} can not find any remote database for you")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Raised when you try to fetch data that we find traces of
|
67
|
+
# but are unable to fully restore.
|
68
|
+
#
|
69
|
+
class NotEnoughDataException < RuntimeError
|
70
|
+
def initialize(officer, key)
|
71
|
+
super("#{officer} can not find enough data to restore the value for #{key}")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
#
|
76
|
+
# The client class for the redundant Archipelago::Dump network.
|
77
|
+
#
|
78
|
+
# Keeps track of our sites and writes and reads data.
|
79
|
+
#
|
80
|
+
# Also keeps track of all the redundancy work needed, but lets Site do the
|
81
|
+
# work.
|
82
|
+
#
|
83
|
+
class Officer < Archipelago::Client::Base
|
84
|
+
def initialize(options = {})
|
85
|
+
setup(options)
|
86
|
+
end
|
87
|
+
|
88
|
+
def setup(options = {})
|
89
|
+
@minimum_recoverable_size = options[:minimum_recoverable_size] || MINIMUM_RECOVERABLE_SIZE
|
90
|
+
@minimum_nr_of_chunks = options[:minimum_nr_of_chunks] || MINIMUM_NR_OF_CHUNKS
|
91
|
+
@minimum_redundancy_ratio = options[:minimum_redundancy_ratio] || MINIMUM_REDUNDANCY_RATIO
|
92
|
+
@metadata_overhead = options[:metadata_overhead] || METADATA_OVERHEAD
|
93
|
+
|
94
|
+
options.merge!({
|
95
|
+
:service_descriptions => {
|
96
|
+
:sites => Archipelago::Disco::Query.new(SITE_DESCRIPTION.merge(options[:site_description] || {}))
|
97
|
+
}
|
98
|
+
})
|
99
|
+
setup_client(options)
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# Write +key+ and +value+ into the site network with a good level of redundancy etc.
|
104
|
+
#
|
105
|
+
# The key should must be a SHA1 hash.
|
106
|
+
#
|
107
|
+
# Optionally the timestamp +t+ can be provided, but it defaults to now.
|
108
|
+
#
|
109
|
+
def []=(key, value, t = [Time.now.to_i].pack("I"))
|
110
|
+
super_string = Oneliner::SuperString.new(value)
|
111
|
+
nr_of_needed_chunks = @minimum_nr_of_chunks / @minimum_redundancy_ratio
|
112
|
+
chunk_size = (super_string.size / nr_of_needed_chunks) + @metadata_overhead
|
113
|
+
chunk_size = @minimum_recoverable_size / nr_of_needed_chunks if chunk_size < @minimum_recoverable_size / nr_of_needed_chunks
|
114
|
+
|
115
|
+
dump_hash = responsible_sites(key)
|
116
|
+
super_string.encode(8)
|
117
|
+
dump_hash.t_each do |dump_id, nr_of_chunks_needed|
|
118
|
+
self.sites[dump_id][:service].insert!(key,
|
119
|
+
(0...nr_of_chunks_needed).collect do |nr_of_chunks_needed|
|
120
|
+
super_string.encode(chunk_size)
|
121
|
+
end,
|
122
|
+
t)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def delete!(key)
|
127
|
+
dump_hash = responsible_sites(key)
|
128
|
+
dump_hash.t_each do |dump_id, nr_of_chunks_available|
|
129
|
+
self.sites[dump_id][:service].delete!(key)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
#
|
134
|
+
# Get the data for +key+ in the site network.
|
135
|
+
#
|
136
|
+
# The key must be a SHA1 hash.
|
137
|
+
#
|
138
|
+
def [](key)
|
139
|
+
fetch(key).first
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
# Returns {service_id => nr_of_chunks_it_should_have}
|
144
|
+
# where sum(nr_of_chunks_it_should_have) == +n+
|
145
|
+
# from self.sites having service_id > +key+.
|
146
|
+
#
|
147
|
+
# Will loop to the beginning if the number of elements run out.
|
148
|
+
#
|
149
|
+
def responsible_sites(key)
|
150
|
+
raise NoRemoteDatabaseAvailableException.new(self) if self.sites.empty?
|
151
|
+
|
152
|
+
rval = {}
|
153
|
+
rval.extend(Archipelago::Current::ThreadedCollection)
|
154
|
+
get_least_greater_than(:sites, key, @minimum_nr_of_chunks).each do |desc|
|
155
|
+
rval[desc[:service_id]] ||= 0
|
156
|
+
rval[desc[:service_id]] += 1
|
157
|
+
end
|
158
|
+
return rval
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Returns whether the key belongs at the service with given id.
|
163
|
+
#
|
164
|
+
def belongs_at?(service_id, key)
|
165
|
+
responsible_sites(key).include?(service_id)
|
166
|
+
end
|
167
|
+
|
168
|
+
#
|
169
|
+
# Ensures that all the dumps responsible for +key+
|
170
|
+
# has chunks for that key without changing the timestamp
|
171
|
+
# for +key+.
|
172
|
+
#
|
173
|
+
def redistribute(key)
|
174
|
+
value, timestamp = fetch(key)
|
175
|
+
#
|
176
|
+
# Even if fetch didnt raise the exception we must, cause this is serious business.
|
177
|
+
#
|
178
|
+
raise NotEnoughDataException.new(self, key) if value.nil?
|
179
|
+
self.[]=(key, value, timestamp)
|
180
|
+
end
|
181
|
+
|
182
|
+
#
|
183
|
+
# Returns whether +service_id1+ and +service_id2+ would come in that
|
184
|
+
# order in the site array if both existed.
|
185
|
+
#
|
186
|
+
def next_to?(service_id1, service_id2)
|
187
|
+
if self.sites.include?(service_id1)
|
188
|
+
return get_least_greater_than(:sites, service_id1, 1).first[:service_id] <= service_id2
|
189
|
+
elsif self.sites.include?(service_id2)
|
190
|
+
return get_greatest_less_than(:sites, service_id2, 1).first[:service_id] >= service_id1
|
191
|
+
else
|
192
|
+
return false
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
#
|
197
|
+
# Returns the site after the first one that has keys that
|
198
|
+
# will be stored in the site identified by +service_id+.
|
199
|
+
#
|
200
|
+
def second_master_to(service_id)
|
201
|
+
return get_greatest_less_than(:sites, service_id, @minimum_nr_of_chunks - 1).first[:service_id]
|
202
|
+
end
|
203
|
+
|
204
|
+
#
|
205
|
+
# Gets the predecessor of +service_id+ in the array of services.
|
206
|
+
#
|
207
|
+
def predecessor(service_id)
|
208
|
+
return get_greatest_less_than(:sites, service_id, 1).first[:service_id]
|
209
|
+
end
|
210
|
+
|
211
|
+
private
|
212
|
+
|
213
|
+
#
|
214
|
+
# Returns [the value for +key+, the timestamp for the value].
|
215
|
+
#
|
216
|
+
def fetch(key)
|
217
|
+
dump_hash = responsible_sites(key)
|
218
|
+
dump_ids = dump_hash.keys
|
219
|
+
newest_timestamp = "\000\000\000\000"
|
220
|
+
threads = []
|
221
|
+
rval = Oneliner::SuperString.new
|
222
|
+
lock = Archipelago::Current::Lock.new
|
223
|
+
|
224
|
+
dump_hash.t_each do |dump_id, nr_of_chunks_available|
|
225
|
+
site = self.sites[dump_id][:service]
|
226
|
+
begin
|
227
|
+
chunks = site.fetch(key)
|
228
|
+
lock.mon_synchronize do
|
229
|
+
while chunks.size > 0
|
230
|
+
t, data = chunks.shift
|
231
|
+
if t > newest_timestamp
|
232
|
+
rval = Oneliner::SuperString.new
|
233
|
+
newest_timestamp = t
|
234
|
+
end
|
235
|
+
|
236
|
+
if t == newest_timestamp
|
237
|
+
rval.decode!(data)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
rescue DRb::DRbConnError => e
|
242
|
+
self.sites.delete(dump_id)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
if rval.decode_done?
|
247
|
+
return [rval.to_s, newest_timestamp]
|
248
|
+
else
|
249
|
+
raise NotEnoughDataException.new(self, key) if newest_timestamp != "\000\000\000\000"
|
250
|
+
return [nil, nil]
|
251
|
+
end
|
252
|
+
|
253
|
+
end
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
#
|
258
|
+
# The default Archipelago::Sanitation::Officer that is always available for lookups is Archipelago::Sanitation::CLEANER.
|
259
|
+
#
|
260
|
+
# If you really need to you can customize it by defining CLEANER_OPTIONS before loading sanitation.rb, and if you REALLY
|
261
|
+
# need to you can disable it completely by setting CLEANER_DISABLED to true.
|
262
|
+
#
|
263
|
+
CLEANER = Officer.new(defined?(CLEANER_OPTIONS) ? CLEANER_OPTIONS : {}) unless defined?(CLEANER_DISABLED) && CLEANER_DISABLED
|
264
|
+
|
265
|
+
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
data/lib/archipelago/tranny.rb
CHANGED
@@ -99,7 +99,7 @@ module Archipelago
|
|
99
99
|
|
100
100
|
#
|
101
101
|
# Will use a BerkeleyHashishProvider using tranny_manager.db in the same dir to get its hashes
|
102
|
-
# if not <i>:
|
102
|
+
# if not <i>:persistence_directory</i> is given.
|
103
103
|
#
|
104
104
|
# Will create Transactions timing out after <i>:transaction_timeout</i> seconds or TRANSACTION_TIMEOUT
|
105
105
|
# if none is given.
|
@@ -107,13 +107,11 @@ module Archipelago
|
|
107
107
|
# Will use Archipelago::Disco::Publishable by calling <b>initialize_publishable</b> with +options+.
|
108
108
|
#
|
109
109
|
def initialize(options = {})
|
110
|
-
@persistence_provider = options[:persistence_provider] || Archipelago::Hashish::BerkeleyHashishProvider.new(Pathname.new(File.expand_path(__FILE__)).parent.join("tranny_manager.db"))
|
111
|
-
|
112
110
|
initialize_publishable(options)
|
113
111
|
|
114
112
|
@transaction_timeout = options[:transaction_timeout] || TRANSACTION_TIMEOUT
|
115
113
|
|
116
|
-
@db = @persistence_provider.get_cached_hashish("db")
|
114
|
+
@db = @persistence_provider.get_cached_hashish(:name => "db")
|
117
115
|
end
|
118
116
|
|
119
117
|
#
|