hammerspace-fork 0.1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.chef/cookbooks/hammerspace-development/attributes/default.rb +7 -0
- data/.chef/cookbooks/hammerspace-development/attributes/essential.rb +6 -0
- data/.chef/cookbooks/hammerspace-development/attributes/sparkey.rb +7 -0
- data/.chef/cookbooks/hammerspace-development/recipes/default.rb +32 -0
- data/.chef/cookbooks/hammerspace-development/recipes/essential.rb +9 -0
- data/.chef/cookbooks/hammerspace-development/recipes/ruby.rb +21 -0
- data/.chef/cookbooks/hammerspace-development/recipes/sparkey.rb +56 -0
- data/.chef/cookbooks/hammerspace-development/templates/default/.bash_profile.erb +2 -0
- data/.chef/roles/hammerspace-development.rb +6 -0
- data/.gitignore +8 -0
- data/CHANGELOG.md +30 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +523 -0
- data/Vagrantfile +30 -0
- data/hammerspace-fork.gemspec +21 -0
- data/lib/hammerspace.rb +12 -0
- data/lib/hammerspace/backend.rb +106 -0
- data/lib/hammerspace/backend/sparkey.rb +319 -0
- data/lib/hammerspace/hash.rb +62 -0
- data/lib/hammerspace/hash_methods.rb +234 -0
- data/lib/hammerspace/version.rb +3 -0
- data/script/write_concurrency_test.rb +36 -0
- data/spec/features/hash_spec.rb +1487 -0
- data/spec/lib/hammerspace/backend/sparkey_spec.rb +191 -0
- data/spec/lib/hammerspace/hash_spec.rb +143 -0
- data/spec/lib/hammerspace_spec.rb +27 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/support/sparkey_directory_helper.rb +26 -0
- data/spec/support/write_concurrency_test.rb +38 -0
- metadata +96 -0
data/Vagrantfile
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
VAGRANTFILE_API_VERSION = "2"
|
2
|
+
VM_BOX_URL = "https://s3.amazonaws.com/gsc-vagrant-boxes/ubuntu-12.04-omnibus-chef.box"
|
3
|
+
|
4
|
+
HOST_SRC = File.dirname(File.expand_path __FILE__)
|
5
|
+
LOCAL_SRC = '/home/vagrant/hammerspace'
|
6
|
+
|
7
|
+
Vagrant.configure(VAGRANTFILE_API_VERSION) do |master_config|
|
8
|
+
master_config.vm.define 'hammerspace-development' do |config|
|
9
|
+
# base image is our standard airbase -- ubuntu 12.04
|
10
|
+
config.vm.box = "airbase"
|
11
|
+
config.vm.box_url = VM_BOX_URL
|
12
|
+
|
13
|
+
config.vm.synced_folder HOST_SRC, LOCAL_SRC
|
14
|
+
|
15
|
+
config.vm.provider "virtualbox" do |v|
|
16
|
+
v.customize ['modifyvm', :id,
|
17
|
+
'--cpus', 2,
|
18
|
+
'--memory', 512
|
19
|
+
]
|
20
|
+
end
|
21
|
+
|
22
|
+
config.vm.provision :chef_solo do |chef|
|
23
|
+
chef.cookbooks_path = '.chef/cookbooks'
|
24
|
+
chef.roles_path = '.chef/roles'
|
25
|
+
chef.data_bags_path = '.chef/data_bags'
|
26
|
+
|
27
|
+
chef.add_role('hammerspace-development')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
require 'hammerspace/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "hammerspace-fork"
|
8
|
+
s.version = Hammerspace::VERSION
|
9
|
+
s.platform = Gem::Platform::RUBY
|
10
|
+
s.authors = ["Jon Tai", "Nelson Gauthier"]
|
11
|
+
s.email = ["jon.tai@airbnb.com", "nelson@airbnb.com"]
|
12
|
+
s.homepage = "https://github.com/akerl/hammerspace"
|
13
|
+
s.summary = "Hash-like interface to persistent, concurrent, off-heap storage"
|
14
|
+
s.description = "A convenient place to store giant hammers"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- spec/*`.split("\n")
|
18
|
+
s.require_path = 'lib'
|
19
|
+
|
20
|
+
s.add_runtime_dependency 'gnista', '~> 1.0.0'
|
21
|
+
end
|
data/lib/hammerspace.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'hammerspace/version'
|
2
|
+
require 'hammerspace/hash_methods'
|
3
|
+
require 'hammerspace/backend'
|
4
|
+
require 'hammerspace/hash'
|
5
|
+
|
6
|
+
module Hammerspace
|
7
|
+
|
8
|
+
def self.new(path, options={}, *args, &block)
|
9
|
+
Hash.new(path, options, *args, &block)
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module Hammerspace
|
5
|
+
module Backend
|
6
|
+
|
7
|
+
# "Backend" class from which concrete backends extend
|
8
|
+
#
|
9
|
+
# Mixes in Enumerable and HashMethods to provide default implementations of
|
10
|
+
# most methods that Ruby's hash supports. Also provides some basic file and
|
11
|
+
# lock handling methods common to backends.
|
12
|
+
class Base
|
13
|
+
include Enumerable
|
14
|
+
include HashMethods
|
15
|
+
|
16
|
+
attr_reader :frontend
|
17
|
+
attr_reader :path
|
18
|
+
attr_reader :options
|
19
|
+
|
20
|
+
def initialize(frontend, path, options={})
|
21
|
+
@frontend = frontend
|
22
|
+
@path = path
|
23
|
+
@options = options
|
24
|
+
|
25
|
+
check_fs unless File.exist?(lockfile_path)
|
26
|
+
end
|
27
|
+
|
28
|
+
# HashMethods (mixed in above) defines four methods that must be
|
29
|
+
# overridden. The default implementations simply raise
|
30
|
+
# NotImplementedError. The four methods are: [], []=, delete, and each.
|
31
|
+
|
32
|
+
def close
|
33
|
+
# No-op, should probably be overridden
|
34
|
+
end
|
35
|
+
|
36
|
+
def uid
|
37
|
+
# No-op, should probably be overridden
|
38
|
+
end
|
39
|
+
|
40
|
+
def check_fs
|
41
|
+
warn_flock unless flock_works?
|
42
|
+
end
|
43
|
+
|
44
|
+
def flock_works?
|
45
|
+
flock_works = false
|
46
|
+
ensure_path_exists(path)
|
47
|
+
lockfile = Tempfile.new(['flock_works.', '.lock'], path)
|
48
|
+
begin
|
49
|
+
lockfile.close
|
50
|
+
File.open(lockfile.path) do |outer|
|
51
|
+
outer.flock(File::LOCK_EX)
|
52
|
+
File.open(lockfile.path) do |inner|
|
53
|
+
flock_works = inner.flock(File::LOCK_EX | File::LOCK_NB) == false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
rescue
|
57
|
+
ensure
|
58
|
+
lockfile.unlink
|
59
|
+
end
|
60
|
+
flock_works
|
61
|
+
end
|
62
|
+
|
63
|
+
protected
|
64
|
+
|
65
|
+
def ensure_path_exists(path)
|
66
|
+
FileUtils.mkdir_p(path)
|
67
|
+
end
|
68
|
+
|
69
|
+
def lock_for_write
|
70
|
+
ensure_path_exists(path)
|
71
|
+
File.open(lockfile_path, File::CREAT) do |lockfile|
|
72
|
+
lockfile.flock(File::LOCK_EX)
|
73
|
+
yield
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def lock_for_read
|
78
|
+
ensure_path_exists(path)
|
79
|
+
File.open(lockfile_path, File::CREAT) do |lockfile|
|
80
|
+
lockfile.flock(File::LOCK_SH)
|
81
|
+
yield
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def warn(message)
|
86
|
+
Kernel.warn "\e[31m#{self.class}: Warning: #{message}\e[0m"
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def lockfile_path
|
92
|
+
File.join(path, 'hammerspace.lock')
|
93
|
+
end
|
94
|
+
|
95
|
+
def warn_flock
|
96
|
+
warn "filesystem does not appear to support flock(2). " \
|
97
|
+
"Concurrent access may not behave as expected."
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Require all backends
|
106
|
+
Dir[File.expand_path("../backend/*.rb", __FILE__)].each { |f| require f }
|
@@ -0,0 +1,319 @@
|
|
1
|
+
require 'gnista'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'securerandom'
|
4
|
+
|
5
|
+
module Hammerspace
|
6
|
+
module Backend
|
7
|
+
|
8
|
+
class Sparkey < Base
|
9
|
+
|
10
|
+
def check_fs
|
11
|
+
super
|
12
|
+
warn_dir_cleanup unless dir_cleanup_works?
|
13
|
+
end
|
14
|
+
|
15
|
+
def dir_cleanup_works?
|
16
|
+
dir_cleanup_works = false
|
17
|
+
ensure_path_exists(path)
|
18
|
+
begin
|
19
|
+
Dir.mktmpdir('dir_cleanup_works.', path) do |tmpdir|
|
20
|
+
test = File.join(tmpdir, 'test')
|
21
|
+
test_tmp = File.join(tmpdir, 'test.tmp')
|
22
|
+
test_file = File.join(test, 'file')
|
23
|
+
test1 = File.join(tmpdir, 'test.1')
|
24
|
+
test1_file = File.join(test1, 'file')
|
25
|
+
test2 = File.join(tmpdir, 'test.2')
|
26
|
+
test2_file = File.join(test2, 'file')
|
27
|
+
|
28
|
+
Dir.mkdir(test1)
|
29
|
+
FileUtils.touch(test1_file)
|
30
|
+
File.symlink(File.basename(test1), test)
|
31
|
+
|
32
|
+
File.open(test_file) do
|
33
|
+
Dir.mkdir(test2)
|
34
|
+
FileUtils.touch(test2_file)
|
35
|
+
File.symlink(File.basename(test2), test_tmp)
|
36
|
+
File.rename(test_tmp, test)
|
37
|
+
|
38
|
+
FileUtils.rm_rf(test1, :secure => true)
|
39
|
+
|
40
|
+
dir_cleanup_works = File.directory?(test1) == false
|
41
|
+
end
|
42
|
+
end
|
43
|
+
rescue
|
44
|
+
end
|
45
|
+
dir_cleanup_works
|
46
|
+
end
|
47
|
+
|
48
|
+
def [](key)
|
49
|
+
close_logwriter
|
50
|
+
open_hash
|
51
|
+
|
52
|
+
return @hash[key] if @hash && @hash.include?(key)
|
53
|
+
frontend.default(key)
|
54
|
+
end
|
55
|
+
|
56
|
+
def []=(key, value)
|
57
|
+
close_hash
|
58
|
+
open_logwriter
|
59
|
+
|
60
|
+
@logwriter[key] = value
|
61
|
+
end
|
62
|
+
|
63
|
+
def clear
|
64
|
+
close_hash
|
65
|
+
close_logwriter_clear
|
66
|
+
|
67
|
+
frontend
|
68
|
+
end
|
69
|
+
|
70
|
+
def close
|
71
|
+
close_logwriter
|
72
|
+
close_hash
|
73
|
+
end
|
74
|
+
|
75
|
+
# TODO: This currently always returns nil. If the key is not found,
|
76
|
+
# return the default value. Also, support block usage.
|
77
|
+
def delete(key)
|
78
|
+
close_hash
|
79
|
+
open_logwriter
|
80
|
+
|
81
|
+
@logwriter.del(key)
|
82
|
+
end
|
83
|
+
|
84
|
+
def each(&block)
|
85
|
+
close_logwriter
|
86
|
+
|
87
|
+
# Open a private copy of the hash to ensure isolation during iteration.
|
88
|
+
# Further, Gnista segfaults if the hash is closed during iteration (e.g.,
|
89
|
+
# from interleaved reads and writes), so a private copy ensures that
|
90
|
+
# the hash is only closed once iteration is complete.
|
91
|
+
hash = open_hash_private
|
92
|
+
|
93
|
+
unless hash
|
94
|
+
return block_given? ? nil : Enumerator.new {}
|
95
|
+
end
|
96
|
+
|
97
|
+
if block_given?
|
98
|
+
begin
|
99
|
+
hash.each(&block)
|
100
|
+
ensure
|
101
|
+
hash.close
|
102
|
+
end
|
103
|
+
frontend
|
104
|
+
else
|
105
|
+
# Gnista does not support each w/o a block; emulate the behavior here.
|
106
|
+
Enumerator.new do |y|
|
107
|
+
begin
|
108
|
+
hash.each { |*args| y << args }
|
109
|
+
ensure
|
110
|
+
hash.close
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def has_key?(key)
|
117
|
+
close_logwriter
|
118
|
+
open_hash
|
119
|
+
|
120
|
+
@hash ? @hash.include?(key) : false
|
121
|
+
end
|
122
|
+
|
123
|
+
def keys
|
124
|
+
close_logwriter
|
125
|
+
open_hash
|
126
|
+
|
127
|
+
@hash ? @hash.keys : []
|
128
|
+
end
|
129
|
+
|
130
|
+
def replace(hash)
|
131
|
+
close_hash
|
132
|
+
open_logwriter_replace
|
133
|
+
|
134
|
+
merge!(hash)
|
135
|
+
end
|
136
|
+
|
137
|
+
def size
|
138
|
+
close_logwriter
|
139
|
+
open_hash
|
140
|
+
|
141
|
+
@hash ? @hash.size : 0
|
142
|
+
end
|
143
|
+
|
144
|
+
def uid
|
145
|
+
close_logwriter
|
146
|
+
open_hash
|
147
|
+
|
148
|
+
@uid
|
149
|
+
end
|
150
|
+
|
151
|
+
def values
|
152
|
+
close_logwriter
|
153
|
+
open_hash
|
154
|
+
|
155
|
+
@hash ? @hash.values : []
|
156
|
+
end
|
157
|
+
|
158
|
+
private
|
159
|
+
|
160
|
+
def open_logwriter
|
161
|
+
@logwriter ||= begin
|
162
|
+
# Create a new log file in a new, private directory and copy the
|
163
|
+
# contents of the current hash over to it. Writes to this new file
|
164
|
+
# can happen independently of all other writers, so no locking is
|
165
|
+
# required.
|
166
|
+
regenerate_uid
|
167
|
+
ensure_path_exists(new_path)
|
168
|
+
logwriter = Gnista::Logwriter.new(new_log_path)
|
169
|
+
each { |key,value| logwriter[key] = value }
|
170
|
+
logwriter
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def open_logwriter_replace
|
175
|
+
@logwriter ||= begin
|
176
|
+
# Create a new log file in a new, private directory. Writes to this
|
177
|
+
# new file can happen independently of all other writers, so no
|
178
|
+
# locking is required.
|
179
|
+
regenerate_uid
|
180
|
+
ensure_path_exists(new_path)
|
181
|
+
Gnista::Logwriter.new(new_log_path)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def close_logwriter
|
186
|
+
if @logwriter
|
187
|
+
@logwriter.close
|
188
|
+
@logwriter = nil
|
189
|
+
|
190
|
+
# Create an index of the log file and write it to a hash file in the
|
191
|
+
# same private directory. Again, this happens independently of all
|
192
|
+
# other writers, so no locking is required.
|
193
|
+
Gnista::Hash.write(new_hash_path, new_log_path)
|
194
|
+
|
195
|
+
# Create a symlink pointed at the private directory. Give the symlink
|
196
|
+
# a temporary name for now. Note that the target of the symlink is
|
197
|
+
# the raw uid, not a full path, since symlink targets are relative.
|
198
|
+
File.symlink(@uid, "#{new_path}.tmp")
|
199
|
+
|
200
|
+
# Rename the symlink pointed at the new directory to "current", which
|
201
|
+
# atomically promotes the new directory to be the current directory.
|
202
|
+
# Only one process should do this at a time, and no readers should
|
203
|
+
# try to open files while this is happening, so we need to take an
|
204
|
+
# exclusive lock for this operation. While we are holding the lock,
|
205
|
+
# note the old target of the "current" symlink if it exists.
|
206
|
+
old_path = nil
|
207
|
+
lock_for_write do
|
208
|
+
old_path = File.readlink(cur_path) if File.symlink?(cur_path)
|
209
|
+
File.rename("#{new_path}.tmp", cur_path)
|
210
|
+
end
|
211
|
+
|
212
|
+
# If there was an existing "current" symlink, the directory it
|
213
|
+
# pointed to is now obsolete. Remove it and its contents.
|
214
|
+
FileUtils.rm_rf(File.join(path, old_path), :secure => true) if old_path
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def close_logwriter_clear
|
219
|
+
if @logwriter
|
220
|
+
@logwriter.close
|
221
|
+
@logwriter = nil
|
222
|
+
|
223
|
+
# Delete the private directory and the new log file inside it.
|
224
|
+
FileUtils.rm_rf(new_path, :secure => true)
|
225
|
+
end
|
226
|
+
|
227
|
+
# Remove the "current" symlink if it exists. Only one process should
|
228
|
+
# do this at a time, and no readers should try to open files while
|
229
|
+
# this is happening, so we need to take an exclusive lock for this
|
230
|
+
# operation. While we are holding the lock, note the old target of
|
231
|
+
# the "current" symlink if it exists.
|
232
|
+
old_path = nil
|
233
|
+
lock_for_write do
|
234
|
+
if File.symlink?(cur_path)
|
235
|
+
old_path = File.readlink(cur_path)
|
236
|
+
File.unlink(cur_path)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
# If there was an existing "current" symlink, the directory it
|
241
|
+
# pointed to is now obsolete. Remove it and its contents.
|
242
|
+
FileUtils.rm_rf(File.join(path, old_path), :secure => true) if old_path
|
243
|
+
end
|
244
|
+
|
245
|
+
def open_hash
|
246
|
+
# Take a shared lock before opening files. This avoids a situation
|
247
|
+
# where a writer updates the files after we have opened the hash file
|
248
|
+
# but before we have opened the log file. Once we have open file
|
249
|
+
# descriptors it doesn't matter what happens to the files, so we can
|
250
|
+
# release the lock immediately after opening. While we are holding the
|
251
|
+
# lock, note the target of the "current" symlink.
|
252
|
+
@hash ||= lock_for_read do
|
253
|
+
begin
|
254
|
+
hash = Gnista::Hash.new(cur_hash_path, cur_log_path)
|
255
|
+
@uid = File.readlink(cur_path)
|
256
|
+
hash
|
257
|
+
rescue GnistaException
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def open_hash_private
|
263
|
+
# Take a shared lock before opening files. This avoids a situation
|
264
|
+
# where a writer updates the files after we have opened the hash file
|
265
|
+
# but before we have opened the log file. Once we have open file
|
266
|
+
# descriptors it doesn't matter what happens to the files, so we can
|
267
|
+
# release the lock immediately after opening.
|
268
|
+
lock_for_read do
|
269
|
+
begin
|
270
|
+
Gnista::Hash.new(cur_hash_path, cur_log_path)
|
271
|
+
rescue GnistaException
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def close_hash
|
277
|
+
if @hash
|
278
|
+
@hash.close
|
279
|
+
@hash = nil
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
def regenerate_uid
|
284
|
+
@uid = "#{Process.pid}_#{SecureRandom.uuid}"
|
285
|
+
end
|
286
|
+
|
287
|
+
def new_path
|
288
|
+
File.join(path, @uid)
|
289
|
+
end
|
290
|
+
|
291
|
+
def new_log_path
|
292
|
+
File.join(new_path, 'hammerspace.spl')
|
293
|
+
end
|
294
|
+
|
295
|
+
def new_hash_path
|
296
|
+
File.join(new_path, 'hammerspace.spi')
|
297
|
+
end
|
298
|
+
|
299
|
+
def cur_path
|
300
|
+
File.join(path, 'current')
|
301
|
+
end
|
302
|
+
|
303
|
+
def cur_log_path
|
304
|
+
File.join(cur_path, 'hammerspace.spl')
|
305
|
+
end
|
306
|
+
|
307
|
+
def cur_hash_path
|
308
|
+
File.join(cur_path, 'hammerspace.spi')
|
309
|
+
end
|
310
|
+
|
311
|
+
def warn_dir_cleanup
|
312
|
+
warn "filesystem does not appear to allow removing directories when files " \
|
313
|
+
"within are still in use. Directory cleanup may not behave as expected."
|
314
|
+
end
|
315
|
+
|
316
|
+
end
|
317
|
+
|
318
|
+
end
|
319
|
+
end
|