hammerspace 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Vagrantfile ADDED
@@ -0,0 +1,30 @@
1
+ VAGRANTFILE_API_VERSION = "2"
2
+ VM_BOX_URL = "https://s3.amazonaws.com/gsc-vagrant-boxes/ubuntu-12.04-omnibus-chef.box"
3
+
4
+ HOST_SRC = File.dirname(File.expand_path __FILE__)
5
+ LOCAL_SRC = '/home/vagrant/hammerspace'
6
+
7
+ Vagrant.configure(VAGRANTFILE_API_VERSION) do |master_config|
8
+ master_config.vm.define 'hammerspace-development' do |config|
9
+ # base image is our standard airbase -- ubuntu 12.04
10
+ config.vm.box = "airbase"
11
+ config.vm.box_url = VM_BOX_URL
12
+
13
+ config.vm.synced_folder HOST_SRC, LOCAL_SRC
14
+
15
+ config.vm.provider "virtualbox" do |v|
16
+ v.customize ['modifyvm', :id,
17
+ '--cpus', 2,
18
+ '--memory', 512
19
+ ]
20
+ end
21
+
22
+ config.vm.provision :chef_solo do |chef|
23
+ chef.cookbooks_path = '.chef/cookbooks'
24
+ chef.roles_path = '.chef/roles'
25
+ chef.data_bags_path = '.chef/data_bags'
26
+
27
+ chef.add_role('hammerspace-development')
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ require 'hammerspace/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "hammerspace"
8
+ s.version = Hammerspace::VERSION
9
+ s.platform = Gem::Platform::RUBY
10
+ s.authors = ["Jon Tai", "Nelson Gauthier"]
11
+ s.email = ["jon.tai@airbnb.com", "nelson@airbnb.com"]
12
+ s.homepage = "https://github.com/airbnb/hammerspace"
13
+ s.summary = "Hash-like interface to persistent, concurrent, off-heap storage"
14
+ s.description = "A convenient place to store giant hammers"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- spec/*`.split("\n")
18
+ s.require_path = 'lib'
19
+
20
+ s.add_runtime_dependency 'gnista', '0.0.4'
21
+ end
@@ -0,0 +1,12 @@
1
+ require 'hammerspace/version'
2
+ require 'hammerspace/hash_methods'
3
+ require 'hammerspace/backend'
4
+ require 'hammerspace/hash'
5
+
6
+ module Hammerspace
7
+
8
+ def self.new(path, options={}, *args, &block)
9
+ Hash.new(path, options, *args, &block)
10
+ end
11
+
12
+ end
@@ -0,0 +1,106 @@
1
+ require 'fileutils'
2
+ require 'tempfile'
3
+
4
+ module Hammerspace
5
+ module Backend
6
+
7
+ # "Backend" class from which concrete backends extend
8
+ #
9
+ # Mixes in Enumerable and HashMethods to provide default implementations of
10
+ # most methods that Ruby's hash supports. Also provides some basic file and
11
+ # lock handling methods common to backends.
12
+ class Base
13
+ include Enumerable
14
+ include HashMethods
15
+
16
+ attr_reader :frontend
17
+ attr_reader :path
18
+ attr_reader :options
19
+
20
+ def initialize(frontend, path, options={})
21
+ @frontend = frontend
22
+ @path = path
23
+ @options = options
24
+
25
+ check_fs unless File.exist?(lockfile_path)
26
+ end
27
+
28
+ # HashMethods (mixed in above) defines four methods that must be
29
+ # overridden. The default implementations simply raise
30
+ # NotImplementedError. The four methods are: [], []=, delete, and each.
31
+
32
+ def close
33
+ # No-op, should probably be overridden
34
+ end
35
+
36
+ def uid
37
+ # No-op, should probably be overridden
38
+ end
39
+
40
+ def check_fs
41
+ warn_flock unless flock_works?
42
+ end
43
+
44
+ def flock_works?
45
+ flock_works = false
46
+ ensure_path_exists(path)
47
+ lockfile = Tempfile.new(['flock_works.', '.lock'], path)
48
+ begin
49
+ lockfile.close
50
+ File.open(lockfile.path) do |outer|
51
+ outer.flock(File::LOCK_EX)
52
+ File.open(lockfile.path) do |inner|
53
+ flock_works = inner.flock(File::LOCK_EX | File::LOCK_NB) == false
54
+ end
55
+ end
56
+ rescue
57
+ ensure
58
+ lockfile.unlink
59
+ end
60
+ flock_works
61
+ end
62
+
63
+ protected
64
+
65
+ def ensure_path_exists(path)
66
+ FileUtils.mkdir_p(path)
67
+ end
68
+
69
+ def lock_for_write
70
+ ensure_path_exists(path)
71
+ File.open(lockfile_path, File::CREAT) do |lockfile|
72
+ lockfile.flock(File::LOCK_EX)
73
+ yield
74
+ end
75
+ end
76
+
77
+ def lock_for_read
78
+ ensure_path_exists(path)
79
+ File.open(lockfile_path, File::CREAT) do |lockfile|
80
+ lockfile.flock(File::LOCK_SH)
81
+ yield
82
+ end
83
+ end
84
+
85
+ def warn(message)
86
+ Kernel.warn "\e[31m#{self.class}: Warning: #{message}\e[0m"
87
+ end
88
+
89
+ private
90
+
91
+ def lockfile_path
92
+ File.join(path, 'hammerspace.lock')
93
+ end
94
+
95
+ def warn_flock
96
+ warn "filesystem does not appear to support flock(2). " \
97
+ "Concurrent access may not behave as expected."
98
+ end
99
+
100
+ end
101
+
102
+ end
103
+ end
104
+
105
+ # Require all backends
106
+ Dir[File.expand_path("../backend/*.rb", __FILE__)].each { |f| require f }
@@ -0,0 +1,319 @@
1
+ require 'gnista'
2
+ require 'fileutils'
3
+ require 'securerandom'
4
+
5
+ module Hammerspace
6
+ module Backend
7
+
8
+ class Sparkey < Base
9
+
10
+ def check_fs
11
+ super
12
+ warn_dir_cleanup unless dir_cleanup_works?
13
+ end
14
+
15
+ def dir_cleanup_works?
16
+ dir_cleanup_works = false
17
+ ensure_path_exists(path)
18
+ begin
19
+ Dir.mktmpdir('dir_cleanup_works.', path) do |tmpdir|
20
+ test = File.join(tmpdir, 'test')
21
+ test_tmp = File.join(tmpdir, 'test.tmp')
22
+ test_file = File.join(test, 'file')
23
+ test1 = File.join(tmpdir, 'test.1')
24
+ test1_file = File.join(test1, 'file')
25
+ test2 = File.join(tmpdir, 'test.2')
26
+ test2_file = File.join(test2, 'file')
27
+
28
+ Dir.mkdir(test1)
29
+ FileUtils.touch(test1_file)
30
+ File.symlink(File.basename(test1), test)
31
+
32
+ File.open(test_file) do
33
+ Dir.mkdir(test2)
34
+ FileUtils.touch(test2_file)
35
+ File.symlink(File.basename(test2), test_tmp)
36
+ File.rename(test_tmp, test)
37
+
38
+ FileUtils.rm_rf(test1, :secure => true)
39
+
40
+ dir_cleanup_works = File.directory?(test1) == false
41
+ end
42
+ end
43
+ rescue
44
+ end
45
+ dir_cleanup_works
46
+ end
47
+
48
+ def [](key)
49
+ close_logwriter
50
+ open_hash
51
+
52
+ return @hash[key] if @hash && @hash.include?(key)
53
+ frontend.default(key)
54
+ end
55
+
56
+ def []=(key, value)
57
+ close_hash
58
+ open_logwriter
59
+
60
+ @logwriter[key] = value
61
+ end
62
+
63
+ def clear
64
+ close_hash
65
+ close_logwriter_clear
66
+
67
+ frontend
68
+ end
69
+
70
+ def close
71
+ close_logwriter
72
+ close_hash
73
+ end
74
+
75
+ # TODO: This currently always returns nil. If the key is not found,
76
+ # return the default value. Also, support block usage.
77
+ def delete(key)
78
+ close_hash
79
+ open_logwriter
80
+
81
+ @logwriter.del(key)
82
+ end
83
+
84
+ def each(&block)
85
+ close_logwriter
86
+
87
+ # Open a private copy of the hash to ensure isolation during iteration.
88
+ # Further, Gnista segfaults if the hash is closed during iteration (e.g.,
89
+ # from interleaved reads and writes), so a private copy ensures that
90
+ # the hash is only closed once iteration is complete.
91
+ hash = open_hash_private
92
+
93
+ unless hash
94
+ return block_given? ? nil : Enumerator.new {}
95
+ end
96
+
97
+ if block_given?
98
+ begin
99
+ hash.each(&block)
100
+ ensure
101
+ hash.close
102
+ end
103
+ frontend
104
+ else
105
+ # Gnista does not support each w/o a block; emulate the behavior here.
106
+ Enumerator.new do |y|
107
+ begin
108
+ hash.each { |*args| y << args }
109
+ ensure
110
+ hash.close
111
+ end
112
+ end
113
+ end
114
+ end
115
+
116
+ def has_key?(key)
117
+ close_logwriter
118
+ open_hash
119
+
120
+ @hash ? @hash.include?(key) : false
121
+ end
122
+
123
+ def keys
124
+ close_logwriter
125
+ open_hash
126
+
127
+ @hash ? @hash.keys : []
128
+ end
129
+
130
+ def replace(hash)
131
+ close_hash
132
+ open_logwriter_replace
133
+
134
+ merge!(hash)
135
+ end
136
+
137
+ def size
138
+ close_logwriter
139
+ open_hash
140
+
141
+ @hash ? @hash.size : 0
142
+ end
143
+
144
+ def uid
145
+ close_logwriter
146
+ open_hash
147
+
148
+ @uid
149
+ end
150
+
151
+ def values
152
+ close_logwriter
153
+ open_hash
154
+
155
+ @hash ? @hash.values : []
156
+ end
157
+
158
+ private
159
+
160
+ def open_logwriter
161
+ @logwriter ||= begin
162
+ # Create a new log file in a new, private directory and copy the
163
+ # contents of the current hash over to it. Writes to this new file
164
+ # can happen independently of all other writers, so no locking is
165
+ # required.
166
+ regenerate_uid
167
+ ensure_path_exists(new_path)
168
+ logwriter = Gnista::Logwriter.new(new_log_path)
169
+ each { |key,value| logwriter[key] = value }
170
+ logwriter
171
+ end
172
+ end
173
+
174
+ def open_logwriter_replace
175
+ @logwriter ||= begin
176
+ # Create a new log file in a new, private directory. Writes to this
177
+ # new file can happen independently of all other writers, so no
178
+ # locking is required.
179
+ regenerate_uid
180
+ ensure_path_exists(new_path)
181
+ Gnista::Logwriter.new(new_log_path)
182
+ end
183
+ end
184
+
185
+ def close_logwriter
186
+ if @logwriter
187
+ @logwriter.close
188
+ @logwriter = nil
189
+
190
+ # Create an index of the log file and write it to a hash file in the
191
+ # same private directory. Again, this happens independently of all
192
+ # other writers, so no locking is required.
193
+ Gnista::Hash.write(new_hash_path, new_log_path)
194
+
195
+ # Create a symlink pointed at the private directory. Give the symlink
196
+ # a temporary name for now. Note that the target of the symlink is
197
+ # the raw uid, not a full path, since symlink targets are relative.
198
+ File.symlink(@uid, "#{new_path}.tmp")
199
+
200
+ # Rename the symlink pointed at the new directory to "current", which
201
+ # atomically promotes the new directory to be the current directory.
202
+ # Only one process should do this at a time, and no readers should
203
+ # try to open files while this is happening, so we need to take an
204
+ # exclusive lock for this operation. While we are holding the lock,
205
+ # note the old target of the "current" symlink if it exists.
206
+ old_path = nil
207
+ lock_for_write do
208
+ old_path = File.readlink(cur_path) if File.symlink?(cur_path)
209
+ File.rename("#{new_path}.tmp", cur_path)
210
+ end
211
+
212
+ # If there was an existing "current" symlink, the directory it
213
+ # pointed to is now obsolete. Remove it and its contents.
214
+ FileUtils.rm_rf(File.join(path, old_path), :secure => true) if old_path
215
+ end
216
+ end
217
+
218
+ def close_logwriter_clear
219
+ if @logwriter
220
+ @logwriter.close
221
+ @logwriter = nil
222
+
223
+ # Delete the private directory and the new log file inside it.
224
+ FileUtils.rm_rf(new_path, :secure => true)
225
+ end
226
+
227
+ # Remove the "current" symlink if it exists. Only one process should
228
+ # do this at a time, and no readers should try to open files while
229
+ # this is happening, so we need to take an exclusive lock for this
230
+ # operation. While we are holding the lock, note the old target of
231
+ # the "current" symlink if it exists.
232
+ old_path = nil
233
+ lock_for_write do
234
+ if File.symlink?(cur_path)
235
+ old_path = File.readlink(cur_path)
236
+ File.unlink(cur_path)
237
+ end
238
+ end
239
+
240
+ # If there was an existing "current" symlink, the directory it
241
+ # pointed to is now obsolete. Remove it and its contents.
242
+ FileUtils.rm_rf(File.join(path, old_path), :secure => true) if old_path
243
+ end
244
+
245
+ def open_hash
246
+ # Take a shared lock before opening files. This avoids a situation
247
+ # where a writer updates the files after we have opened the hash file
248
+ # but before we have opened the log file. Once we have open file
249
+ # descriptors it doesn't matter what happens to the files, so we can
250
+ # release the lock immediately after opening. While we are holding the
251
+ # lock, note the target of the "current" symlink.
252
+ @hash ||= lock_for_read do
253
+ begin
254
+ hash = Gnista::Hash.new(cur_hash_path, cur_log_path)
255
+ @uid = File.readlink(cur_path)
256
+ hash
257
+ rescue GnistaException
258
+ end
259
+ end
260
+ end
261
+
262
+ def open_hash_private
263
+ # Take a shared lock before opening files. This avoids a situation
264
+ # where a writer updates the files after we have opened the hash file
265
+ # but before we have opened the log file. Once we have open file
266
+ # descriptors it doesn't matter what happens to the files, so we can
267
+ # release the lock immediately after opening.
268
+ lock_for_read do
269
+ begin
270
+ Gnista::Hash.new(cur_hash_path, cur_log_path)
271
+ rescue GnistaException
272
+ end
273
+ end
274
+ end
275
+
276
+ def close_hash
277
+ if @hash
278
+ @hash.close
279
+ @hash = nil
280
+ end
281
+ end
282
+
283
+ def regenerate_uid
284
+ @uid = "#{Process.pid}_#{SecureRandom.uuid}"
285
+ end
286
+
287
+ def new_path
288
+ File.join(path, @uid)
289
+ end
290
+
291
+ def new_log_path
292
+ File.join(new_path, 'hammerspace.spl')
293
+ end
294
+
295
+ def new_hash_path
296
+ File.join(new_path, 'hammerspace.spi')
297
+ end
298
+
299
+ def cur_path
300
+ File.join(path, 'current')
301
+ end
302
+
303
+ def cur_log_path
304
+ File.join(cur_path, 'hammerspace.spl')
305
+ end
306
+
307
+ def cur_hash_path
308
+ File.join(cur_path, 'hammerspace.spi')
309
+ end
310
+
311
+ def warn_dir_cleanup
312
+ warn "filesystem does not appear to allow removing directories when files " \
313
+ "within are still in use. Directory cleanup may not behave as expected."
314
+ end
315
+
316
+ end
317
+
318
+ end
319
+ end