hammerspace 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZGMzNzRmZWE0Yjc1NjQ4NDJhMTc3NzA0YmZiYzAxZDIyMWNkNDRiYQ==
5
+ data.tar.gz: !binary |-
6
+ ZjdjYjY2NzI4MDQxYTA3YzYzZmM5MjVlOTJjY2RhMzI4Mzk3OGU5MQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ NDU0NzZhYThhNWQ4YTdhYTQ2MDkzMmI2NTFlM2IzNjkwYTdlMDk0MjllNTgw
10
+ MzQwNDdkOWYxMmZlMzI0OTI3M2NmYjQ4ZjlhMDMwYWEwZThjYjNiMTIzNGUw
11
+ ZmM0NjAyNDBmYmE1NzkwMjdmMmU0MTA3YTdmYjljZTQwOTA5ODM=
12
+ data.tar.gz: !binary |-
13
+ NTZhNTU2YjUxNjY0ZDYwNDQyYTFjMmIxODE4MzY5NjE5Y2RmZDcyOWIxMDg0
14
+ ZTQ1NzE5OTIwMTUzZTk2ZjgwNTY0MWVhOTJmOTNjN2Q3Njk2MGNiYjE1MDcw
15
+ NTlmODY5MTI5ZjlkYWQ2MWJhZGE3NTc1NzMyYTBiODQ5ZmI3Mzk=
@@ -0,0 +1,7 @@
1
+ default.hammerspace_development.user = 'vagrant'
2
+ default.hammerspace_development.hammerspace.home = '/home/vagrant/hammerspace'
3
+ default.hammerspace_development.hammerspace.gem_home = '/home/vagrant/.gems'
4
+ default.hammerspace_development.hammerspace.root = '/var/lib/hammerspace'
5
+
6
+ default.hammerspace_development.ruby.version = '1.9.1'
7
+ default.hammerspace_development.sparkey.version = '3b3b061a706c33ae764c2bb8e61888b7a7a26c12'
@@ -0,0 +1,6 @@
1
+ default.hammerspace_development.essential.packages = [
2
+ 'build-essential',
3
+ 'autoconf',
4
+ 'libtool',
5
+ 'git',
6
+ ]
@@ -0,0 +1,7 @@
1
+ default.hammerspace_development.sparkey.home = "/home/#{default.hammerspace_development.user}/sparkey"
2
+ default.hammerspace_development.sparkey.source_file = "https://github.com/spotify/sparkey/archive/#{node.hammerspace_development.sparkey.version}.tar.gz"
3
+ default.hammerspace_development.sparkey.local_dir = File.join(default.hammerspace_development.sparkey.home, "sparkey-#{node.hammerspace_development.sparkey.version}")
4
+
5
+ default.hammerspace_development.sparkey.packages = [
6
+ 'libsnappy-dev',
7
+ ]
@@ -0,0 +1,32 @@
1
+ include_recipe "hammerspace-development::essential"
2
+ include_recipe "hammerspace-development::sparkey"
3
+ include_recipe "hammerspace-development::ruby"
4
+
5
+ template "/home/#{node.hammerspace_development.user}/.bash_profile" do
6
+ owner node.hammerspace_development.user
7
+ group node.hammerspace_development.user
8
+ mode '0755'
9
+ end
10
+
11
+ directory node.hammerspace_development.hammerspace.gem_home do
12
+ owner node.hammerspace_development.user
13
+ group node.hammerspace_development.user
14
+ mode '0755'
15
+ recursive true
16
+ action :create
17
+ end
18
+
19
+ execute "hammerspace-bundle-install" do
20
+ cwd node.hammerspace_development.hammerspace.home
21
+ user node.hammerspace_development.user
22
+ group node.hammerspace_development.user
23
+ command "bundle install --path #{node.hammerspace_development.hammerspace.gem_home}"
24
+ end
25
+
26
+ directory node.hammerspace_development.hammerspace.root do
27
+ owner node.hammerspace_development.user
28
+ group node.hammerspace_development.user
29
+ mode '0755'
30
+ recursive true
31
+ action :create
32
+ end
@@ -0,0 +1,9 @@
1
+ execute "first-apt-get-update" do
2
+ command "apt-get update"
3
+ end
4
+
5
+ node.hammerspace_development.essential.packages.each do |p|
6
+ package p do
7
+ action :upgrade
8
+ end
9
+ end
@@ -0,0 +1,21 @@
1
+ package "ruby#{node.hammerspace_development.ruby.version}" do
2
+ action :upgrade
3
+ options "--force-yes"
4
+ end
5
+
6
+ package "ruby#{node.hammerspace_development.ruby.version}-dev" do
7
+ action :upgrade
8
+ options "--force-yes"
9
+ end
10
+
11
+ # other common packages needed by ruby gems
12
+ ["libxslt-dev", "libxml2-dev"].each do |p|
13
+ package p do
14
+ action :upgrade
15
+ end
16
+ end
17
+
18
+ gem_package 'bundler' do
19
+ action :install
20
+ end
21
+
@@ -0,0 +1,56 @@
1
+ sparkey_local_file = File.join(
2
+ node.hammerspace_development.sparkey.home,
3
+ File.basename(node.hammerspace_development.sparkey.source_file))
4
+
5
+ node.hammerspace_development.sparkey.packages.each do |p|
6
+ package p do
7
+ action :upgrade
8
+ end
9
+ end
10
+
11
+ directory node.hammerspace_development.sparkey.home do
12
+ owner node.hammerspace_development.user
13
+ group node.hammerspace_development.user
14
+ mode '0755'
15
+ recursive true
16
+ action :create
17
+ end
18
+
19
+ remote_file sparkey_local_file do
20
+ source node.hammerspace_development.sparkey.source_file
21
+ owner node.hammerspace_development.user
22
+ group node.hammerspace_development.user
23
+ mode "644"
24
+
25
+ action :create_if_missing
26
+ notifies :run, "execute[extract-sparkey-#{node.hammerspace_development.sparkey.version}]", :immediately
27
+ end
28
+
29
+ execute "extract-sparkey-#{node.hammerspace_development.sparkey.version}" do
30
+ cwd node.hammerspace_development.sparkey.home
31
+ user node.hammerspace_development.user
32
+ command "tar -xvzf #{sparkey_local_file}"
33
+
34
+ action :nothing
35
+ notifies :run, "bash[build-sparkey-#{node.hammerspace_development.sparkey.version}]", :immediately
36
+ end
37
+
38
+ bash "build-sparkey-#{node.hammerspace_development.sparkey.version}" do
39
+ cwd node.hammerspace_development.sparkey.local_dir
40
+ user node.hammerspace_development.user
41
+ code <<-EOS
42
+ autoreconf --install
43
+ ./configure
44
+ make
45
+ EOS
46
+
47
+ action :nothing
48
+ notifies :run, "execute[install-sparkey-#{node.hammerspace_development.sparkey.version}]", :immediately
49
+ end
50
+
51
+ execute "install-sparkey-#{node.hammerspace_development.sparkey.version}" do
52
+ cwd node.hammerspace_development.sparkey.local_dir
53
+ command "make install && sudo ldconfig"
54
+
55
+ action :nothing
56
+ end
@@ -0,0 +1,2 @@
1
+ export HAMMERSPACE_ROOT=<%= node.hammerspace_development.hammerspace.root %>
2
+ cd <%= node.hammerspace_development.hammerspace.home %>
@@ -0,0 +1,6 @@
1
+ name "hammerspace-development"
2
+ description "development virtual machine for hammerspace"
3
+
4
+ run_list(
5
+ "recipe[hammerspace-development]",
6
+ )
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .rvmrc
4
+ .DS_Store
5
+ /coverage
6
+
7
+ # IDEs
8
+ .idea
data/CHANGELOG.md ADDED
@@ -0,0 +1,22 @@
1
+ # v0.1.2
2
+ * Support vagrant for local development.
3
+ * Remove dependency on colored gem.
4
+ * Add MIT license.
5
+ * Documentation updates.
6
+
7
+ # v0.1.1
8
+ * Expose the uid of the directory that the current reader is reading from.
9
+ * Documentation updates.
10
+
11
+ # v0.1.0
12
+ * Change semantics of block passed to constructor, now used to specify default_proc.
13
+ * Add support for most Ruby Hash methods.
14
+ * Major internal refactor, new HashMethods module allows new backends to be written more easily.
15
+ * Add documentation.
16
+
17
+ # v0.0.2
18
+ * Add support for multiple writers with last-write-wins semantics.
19
+ * Implement `clear` method.
20
+
21
+ # v0.0.1
22
+ * Initial release.
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+
4
+ gem 'trollop', '~> 2.0'
5
+
6
+ group :test do
7
+ gem 'rspec', '~> 2.13.0'
8
+ gem 'rspec-instafail', '~> 0.2'
9
+ gem 'simplecov', :require => false, :group => :test
10
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Airbnb, Inc.
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,522 @@
1
+ Hammerspace
2
+ ===========
3
+
4
+ Hash-like interface to persistent, concurrent, off-heap storage
5
+
6
+
7
+ ## What is Hammerspace?
8
+
9
+ _[Hammerspace](http://en.wikipedia.org/wiki/Hammerspace) ... is a
10
+ fan-envisioned extradimensional, instantly accessible storage area in fiction,
11
+ which is used to explain how animated, comic, and game characters can produce
12
+ objects out of thin air._
13
+
14
+ This gem provides persistent, concurrently-accessible off-heap storage of
15
+ strings with a familiar hash-like interface. It is optimized for bulk writes
16
+ and random reads.
17
+
18
+
19
+ ## Motivation
20
+
21
+ Applications often use data that never changes or changes very infrequently. In
22
+ many cases, some latency is acceptable when accessing this data. For example, a
23
+ user's profile may be loaded from a web service, a database, or an external
24
+ shared cache like memcache. In other cases, latency is much more sensitive. For
25
+ example, translations may be used many times and incurring even a ~2ms delay to
26
+ access them from an external cache would be prohibitively slow.
27
+
28
+ To work around the performance issue, this type of data is often loaded into
29
+ the application at startup. Unfortunately, this means the data is stored on the
30
+ heap, where the garbage collector must scan over the objects on every run (at
31
+ least in the case of Ruby MRI). Further, for application servers that utilize
32
+ multiple processes, each process has its own copy of the data which is an
33
+ inefficient use of memory.
34
+
35
+ Hammerspace solves these problems by moving the data off the heap onto disk.
36
+ Leveraging libraries and data structures optimized for bulk writes and random
37
+ reads allows an acceptable level of performance to be maintained. Because the
38
+ data is persistent, it does not need to be reloaded from an external cache or
39
+ service on application startup unless the data has changed.
40
+
41
+ Unfortunately, these low-level libraries don't always support concurrent
42
+ writers. Hammerspace adds concurrency control to allow mutliple processes to
43
+ update and read from a single shared copy of the data safely. Finally,
44
+ hammerspace's interface is designed to mimic Ruby's `Hash` to make integrating
45
+ with existing applications simple and straightforward. Different low-level
46
+ libraries can be used by implementing a new backend that uses the library.
47
+ (Currently, only [Sparkey](https://github.com/spotify/sparkey) is supported.)
48
+ Backends only need to implement a small set of methods (`[]`, `[]=`, `close`,
49
+ `delete`, `each`, `uid`), but can override the default implementation of other
50
+ methods if the underlying library supports more efficient implementations.
51
+
52
+ ## Installation
53
+
54
+ ### Requirements
55
+
56
+ * [Gnista](https://github.com/emnl/gnista), Ruby bindings for Sparkey
57
+ * [Sparkey](https://github.com/spotify/sparkey), constant key/value storage library
58
+ * [Snappy](https://code.google.com/p/snappy/), compression/decompression library (unused, but required to compile Sparkey)
59
+ * A filesystem that supports `flock(2)` and unlinking files/directories with outstanding file descriptors (ext3/4 will do just fine)
60
+
61
+
62
+ ### Installation
63
+
64
+ Add the following line to your Gemfile:
65
+
66
+ gem 'hammerspace'
67
+
68
+ Then run:
69
+
70
+ bundle
71
+
72
+ ### Vagrant
73
+
74
+ To make development easier, the source tree contains a Vagrantfile and a small
75
+ cookbook to install all the prerequisites. The vagrant environment also serves
76
+ as a consistent environment to run the test suite.
77
+
78
+ To use it, make sure you have vagrant installed, then:
79
+
80
+ vagrant up
81
+ vagrant ssh
82
+ bundle exec rspec
83
+
84
+
85
+ ## Usage
86
+
87
+ ### Getting Started
88
+
89
+ For the most part, hammerspace acts like a Ruby hash. But since it's a hash
90
+ that persists on disk, you have to tell it where to store the files. The
91
+ enclosing directory and any parent directories are created if they don't
92
+ already exist.
93
+
94
+ ```ruby
95
+ h = Hammerspace.new("/tmp/hammerspace")
96
+
97
+ h["cartoons"] = "mallets"
98
+ h["games"] = "inventory"
99
+ h["rubyists"] = "data"
100
+
101
+ h.size #=> 3
102
+ h["cartoons"] #=> "mallets"
103
+
104
+ h.map { |k,v| "#{k.capitalize} use hammerspace to store #{v}." }
105
+
106
+ h.close
107
+ ```
108
+
109
+ You should call `close` on the hammerspace object when you're done with it.
110
+ This flushes any pending writes to disk and closes any open file handles.
111
+
112
+
113
+ ### Options
114
+
115
+ The constructor takes a hash of options as an optional second argument.
116
+ Currently the only option supported is `:backend` which specifies which backend
117
+ class to use. Since there is only one backend supported at this time, there is
118
+ currently no reason to pass this argument.
119
+
120
+ ```ruby
121
+ h = Hammerspace.new("/tmp/hammerspace", {:backend => Hammerspace::Backend::Sparkey})
122
+ ```
123
+
124
+
125
+ ### Default Values
126
+
127
+ The constructor takes a default value as an optional third argument. This
128
+ functions the same as Ruby's `Hash`, except with `Hash` it is the first
129
+ argument.
130
+
131
+ ```ruby
132
+ h = Hammerspace.new("/tmp/hammerspace", {}, "default")
133
+ h["foo"] = "bar"
134
+ h["foo"] #=> "bar"
135
+ h["new"] #=> "default"
136
+ h.close
137
+ ```
138
+
139
+ The constructor also takes a block to specify a default Proc, which works the
140
+ same way as Ruby's `Hash`. As with `Hash`, it is the block's responsibility to
141
+ store the value in the hash if required.
142
+
143
+ ```ruby
144
+ h = Hammerspace.new("/tmp/hammerspace") { |hash, key| hash[key] = "#{key} (default)" }
145
+ h["new"] #=> "new (default)"
146
+ h.has_key?("new") #=> true
147
+ h.close
148
+ ```
149
+
150
+
151
+ ### Supported Data Types
152
+
153
+ Only string keys and values are supported.
154
+
155
+ ```ruby
156
+ h = Hammerspace.new("/tmp/hammerspace")
157
+ h[1] = "foo" #=> TypeError
158
+ h["fixnum"] = 8 #=> TypeError
159
+ h["nil"] = nil #=> TypeError
160
+ h.close
161
+ ```
162
+
163
+ Ruby hashes store references to objects, but hammerspace stores raw bytes. A
164
+ new Ruby `String` object is created from those bytes when a key is accessed.
165
+
166
+ ```ruby
167
+ value = "bar"
168
+
169
+ hash = {"foo" => value}
170
+ hash["foo"] == value #=> true
171
+ hash["foo"].equal?(value) #=> true
172
+
173
+ hammerspace = Hammerspace.new("/tmp/hammerspace")
174
+ hammerspace["foo"] = value
175
+ hammerspace["foo"] == value #=> true
176
+ hammerspace["foo"].equal?(value) #=> false
177
+ hammerspace.close
178
+ ```
179
+
180
+ Since every access results in a new `String` object, mutating values doesn't
181
+ work unless you create an explicit reference to the string.
182
+
183
+ ```ruby
184
+ h = Hammerspace.new("/tmp/hammerspace")
185
+ h["foo"] = "bar"
186
+
187
+ # This doesn't work like Ruby's Hash because every access creates a new object
188
+ h["foo"].upcase!
189
+ h["foo"] #=> "bar"
190
+
191
+ # An explicit reference is required
192
+ value = h["foo"]
193
+ value.upcase!
194
+ value #=> "BAR"
195
+
196
+ # Another access, another a new object
197
+ h["foo"] #=> "bar"
198
+
199
+ h.close
200
+ ```
201
+
202
+ This also imples that strings "lose" their encoding when retrieved from
203
+ hammerspace.
204
+
205
+ ```ruby
206
+ value = "bar"
207
+ value.encoding #=> #<Encoding:UTF-8>
208
+
209
+ h = Hammerspace.new("/tmp/hammerspace")
210
+ h["foo"] = value
211
+ h["foo"].encoding #=> #<Encoding:ASCII-8BIT>
212
+ h.close
213
+ ```
214
+
215
+ If you require strings in UTF-8, make sure strings are encoded as UTF-8 when
216
+ storing the key, then force the encoding to be UTF-8 when accessing the key.
217
+
218
+ ```ruby
219
+ h[key] = value.encode('utf-8')
220
+ value = h[key].force_encoding('utf-8')
221
+ ```
222
+
223
+
224
+ ### Persistence
225
+
226
+ Hammerspace objects are backed by files on disk, so even a new object may
227
+ already have data in it.
228
+
229
+ ```ruby
230
+ h = Hammerspace.new("/tmp/hammerspace")
231
+ h["foo"] = "bar"
232
+ h.close
233
+
234
+ h = Hammerspace.new("/tmp/hammerspace")
235
+ h["foo"] #=> "bar"
236
+ h.close
237
+ ```
238
+
239
+ Calling `clear` deletes the data files on disk. The parent directory is not
240
+ removed, nor is it guaranteed to be empty. Some files containing metadata may
241
+ still be present, e.g., lock files.
242
+
243
+
244
+ ### Concurrency
245
+
246
+ Multiple concurrent readers are supported. Readers are isolated from writers,
247
+ i.e., reads are consistent to the time that the reader was opened. Note that
248
+ the reader opens its files lazily on first read, not when the hammerspace
249
+ object is created.
250
+
251
+ ```ruby
252
+ h = Hammerspace.new("/tmp/hammerspace")
253
+ h["foo"] = "bar"
254
+ h.close
255
+
256
+ reader1 = Hammerspace.new("/tmp/hammerspace")
257
+ reader1["foo"] #=> "bar"
258
+
259
+ writer = Hammerspace.new("/tmp/hammerspace")
260
+ writer["foo"] = "updated"
261
+ writer.close
262
+
263
+ # Still "bar" because reader1 opened its files before the write
264
+ reader1["foo"] #=> "bar"
265
+
266
+ # Updated key is visible because reader2 opened its files after the write
267
+ reader2 = Hammerspace.new("/tmp/hammerspace")
268
+ reader2["foo"] #=> "updated"
269
+ reader2.close
270
+
271
+ reader1.close
272
+ ```
273
+
274
+ A new hammerspace object does not necessarily need to be created. Calling
275
+ `close` will close the files, then the reader will open them lazily again on
276
+ the next read.
277
+
278
+ ```ruby
279
+ h = Hammerspace.new("/tmp/hammerspace")
280
+ h["foo"] = "bar"
281
+ h.close
282
+
283
+ reader = Hammerspace.new("/tmp/hammerspace")
284
+ reader["foo"] #=> "bar"
285
+
286
+ writer = Hammerspace.new("/tmp/hammerspace")
287
+ writer["foo"] = "updated"
288
+ writer.close
289
+
290
+ reader["foo"] #=> "bar"
291
+
292
+ # Close files now, re-open lazily on next read
293
+ reader.close
294
+
295
+ reader["foo"] #=> "updated"
296
+ reader.close
297
+ ```
298
+
299
+ If no hammerspace files exist on disk yet, the reader will fail to open the
300
+ files. It will try again on next read.
301
+
302
+ ```ruby
303
+ reader = Hammerspace.new("/tmp/hammerspace")
304
+ reader.has_key?("foo") #=> false
305
+
306
+ writer = Hammerspace.new("/tmp/hammerspace")
307
+ writer["foo"] = "bar"
308
+ writer.close
309
+
310
+ # Files are opened here
311
+ reader.has_key?("foo") #=> true
312
+ reader.close
313
+ ```
314
+
315
+ You can call `uid` to get a unique id that identifies the version of the files
316
+ being read. `uid` will be `nil` if no hammerspace files exist on disk yet.
317
+
318
+ ```ruby
319
+ reader = Hammerspace.new("/tmp/hammerspace")
320
+ reader.uid #=> nil
321
+
322
+ writer = Hammerspace.new("/tmp/hammerspace")
323
+ writer["foo"] = "bar"
324
+ writer.close
325
+
326
+ reader.close
327
+ reader.uid #=> "24913_53943df0-e784-4873-ade6-d1cccc848a70"
328
+
329
+ # The uid changes on every write, even if the content is the same, i.e., it's
330
+ # an identifier, not a checksum
331
+ writer["foo"] = "bar"
332
+ writer.close
333
+
334
+ reader.close
335
+ reader.uid #=> "24913_9371024e-8c80-477b-8558-7c292bfcbfc1"
336
+
337
+ reader.close
338
+ ```
339
+
340
+ Multiple concurrent writers are also supported. When a writer flushes its
341
+ changes it will overwrite any previous versions of the hammerspace.
342
+
343
+ In practice, this works because hammerspace is designed to hold data that is
344
+ bulk-loaded from some authoritative external source. Rather than block writers
345
+ to enforce consistency, it is simpler to allow writers to concurrently attempt
346
+ to load the data. The last writer to finish loading the data and flush its
347
+ writes will have its data persisted.
348
+
349
+ ```ruby
350
+ writer1 = Hammerspace.new("/tmp/hammerspace")
351
+ writer1["color"] = "red"
352
+
353
+ # Can start while writer1 is still open
354
+ writer2 = Hammerspace.new("/tmp/hammerspace")
355
+ writer2["color"] = "blue"
356
+ writer2["fruit"] = "banana"
357
+ writer2.close
358
+
359
+ # Reads at this point see writer2's data
360
+ reader1 = Hammerspace.new("/tmp/hammerspace")
361
+ reader1["color"] #=> "blue"
362
+ reader1["fruit"] #=> "banana"
363
+ reader1.close
364
+
365
+ # Replaces writer2's data
366
+ writer1.close
367
+
368
+ # Reads at this point see writer1's data; note that "fruit" key is absent
369
+ reader2 = Hammerspace.new("/tmp/hammerspace")
370
+ reader2["color"] #=> "red"
371
+ reader2["fruit"] #=> nil
372
+ reader2.close
373
+ ```
374
+
375
+
376
+ ### Flushing Writes
377
+
378
+ Flushing a write incurs some overhead to build the on-disk hash structures that
379
+ allows fast lookup later. To avoid the overhead of rebuilding the hash after
380
+ every write, most write operations do not implicitly flush. Writes can be
381
+ flushed explicitly by calling `close`.
382
+
383
+ Delaying flushing of writes has the side effect of allowing "transactions" --
384
+ all unflushed writes are private to the hammerspace object doing the writing.
385
+
386
+ One exception is the `clear` method which deletes the files on disk. If a
387
+ reader attempts to open the files immediately after they are deleted, it will
388
+ perceive the hammerspace to be empty.
389
+
390
+ ```ruby
391
+ h = Hammerspace.new("/tmp/hammerspace")
392
+ h["yesterday"] = "foo"
393
+ h["today"] = "bar"
394
+ h.close
395
+
396
+ reader1 = Hammerspace.new("/tmp/hammerspace")
397
+ reader1.keys #=> ["yesterday", "today"]
398
+ reader1.close
399
+
400
+ # Writer wants to remove everything except "today"
401
+ writer = Hammerspace.new("/tmp/hammerspace")
402
+ writer.clear
403
+
404
+ # Effect of clear is immediately visible to readers
405
+ reader2 = Hammerspace.new("/tmp/hammerspace")
406
+ reader2.keys #=> []
407
+ reader2.close
408
+
409
+ writer["today"] = "bar"
410
+ writer.close
411
+
412
+ reader3 = Hammerspace.new("/tmp/hammerspace")
413
+ reader3.keys #=> ["today"]
414
+ reader3.close
415
+ ```
416
+
417
+ If you want to replace the existing data with new data without flushing in
418
+ between (i.e., in a "transaction"), use `replace` instead.
419
+
420
+ ```ruby
421
+ h = Hammerspace.new("/tmp/hammerspace")
422
+ h["yesterday"] = "foo"
423
+ h["today"] = "bar"
424
+ h.close
425
+
426
+ reader1 = Hammerspace.new("/tmp/hammerspace")
427
+ reader1.keys #=> ["yesterday", "today"]
428
+ reader1.close
429
+
430
+ # Writer wants to remove everything except "today"
431
+ writer = Hammerspace.new("/tmp/hammerspace")
432
+ writer.replace({"today" => "bar"})
433
+
434
+ # Old keys still present because writer has not flushed yet
435
+ reader2 = Hammerspace.new("/tmp/hammerspace")
436
+ reader2.keys #=> ["yesterday", "today"]
437
+ reader2.close
438
+
439
+ writer.close
440
+
441
+ reader3 = Hammerspace.new("/tmp/hammerspace")
442
+ reader3.keys #=> ["today"]
443
+ reader3.close
444
+ ```
445
+
446
+
447
+ ### Interleaving Reads and Writes
448
+
449
+ To ensure writes are available to subsequent reads, every read operation
450
+ implicitly flushes any previous writes.
451
+
452
+ ```ruby
453
+ h = Hammerspace.new("/tmp/hammerspace")
454
+ h["foo"] = "bar"
455
+
456
+ # Implicitly flushes write (builds on-disk hash for fast lookup), then opens
457
+ # newly written on-disk hash for reading
458
+ h["foo"] #=> "bar"
459
+
460
+ h.close
461
+ ```
462
+
463
+ While batch reads or writes are relatively fast, interleaved reads and writes
464
+ are slow because the hash is rebuilt very often.
465
+
466
+ ```ruby
467
+ # One flush, fast
468
+ h = Hammerspace.new("/tmp/hammerspace")
469
+ h["a"] = "100"
470
+ h["b"] = "200"
471
+ h["c"] = "300"
472
+ h["a"] #=> "100"
473
+ h["b"] #=> "200"
474
+ h["c"] #=> "300"
475
+ h.close
476
+
477
+ # Three flushes, slow
478
+ h = Hammerspace.new("/tmp/hammerspace")
479
+ h["a"] = "100"
480
+ h["a"] #=> "100"
481
+ h["b"] = "200"
482
+ h["b"] #=> "200"
483
+ h["c"] = "300"
484
+ h["c"] #=> "300"
485
+ h.close
486
+ ```
487
+
488
+ To avoid this overhead, and to ensure consistency during iteration, the `each`
489
+ method opens its own private reader for the duration of the iteration. This is
490
+ also true for any method that uses `each`, including all methods provided by
491
+ `Enumerable`.
492
+
493
+ ```ruby
494
+ h = Hammerspace.new("/tmp/hammerspace")
495
+ h["a"] = "100"
496
+ h["b"] = "200"
497
+ h["c"] = "300"
498
+
499
+ # Flushes the above writes, then opens a private reader for the each call
500
+ h.each do |key, value|
501
+ # Writes are done in bulk without flushing in between
502
+ h[key] = value[0]
503
+ end
504
+
505
+ # Flushes the above writes, then opens the reader
506
+ h.to_hash #=> {"a"=>"1", "b"=>"2", "c"=>"3"}
507
+
508
+ h.close
509
+ ```
510
+
511
+
512
+ ### Unsupported Methods
513
+
514
+ Besides the incompatibilities with Ruby's `Hash` discussed above, there are
515
+ some `Hash` methods that are not supported.
516
+
517
+ * Methods that return a copy of the hash: `invert`, `merge`, `reject`, `select`
518
+ * `rehash` is not needed, since hammerspace only supports string keys, and keys are effectively `dup`d
519
+ * `delete` does not return the value deleted, and it does not support block usage
520
+ * `hash` and `to_s` are not overriden, so the behavior is that of `Object#hash` and `Object#to_s`
521
+ * `compare_by_identity`, `compare_by_identity?`
522
+ * `pretty_print`, `pretty_print_cycle`