hammerspace 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.chef/cookbooks/hammerspace-development/attributes/default.rb +7 -0
- data/.chef/cookbooks/hammerspace-development/attributes/essential.rb +6 -0
- data/.chef/cookbooks/hammerspace-development/attributes/sparkey.rb +7 -0
- data/.chef/cookbooks/hammerspace-development/recipes/default.rb +32 -0
- data/.chef/cookbooks/hammerspace-development/recipes/essential.rb +9 -0
- data/.chef/cookbooks/hammerspace-development/recipes/ruby.rb +21 -0
- data/.chef/cookbooks/hammerspace-development/recipes/sparkey.rb +56 -0
- data/.chef/cookbooks/hammerspace-development/templates/default/.bash_profile.erb +2 -0
- data/.chef/roles/hammerspace-development.rb +6 -0
- data/.gitignore +8 -0
- data/CHANGELOG.md +22 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +22 -0
- data/README.md +522 -0
- data/Vagrantfile +30 -0
- data/hammerspace.gemspec +21 -0
- data/lib/hammerspace.rb +12 -0
- data/lib/hammerspace/backend.rb +106 -0
- data/lib/hammerspace/backend/sparkey.rb +319 -0
- data/lib/hammerspace/hash.rb +62 -0
- data/lib/hammerspace/hash_methods.rb +234 -0
- data/lib/hammerspace/version.rb +3 -0
- data/script/write_concurrency_test.rb +36 -0
- data/spec/features/hash_spec.rb +1487 -0
- data/spec/lib/hammerspace/backend/sparkey_spec.rb +191 -0
- data/spec/lib/hammerspace/hash_spec.rb +143 -0
- data/spec/lib/hammerspace_spec.rb +27 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/support/sparkey_directory_helper.rb +26 -0
- data/spec/support/write_concurrency_test.rb +38 -0
- metadata +96 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ZGMzNzRmZWE0Yjc1NjQ4NDJhMTc3NzA0YmZiYzAxZDIyMWNkNDRiYQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZjdjYjY2NzI4MDQxYTA3YzYzZmM5MjVlOTJjY2RhMzI4Mzk3OGU5MQ==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NDU0NzZhYThhNWQ4YTdhYTQ2MDkzMmI2NTFlM2IzNjkwYTdlMDk0MjllNTgw
|
10
|
+
MzQwNDdkOWYxMmZlMzI0OTI3M2NmYjQ4ZjlhMDMwYWEwZThjYjNiMTIzNGUw
|
11
|
+
ZmM0NjAyNDBmYmE1NzkwMjdmMmU0MTA3YTdmYjljZTQwOTA5ODM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NTZhNTU2YjUxNjY0ZDYwNDQyYTFjMmIxODE4MzY5NjE5Y2RmZDcyOWIxMDg0
|
14
|
+
ZTQ1NzE5OTIwMTUzZTk2ZjgwNTY0MWVhOTJmOTNjN2Q3Njk2MGNiYjE1MDcw
|
15
|
+
NTlmODY5MTI5ZjlkYWQ2MWJhZGE3NTc1NzMyYTBiODQ5ZmI3Mzk=
|
@@ -0,0 +1,7 @@
|
|
1
|
+
default.hammerspace_development.user = 'vagrant'
|
2
|
+
default.hammerspace_development.hammerspace.home = '/home/vagrant/hammerspace'
|
3
|
+
default.hammerspace_development.hammerspace.gem_home = '/home/vagrant/.gems'
|
4
|
+
default.hammerspace_development.hammerspace.root = '/var/lib/hammerspace'
|
5
|
+
|
6
|
+
default.hammerspace_development.ruby.version = '1.9.1'
|
7
|
+
default.hammerspace_development.sparkey.version = '3b3b061a706c33ae764c2bb8e61888b7a7a26c12'
|
@@ -0,0 +1,7 @@
|
|
1
|
+
default.hammerspace_development.sparkey.home = "/home/#{default.hammerspace_development.user}/sparkey"
|
2
|
+
default.hammerspace_development.sparkey.source_file = "https://github.com/spotify/sparkey/archive/#{node.hammerspace_development.sparkey.version}.tar.gz"
|
3
|
+
default.hammerspace_development.sparkey.local_dir = File.join(default.hammerspace_development.sparkey.home, "sparkey-#{node.hammerspace_development.sparkey.version}")
|
4
|
+
|
5
|
+
default.hammerspace_development.sparkey.packages = [
|
6
|
+
'libsnappy-dev',
|
7
|
+
]
|
@@ -0,0 +1,32 @@
|
|
1
|
+
include_recipe "hammerspace-development::essential"
|
2
|
+
include_recipe "hammerspace-development::sparkey"
|
3
|
+
include_recipe "hammerspace-development::ruby"
|
4
|
+
|
5
|
+
template "/home/#{node.hammerspace_development.user}/.bash_profile" do
|
6
|
+
owner node.hammerspace_development.user
|
7
|
+
group node.hammerspace_development.user
|
8
|
+
mode '0755'
|
9
|
+
end
|
10
|
+
|
11
|
+
directory node.hammerspace_development.hammerspace.gem_home do
|
12
|
+
owner node.hammerspace_development.user
|
13
|
+
group node.hammerspace_development.user
|
14
|
+
mode '0755'
|
15
|
+
recursive true
|
16
|
+
action :create
|
17
|
+
end
|
18
|
+
|
19
|
+
execute "hammerspace-bundle-install" do
|
20
|
+
cwd node.hammerspace_development.hammerspace.home
|
21
|
+
user node.hammerspace_development.user
|
22
|
+
group node.hammerspace_development.user
|
23
|
+
command "bundle install --path #{node.hammerspace_development.hammerspace.gem_home}"
|
24
|
+
end
|
25
|
+
|
26
|
+
directory node.hammerspace_development.hammerspace.root do
|
27
|
+
owner node.hammerspace_development.user
|
28
|
+
group node.hammerspace_development.user
|
29
|
+
mode '0755'
|
30
|
+
recursive true
|
31
|
+
action :create
|
32
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
package "ruby#{node.hammerspace_development.ruby.version}" do
|
2
|
+
action :upgrade
|
3
|
+
options "--force-yes"
|
4
|
+
end
|
5
|
+
|
6
|
+
package "ruby#{node.hammerspace_development.ruby.version}-dev" do
|
7
|
+
action :upgrade
|
8
|
+
options "--force-yes"
|
9
|
+
end
|
10
|
+
|
11
|
+
# other common packages needed by ruby gems
|
12
|
+
["libxslt-dev", "libxml2-dev"].each do |p|
|
13
|
+
package p do
|
14
|
+
action :upgrade
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
gem_package 'bundler' do
|
19
|
+
action :install
|
20
|
+
end
|
21
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
sparkey_local_file = File.join(
|
2
|
+
node.hammerspace_development.sparkey.home,
|
3
|
+
File.basename(node.hammerspace_development.sparkey.source_file))
|
4
|
+
|
5
|
+
node.hammerspace_development.sparkey.packages.each do |p|
|
6
|
+
package p do
|
7
|
+
action :upgrade
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
directory node.hammerspace_development.sparkey.home do
|
12
|
+
owner node.hammerspace_development.user
|
13
|
+
group node.hammerspace_development.user
|
14
|
+
mode '0755'
|
15
|
+
recursive true
|
16
|
+
action :create
|
17
|
+
end
|
18
|
+
|
19
|
+
remote_file sparkey_local_file do
|
20
|
+
source node.hammerspace_development.sparkey.source_file
|
21
|
+
owner node.hammerspace_development.user
|
22
|
+
group node.hammerspace_development.user
|
23
|
+
mode "644"
|
24
|
+
|
25
|
+
action :create_if_missing
|
26
|
+
notifies :run, "execute[extract-sparkey-#{node.hammerspace_development.sparkey.version}]", :immediately
|
27
|
+
end
|
28
|
+
|
29
|
+
execute "extract-sparkey-#{node.hammerspace_development.sparkey.version}" do
|
30
|
+
cwd node.hammerspace_development.sparkey.home
|
31
|
+
user node.hammerspace_development.user
|
32
|
+
command "tar -xvzf #{sparkey_local_file}"
|
33
|
+
|
34
|
+
action :nothing
|
35
|
+
notifies :run, "bash[build-sparkey-#{node.hammerspace_development.sparkey.version}]", :immediately
|
36
|
+
end
|
37
|
+
|
38
|
+
bash "build-sparkey-#{node.hammerspace_development.sparkey.version}" do
|
39
|
+
cwd node.hammerspace_development.sparkey.local_dir
|
40
|
+
user node.hammerspace_development.user
|
41
|
+
code <<-EOS
|
42
|
+
autoreconf --install
|
43
|
+
./configure
|
44
|
+
make
|
45
|
+
EOS
|
46
|
+
|
47
|
+
action :nothing
|
48
|
+
notifies :run, "execute[install-sparkey-#{node.hammerspace_development.sparkey.version}]", :immediately
|
49
|
+
end
|
50
|
+
|
51
|
+
execute "install-sparkey-#{node.hammerspace_development.sparkey.version}" do
|
52
|
+
cwd node.hammerspace_development.sparkey.local_dir
|
53
|
+
command "make install && sudo ldconfig"
|
54
|
+
|
55
|
+
action :nothing
|
56
|
+
end
|
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# v0.1.2
|
2
|
+
* Support vagrant for local development.
|
3
|
+
* Remove dependency on colored gem.
|
4
|
+
* Add MIT license.
|
5
|
+
* Documentation updates.
|
6
|
+
|
7
|
+
# v0.1.1
|
8
|
+
* Expose the uid of the directory that the current reader is reading from.
|
9
|
+
* Documentation updates.
|
10
|
+
|
11
|
+
# v0.1.0
|
12
|
+
* Change semantics of block passed to constructor, now used to specify default_proc.
|
13
|
+
* Add support for most Ruby Hash methods.
|
14
|
+
* Major internal refactor, new HashMethods module allows new backends to be written more easily.
|
15
|
+
* Add documentation.
|
16
|
+
|
17
|
+
# v0.0.2
|
18
|
+
* Add support for multiple writers with last-write-wins semantics.
|
19
|
+
* Implement `clear` method.
|
20
|
+
|
21
|
+
# v0.0.1
|
22
|
+
* Initial release.
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Airbnb, Inc.
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,522 @@
|
|
1
|
+
Hammerspace
|
2
|
+
===========
|
3
|
+
|
4
|
+
Hash-like interface to persistent, concurrent, off-heap storage
|
5
|
+
|
6
|
+
|
7
|
+
## What is Hammerspace?
|
8
|
+
|
9
|
+
_[Hammerspace](http://en.wikipedia.org/wiki/Hammerspace) ... is a
|
10
|
+
fan-envisioned extradimensional, instantly accessible storage area in fiction,
|
11
|
+
which is used to explain how animated, comic, and game characters can produce
|
12
|
+
objects out of thin air._
|
13
|
+
|
14
|
+
This gem provides persistent, concurrently-accessible off-heap storage of
|
15
|
+
strings with a familiar hash-like interface. It is optimized for bulk writes
|
16
|
+
and random reads.
|
17
|
+
|
18
|
+
|
19
|
+
## Motivation
|
20
|
+
|
21
|
+
Applications often use data that never changes or changes very infrequently. In
|
22
|
+
many cases, some latency is acceptable when accessing this data. For example, a
|
23
|
+
user's profile may be loaded from a web service, a database, or an external
|
24
|
+
shared cache like memcache. In other cases, latency is much more sensitive. For
|
25
|
+
example, translations may be used many times and incurring even a ~2ms delay to
|
26
|
+
access them from an external cache would be prohibitively slow.
|
27
|
+
|
28
|
+
To work around the performance issue, this type of data is often loaded into
|
29
|
+
the application at startup. Unfortunately, this means the data is stored on the
|
30
|
+
heap, where the garbage collector must scan over the objects on every run (at
|
31
|
+
least in the case of Ruby MRI). Further, for application servers that utilize
|
32
|
+
multiple processes, each process has its own copy of the data which is an
|
33
|
+
inefficient use of memory.
|
34
|
+
|
35
|
+
Hammerspace solves these problems by moving the data off the heap onto disk.
|
36
|
+
Leveraging libraries and data structures optimized for bulk writes and random
|
37
|
+
reads allows an acceptable level of performance to be maintained. Because the
|
38
|
+
data is persistent, it does not need to be reloaded from an external cache or
|
39
|
+
service on application startup unless the data has changed.
|
40
|
+
|
41
|
+
Unfortunately, these low-level libraries don't always support concurrent
|
42
|
+
writers. Hammerspace adds concurrency control to allow mutliple processes to
|
43
|
+
update and read from a single shared copy of the data safely. Finally,
|
44
|
+
hammerspace's interface is designed to mimic Ruby's `Hash` to make integrating
|
45
|
+
with existing applications simple and straightforward. Different low-level
|
46
|
+
libraries can be used by implementing a new backend that uses the library.
|
47
|
+
(Currently, only [Sparkey](https://github.com/spotify/sparkey) is supported.)
|
48
|
+
Backends only need to implement a small set of methods (`[]`, `[]=`, `close`,
|
49
|
+
`delete`, `each`, `uid`), but can override the default implementation of other
|
50
|
+
methods if the underlying library supports more efficient implementations.
|
51
|
+
|
52
|
+
## Installation
|
53
|
+
|
54
|
+
### Requirements
|
55
|
+
|
56
|
+
* [Gnista](https://github.com/emnl/gnista), Ruby bindings for Sparkey
|
57
|
+
* [Sparkey](https://github.com/spotify/sparkey), constant key/value storage library
|
58
|
+
* [Snappy](https://code.google.com/p/snappy/), compression/decompression library (unused, but required to compile Sparkey)
|
59
|
+
* A filesystem that supports `flock(2)` and unlinking files/directories with outstanding file descriptors (ext3/4 will do just fine)
|
60
|
+
|
61
|
+
|
62
|
+
### Installation
|
63
|
+
|
64
|
+
Add the following line to your Gemfile:
|
65
|
+
|
66
|
+
gem 'hammerspace'
|
67
|
+
|
68
|
+
Then run:
|
69
|
+
|
70
|
+
bundle
|
71
|
+
|
72
|
+
### Vagrant
|
73
|
+
|
74
|
+
To make development easier, the source tree contains a Vagrantfile and a small
|
75
|
+
cookbook to install all the prerequisites. The vagrant environment also serves
|
76
|
+
as a consistent environment to run the test suite.
|
77
|
+
|
78
|
+
To use it, make sure you have vagrant installed, then:
|
79
|
+
|
80
|
+
vagrant up
|
81
|
+
vagrant ssh
|
82
|
+
bundle exec rspec
|
83
|
+
|
84
|
+
|
85
|
+
## Usage
|
86
|
+
|
87
|
+
### Getting Started
|
88
|
+
|
89
|
+
For the most part, hammerspace acts like a Ruby hash. But since it's a hash
|
90
|
+
that persists on disk, you have to tell it where to store the files. The
|
91
|
+
enclosing directory and any parent directories are created if they don't
|
92
|
+
already exist.
|
93
|
+
|
94
|
+
```ruby
|
95
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
96
|
+
|
97
|
+
h["cartoons"] = "mallets"
|
98
|
+
h["games"] = "inventory"
|
99
|
+
h["rubyists"] = "data"
|
100
|
+
|
101
|
+
h.size #=> 3
|
102
|
+
h["cartoons"] #=> "mallets"
|
103
|
+
|
104
|
+
h.map { |k,v| "#{k.capitalize} use hammerspace to store #{v}." }
|
105
|
+
|
106
|
+
h.close
|
107
|
+
```
|
108
|
+
|
109
|
+
You should call `close` on the hammerspace object when you're done with it.
|
110
|
+
This flushes any pending writes to disk and closes any open file handles.
|
111
|
+
|
112
|
+
|
113
|
+
### Options
|
114
|
+
|
115
|
+
The constructor takes a hash of options as an optional second argument.
|
116
|
+
Currently the only option supported is `:backend` which specifies which backend
|
117
|
+
class to use. Since there is only one backend supported at this time, there is
|
118
|
+
currently no reason to pass this argument.
|
119
|
+
|
120
|
+
```ruby
|
121
|
+
h = Hammerspace.new("/tmp/hammerspace", {:backend => Hammerspace::Backend::Sparkey})
|
122
|
+
```
|
123
|
+
|
124
|
+
|
125
|
+
### Default Values
|
126
|
+
|
127
|
+
The constructor takes a default value as an optional third argument. This
|
128
|
+
functions the same as Ruby's `Hash`, except with `Hash` it is the first
|
129
|
+
argument.
|
130
|
+
|
131
|
+
```ruby
|
132
|
+
h = Hammerspace.new("/tmp/hammerspace", {}, "default")
|
133
|
+
h["foo"] = "bar"
|
134
|
+
h["foo"] #=> "bar"
|
135
|
+
h["new"] #=> "default"
|
136
|
+
h.close
|
137
|
+
```
|
138
|
+
|
139
|
+
The constructor also takes a block to specify a default Proc, which works the
|
140
|
+
same way as Ruby's `Hash`. As with `Hash`, it is the block's responsibility to
|
141
|
+
store the value in the hash if required.
|
142
|
+
|
143
|
+
```ruby
|
144
|
+
h = Hammerspace.new("/tmp/hammerspace") { |hash, key| hash[key] = "#{key} (default)" }
|
145
|
+
h["new"] #=> "new (default)"
|
146
|
+
h.has_key?("new") #=> true
|
147
|
+
h.close
|
148
|
+
```
|
149
|
+
|
150
|
+
|
151
|
+
### Supported Data Types
|
152
|
+
|
153
|
+
Only string keys and values are supported.
|
154
|
+
|
155
|
+
```ruby
|
156
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
157
|
+
h[1] = "foo" #=> TypeError
|
158
|
+
h["fixnum"] = 8 #=> TypeError
|
159
|
+
h["nil"] = nil #=> TypeError
|
160
|
+
h.close
|
161
|
+
```
|
162
|
+
|
163
|
+
Ruby hashes store references to objects, but hammerspace stores raw bytes. A
|
164
|
+
new Ruby `String` object is created from those bytes when a key is accessed.
|
165
|
+
|
166
|
+
```ruby
|
167
|
+
value = "bar"
|
168
|
+
|
169
|
+
hash = {"foo" => value}
|
170
|
+
hash["foo"] == value #=> true
|
171
|
+
hash["foo"].equal?(value) #=> true
|
172
|
+
|
173
|
+
hammerspace = Hammerspace.new("/tmp/hammerspace")
|
174
|
+
hammerspace["foo"] = value
|
175
|
+
hammerspace["foo"] == value #=> true
|
176
|
+
hammerspace["foo"].equal?(value) #=> false
|
177
|
+
hammerspace.close
|
178
|
+
```
|
179
|
+
|
180
|
+
Since every access results in a new `String` object, mutating values doesn't
|
181
|
+
work unless you create an explicit reference to the string.
|
182
|
+
|
183
|
+
```ruby
|
184
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
185
|
+
h["foo"] = "bar"
|
186
|
+
|
187
|
+
# This doesn't work like Ruby's Hash because every access creates a new object
|
188
|
+
h["foo"].upcase!
|
189
|
+
h["foo"] #=> "bar"
|
190
|
+
|
191
|
+
# An explicit reference is required
|
192
|
+
value = h["foo"]
|
193
|
+
value.upcase!
|
194
|
+
value #=> "BAR"
|
195
|
+
|
196
|
+
# Another access, another a new object
|
197
|
+
h["foo"] #=> "bar"
|
198
|
+
|
199
|
+
h.close
|
200
|
+
```
|
201
|
+
|
202
|
+
This also imples that strings "lose" their encoding when retrieved from
|
203
|
+
hammerspace.
|
204
|
+
|
205
|
+
```ruby
|
206
|
+
value = "bar"
|
207
|
+
value.encoding #=> #<Encoding:UTF-8>
|
208
|
+
|
209
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
210
|
+
h["foo"] = value
|
211
|
+
h["foo"].encoding #=> #<Encoding:ASCII-8BIT>
|
212
|
+
h.close
|
213
|
+
```
|
214
|
+
|
215
|
+
If you require strings in UTF-8, make sure strings are encoded as UTF-8 when
|
216
|
+
storing the key, then force the encoding to be UTF-8 when accessing the key.
|
217
|
+
|
218
|
+
```ruby
|
219
|
+
h[key] = value.encode('utf-8')
|
220
|
+
value = h[key].force_encoding('utf-8')
|
221
|
+
```
|
222
|
+
|
223
|
+
|
224
|
+
### Persistence
|
225
|
+
|
226
|
+
Hammerspace objects are backed by files on disk, so even a new object may
|
227
|
+
already have data in it.
|
228
|
+
|
229
|
+
```ruby
|
230
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
231
|
+
h["foo"] = "bar"
|
232
|
+
h.close
|
233
|
+
|
234
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
235
|
+
h["foo"] #=> "bar"
|
236
|
+
h.close
|
237
|
+
```
|
238
|
+
|
239
|
+
Calling `clear` deletes the data files on disk. The parent directory is not
|
240
|
+
removed, nor is it guaranteed to be empty. Some files containing metadata may
|
241
|
+
still be present, e.g., lock files.
|
242
|
+
|
243
|
+
|
244
|
+
### Concurrency
|
245
|
+
|
246
|
+
Multiple concurrent readers are supported. Readers are isolated from writers,
|
247
|
+
i.e., reads are consistent to the time that the reader was opened. Note that
|
248
|
+
the reader opens its files lazily on first read, not when the hammerspace
|
249
|
+
object is created.
|
250
|
+
|
251
|
+
```ruby
|
252
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
253
|
+
h["foo"] = "bar"
|
254
|
+
h.close
|
255
|
+
|
256
|
+
reader1 = Hammerspace.new("/tmp/hammerspace")
|
257
|
+
reader1["foo"] #=> "bar"
|
258
|
+
|
259
|
+
writer = Hammerspace.new("/tmp/hammerspace")
|
260
|
+
writer["foo"] = "updated"
|
261
|
+
writer.close
|
262
|
+
|
263
|
+
# Still "bar" because reader1 opened its files before the write
|
264
|
+
reader1["foo"] #=> "bar"
|
265
|
+
|
266
|
+
# Updated key is visible because reader2 opened its files after the write
|
267
|
+
reader2 = Hammerspace.new("/tmp/hammerspace")
|
268
|
+
reader2["foo"] #=> "updated"
|
269
|
+
reader2.close
|
270
|
+
|
271
|
+
reader1.close
|
272
|
+
```
|
273
|
+
|
274
|
+
A new hammerspace object does not necessarily need to be created. Calling
|
275
|
+
`close` will close the files, then the reader will open them lazily again on
|
276
|
+
the next read.
|
277
|
+
|
278
|
+
```ruby
|
279
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
280
|
+
h["foo"] = "bar"
|
281
|
+
h.close
|
282
|
+
|
283
|
+
reader = Hammerspace.new("/tmp/hammerspace")
|
284
|
+
reader["foo"] #=> "bar"
|
285
|
+
|
286
|
+
writer = Hammerspace.new("/tmp/hammerspace")
|
287
|
+
writer["foo"] = "updated"
|
288
|
+
writer.close
|
289
|
+
|
290
|
+
reader["foo"] #=> "bar"
|
291
|
+
|
292
|
+
# Close files now, re-open lazily on next read
|
293
|
+
reader.close
|
294
|
+
|
295
|
+
reader["foo"] #=> "updated"
|
296
|
+
reader.close
|
297
|
+
```
|
298
|
+
|
299
|
+
If no hammerspace files exist on disk yet, the reader will fail to open the
|
300
|
+
files. It will try again on next read.
|
301
|
+
|
302
|
+
```ruby
|
303
|
+
reader = Hammerspace.new("/tmp/hammerspace")
|
304
|
+
reader.has_key?("foo") #=> false
|
305
|
+
|
306
|
+
writer = Hammerspace.new("/tmp/hammerspace")
|
307
|
+
writer["foo"] = "bar"
|
308
|
+
writer.close
|
309
|
+
|
310
|
+
# Files are opened here
|
311
|
+
reader.has_key?("foo") #=> true
|
312
|
+
reader.close
|
313
|
+
```
|
314
|
+
|
315
|
+
You can call `uid` to get a unique id that identifies the version of the files
|
316
|
+
being read. `uid` will be `nil` if no hammerspace files exist on disk yet.
|
317
|
+
|
318
|
+
```ruby
|
319
|
+
reader = Hammerspace.new("/tmp/hammerspace")
|
320
|
+
reader.uid #=> nil
|
321
|
+
|
322
|
+
writer = Hammerspace.new("/tmp/hammerspace")
|
323
|
+
writer["foo"] = "bar"
|
324
|
+
writer.close
|
325
|
+
|
326
|
+
reader.close
|
327
|
+
reader.uid #=> "24913_53943df0-e784-4873-ade6-d1cccc848a70"
|
328
|
+
|
329
|
+
# The uid changes on every write, even if the content is the same, i.e., it's
|
330
|
+
# an identifier, not a checksum
|
331
|
+
writer["foo"] = "bar"
|
332
|
+
writer.close
|
333
|
+
|
334
|
+
reader.close
|
335
|
+
reader.uid #=> "24913_9371024e-8c80-477b-8558-7c292bfcbfc1"
|
336
|
+
|
337
|
+
reader.close
|
338
|
+
```
|
339
|
+
|
340
|
+
Multiple concurrent writers are also supported. When a writer flushes its
|
341
|
+
changes it will overwrite any previous versions of the hammerspace.
|
342
|
+
|
343
|
+
In practice, this works because hammerspace is designed to hold data that is
|
344
|
+
bulk-loaded from some authoritative external source. Rather than block writers
|
345
|
+
to enforce consistency, it is simpler to allow writers to concurrently attempt
|
346
|
+
to load the data. The last writer to finish loading the data and flush its
|
347
|
+
writes will have its data persisted.
|
348
|
+
|
349
|
+
```ruby
|
350
|
+
writer1 = Hammerspace.new("/tmp/hammerspace")
|
351
|
+
writer1["color"] = "red"
|
352
|
+
|
353
|
+
# Can start while writer1 is still open
|
354
|
+
writer2 = Hammerspace.new("/tmp/hammerspace")
|
355
|
+
writer2["color"] = "blue"
|
356
|
+
writer2["fruit"] = "banana"
|
357
|
+
writer2.close
|
358
|
+
|
359
|
+
# Reads at this point see writer2's data
|
360
|
+
reader1 = Hammerspace.new("/tmp/hammerspace")
|
361
|
+
reader1["color"] #=> "blue"
|
362
|
+
reader1["fruit"] #=> "banana"
|
363
|
+
reader1.close
|
364
|
+
|
365
|
+
# Replaces writer2's data
|
366
|
+
writer1.close
|
367
|
+
|
368
|
+
# Reads at this point see writer1's data; note that "fruit" key is absent
|
369
|
+
reader2 = Hammerspace.new("/tmp/hammerspace")
|
370
|
+
reader2["color"] #=> "red"
|
371
|
+
reader2["fruit"] #=> nil
|
372
|
+
reader2.close
|
373
|
+
```
|
374
|
+
|
375
|
+
|
376
|
+
### Flushing Writes
|
377
|
+
|
378
|
+
Flushing a write incurs some overhead to build the on-disk hash structures that
|
379
|
+
allows fast lookup later. To avoid the overhead of rebuilding the hash after
|
380
|
+
every write, most write operations do not implicitly flush. Writes can be
|
381
|
+
flushed explicitly by calling `close`.
|
382
|
+
|
383
|
+
Delaying flushing of writes has the side effect of allowing "transactions" --
|
384
|
+
all unflushed writes are private to the hammerspace object doing the writing.
|
385
|
+
|
386
|
+
One exception is the `clear` method which deletes the files on disk. If a
|
387
|
+
reader attempts to open the files immediately after they are deleted, it will
|
388
|
+
perceive the hammerspace to be empty.
|
389
|
+
|
390
|
+
```ruby
|
391
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
392
|
+
h["yesterday"] = "foo"
|
393
|
+
h["today"] = "bar"
|
394
|
+
h.close
|
395
|
+
|
396
|
+
reader1 = Hammerspace.new("/tmp/hammerspace")
|
397
|
+
reader1.keys #=> ["yesterday", "today"]
|
398
|
+
reader1.close
|
399
|
+
|
400
|
+
# Writer wants to remove everything except "today"
|
401
|
+
writer = Hammerspace.new("/tmp/hammerspace")
|
402
|
+
writer.clear
|
403
|
+
|
404
|
+
# Effect of clear is immediately visible to readers
|
405
|
+
reader2 = Hammerspace.new("/tmp/hammerspace")
|
406
|
+
reader2.keys #=> []
|
407
|
+
reader2.close
|
408
|
+
|
409
|
+
writer["today"] = "bar"
|
410
|
+
writer.close
|
411
|
+
|
412
|
+
reader3 = Hammerspace.new("/tmp/hammerspace")
|
413
|
+
reader3.keys #=> ["today"]
|
414
|
+
reader3.close
|
415
|
+
```
|
416
|
+
|
417
|
+
If you want to replace the existing data with new data without flushing in
|
418
|
+
between (i.e., in a "transaction"), use `replace` instead.
|
419
|
+
|
420
|
+
```ruby
|
421
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
422
|
+
h["yesterday"] = "foo"
|
423
|
+
h["today"] = "bar"
|
424
|
+
h.close
|
425
|
+
|
426
|
+
reader1 = Hammerspace.new("/tmp/hammerspace")
|
427
|
+
reader1.keys #=> ["yesterday", "today"]
|
428
|
+
reader1.close
|
429
|
+
|
430
|
+
# Writer wants to remove everything except "today"
|
431
|
+
writer = Hammerspace.new("/tmp/hammerspace")
|
432
|
+
writer.replace({"today" => "bar"})
|
433
|
+
|
434
|
+
# Old keys still present because writer has not flushed yet
|
435
|
+
reader2 = Hammerspace.new("/tmp/hammerspace")
|
436
|
+
reader2.keys #=> ["yesterday", "today"]
|
437
|
+
reader2.close
|
438
|
+
|
439
|
+
writer.close
|
440
|
+
|
441
|
+
reader3 = Hammerspace.new("/tmp/hammerspace")
|
442
|
+
reader3.keys #=> ["today"]
|
443
|
+
reader3.close
|
444
|
+
```
|
445
|
+
|
446
|
+
|
447
|
+
### Interleaving Reads and Writes
|
448
|
+
|
449
|
+
To ensure writes are available to subsequent reads, every read operation
|
450
|
+
implicitly flushes any previous writes.
|
451
|
+
|
452
|
+
```ruby
|
453
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
454
|
+
h["foo"] = "bar"
|
455
|
+
|
456
|
+
# Implicitly flushes write (builds on-disk hash for fast lookup), then opens
|
457
|
+
# newly written on-disk hash for reading
|
458
|
+
h["foo"] #=> "bar"
|
459
|
+
|
460
|
+
h.close
|
461
|
+
```
|
462
|
+
|
463
|
+
While batch reads or writes are relatively fast, interleaved reads and writes
|
464
|
+
are slow because the hash is rebuilt very often.
|
465
|
+
|
466
|
+
```ruby
|
467
|
+
# One flush, fast
|
468
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
469
|
+
h["a"] = "100"
|
470
|
+
h["b"] = "200"
|
471
|
+
h["c"] = "300"
|
472
|
+
h["a"] #=> "100"
|
473
|
+
h["b"] #=> "200"
|
474
|
+
h["c"] #=> "300"
|
475
|
+
h.close
|
476
|
+
|
477
|
+
# Three flushes, slow
|
478
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
479
|
+
h["a"] = "100"
|
480
|
+
h["a"] #=> "100"
|
481
|
+
h["b"] = "200"
|
482
|
+
h["b"] #=> "200"
|
483
|
+
h["c"] = "300"
|
484
|
+
h["c"] #=> "300"
|
485
|
+
h.close
|
486
|
+
```
|
487
|
+
|
488
|
+
To avoid this overhead, and to ensure consistency during iteration, the `each`
|
489
|
+
method opens its own private reader for the duration of the iteration. This is
|
490
|
+
also true for any method that uses `each`, including all methods provided by
|
491
|
+
`Enumerable`.
|
492
|
+
|
493
|
+
```ruby
|
494
|
+
h = Hammerspace.new("/tmp/hammerspace")
|
495
|
+
h["a"] = "100"
|
496
|
+
h["b"] = "200"
|
497
|
+
h["c"] = "300"
|
498
|
+
|
499
|
+
# Flushes the above writes, then opens a private reader for the each call
|
500
|
+
h.each do |key, value|
|
501
|
+
# Writes are done in bulk without flushing in between
|
502
|
+
h[key] = value[0]
|
503
|
+
end
|
504
|
+
|
505
|
+
# Flushes the above writes, then opens the reader
|
506
|
+
h.to_hash #=> {"a"=>"1", "b"=>"2", "c"=>"3"}
|
507
|
+
|
508
|
+
h.close
|
509
|
+
```
|
510
|
+
|
511
|
+
|
512
|
+
### Unsupported Methods
|
513
|
+
|
514
|
+
Besides the incompatibilities with Ruby's `Hash` discussed above, there are
|
515
|
+
some `Hash` methods that are not supported.
|
516
|
+
|
517
|
+
* Methods that return a copy of the hash: `invert`, `merge`, `reject`, `select`
|
518
|
+
* `rehash` is not needed, since hammerspace only supports string keys, and keys are effectively `dup`d
|
519
|
+
* `delete` does not return the value deleted, and it does not support block usage
|
520
|
+
* `hash` and `to_s` are not overriden, so the behavior is that of `Object#hash` and `Object#to_s`
|
521
|
+
* `compare_by_identity`, `compare_by_identity?`
|
522
|
+
* `pretty_print`, `pretty_print_cycle`
|