spool_pool 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.rdoc +15 -0
- data/LICENSE.txt +27 -0
- data/README.rdoc +53 -0
- data/TODOs +1 -0
- data/lib/spool_pool.rb +53 -0
- data/lib/spool_pool/file.rb +161 -0
- data/lib/spool_pool/pool.rb +172 -0
- data/lib/spool_pool/spool.rb +113 -0
- data/scripts/perf_test.rb +16 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/spool_pool/file_spec.rb +119 -0
- data/spec/spool_pool/pool_spec.rb +492 -0
- data/spec/spool_pool/spool_spec.rb +245 -0
- metadata +68 -0
data/History.rdoc
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
= 0.2.1
|
2
|
+
* Add upfront sanity checking of the pools directory environment and permissions
|
3
|
+
|
4
|
+
= 0.2
|
5
|
+
* Add safe behaviour for get: supply operations in a block; the spoolfile
|
6
|
+
only gets deleted if the block completes without an exception
|
7
|
+
* Include into SpoolPool::File adapted Tempfile code from the ruby stdlib,
|
8
|
+
resulting in ~5x speed improvement for put operations
|
9
|
+
* Change the naming scheme of the spool files
|
10
|
+
* Sort files by name, not by ctime
|
11
|
+
* Cache sorted list of spooled files, resulting in a massive speed up for
|
12
|
+
get/flush operations (10000 files took about 14000 seconds, now 4.4 seconds)
|
13
|
+
|
14
|
+
= 0.1
|
15
|
+
* First version with a basic implementation of all core features
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright 2010 Sven Riedel
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions
|
6
|
+
are met:
|
7
|
+
|
8
|
+
1. Redistributions of source code must retain the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer.
|
10
|
+
2. Redistributions in binary form must reproduce the above copyright
|
11
|
+
notice, this list of conditions and the following disclaimer in the
|
12
|
+
documentation and/or other materials provided with the distribution.
|
13
|
+
3. Neither the names of the authors nor the names of their contributors
|
14
|
+
may be used to endorse or promote products derived from this software
|
15
|
+
without specific prior written permission.
|
16
|
+
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS
|
18
|
+
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
19
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
|
21
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
|
22
|
+
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
23
|
+
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
24
|
+
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
25
|
+
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
26
|
+
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
27
|
+
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
= Introduction
|
2
|
+
This is a simple implementation of a file spooler. You can think of it as
|
3
|
+
a filesystem based queueing service without a service running behind it.
|
4
|
+
Like the spools used in unix for mail servers, print jobs, etc.
|
5
|
+
|
6
|
+
In this module, a Pool instance can contain several different Spool instances,
|
7
|
+
each of which can store files. Data is retrieved from the Spool in a
|
8
|
+
non-strict order, oldest first.
|
9
|
+
|
10
|
+
Data is serialized and deserialized on storage/retrieval (currently using
|
11
|
+
YAML).
|
12
|
+
|
13
|
+
Most users will want to start using this library by instantiating a Pool
|
14
|
+
object, pointing it to a directory that will act as the parent directory
|
15
|
+
for all subsequent Spools.
|
16
|
+
|
17
|
+
= Note
|
18
|
+
This library has currently only been tested with Ruby 1.9.1. It uses Pathname
|
19
|
+
extensively, and while it might work with Ruby 1.8.7, it probably will not
|
20
|
+
work with Ruby 1.8.6 and smaller.
|
21
|
+
|
22
|
+
= Usage Example
|
23
|
+
# instatiate a pool, pointing to a directory with
|
24
|
+
# read/write permissions for the effective user of
|
25
|
+
# the current process
|
26
|
+
|
27
|
+
require 'spool_pool'
|
28
|
+
pool = SpoolPool::Pool.new( "/path/to/my/spool/root" )
|
29
|
+
|
30
|
+
# store data in one spool
|
31
|
+
pool.put :my_spool, "some data here"
|
32
|
+
|
33
|
+
|
34
|
+
# retrieve the data
|
35
|
+
|
36
|
+
pool.get :my_spool
|
37
|
+
# -> "some data here"
|
38
|
+
|
39
|
+
# store data in another spool,
|
40
|
+
# demonstrating the ordered retrieval
|
41
|
+
|
42
|
+
pool.put :my_other_spool, :foo
|
43
|
+
|
44
|
+
sleep 1
|
45
|
+
|
46
|
+
spool.put :my_other_spool, :bar
|
47
|
+
|
48
|
+
spool.get :my_other_spool # -> :foo
|
49
|
+
|
50
|
+
spool.get :my_other_spool # -> :bar
|
51
|
+
|
52
|
+
= Feedback/Suggestions
|
53
|
+
By email to: sr@gimp.org
|
data/TODOs
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
- clean up specs
|
data/lib/spool_pool.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
=begin rdoc
|
2
|
+
= Introduction
|
3
|
+
This is a simple implementation of a file spooler. You can think of it as
|
4
|
+
a filesystem based queueing service without a service running behind it.
|
5
|
+
Like the spools used in unix for mail servers, print jobs etc.
|
6
|
+
|
7
|
+
In this module, a Pool instance can contain several different Spool instances,
|
8
|
+
each of which can store files. Data is retrieved from the spool in a
|
9
|
+
non-strict order, oldest first.
|
10
|
+
|
11
|
+
Data is serialized and deserialized on storage/retrieval (currently using
|
12
|
+
YAML).
|
13
|
+
|
14
|
+
Most users will want to start using this library by instantiating a Pool
|
15
|
+
object, pointing it to a directory that will act as the parent directory
|
16
|
+
for all subsequent Spools.
|
17
|
+
|
18
|
+
= Usage Example
|
19
|
+
# instatiate a pool, pointing to a directory with read/write permissions
|
20
|
+
# for the effective user of the current process
|
21
|
+
|
22
|
+
require 'spool_pool'
|
23
|
+
pool = SpoolPool::Pool.new( "/path/to/my/spool/root" )
|
24
|
+
|
25
|
+
# store data in one spool
|
26
|
+
pool.put :my_spool, "some data here"
|
27
|
+
|
28
|
+
|
29
|
+
# retrieve the data
|
30
|
+
|
31
|
+
pool.get :my_spool
|
32
|
+
# -> "some data here"
|
33
|
+
|
34
|
+
# store data in another spool, demonstrating the ordered retrieval
|
35
|
+
|
36
|
+
pool.put :my_other_spool, :foo
|
37
|
+
sleep 1
|
38
|
+
spool.put :my_other_spool, :bar
|
39
|
+
|
40
|
+
spool.get :my_other_spool
|
41
|
+
# -> :foo
|
42
|
+
spool.get :my_other_spool
|
43
|
+
# -> :bar
|
44
|
+
|
45
|
+
=end
|
46
|
+
module SpoolPool
|
47
|
+
end
|
48
|
+
|
49
|
+
$: << File.expand_path( File.dirname( __FILE__ ) )
|
50
|
+
require 'spool_pool/pool'
|
51
|
+
require 'spool_pool/spool'
|
52
|
+
require 'spool_pool/file'
|
53
|
+
|
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'delegate'
|
3
|
+
require 'tmpdir'
|
4
|
+
require 'thread'
|
5
|
+
|
6
|
+
module SpoolPool
|
7
|
+
=begin rdoc
|
8
|
+
A class to deal with the writing of spool files. Currently uses Tempfile
|
9
|
+
to do most of the heavy lifting.
|
10
|
+
|
11
|
+
Most of this file has been adapted from the Tempfile code in the Ruby 1.9.1
|
12
|
+
class library, written by yugui.
|
13
|
+
=end
|
14
|
+
class File < DelegateClass( ::File )
|
15
|
+
attr_reader :path
|
16
|
+
|
17
|
+
=begin rdoc
|
18
|
+
Returns the data read from the given +filename+, and deletes the file
|
19
|
+
before returning.
|
20
|
+
|
21
|
+
Yields the read data also to an optionally given block. If you give a block
|
22
|
+
to process your data and your code throws an exception, the file will not
|
23
|
+
be deleted and another processing of the data can be attempted in the
|
24
|
+
future.
|
25
|
+
=end
|
26
|
+
def self.safe_read( filename )
|
27
|
+
data = ::File.read( filename )
|
28
|
+
yield data if block_given?
|
29
|
+
::File.unlink( filename )
|
30
|
+
data
|
31
|
+
end
|
32
|
+
|
33
|
+
=begin rdoc
|
34
|
+
Stores the given +data+ in a unique file in the directory +basepath+.
|
35
|
+
+basepath+ can be either a file path as a String or a Pathname.
|
36
|
+
|
37
|
+
If the data can't be written to the file (permissions, quota, I/O errors...),
|
38
|
+
it will attempt to delete the file before throwing an exception.
|
39
|
+
|
40
|
+
Returns the path of the file storing the data.
|
41
|
+
=end
|
42
|
+
def self.write( basepath, data )
|
43
|
+
file = nil
|
44
|
+
begin
|
45
|
+
file = new( basepath.to_s )
|
46
|
+
file.write data
|
47
|
+
rescue
|
48
|
+
file.unlink if file
|
49
|
+
raise $!
|
50
|
+
else
|
51
|
+
file.path
|
52
|
+
ensure
|
53
|
+
file.close
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# If no block is given, this is a synonym for new().
|
58
|
+
#
|
59
|
+
# If a block is given, it will be passed the spool file as an argument,
|
60
|
+
# and the spool file will automatically be closed when the block
|
61
|
+
# terminates. The call returns the value of the block.
|
62
|
+
def self.open(*args)
|
63
|
+
file = new(*args)
|
64
|
+
return file unless block_given?
|
65
|
+
|
66
|
+
begin
|
67
|
+
yield(file)
|
68
|
+
ensure
|
69
|
+
file.close
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
MAX_TRY = 10
|
74
|
+
FILE_PERMISSIONS = 0600
|
75
|
+
@@lock = Mutex.new
|
76
|
+
|
77
|
+
# Creates a spool file of mode 0600 in the directory +basedir+,
|
78
|
+
# opens it with mode "w+", and returns a SpoolPool::File object which
|
79
|
+
# represents the created spool file. A SpoolPool::File object can be
|
80
|
+
# treated just like a normal File object.
|
81
|
+
#
|
82
|
+
def initialize( basedir )
|
83
|
+
create_threadsafe_spoolname( basedir ) do |spoolname|
|
84
|
+
@spoolfile = ::File.open( spoolname,
|
85
|
+
::File::RDWR | ::File::CREAT | ::File::EXCL,
|
86
|
+
FILE_PERMISSIONS )
|
87
|
+
@path = spoolname
|
88
|
+
|
89
|
+
super(@spoolfile)
|
90
|
+
# Now we have all the File/IO methods defined, you must not
|
91
|
+
# carelessly put bare puts(), etc. after this.
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Opens or reopens the file with mode "r+".
|
96
|
+
def open
|
97
|
+
@spoolfile.close if @spoolfile
|
98
|
+
@spoolfile = ::File.open(@path, 'r+')
|
99
|
+
__setobj__(@spoolfile)
|
100
|
+
end
|
101
|
+
|
102
|
+
#Closes the file.
|
103
|
+
def close
|
104
|
+
@spoolfile.close if @spoolfile
|
105
|
+
@spoolfile = nil
|
106
|
+
end
|
107
|
+
|
108
|
+
# Unlinks the file.
|
109
|
+
def unlink
|
110
|
+
# keep this order for thread safeness
|
111
|
+
begin
|
112
|
+
if ::File.exist?(@path)
|
113
|
+
close unless closed?
|
114
|
+
::File.unlink(@path)
|
115
|
+
end
|
116
|
+
@path = nil
|
117
|
+
rescue Errno::EACCES
|
118
|
+
# may not be able to unlink on Windows; just ignore
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns the size of the file. As a side effect, the IO
|
123
|
+
# buffer is flushed before determining the size.
|
124
|
+
def size
|
125
|
+
return 0 unless @spoolfile
|
126
|
+
|
127
|
+
@spoolfile.flush
|
128
|
+
@spoolfile.stat.size
|
129
|
+
end
|
130
|
+
alias length size
|
131
|
+
|
132
|
+
private
|
133
|
+
def spoolfilename_for_try(n)
|
134
|
+
"#{Time.now.to_f}-#{$$}-#{n}"
|
135
|
+
end
|
136
|
+
|
137
|
+
def create_threadsafe_spoolname( basedir )
|
138
|
+
lock = spoolname = nil
|
139
|
+
n = failure = 0
|
140
|
+
|
141
|
+
@@lock.synchronize {
|
142
|
+
begin
|
143
|
+
begin
|
144
|
+
spoolname = ::File.join( basedir, spoolfilename_for_try(n) )
|
145
|
+
lock = spoolname + '.lock'
|
146
|
+
n += 1
|
147
|
+
end while ::File.exist?(lock) or ::File.exist?(spoolname)
|
148
|
+
Dir.mkdir(lock)
|
149
|
+
rescue
|
150
|
+
failure += 1
|
151
|
+
retry if failure < MAX_TRY
|
152
|
+
raise "cannot generate spool file `%s': #{$!}" % spoolname
|
153
|
+
end
|
154
|
+
}
|
155
|
+
|
156
|
+
yield spoolname
|
157
|
+
Dir.rmdir(lock)
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
161
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'spool_pool/spool'
|
3
|
+
|
4
|
+
module SpoolPool
|
5
|
+
=begin rdoc
|
6
|
+
This is a container class used to manage the interaction with the
|
7
|
+
individual Spool instances. Spool directories are created using the name
|
8
|
+
given in the put/get methods on demand as subdirectories of the +spool_dir+
|
9
|
+
passed to the initializer..
|
10
|
+
|
11
|
+
= Security Note
|
12
|
+
Some naive tests are in place to catch the most blatant directory traversal
|
13
|
+
attempts. But for real security you should never blindly pass any
|
14
|
+
user-supplied or computed queue name to these methods. Always validate
|
15
|
+
user input!
|
16
|
+
|
17
|
+
=end
|
18
|
+
class Pool
|
19
|
+
attr_reader :spool_dir
|
20
|
+
attr_reader :spools
|
21
|
+
|
22
|
+
=begin rdoc
|
23
|
+
Sanity checking of the given pool +directory+ and it's children (and parent,
|
24
|
+
if the +directory+ itself doesn't exist yet).
|
25
|
+
|
26
|
+
Will throw an exception if anything permission-wise looks fishy.
|
27
|
+
=end
|
28
|
+
def self.validate_pool_dir( directory )
|
29
|
+
pool_dir = Pathname.new( directory )
|
30
|
+
|
31
|
+
begin
|
32
|
+
if !pool_dir.exist?
|
33
|
+
raise Errno::EACCES unless pool_dir.parent.writable? and
|
34
|
+
pool_dir.parent.executable?
|
35
|
+
return
|
36
|
+
end
|
37
|
+
|
38
|
+
raise Errno::EACCES unless pool_dir.readable? and
|
39
|
+
pool_dir.writable? and
|
40
|
+
pool_dir.executable?
|
41
|
+
|
42
|
+
return if pool_dir.children.empty?
|
43
|
+
|
44
|
+
pool_dir.children.select{ |d| d.dir? }.each do |spool_dir|
|
45
|
+
raise Errno::EACCES unless spool_dir.readable? and
|
46
|
+
spool_dir.writable? and
|
47
|
+
spool_dir.executable?
|
48
|
+
|
49
|
+
spool_dir.children.select{ |f| f.file? }.each do |spool_file|
|
50
|
+
raise Errno::EACCES unless spool_file.readable?
|
51
|
+
end
|
52
|
+
end
|
53
|
+
rescue Errno::EACCES
|
54
|
+
raise Errno::EACCES.new( "Something doesn't look right permission wise. Consider running 'chmod -R 0755 #{directory}' or the equivalent. If the #{directory} itself doesn't exist, check to make sure it's parent exists, and is write- and executable for the current process owner." )
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
=begin rdoc
|
59
|
+
Sets up a spooling pool in the +spool_path+ given.
|
60
|
+
If the directory does not exist, it will try to create it for you.
|
61
|
+
|
62
|
+
Will throw an exception if it can't create the directoy, or if the
|
63
|
+
directory exists and is not read- and writeable by the effective user id
|
64
|
+
of the process.
|
65
|
+
=end
|
66
|
+
def initialize( spool_path )
|
67
|
+
@spool_dir = Pathname.new spool_path
|
68
|
+
@spools = {}
|
69
|
+
|
70
|
+
self.class.validate_pool_dir( spool_path )
|
71
|
+
|
72
|
+
setup_spooldir unless @spool_dir.exist?
|
73
|
+
assert_readable @spool_dir
|
74
|
+
assert_writeable @spool_dir
|
75
|
+
end
|
76
|
+
|
77
|
+
=begin rdoc
|
78
|
+
Serializes and stores the +data+ in the given +spool+. If the +spool+
|
79
|
+
doesn't exist yet, it will try to create a new spool and directory.
|
80
|
+
|
81
|
+
Returns the path of the file storing the data.
|
82
|
+
|
83
|
+
This method performs a naive check on the spool name for directory
|
84
|
+
traversal attempts. *DO NOT* rely on this for security relevant systems,
|
85
|
+
always validate user supplied queue names yourself before handing them
|
86
|
+
off to this method!
|
87
|
+
=end
|
88
|
+
def put( spool, data )
|
89
|
+
validate_spool_path spool
|
90
|
+
@spools[spool] ||= SpoolPool::Spool.new( @spool_dir + spool.to_s )
|
91
|
+
@spools[spool].put( data )
|
92
|
+
end
|
93
|
+
|
94
|
+
=begin rdoc
|
95
|
+
Retrieves and deserializes oldest data in the given +spool+, yielding it to
|
96
|
+
an optional block as well. The spool file is deleted just before the method
|
97
|
+
returns. If a block was given, and an exception was raised within the block,
|
98
|
+
the spool file is not deleted and another try at processing can be attempted
|
99
|
+
in the future.
|
100
|
+
|
101
|
+
Note that while data is retrieved oldest first, the order is non-strict, i.e.
|
102
|
+
different data written during the same second to the storage will be
|
103
|
+
retrieved in a random order. Or to put it another way: Ordering is exact down
|
104
|
+
to the second, but sub-second ordering is random.
|
105
|
+
|
106
|
+
This method performs a naive check on the spool name for directory
|
107
|
+
traversal attempts. *DO NOT* rely on this for security relevant systems,
|
108
|
+
always validate user supplied queue names yourself before handing them
|
109
|
+
off to this method!
|
110
|
+
=end
|
111
|
+
def get( spool, &block )
|
112
|
+
validate_spool_path spool
|
113
|
+
|
114
|
+
missing_spool_on_read_handler( spool ) unless @spools.has_key?( spool )
|
115
|
+
|
116
|
+
data = nil
|
117
|
+
data = @spools[spool].get( &block ) if @spools[spool]
|
118
|
+
data
|
119
|
+
end
|
120
|
+
|
121
|
+
=begin rdoc
|
122
|
+
Retrieves and deserializes all data in the given +spool+, yielding
|
123
|
+
each deserialized data to the supplied block. Ordering is oldest data first.
|
124
|
+
|
125
|
+
Note that while data is retrieved oldest first, the order is non-strict, i.e.
|
126
|
+
different data written during the same second to the storage will be
|
127
|
+
retrieved in a random order. Or to put it another way: Ordering is
|
128
|
+
exact down to the second, but sub-second ordering is random.
|
129
|
+
|
130
|
+
This method performs a naive check on the spool name for directory
|
131
|
+
traversal attempts. *DO NOT* rely on this for security relevant systems,
|
132
|
+
always validate user supplied queue names yourself before handing them
|
133
|
+
off to this method!
|
134
|
+
=end
|
135
|
+
def flush( spool, &block )
|
136
|
+
validate_spool_path spool
|
137
|
+
|
138
|
+
missing_spool_on_read_handler( spool ) unless @spools.has_key?( spool )
|
139
|
+
|
140
|
+
@spools[spool].flush( &block ) if @spools[spool]
|
141
|
+
end
|
142
|
+
|
143
|
+
private
|
144
|
+
def setup_spooldir
|
145
|
+
raise Errno::EACCES.new("The directory '#{@spool_dir}' does not exist and I don't have enough permissions to create it!") unless @spool_dir.parent.writable?
|
146
|
+
@spool_dir.mkpath
|
147
|
+
@spool_dir.chmod 0755
|
148
|
+
end
|
149
|
+
|
150
|
+
def create_spool_for_existing_path( pathname )
|
151
|
+
pathname.exist? ? SpoolPool::Spool.new( pathname ) : nil
|
152
|
+
end
|
153
|
+
|
154
|
+
def missing_spool_on_read_handler( spool )
|
155
|
+
spool_instance = create_spool_for_existing_path( @spool_dir + spool.to_s )
|
156
|
+
@spools[spool] = spool_instance if spool_instance
|
157
|
+
end
|
158
|
+
|
159
|
+
def assert_readable( pathname )
|
160
|
+
raise Errno::EACCES.new( "I can't read in the directory '#{pathname}'!" ) unless pathname.readable?
|
161
|
+
end
|
162
|
+
|
163
|
+
def assert_writeable( pathname )
|
164
|
+
raise Errno::EACCES.new( "I can't write to the directory '#{pathname}'!" ) unless pathname.writable?
|
165
|
+
end
|
166
|
+
|
167
|
+
def validate_spool_path( spool )
|
168
|
+
raise "Directory traversal attempt" if spool =~ %r{/\.\./} ||
|
169
|
+
spool =~ %r{\A\.\.\/}
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|