perobs 0.0.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,242 +0,0 @@
1
- # encoding: UTF-8
2
- #
3
- # = BlockDB.rb -- Persistent Ruby Object Store
4
- #
5
- # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
- #
7
- # MIT License
8
- #
9
- # Permission is hereby granted, free of charge, to any person obtaining
10
- # a copy of this software and associated documentation files (the
11
- # "Software"), to deal in the Software without restriction, including
12
- # without limitation the rights to use, copy, modify, merge, publish,
13
- # distribute, sublicense, and/or sell copies of the Software, and to
14
- # permit persons to whom the Software is furnished to do so, subject to
15
- # the following conditions:
16
- #
17
- # The above copyright notice and this permission notice shall be
18
- # included in all copies or substantial portions of the Software.
19
- #
20
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
-
28
- require 'json'
29
- require 'json/add/core'
30
- require 'json/add/struct'
31
-
32
- module PEROBS
33
-
34
- # This class manages the usage of the data blocks in the corresponding
35
- # HashedBlocks object.
36
- class BlockDB
37
-
38
- # Create a new BlockDB object.
39
- def initialize(dir, block_size)
40
- @dir = dir
41
- @block_size = block_size
42
-
43
- @index_file_name = File.join(dir, 'index.json')
44
- @block_file_name = File.join(dir, 'data')
45
- read_index
46
- end
47
-
48
- # Write the given bytes with the given ID into the DB.
49
- # @param id [Fixnum or Bignum] ID
50
- # @param raw [String] sequence of bytes
51
- def write_object(id, raw)
52
- bytes = raw.bytesize
53
- start_address = reserve_blocks(id, bytes)
54
- if write_to_block_file(raw, start_address) != bytes
55
- raise RuntimeError, 'Object length does not match written bytes'
56
- end
57
- write_index
58
- end
59
-
60
- # Read the entry for the given ID and return it as bytes.
61
- # @param id [Fixnum or Bignum] ID
62
- # @return [String] sequence of bytes
63
- def read_object(id)
64
- read_from_block_file(*find(id))
65
- end
66
-
67
-
68
- # Find the data for the object with given id.
69
- # @param id [Fixnum or Bignum] Object ID
70
- # @return [Array] Returns an Array with two Fixnum entries. The first is
71
- # the number of bytes and the second is the starting offset in the
72
- # block storage file.
73
- def find(id)
74
- @entries.each do |entry|
75
- if entry['id'] == id
76
- return [ entry['bytes'], entry['first_block'] * @block_size ]
77
- end
78
- end
79
-
80
- nil
81
- end
82
-
83
- # Write a string of bytes into the file at the given address.
84
- # @param raw [String] bytes to write
85
- # @param address [Fixnum] offset in the file
86
- # @return [Fixnum] number of bytes written
87
- def write_to_block_file(raw, address)
88
- begin
89
- File.write(@block_file_name, raw, address)
90
- rescue => e
91
- raise IOError,
92
- "Cannot write block file #{@block_file_name}: #{e.message}"
93
- end
94
- end
95
-
96
- # Read _bytes_ bytes from the file starting at offset _address_.
97
- # @param bytes [Fixnum] number of bytes to read
98
- # @param address [Fixnum] offset in the file
99
- def read_from_block_file(bytes, address)
100
- begin
101
- File.read(@block_file_name, bytes, address)
102
- rescue => e
103
- raise IOError,
104
- "Cannot read block file #{@block_file_name}: #{e.message}"
105
- end
106
- end
107
-
108
- # Clear the mark on all entries in the index.
109
- def clear_marks
110
- @entries.each { |e| e['marked'] = false}
111
- write_index
112
- end
113
-
114
- # Set a mark on the entry with the given ID.
115
- # @param id [Fixnum or Bignum] ID of the entry
116
- def mark(id)
117
- found = false
118
- @entries.each do |entry|
119
- if entry['id'] == id
120
- entry['marked'] = true
121
- found = true
122
- break
123
- end
124
- end
125
-
126
- unless found
127
- raise ArgumentError, "Cannot find an entry for ID #{id} to mark"
128
- end
129
-
130
- write_index
131
- end
132
-
133
- # Check if the entry for a given ID is marked.
134
- # @param id [Fixnum or Bignum] ID of the entry
135
- # @return [TrueClass or FalseClass] true if marked, false otherwise
136
- def is_marked?(id)
137
- @entries.each do |entry|
138
- return entry['marked'] if entry['id'] == id
139
- end
140
-
141
- raise ArgumentError, "Cannot find an entry for ID #{id} to check"
142
- end
143
-
144
- # Remove all entries from the index that have not been marked.
145
- def delete_unmarked_entries
146
- @entries.delete_if { |e| e['marked'] == false }
147
- write_index
148
- end
149
-
150
- private
151
-
152
- # Reserve the blocks needed for the specified number of bytes with the
153
- # given ID.
154
- # @param id [Fixnum or Bignum] ID of the entry
155
- # @param bytes [Fixnum] number of bytes for this entry
156
- # @return [Fixnum] the start address of the reserved block
157
- def reserve_blocks(id, bytes)
158
- # size of the entry in blocks
159
- blocks = size_in_blocks(bytes)
160
- # index of first block after the last seen entry
161
- end_of_last_entry = 0
162
- # block index of best fit segment
163
- best_fit_start = nil
164
- # best fir segment size in blocks
165
- best_fit_blocks = nil
166
- # If there is already an entry for an object with the _id_, we mark it
167
- # for deletion.
168
- entry_to_delete = nil
169
-
170
- @entries.each do |entry|
171
- if entry['id'] == id
172
- # We've found an old entry for this ID.
173
- if entry['blocks'] >= blocks
174
- # The old entry still fits. Let's just reuse it.
175
- entry['bytes'] = bytes
176
- entry['blocks'] = blocks
177
- return entry['first_block'] * @block_size
178
- end
179
- # It does not fit. Ignore the entry and mark it for deletion.
180
- entry_to_delete = entry
181
- next
182
- end
183
-
184
- gap = entry['first_block'] - end_of_last_entry
185
- if gap >= blocks &&
186
- (best_fit_blocks.nil? || gap < best_fit_blocks)
187
- # We've found a segment that fits the requested bytes and fits
188
- # better than any previous find.
189
- best_fit_start = end_of_last_entry
190
- best_fit_blocks = gap
191
- end
192
- end_of_last_entry = entry['first_block'] + entry['blocks']
193
- end
194
-
195
- # Delete the old entry if requested.
196
- @entries.delete(entry_to_delete) if entry_to_delete
197
-
198
- # Create a new entry and insert it.
199
- entry = {
200
- 'id' => id,
201
- 'bytes' => bytes,
202
- 'first_block' => best_fit_start || end_of_last_entry,
203
- 'blocks' => blocks,
204
- 'marked' => false
205
- }
206
- @entries << entry
207
- @entries.sort! { |e1, e2| e1['first_block'] <=> e2['first_block'] }
208
-
209
- entry['first_block'] * @block_size
210
- end
211
-
212
- def read_index
213
- if File.exists?(@index_file_name)
214
- begin
215
- @entries = JSON.parse(File.read(@index_file_name))
216
- rescue => e
217
- raise RuntimeError,
218
- "BlockDB file #{@index_file_name} corrupted: #{e.message}"
219
- end
220
- else
221
- @entries = []
222
- end
223
- end
224
-
225
- def write_index
226
- begin
227
- File.write(@index_file_name, @entries.to_json)
228
- rescue => e
229
- raise RuntimeError,
230
- "Cannot write BlockDB index file #{@index_file_name}: " +
231
- e.message
232
- end
233
- end
234
-
235
- def size_in_blocks(bytes)
236
- bytes / @block_size + (bytes % @block_size != 0 ? 1 : 0)
237
- end
238
-
239
- end
240
-
241
- end
242
-
@@ -1,171 +0,0 @@
1
- # encoding: UTF-8
2
- #
3
- # = FileSystemDB.rb -- Persistent Ruby Object Store
4
- #
5
- # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
- #
7
- # MIT License
8
- #
9
- # Permission is hereby granted, free of charge, to any person obtaining
10
- # a copy of this software and associated documentation files (the
11
- # "Software"), to deal in the Software without restriction, including
12
- # without limitation the rights to use, copy, modify, merge, publish,
13
- # distribute, sublicense, and/or sell copies of the Software, and to
14
- # permit persons to whom the Software is furnished to do so, subject to
15
- # the following conditions:
16
- #
17
- # The above copyright notice and this permission notice shall be
18
- # included in all copies or substantial portions of the Software.
19
- #
20
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
-
28
- require 'time'
29
- require 'json'
30
- require 'json/add/core'
31
- require 'json/add/struct'
32
- require 'yaml'
33
- require 'fileutils'
34
-
35
- require 'perobs/DataBase'
36
- require 'perobs/ObjectBase'
37
-
38
- module PEROBS
39
-
40
- # This class provides a filesytem based database store for objects.
41
- class FileSystemDB < DataBase
42
-
43
- @@Extensions = {
44
- :marshal => '.mshl',
45
- :json => '.json',
46
- :yaml => '.yml'
47
- }
48
-
49
- # Create a new FileSystemDB object. This will create a DB with the given
50
- # name. A database will live in a directory of that name.
51
- # @param db_name [String] name of the DB directory
52
- # @param options [Hash] options to customize the behavior. Currently only
53
- # the following option is supported:
54
- # :serializer : Can be :marshal, :json, :yaml
55
- def initialize(db_name, options = {})
56
- super(options[:serializer] || :json)
57
- @db_dir = db_name
58
-
59
- # Create the database directory if it doesn't exist yet.
60
- ensure_dir_exists(@db_dir)
61
- end
62
-
63
- # Return true if the object with given ID exists
64
- # @param id [Fixnum or Bignum]
65
- def include?(id)
66
- File.exists?(object_file_name(id))
67
- end
68
-
69
- # Store the given object into the filesystem.
70
- # @param obj [Hash] Object as defined by PEROBS::ObjectBase
71
- def put_object(obj, id)
72
- File.write(object_file_name(id), serialize(obj))
73
- end
74
-
75
- # Load the given object from the filesystem.
76
- # @param id [Fixnum or Bignum] object ID
77
- # @return [Hash] Object as defined by PEROBS::ObjectBase
78
- def get_object(id)
79
- begin
80
- raw = File.read(file_name = object_file_name(id))
81
- rescue => e
82
- raise RuntimeError, "Error in #{file_name}: #{e.message}"
83
- end
84
- deserialize(raw)
85
- end
86
-
87
- # This method must be called to initiate the marking process.
88
- def clear_marks
89
- @mark_start = Time.now
90
- # The filesystem stores access times with second granularity. We need to
91
- # wait 1 sec. to ensure that all marks are noticeable.
92
- sleep(1)
93
- end
94
-
95
- # Permanently delete all objects that have not been marked. Those are
96
- # orphaned and are no longer referenced by any actively used object.
97
- def delete_unmarked_objects
98
- Dir.glob(File.join(@db_dir, '*')) do |dir|
99
- next unless Dir.exists?(dir)
100
-
101
- Dir.glob(File.join(dir, '*')) do |file|
102
- if File.atime(file) <= @mark_start
103
- File.delete(file)
104
- end
105
- end
106
- end
107
- end
108
-
109
- # Mark an object.
110
- # @param id [Fixnum or Bignum] ID of the object to mark
111
- def mark(id)
112
- FileUtils.touch(object_file_name(id))
113
- end
114
-
115
- # Check if the object is marked.
116
- # @param id [Fixnum or Bignum] ID of the object to check
117
- def is_marked?(id)
118
- File.atime(object_file_name(id)) > @mark_start
119
- end
120
-
121
- # Check if the stored object is syntactically correct.
122
- # @param id [Fixnum/Bignum] Object ID
123
- # @param repair [TrueClass/FalseClass] True if an repair attempt should be
124
- # made.
125
- # @return [TrueClass/FalseClass] True if the object is OK, otherwise
126
- # false.
127
- def check(id, repair)
128
- file_name = object_file_name(id)
129
- unless File.exists?(file_name)
130
- $stderr.puts "Object file for ID #{id} does not exist"
131
- return false
132
- end
133
-
134
- begin
135
- get_object(id)
136
- rescue => e
137
- $stderr.puts "Cannot read object file #{file_name}: #{e.message}"
138
- return false
139
- end
140
-
141
- true
142
- end
143
-
144
- private
145
-
146
- # Ensure that we have a directory to store the DB items.
147
- def ensure_dir_exists(dir)
148
- unless Dir.exists?(dir)
149
- begin
150
- Dir.mkdir(dir)
151
- rescue IOError => e
152
- raise IOError, "Cannote create DB directory '#{dir}': #{e.message}"
153
- end
154
- end
155
- end
156
-
157
- # Determine the file name to store the object. The object ID determines
158
- # the directory and file name inside the store.
159
- # @param id [Fixnum or Bignum] ID of the object
160
- def object_file_name(id)
161
- hex_id = "%016X" % id
162
- dir = hex_id[0..1]
163
- ensure_dir_exists(File.join(@db_dir, dir))
164
-
165
- File.join(@db_dir, dir, hex_id + @@Extensions[@serializer])
166
- end
167
-
168
- end
169
-
170
- end
171
-
@@ -1,153 +0,0 @@
1
- # encoding: UTF-8
2
- #
3
- # = HashedBlocksDB.rb -- Persistent Ruby Object Store
4
- #
5
- # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
- #
7
- # MIT License
8
- #
9
- # Permission is hereby granted, free of charge, to any person obtaining
10
- # a copy of this software and associated documentation files (the
11
- # "Software"), to deal in the Software without restriction, including
12
- # without limitation the rights to use, copy, modify, merge, publish,
13
- # distribute, sublicense, and/or sell copies of the Software, and to
14
- # permit persons to whom the Software is furnished to do so, subject to
15
- # the following conditions:
16
- #
17
- # The above copyright notice and this permission notice shall be
18
- # included in all copies or substantial portions of the Software.
19
- #
20
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
-
28
- require 'time'
29
- require 'json'
30
- require 'json/add/core'
31
- require 'json/add/struct'
32
- require 'yaml'
33
- require 'fileutils'
34
-
35
- require 'perobs/DataBase'
36
- require 'perobs/BlockDB'
37
-
38
- module PEROBS
39
-
40
- # This class provides a filesytem based database store for objects.
41
- class HashedBlocksDB < DataBase
42
-
43
- @@Extensions = {
44
- :marshal => '.mshl',
45
- :json => '.json',
46
- :yaml => '.yml'
47
- }
48
-
49
- # Create a new FileSystemDB object. This will create a DB with the given
50
- # name. A database will live in a directory of that name.
51
- # @param db_name [String] name of the DB directory
52
- # @param options [Hash] options to customize the behavior. Currently only
53
- # the following options are supported:
54
- # :serializer : Can be :marshal, :json, :yaml
55
- # :dir_nibbles : The number of nibbles to use for directory names.
56
- # Meaningful values are 1, 2, and 3. The larger the
57
- # number the more back-end files are used. Each
58
- # nibble provides 16 times more directories.
59
- # :block_size : The size of the blocks inside the storage files in
60
- # bytes. This should roughly correspond to the size
61
- # of the smallest serialized objects you want to
62
- # store in quantities. It also should be an fraction
63
- # of 4096, the native storage system block size.
64
- def initialize(db_name, options = {})
65
- super(options[:serializer] || :json)
66
- @db_dir = db_name
67
- @dir_nibbles = options[:dir_nibbles] || 2
68
- @block_size = options[:block_size] || 256
69
-
70
- # Create the database directory if it doesn't exist yet.
71
- ensure_dir_exists(@db_dir)
72
- end
73
-
74
- # Return true if the object with given ID exists
75
- # @param id [Fixnum or Bignum]
76
- def include?(id)
77
- !BlockDB.new(directory(id), @block_size).find(id).nil?
78
- end
79
-
80
- # Store the given object into the cluster files.
81
- # @param obj [Hash] Object as defined by PEROBS::ObjectBase
82
- def put_object(obj, id)
83
- BlockDB.new(directory(id), @block_size).write_object(id, serialize(obj))
84
- end
85
-
86
- # Load the given object from the filesystem.
87
- # @param id [Fixnum or Bignum] object ID
88
- # @return [Hash] Object as defined by PEROBS::ObjectBase
89
- def get_object(id)
90
- deserialize(BlockDB.new(directory(id), @block_size).read_object(id))
91
- end
92
-
93
- # This method must be called to initiate the marking process.
94
- def clear_marks
95
- Dir.glob(File.join(@db_dir, '*')) do |dir|
96
- BlockDB.new(dir, @block_size).clear_marks
97
- end
98
- end
99
-
100
- # Permanently delete all objects that have not been marked. Those are
101
- # orphaned and are no longer referenced by any actively used object.
102
- def delete_unmarked_objects
103
- Dir.glob(File.join(@db_dir, '*')) do |dir|
104
- BlockDB.new(dir, @block_size).delete_unmarked_entries
105
- end
106
- end
107
-
108
- # Mark an object.
109
- # @param id [Fixnum or Bignum] ID of the object to mark
110
- def mark(id)
111
- BlockDB.new(directory(id), @block_size).mark(id)
112
- end
113
-
114
- # Check if the object is marked.
115
- # @param id [Fixnum or Bignum] ID of the object to check
116
- def is_marked?(id)
117
- BlockDB.new(directory(id), @block_size).is_marked?(id)
118
- end
119
-
120
- # Check if the stored object is syntactically correct.
121
- # @param id [Fixnum/Bignum] Object ID
122
- # @param repair [TrueClass/FalseClass] True if an repair attempt should be
123
- # made.
124
- # @return [TrueClass/FalseClass] True if the object is OK, otherwise
125
- # false.
126
- def check(id, repair)
127
- begin
128
- get_object(id)
129
- rescue => e
130
- $stderr.puts "Cannot read object with ID #{id}: #{e.message}"
131
- return false
132
- end
133
-
134
- true
135
- end
136
-
137
- private
138
-
139
- # Determine the file name to store the object. The object ID determines
140
- # the directory and file name inside the store.
141
- # @param id [Fixnum or Bignum] ID of the object
142
- def directory(id)
143
- hex_id = "%016X" % id
144
- dir = hex_id[0..(@dir_nibbles - 1)]
145
- ensure_dir_exists(dir_name = File.join(@db_dir, dir))
146
-
147
- dir_name
148
- end
149
-
150
- end
151
-
152
- end
153
-