perobs 0.0.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,242 +0,0 @@
1
- # encoding: UTF-8
2
- #
3
- # = BlockDB.rb -- Persistent Ruby Object Store
4
- #
5
- # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
- #
7
- # MIT License
8
- #
9
- # Permission is hereby granted, free of charge, to any person obtaining
10
- # a copy of this software and associated documentation files (the
11
- # "Software"), to deal in the Software without restriction, including
12
- # without limitation the rights to use, copy, modify, merge, publish,
13
- # distribute, sublicense, and/or sell copies of the Software, and to
14
- # permit persons to whom the Software is furnished to do so, subject to
15
- # the following conditions:
16
- #
17
- # The above copyright notice and this permission notice shall be
18
- # included in all copies or substantial portions of the Software.
19
- #
20
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
-
28
- require 'json'
29
- require 'json/add/core'
30
- require 'json/add/struct'
31
-
32
- module PEROBS
33
-
34
- # This class manages the usage of the data blocks in the corresponding
35
- # HashedBlocks object.
36
- class BlockDB
37
-
38
- # Create a new BlockDB object.
39
- def initialize(dir, block_size)
40
- @dir = dir
41
- @block_size = block_size
42
-
43
- @index_file_name = File.join(dir, 'index.json')
44
- @block_file_name = File.join(dir, 'data')
45
- read_index
46
- end
47
-
48
- # Write the given bytes with the given ID into the DB.
49
- # @param id [Fixnum or Bignum] ID
50
- # @param raw [String] sequence of bytes
51
- def write_object(id, raw)
52
- bytes = raw.bytesize
53
- start_address = reserve_blocks(id, bytes)
54
- if write_to_block_file(raw, start_address) != bytes
55
- raise RuntimeError, 'Object length does not match written bytes'
56
- end
57
- write_index
58
- end
59
-
60
- # Read the entry for the given ID and return it as bytes.
61
- # @param id [Fixnum or Bignum] ID
62
- # @return [String] sequence of bytes
63
- def read_object(id)
64
- read_from_block_file(*find(id))
65
- end
66
-
67
-
68
- # Find the data for the object with given id.
69
- # @param id [Fixnum or Bignum] Object ID
70
- # @return [Array] Returns an Array with two Fixnum entries. The first is
71
- # the number of bytes and the second is the starting offset in the
72
- # block storage file.
73
- def find(id)
74
- @entries.each do |entry|
75
- if entry['id'] == id
76
- return [ entry['bytes'], entry['first_block'] * @block_size ]
77
- end
78
- end
79
-
80
- nil
81
- end
82
-
83
- # Write a string of bytes into the file at the given address.
84
- # @param raw [String] bytes to write
85
- # @param address [Fixnum] offset in the file
86
- # @return [Fixnum] number of bytes written
87
- def write_to_block_file(raw, address)
88
- begin
89
- File.write(@block_file_name, raw, address)
90
- rescue => e
91
- raise IOError,
92
- "Cannot write block file #{@block_file_name}: #{e.message}"
93
- end
94
- end
95
-
96
- # Read _bytes_ bytes from the file starting at offset _address_.
97
- # @param bytes [Fixnum] number of bytes to read
98
- # @param address [Fixnum] offset in the file
99
- def read_from_block_file(bytes, address)
100
- begin
101
- File.read(@block_file_name, bytes, address)
102
- rescue => e
103
- raise IOError,
104
- "Cannot read block file #{@block_file_name}: #{e.message}"
105
- end
106
- end
107
-
108
- # Clear the mark on all entries in the index.
109
- def clear_marks
110
- @entries.each { |e| e['marked'] = false}
111
- write_index
112
- end
113
-
114
- # Set a mark on the entry with the given ID.
115
- # @param id [Fixnum or Bignum] ID of the entry
116
- def mark(id)
117
- found = false
118
- @entries.each do |entry|
119
- if entry['id'] == id
120
- entry['marked'] = true
121
- found = true
122
- break
123
- end
124
- end
125
-
126
- unless found
127
- raise ArgumentError, "Cannot find an entry for ID #{id} to mark"
128
- end
129
-
130
- write_index
131
- end
132
-
133
- # Check if the entry for a given ID is marked.
134
- # @param id [Fixnum or Bignum] ID of the entry
135
- # @return [TrueClass or FalseClass] true if marked, false otherwise
136
- def is_marked?(id)
137
- @entries.each do |entry|
138
- return entry['marked'] if entry['id'] == id
139
- end
140
-
141
- raise ArgumentError, "Cannot find an entry for ID #{id} to check"
142
- end
143
-
144
- # Remove all entries from the index that have not been marked.
145
- def delete_unmarked_entries
146
- @entries.delete_if { |e| e['marked'] == false }
147
- write_index
148
- end
149
-
150
- private
151
-
152
- # Reserve the blocks needed for the specified number of bytes with the
153
- # given ID.
154
- # @param id [Fixnum or Bignum] ID of the entry
155
- # @param bytes [Fixnum] number of bytes for this entry
156
- # @return [Fixnum] the start address of the reserved block
157
- def reserve_blocks(id, bytes)
158
- # size of the entry in blocks
159
- blocks = size_in_blocks(bytes)
160
- # index of first block after the last seen entry
161
- end_of_last_entry = 0
162
- # block index of best fit segment
163
- best_fit_start = nil
164
- # best fir segment size in blocks
165
- best_fit_blocks = nil
166
- # If there is already an entry for an object with the _id_, we mark it
167
- # for deletion.
168
- entry_to_delete = nil
169
-
170
- @entries.each do |entry|
171
- if entry['id'] == id
172
- # We've found an old entry for this ID.
173
- if entry['blocks'] >= blocks
174
- # The old entry still fits. Let's just reuse it.
175
- entry['bytes'] = bytes
176
- entry['blocks'] = blocks
177
- return entry['first_block'] * @block_size
178
- end
179
- # It does not fit. Ignore the entry and mark it for deletion.
180
- entry_to_delete = entry
181
- next
182
- end
183
-
184
- gap = entry['first_block'] - end_of_last_entry
185
- if gap >= blocks &&
186
- (best_fit_blocks.nil? || gap < best_fit_blocks)
187
- # We've found a segment that fits the requested bytes and fits
188
- # better than any previous find.
189
- best_fit_start = end_of_last_entry
190
- best_fit_blocks = gap
191
- end
192
- end_of_last_entry = entry['first_block'] + entry['blocks']
193
- end
194
-
195
- # Delete the old entry if requested.
196
- @entries.delete(entry_to_delete) if entry_to_delete
197
-
198
- # Create a new entry and insert it.
199
- entry = {
200
- 'id' => id,
201
- 'bytes' => bytes,
202
- 'first_block' => best_fit_start || end_of_last_entry,
203
- 'blocks' => blocks,
204
- 'marked' => false
205
- }
206
- @entries << entry
207
- @entries.sort! { |e1, e2| e1['first_block'] <=> e2['first_block'] }
208
-
209
- entry['first_block'] * @block_size
210
- end
211
-
212
- def read_index
213
- if File.exists?(@index_file_name)
214
- begin
215
- @entries = JSON.parse(File.read(@index_file_name))
216
- rescue => e
217
- raise RuntimeError,
218
- "BlockDB file #{@index_file_name} corrupted: #{e.message}"
219
- end
220
- else
221
- @entries = []
222
- end
223
- end
224
-
225
- def write_index
226
- begin
227
- File.write(@index_file_name, @entries.to_json)
228
- rescue => e
229
- raise RuntimeError,
230
- "Cannot write BlockDB index file #{@index_file_name}: " +
231
- e.message
232
- end
233
- end
234
-
235
- def size_in_blocks(bytes)
236
- bytes / @block_size + (bytes % @block_size != 0 ? 1 : 0)
237
- end
238
-
239
- end
240
-
241
- end
242
-
@@ -1,171 +0,0 @@
1
- # encoding: UTF-8
2
- #
3
- # = FileSystemDB.rb -- Persistent Ruby Object Store
4
- #
5
- # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
- #
7
- # MIT License
8
- #
9
- # Permission is hereby granted, free of charge, to any person obtaining
10
- # a copy of this software and associated documentation files (the
11
- # "Software"), to deal in the Software without restriction, including
12
- # without limitation the rights to use, copy, modify, merge, publish,
13
- # distribute, sublicense, and/or sell copies of the Software, and to
14
- # permit persons to whom the Software is furnished to do so, subject to
15
- # the following conditions:
16
- #
17
- # The above copyright notice and this permission notice shall be
18
- # included in all copies or substantial portions of the Software.
19
- #
20
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
-
28
- require 'time'
29
- require 'json'
30
- require 'json/add/core'
31
- require 'json/add/struct'
32
- require 'yaml'
33
- require 'fileutils'
34
-
35
- require 'perobs/DataBase'
36
- require 'perobs/ObjectBase'
37
-
38
- module PEROBS
39
-
40
- # This class provides a filesytem based database store for objects.
41
- class FileSystemDB < DataBase
42
-
43
- @@Extensions = {
44
- :marshal => '.mshl',
45
- :json => '.json',
46
- :yaml => '.yml'
47
- }
48
-
49
- # Create a new FileSystemDB object. This will create a DB with the given
50
- # name. A database will live in a directory of that name.
51
- # @param db_name [String] name of the DB directory
52
- # @param options [Hash] options to customize the behavior. Currently only
53
- # the following option is supported:
54
- # :serializer : Can be :marshal, :json, :yaml
55
- def initialize(db_name, options = {})
56
- super(options[:serializer] || :json)
57
- @db_dir = db_name
58
-
59
- # Create the database directory if it doesn't exist yet.
60
- ensure_dir_exists(@db_dir)
61
- end
62
-
63
- # Return true if the object with given ID exists
64
- # @param id [Fixnum or Bignum]
65
- def include?(id)
66
- File.exists?(object_file_name(id))
67
- end
68
-
69
- # Store the given object into the filesystem.
70
- # @param obj [Hash] Object as defined by PEROBS::ObjectBase
71
- def put_object(obj, id)
72
- File.write(object_file_name(id), serialize(obj))
73
- end
74
-
75
- # Load the given object from the filesystem.
76
- # @param id [Fixnum or Bignum] object ID
77
- # @return [Hash] Object as defined by PEROBS::ObjectBase
78
- def get_object(id)
79
- begin
80
- raw = File.read(file_name = object_file_name(id))
81
- rescue => e
82
- raise RuntimeError, "Error in #{file_name}: #{e.message}"
83
- end
84
- deserialize(raw)
85
- end
86
-
87
- # This method must be called to initiate the marking process.
88
- def clear_marks
89
- @mark_start = Time.now
90
- # The filesystem stores access times with second granularity. We need to
91
- # wait 1 sec. to ensure that all marks are noticeable.
92
- sleep(1)
93
- end
94
-
95
- # Permanently delete all objects that have not been marked. Those are
96
- # orphaned and are no longer referenced by any actively used object.
97
- def delete_unmarked_objects
98
- Dir.glob(File.join(@db_dir, '*')) do |dir|
99
- next unless Dir.exists?(dir)
100
-
101
- Dir.glob(File.join(dir, '*')) do |file|
102
- if File.atime(file) <= @mark_start
103
- File.delete(file)
104
- end
105
- end
106
- end
107
- end
108
-
109
- # Mark an object.
110
- # @param id [Fixnum or Bignum] ID of the object to mark
111
- def mark(id)
112
- FileUtils.touch(object_file_name(id))
113
- end
114
-
115
- # Check if the object is marked.
116
- # @param id [Fixnum or Bignum] ID of the object to check
117
- def is_marked?(id)
118
- File.atime(object_file_name(id)) > @mark_start
119
- end
120
-
121
- # Check if the stored object is syntactically correct.
122
- # @param id [Fixnum/Bignum] Object ID
123
- # @param repair [TrueClass/FalseClass] True if an repair attempt should be
124
- # made.
125
- # @return [TrueClass/FalseClass] True if the object is OK, otherwise
126
- # false.
127
- def check(id, repair)
128
- file_name = object_file_name(id)
129
- unless File.exists?(file_name)
130
- $stderr.puts "Object file for ID #{id} does not exist"
131
- return false
132
- end
133
-
134
- begin
135
- get_object(id)
136
- rescue => e
137
- $stderr.puts "Cannot read object file #{file_name}: #{e.message}"
138
- return false
139
- end
140
-
141
- true
142
- end
143
-
144
- private
145
-
146
- # Ensure that we have a directory to store the DB items.
147
- def ensure_dir_exists(dir)
148
- unless Dir.exists?(dir)
149
- begin
150
- Dir.mkdir(dir)
151
- rescue IOError => e
152
- raise IOError, "Cannote create DB directory '#{dir}': #{e.message}"
153
- end
154
- end
155
- end
156
-
157
- # Determine the file name to store the object. The object ID determines
158
- # the directory and file name inside the store.
159
- # @param id [Fixnum or Bignum] ID of the object
160
- def object_file_name(id)
161
- hex_id = "%016X" % id
162
- dir = hex_id[0..1]
163
- ensure_dir_exists(File.join(@db_dir, dir))
164
-
165
- File.join(@db_dir, dir, hex_id + @@Extensions[@serializer])
166
- end
167
-
168
- end
169
-
170
- end
171
-
@@ -1,153 +0,0 @@
1
- # encoding: UTF-8
2
- #
3
- # = HashedBlocksDB.rb -- Persistent Ruby Object Store
4
- #
5
- # Copyright (c) 2015 by Chris Schlaeger <chris@taskjuggler.org>
6
- #
7
- # MIT License
8
- #
9
- # Permission is hereby granted, free of charge, to any person obtaining
10
- # a copy of this software and associated documentation files (the
11
- # "Software"), to deal in the Software without restriction, including
12
- # without limitation the rights to use, copy, modify, merge, publish,
13
- # distribute, sublicense, and/or sell copies of the Software, and to
14
- # permit persons to whom the Software is furnished to do so, subject to
15
- # the following conditions:
16
- #
17
- # The above copyright notice and this permission notice shall be
18
- # included in all copies or substantial portions of the Software.
19
- #
20
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
-
28
- require 'time'
29
- require 'json'
30
- require 'json/add/core'
31
- require 'json/add/struct'
32
- require 'yaml'
33
- require 'fileutils'
34
-
35
- require 'perobs/DataBase'
36
- require 'perobs/BlockDB'
37
-
38
- module PEROBS
39
-
40
- # This class provides a filesytem based database store for objects.
41
- class HashedBlocksDB < DataBase
42
-
43
- @@Extensions = {
44
- :marshal => '.mshl',
45
- :json => '.json',
46
- :yaml => '.yml'
47
- }
48
-
49
- # Create a new FileSystemDB object. This will create a DB with the given
50
- # name. A database will live in a directory of that name.
51
- # @param db_name [String] name of the DB directory
52
- # @param options [Hash] options to customize the behavior. Currently only
53
- # the following options are supported:
54
- # :serializer : Can be :marshal, :json, :yaml
55
- # :dir_nibbles : The number of nibbles to use for directory names.
56
- # Meaningful values are 1, 2, and 3. The larger the
57
- # number the more back-end files are used. Each
58
- # nibble provides 16 times more directories.
59
- # :block_size : The size of the blocks inside the storage files in
60
- # bytes. This should roughly correspond to the size
61
- # of the smallest serialized objects you want to
62
- # store in quantities. It also should be an fraction
63
- # of 4096, the native storage system block size.
64
- def initialize(db_name, options = {})
65
- super(options[:serializer] || :json)
66
- @db_dir = db_name
67
- @dir_nibbles = options[:dir_nibbles] || 2
68
- @block_size = options[:block_size] || 256
69
-
70
- # Create the database directory if it doesn't exist yet.
71
- ensure_dir_exists(@db_dir)
72
- end
73
-
74
- # Return true if the object with given ID exists
75
- # @param id [Fixnum or Bignum]
76
- def include?(id)
77
- !BlockDB.new(directory(id), @block_size).find(id).nil?
78
- end
79
-
80
- # Store the given object into the cluster files.
81
- # @param obj [Hash] Object as defined by PEROBS::ObjectBase
82
- def put_object(obj, id)
83
- BlockDB.new(directory(id), @block_size).write_object(id, serialize(obj))
84
- end
85
-
86
- # Load the given object from the filesystem.
87
- # @param id [Fixnum or Bignum] object ID
88
- # @return [Hash] Object as defined by PEROBS::ObjectBase
89
- def get_object(id)
90
- deserialize(BlockDB.new(directory(id), @block_size).read_object(id))
91
- end
92
-
93
- # This method must be called to initiate the marking process.
94
- def clear_marks
95
- Dir.glob(File.join(@db_dir, '*')) do |dir|
96
- BlockDB.new(dir, @block_size).clear_marks
97
- end
98
- end
99
-
100
- # Permanently delete all objects that have not been marked. Those are
101
- # orphaned and are no longer referenced by any actively used object.
102
- def delete_unmarked_objects
103
- Dir.glob(File.join(@db_dir, '*')) do |dir|
104
- BlockDB.new(dir, @block_size).delete_unmarked_entries
105
- end
106
- end
107
-
108
- # Mark an object.
109
- # @param id [Fixnum or Bignum] ID of the object to mark
110
- def mark(id)
111
- BlockDB.new(directory(id), @block_size).mark(id)
112
- end
113
-
114
- # Check if the object is marked.
115
- # @param id [Fixnum or Bignum] ID of the object to check
116
- def is_marked?(id)
117
- BlockDB.new(directory(id), @block_size).is_marked?(id)
118
- end
119
-
120
- # Check if the stored object is syntactically correct.
121
- # @param id [Fixnum/Bignum] Object ID
122
- # @param repair [TrueClass/FalseClass] True if an repair attempt should be
123
- # made.
124
- # @return [TrueClass/FalseClass] True if the object is OK, otherwise
125
- # false.
126
- def check(id, repair)
127
- begin
128
- get_object(id)
129
- rescue => e
130
- $stderr.puts "Cannot read object with ID #{id}: #{e.message}"
131
- return false
132
- end
133
-
134
- true
135
- end
136
-
137
- private
138
-
139
- # Determine the file name to store the object. The object ID determines
140
- # the directory and file name inside the store.
141
- # @param id [Fixnum or Bignum] ID of the object
142
- def directory(id)
143
- hex_id = "%016X" % id
144
- dir = hex_id[0..(@dir_nibbles - 1)]
145
- ensure_dir_exists(dir_name = File.join(@db_dir, dir))
146
-
147
- dir_name
148
- end
149
-
150
- end
151
-
152
- end
153
-