perobs 2.5.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,570 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = EquiBlobsFile.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/Log'
29
+
30
+ module PEROBS
31
+
32
+ # This class implements persistent storage space for same size data blobs.
33
+ # The blobs can be stored and retrieved and can be deleted again. The
34
+ # EquiBlobsFile manages the storage of the blobs and free storage
35
+ # spaces. The files grows and shrinks as needed. A blob is referenced by its
36
+ # address. The address is an Integer that must be larger than 0. The value 0
37
+ # is used to represent an undefined address or nil. The file has a 4 * 8
38
+ # bytes long header that stores the total entry count, the total space
39
+ # count, the offset of the first entry and the offset of the first space.
40
+ class EquiBlobsFile
41
+
42
+ TOTAL_ENTRIES_OFFSET = 0
43
+ TOTAL_SPACES_OFFSET = 8
44
+ FIRST_ENTRY_OFFSET = 2 * 8
45
+ FIRST_SPACE_OFFSET = 3 * 8
46
+ HEADER_SIZE = 4 * 8
47
+
48
+ attr_reader :total_entries, :total_spaces, :file_name
49
+ attr_accessor :first_entry
50
+
51
+ # Create a new stack file in the given directory with the given file name.
52
+ # @param dir [String] Directory
53
+ # @param name [String] File name
54
+ # @param entry_bytes [Fixnum] Number of bytes each entry must have
55
+ def initialize(dir, name, entry_bytes, first_entry_default = 0)
56
+ @file_name = File.join(dir, name + '.blobs')
57
+ if entry_bytes < 8
58
+ PEROBS.log.fatal "EquiBlobsFile entry size must be at least 8"
59
+ end
60
+ @entry_bytes = entry_bytes
61
+ @first_entry_default = first_entry_default
62
+ reset_counters
63
+
64
+ # The File handle.
65
+ @f = nil
66
+ end
67
+
68
+ # Open the blob file.
69
+ def open
70
+ begin
71
+ if File.exist?(@file_name)
72
+ # Open an existing file.
73
+ @f = File.open(@file_name, 'rb+')
74
+ read_header
75
+ else
76
+ # Create a new file by writing a new header.
77
+ @f = File.open(@file_name, 'wb+')
78
+ write_header
79
+ end
80
+ rescue IOError => e
81
+ PEROBS.log.fatal "Cannot open blob file #{@file_name}: #{e.message}"
82
+ end
83
+ unless @f.flock(File::LOCK_NB | File::LOCK_EX)
84
+ PEROBS.log.fatal 'Database blob file is locked by another process'
85
+ end
86
+ end
87
+
88
+ # Close the blob file. This method must be called before the program is
89
+ # terminated to avoid data loss.
90
+ def close
91
+ begin
92
+ if @f
93
+ @f.flush
94
+ @f.flock(File::LOCK_UN)
95
+ @f.close
96
+ @f = nil
97
+ end
98
+ rescue IOError => e
99
+ PEROBS.log.fatal "Cannot close blob file #{@file_name}: #{e.message}"
100
+ end
101
+ end
102
+
103
+ # Erase the backing store. This method should only be called when the file
104
+ # is not currently open.
105
+ def erase
106
+ PEROBS.log.fatal 'Cannot call EquiBlobsFile::erase while it is open' if @f
107
+ File.delete(@file_name) if File.exist?(@file_name)
108
+ reset_counters
109
+ end
110
+
111
+ # Flush out all unwritten data.
112
+ def sync
113
+ begin
114
+ @f.flush if @f
115
+ rescue IOError => e
116
+ PEROBS.log.fatal "Cannot sync blob file #{@file_name}: #{e.message}"
117
+ end
118
+ end
119
+
120
+ # Delete all data.
121
+ def clear
122
+ @f.truncate(0)
123
+ @f.flush
124
+ reset_counters
125
+ write_header
126
+ end
127
+
128
+ # Return the address of a free blob storage space. Addresses start at 0
129
+ # and increase linearly.
130
+ # @return [Fixnum] address of a free blob space
131
+ def free_address
132
+ if @first_space == 0
133
+ # There is currently no free entry. Create a new reserved entry at the
134
+ # end of the file.
135
+ begin
136
+ offset = @f.size
137
+ @f.seek(offset)
138
+ write_n_bytes([1] + ::Array.new(@entry_bytes, 0))
139
+ write_header
140
+ return offset_to_address(offset)
141
+ rescue IOError => e
142
+ PEROBS.log.fatal "Cannot create reserved space at #{@first_space} " +
143
+ "in EquiBlobsFile #{@file_name}: #{e.message}"
144
+ end
145
+ else
146
+ begin
147
+ free_space_address = offset_to_address(@first_space)
148
+ @f.seek(@first_space)
149
+ marker = read_char
150
+ @first_space = read_unsigned_int
151
+ unless marker == 0
152
+ PEROBS.log.fatal "Free space list of EquiBlobsFile #{@file_name} " +
153
+ "points to non-empty entry at address #{@first_space}"
154
+ end
155
+ # Mark entry as reserved by setting the mark byte to 1.
156
+ @f.seek(-(1 + 8), IO::SEEK_CUR)
157
+ write_char(1)
158
+
159
+ # Update the file header
160
+ @total_spaces -= 1
161
+ write_header
162
+ return free_space_address
163
+ rescue IOError => e
164
+ PEROBS.log.fatal "Cannot mark reserved space at " +
165
+ "#{free_space_address} in EquiBlobsFile #{@file_name}: " +
166
+ "#{e.message}"
167
+ end
168
+ end
169
+ end
170
+
171
+ # Store the given byte blob at the specified address. If the blob space is
172
+ # already in use the content will be overwritten.
173
+ # @param address [Fixnum] Address to store the blob
174
+ # @param bytes [String] bytes to store
175
+ def store_blob(address, bytes)
176
+ unless address >= 0
177
+ PEROBS.log.fatal "Blob storage address must be larger than 0, " +
178
+ "not #{address}"
179
+ end
180
+ if bytes.length != @entry_bytes
181
+ PEROBS.log.fatal "All stack entries must be #{@entry_bytes} " +
182
+ "long. This entry is #{bytes.length} bytes long."
183
+ end
184
+
185
+ marker = 1
186
+ begin
187
+ offset = address_to_offset(address)
188
+ if offset > (file_size = @f.size)
189
+ PEROBS.log.fatal "Cannot store blob at address #{address} in " +
190
+ "EquiBlobsFile #{@file_name}. Address is larger than file size. " +
191
+ "Offset: #{offset} File size: #{file_size}"
192
+ end
193
+
194
+ @f.seek(offset)
195
+ # The first byte is the marker byte. It's set to 2 for cells that hold
196
+ # a blob. 1 for reserved cells and 0 for empty cells. The cell must be
197
+ # either already be in use or be reserved. It must not be 0.
198
+ if file_size > offset &&
199
+ (marker = read_char) != 1 && marker != 2
200
+ PEROBS.log.fatal "Marker for entry at address #{address} of " +
201
+ "EquiBlobsFile #{@file_name} must be 1 or 2 but is #{marker}"
202
+ end
203
+ @f.seek(offset)
204
+ write_char(2)
205
+ @f.write(bytes)
206
+ @f.flush
207
+ rescue IOError => e
208
+ PEROBS.log.fatal "Cannot store blob at address #{address} in " +
209
+ "EquiBlobsFile #{@file_name}: #{e.message}"
210
+ end
211
+
212
+ # Update the entries counter if we inserted a new blob.
213
+ if marker == 1
214
+ @total_entries += 1
215
+ write_header
216
+ end
217
+ end
218
+
219
+ # Retrieve a blob from the given address.
220
+ # @param address [Fixnum] Address to store the blob
221
+ # @return [String] blob bytes
222
+ def retrieve_blob(address)
223
+ unless address > 0
224
+ PEROBS.log.fatal "Blob retrieval address must be larger than 0, " +
225
+ "not #{address}"
226
+ end
227
+
228
+ begin
229
+ if (offset = address_to_offset(address)) >= @f.size
230
+ PEROBS.log.fatal "Cannot retrieve blob at address #{address} " +
231
+ "of EquiBlobsFile #{@file_name}. Address is beyond end of file."
232
+ end
233
+
234
+ @f.seek(address_to_offset(address))
235
+ if (marker = read_char) != 2
236
+ PEROBS.log.fatal "Cannot retrieve blob at address #{address} " +
237
+ "of EquiBlobsFile #{@file_name}. Blob is " +
238
+ (marker == 0 ? 'empty' : marker == 1 ? 'reserved' : 'corrupted') +
239
+ '.'
240
+ end
241
+ bytes = @f.read(@entry_bytes)
242
+ rescue IOError => e
243
+ PEROBS.log.fatal "Cannot retrieve blob at adress #{address} " +
244
+ "of EquiBlobsFile #{@file_name}: " + e.message
245
+ end
246
+
247
+ bytes
248
+ end
249
+
250
+ # Delete the blob at the given address.
251
+ # @param address [Fixnum] Address of blob to delete
252
+ def delete_blob(address)
253
+ unless address >= 0
254
+ PEROBS.log.fatal "Blob address must be larger than 0, " +
255
+ "not #{address}"
256
+ end
257
+
258
+ offset = address_to_offset(address)
259
+ begin
260
+ @f.seek(offset)
261
+ if (marker = read_char) != 1 && marker != 2
262
+ PEROBS.log.fatal "Cannot delete blob stored at address #{address} " +
263
+ "of EquiBlobsFile #{@file_name}. Blob is " +
264
+ (marker == 0 ? 'empty' : 'corrupted') + '.'
265
+ end
266
+ @f.seek(address_to_offset(address))
267
+ write_char(0)
268
+ write_unsigned_int(@first_space)
269
+ rescue IOError => e
270
+ PEROBS.log.fatal "Cannot delete blob at address #{address}: " +
271
+ e.message
272
+ end
273
+
274
+ @first_space = offset
275
+ @total_spaces += 1
276
+ @total_entries -= 1
277
+ write_header
278
+
279
+ if offset == @f.size - 1 - @entry_bytes
280
+ # We have deleted the last entry in the file. Make sure that all empty
281
+ # entries are removed up to the now new last used entry.
282
+ trim_file
283
+ end
284
+ end
285
+
286
+ # Check the file for logical errors.
287
+ # @return [Boolean] true of file has no errors, false otherwise.
288
+ def check
289
+ return false unless check_spaces
290
+ return false unless check_entries
291
+
292
+ if @f.size != HEADER_SIZE + (@total_entries + @total_spaces) *
293
+ (1 + @entry_bytes)
294
+ PEROBS.log.error "Size mismatch in EquiBlobsFile #{@file_name}"
295
+ return false
296
+ end
297
+
298
+ true
299
+ end
300
+
301
+ # Check if the file exists and is larger than 0.
302
+ def file_exist?
303
+ File.exist?(@file_name) && File.size(@file_name) > 0
304
+ end
305
+
306
+ private
307
+
308
+ def reset_counters
309
+ # The total number of entries stored in the file.
310
+ @total_entries = 0
311
+ # The total number of spaces (empty entries) in the file.
312
+ @total_spaces = 0
313
+ # The address of the first entry.
314
+ @first_entry = @first_entry_default
315
+ # The file offset of the first empty entry.
316
+ @first_space = 0
317
+ end
318
+
319
+ def read_header
320
+ begin
321
+ @f.seek(0)
322
+ @total_entries, @total_spaces, @first_entry, @first_space =
323
+ @f.read(HEADER_SIZE).unpack('QQQQ')
324
+ rescue IOError => e
325
+ PEROBS.log.fatal "Cannot read EquiBlobsFile header: #{e.message}"
326
+ end
327
+ end
328
+
329
+ def write_header
330
+ header_ary = [ @total_entries, @total_spaces, @first_entry, @first_space ]
331
+ begin
332
+ @f.seek(0)
333
+ @f.write(header_ary.pack('QQQQ'))
334
+ @f.flush
335
+ end
336
+ end
337
+
338
+ def check_spaces
339
+ begin
340
+ # Read and check total space count
341
+ @f.seek(TOTAL_SPACES_OFFSET)
342
+ total_spaces = read_unsigned_int
343
+ unless total_spaces == @total_spaces
344
+ PEROBS.log.error "Mismatch in total space count in EquiBlobsFile " +
345
+ "#{@file_name}. Memory: #{@total_spaces} File: #{total_spaces}"
346
+ return false
347
+ end
348
+
349
+ # Read offset of first empty space
350
+ @f.seek(FIRST_SPACE_OFFSET)
351
+ next_offset = read_unsigned_int
352
+ rescue IOError => e
353
+ PEROBS.log.error "Cannot check header of EquiBlobsFile " +
354
+ "#{@file_name}: #{e.message}"
355
+ return false
356
+ end
357
+
358
+ total_spaces = 0
359
+ begin
360
+ while next_offset != 0
361
+ # Check that the marker byte is 0
362
+ @f.seek(next_offset)
363
+ if (marker = read_char) != 0
364
+ PEROBS.log.error "Marker byte at address " +
365
+ "#{offset_to_address(next_offset)} is #{marker} instead of 0."
366
+ return false
367
+ end
368
+ # Read offset of next empty space
369
+ next_offset = read_unsigned_int
370
+
371
+ total_spaces += 1
372
+ end
373
+ rescue IOError => e
374
+ PEROBS.log.error "Cannot check space list of EquiBlobsFile " +
375
+ "#{@file_name}: #{e.message}"
376
+ return false
377
+ end
378
+
379
+ unless total_spaces == @total_spaces
380
+ PEROBS.log.error "Mismatch between space counter and entries in " +
381
+ "EquiBlobsFile #{@file_name}. Counter: #{@total_spaces} " +
382
+ "Entries: #{total_spaces}"
383
+ return false
384
+ end
385
+
386
+ true
387
+ end
388
+
389
+ def check_entries
390
+ begin
391
+ # Read total entry count
392
+ @f.seek(TOTAL_ENTRIES_OFFSET)
393
+ total_entries = read_unsigned_int
394
+ unless total_entries == @total_entries
395
+ PEROBS.log.error "Mismatch in total entry count in EquiBlobsFile " +
396
+ "#{@file_name}. Memory: #{@total_entries} File: #{total_entries}"
397
+ return false
398
+ end
399
+ rescue IOError => e
400
+ PEROBS.log.error "Cannot check header of EquiBlobsFile " +
401
+ "#{@file_name}: #{e.message}"
402
+ return false
403
+ end
404
+
405
+ next_offset = HEADER_SIZE
406
+ total_entries = 0
407
+ total_spaces = 0
408
+ begin
409
+ @f.seek(next_offset)
410
+ while !@f.eof
411
+ marker, bytes = @f.read(1 + @entry_bytes).
412
+ unpack("C#{1 + @entry_bytes}")
413
+ case marker
414
+ when 0
415
+ total_spaces += 1
416
+ when 1
417
+ PEROBS.log.error "Entry at address " +
418
+ "#{offset_to_address(next_offset)} in EquiBlobsFile " +
419
+ "#{@file_name} has reserved marker"
420
+ return false
421
+ when 2
422
+ total_entries += 1
423
+ else
424
+ PEROBS.log.error "Entry at address " +
425
+ "#{offset_to_address(next_offset)} in EquiBlobsFile " +
426
+ "#{@file_name} has illegal marker #{marker}"
427
+ return false
428
+ end
429
+ next_offset += 1 + @entry_bytes
430
+ end
431
+ rescue
432
+ PEROBS.log.error "Cannot check entries of EquiBlobsFile " +
433
+ "#{@file_name}: #{e.message}"
434
+ return false
435
+ end
436
+
437
+ unless total_spaces == @total_spaces
438
+ PEROBS.log.error "Mismatch between space counter and spaces in " +
439
+ "EquiBlobsFile #{@file_name}. Counter: #{@total_spaces} " +
440
+ "Found spaces: #{total_spaces}"
441
+ return false
442
+ end
443
+ unless total_entries == @total_entries
444
+ PEROBS.log.error "Mismatch between entries counter and entries in " +
445
+ "EquiBlobsFile #{@file_name}. Counter: #{@total_entries} " +
446
+ "Found entries: #{total_entries}"
447
+ return false
448
+ end
449
+
450
+ true
451
+ end
452
+
453
+ def trim_file
454
+ offset = @f.size - 1 - @entry_bytes
455
+ while offset >= HEADER_SIZE
456
+ @f.seek(offset)
457
+ begin
458
+ if (marker = read_char) == 0
459
+ # This entry is a deleted entry
460
+ unlink_space(offset)
461
+ else
462
+ # No more empty entries at the end of the file to trim.
463
+ return
464
+ end
465
+ @f.truncate(offset)
466
+ @f.flush
467
+ rescue IOError => e
468
+ PEROBS.log.fatal "Error while trimming EquiBlobsFile " +
469
+ "#{@file_name}: #{e.message}"
470
+ end
471
+
472
+ # Push offset to the previous entry
473
+ offset -= 1 + @entry_bytes
474
+ end
475
+ end
476
+
477
+ def unlink_space(offset_to_unlink)
478
+ # The space list entry that points to offset_to_unlink will be
479
+ # eliminated.
480
+ #
481
+ # This is the offset in the file that potentially holds the offset to
482
+ # the entry that we want to eliminate. If so, it will be overwritten
483
+ # with the offset that the eliminated entry was pointing to.
484
+ offset_to_modify = FIRST_SPACE_OFFSET
485
+
486
+ begin
487
+ # Read the offset that potentially points to the entry to be
488
+ # eliminated.
489
+ @f.seek(offset_to_modify)
490
+ if (next_entry_offset = read_unsigned_int) == 0
491
+ PEROBS.log.fatal "Cannot delete space from an empty space list"
492
+ end
493
+
494
+ loop do
495
+ # Get the offset of the successor in the chain.
496
+ @f.seek(next_entry_offset)
497
+ # Check that it really is a empty space entry.
498
+ unless (marker = read_char) == 0
499
+ PEROBS.log.fatal "Marker of space must be 0"
500
+ end
501
+ offset_to_insert = read_unsigned_int
502
+
503
+ if next_entry_offset == offset_to_unlink
504
+ # We've found the entry that we want to eliminate.
505
+ # Rewind to the location of the offset that pointed to the entry
506
+ # to eliminate.
507
+ @f.seek(offset_to_modify)
508
+ # Write the offset of the successor to this offset.
509
+ write_unsigned_int(offset_to_insert)
510
+ if offset_to_modify == FIRST_SPACE_OFFSET
511
+ # When we modify the first space entry we also have to update
512
+ # the in-memory value.
513
+ @first_space = offset_to_insert
514
+ end
515
+
516
+ # Reduce the space counter.
517
+ @total_spaces -= 1
518
+ write_header
519
+
520
+ return
521
+ end
522
+
523
+ # Check if we've reached the end of the chain.
524
+ return if offset_to_insert == 0
525
+
526
+ # The next_entry_offset did not match the offset to unlink. Shift
527
+ # offset_to_modify and next_entry_offset to their successor entries.
528
+ offset_to_modify = next_entry_offset + 1
529
+ next_entry_offset = offset_to_insert
530
+ end
531
+ rescue IOError => e
532
+ PEROBS.log.fatal "Cannot unlink space of EquiBlobsFile " +
533
+ "#{@file_name}: #{e.message}"
534
+ end
535
+ end
536
+
537
+ # Translate a blob address to the actual offset in the file.
538
+ def address_to_offset(address)
539
+ HEADER_SIZE + (address - 1) * (1 + @entry_bytes)
540
+ end
541
+
542
+ # Translate the file offset to the address of a blob.
543
+ def offset_to_address(offset)
544
+ (offset - HEADER_SIZE) / (1 + @entry_bytes) + 1
545
+ end
546
+
547
+ def write_char(c)
548
+ @f.write([ c ].pack('C'))
549
+ end
550
+
551
+ def read_char
552
+ @f.read(1).unpack('C')[0]
553
+ end
554
+
555
+ def write_unsigned_int(uint)
556
+ @f.write([ uint ].pack('Q'))
557
+ end
558
+
559
+ def write_n_bytes(bytes)
560
+ @f.write(bytes.pack("C#{bytes.size}"))
561
+ end
562
+
563
+ def read_unsigned_int
564
+ @f.read(8).unpack('Q')[0]
565
+ end
566
+
567
+ end
568
+
569
+ end
570
+