perobs 2.5.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,570 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = EquiBlobsFile.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/Log'
29
+
30
+ module PEROBS
31
+
32
+ # This class implements persistent storage space for same size data blobs.
33
+ # The blobs can be stored and retrieved and can be deleted again. The
34
+ # EquiBlobsFile manages the storage of the blobs and free storage
35
+ # spaces. The files grows and shrinks as needed. A blob is referenced by its
36
+ # address. The address is an Integer that must be larger than 0. The value 0
37
+ # is used to represent an undefined address or nil. The file has a 4 * 8
38
+ # bytes long header that stores the total entry count, the total space
39
+ # count, the offset of the first entry and the offset of the first space.
40
+ class EquiBlobsFile
41
+
42
+ TOTAL_ENTRIES_OFFSET = 0
43
+ TOTAL_SPACES_OFFSET = 8
44
+ FIRST_ENTRY_OFFSET = 2 * 8
45
+ FIRST_SPACE_OFFSET = 3 * 8
46
+ HEADER_SIZE = 4 * 8
47
+
48
+ attr_reader :total_entries, :total_spaces, :file_name
49
+ attr_accessor :first_entry
50
+
51
+ # Create a new stack file in the given directory with the given file name.
52
+ # @param dir [String] Directory
53
+ # @param name [String] File name
54
+ # @param entry_bytes [Fixnum] Number of bytes each entry must have
55
+ def initialize(dir, name, entry_bytes, first_entry_default = 0)
56
+ @file_name = File.join(dir, name + '.blobs')
57
+ if entry_bytes < 8
58
+ PEROBS.log.fatal "EquiBlobsFile entry size must be at least 8"
59
+ end
60
+ @entry_bytes = entry_bytes
61
+ @first_entry_default = first_entry_default
62
+ reset_counters
63
+
64
+ # The File handle.
65
+ @f = nil
66
+ end
67
+
68
+ # Open the blob file.
69
+ def open
70
+ begin
71
+ if File.exist?(@file_name)
72
+ # Open an existing file.
73
+ @f = File.open(@file_name, 'rb+')
74
+ read_header
75
+ else
76
+ # Create a new file by writing a new header.
77
+ @f = File.open(@file_name, 'wb+')
78
+ write_header
79
+ end
80
+ rescue IOError => e
81
+ PEROBS.log.fatal "Cannot open blob file #{@file_name}: #{e.message}"
82
+ end
83
+ unless @f.flock(File::LOCK_NB | File::LOCK_EX)
84
+ PEROBS.log.fatal 'Database blob file is locked by another process'
85
+ end
86
+ end
87
+
88
+ # Close the blob file. This method must be called before the program is
89
+ # terminated to avoid data loss.
90
+ def close
91
+ begin
92
+ if @f
93
+ @f.flush
94
+ @f.flock(File::LOCK_UN)
95
+ @f.close
96
+ @f = nil
97
+ end
98
+ rescue IOError => e
99
+ PEROBS.log.fatal "Cannot close blob file #{@file_name}: #{e.message}"
100
+ end
101
+ end
102
+
103
+ # Erase the backing store. This method should only be called when the file
104
+ # is not currently open.
105
+ def erase
106
+ PEROBS.log.fatal 'Cannot call EquiBlobsFile::erase while it is open' if @f
107
+ File.delete(@file_name) if File.exist?(@file_name)
108
+ reset_counters
109
+ end
110
+
111
+ # Flush out all unwritten data.
112
+ def sync
113
+ begin
114
+ @f.flush if @f
115
+ rescue IOError => e
116
+ PEROBS.log.fatal "Cannot sync blob file #{@file_name}: #{e.message}"
117
+ end
118
+ end
119
+
120
+ # Delete all data.
121
+ def clear
122
+ @f.truncate(0)
123
+ @f.flush
124
+ reset_counters
125
+ write_header
126
+ end
127
+
128
+ # Return the address of a free blob storage space. Addresses start at 0
129
+ # and increase linearly.
130
+ # @return [Fixnum] address of a free blob space
131
+ def free_address
132
+ if @first_space == 0
133
+ # There is currently no free entry. Create a new reserved entry at the
134
+ # end of the file.
135
+ begin
136
+ offset = @f.size
137
+ @f.seek(offset)
138
+ write_n_bytes([1] + ::Array.new(@entry_bytes, 0))
139
+ write_header
140
+ return offset_to_address(offset)
141
+ rescue IOError => e
142
+ PEROBS.log.fatal "Cannot create reserved space at #{@first_space} " +
143
+ "in EquiBlobsFile #{@file_name}: #{e.message}"
144
+ end
145
+ else
146
+ begin
147
+ free_space_address = offset_to_address(@first_space)
148
+ @f.seek(@first_space)
149
+ marker = read_char
150
+ @first_space = read_unsigned_int
151
+ unless marker == 0
152
+ PEROBS.log.fatal "Free space list of EquiBlobsFile #{@file_name} " +
153
+ "points to non-empty entry at address #{@first_space}"
154
+ end
155
+ # Mark entry as reserved by setting the mark byte to 1.
156
+ @f.seek(-(1 + 8), IO::SEEK_CUR)
157
+ write_char(1)
158
+
159
+ # Update the file header
160
+ @total_spaces -= 1
161
+ write_header
162
+ return free_space_address
163
+ rescue IOError => e
164
+ PEROBS.log.fatal "Cannot mark reserved space at " +
165
+ "#{free_space_address} in EquiBlobsFile #{@file_name}: " +
166
+ "#{e.message}"
167
+ end
168
+ end
169
+ end
170
+
171
+ # Store the given byte blob at the specified address. If the blob space is
172
+ # already in use the content will be overwritten.
173
+ # @param address [Fixnum] Address to store the blob
174
+ # @param bytes [String] bytes to store
175
+ def store_blob(address, bytes)
176
+ unless address >= 0
177
+ PEROBS.log.fatal "Blob storage address must be larger than 0, " +
178
+ "not #{address}"
179
+ end
180
+ if bytes.length != @entry_bytes
181
+ PEROBS.log.fatal "All stack entries must be #{@entry_bytes} " +
182
+ "long. This entry is #{bytes.length} bytes long."
183
+ end
184
+
185
+ marker = 1
186
+ begin
187
+ offset = address_to_offset(address)
188
+ if offset > (file_size = @f.size)
189
+ PEROBS.log.fatal "Cannot store blob at address #{address} in " +
190
+ "EquiBlobsFile #{@file_name}. Address is larger than file size. " +
191
+ "Offset: #{offset} File size: #{file_size}"
192
+ end
193
+
194
+ @f.seek(offset)
195
+ # The first byte is the marker byte. It's set to 2 for cells that hold
196
+ # a blob. 1 for reserved cells and 0 for empty cells. The cell must be
197
+ # either already be in use or be reserved. It must not be 0.
198
+ if file_size > offset &&
199
+ (marker = read_char) != 1 && marker != 2
200
+ PEROBS.log.fatal "Marker for entry at address #{address} of " +
201
+ "EquiBlobsFile #{@file_name} must be 1 or 2 but is #{marker}"
202
+ end
203
+ @f.seek(offset)
204
+ write_char(2)
205
+ @f.write(bytes)
206
+ @f.flush
207
+ rescue IOError => e
208
+ PEROBS.log.fatal "Cannot store blob at address #{address} in " +
209
+ "EquiBlobsFile #{@file_name}: #{e.message}"
210
+ end
211
+
212
+ # Update the entries counter if we inserted a new blob.
213
+ if marker == 1
214
+ @total_entries += 1
215
+ write_header
216
+ end
217
+ end
218
+
219
+ # Retrieve a blob from the given address.
220
+ # @param address [Fixnum] Address to store the blob
221
+ # @return [String] blob bytes
222
+ def retrieve_blob(address)
223
+ unless address > 0
224
+ PEROBS.log.fatal "Blob retrieval address must be larger than 0, " +
225
+ "not #{address}"
226
+ end
227
+
228
+ begin
229
+ if (offset = address_to_offset(address)) >= @f.size
230
+ PEROBS.log.fatal "Cannot retrieve blob at address #{address} " +
231
+ "of EquiBlobsFile #{@file_name}. Address is beyond end of file."
232
+ end
233
+
234
+ @f.seek(address_to_offset(address))
235
+ if (marker = read_char) != 2
236
+ PEROBS.log.fatal "Cannot retrieve blob at address #{address} " +
237
+ "of EquiBlobsFile #{@file_name}. Blob is " +
238
+ (marker == 0 ? 'empty' : marker == 1 ? 'reserved' : 'corrupted') +
239
+ '.'
240
+ end
241
+ bytes = @f.read(@entry_bytes)
242
+ rescue IOError => e
243
+ PEROBS.log.fatal "Cannot retrieve blob at adress #{address} " +
244
+ "of EquiBlobsFile #{@file_name}: " + e.message
245
+ end
246
+
247
+ bytes
248
+ end
249
+
250
+ # Delete the blob at the given address.
251
+ # @param address [Fixnum] Address of blob to delete
252
+ def delete_blob(address)
253
+ unless address >= 0
254
+ PEROBS.log.fatal "Blob address must be larger than 0, " +
255
+ "not #{address}"
256
+ end
257
+
258
+ offset = address_to_offset(address)
259
+ begin
260
+ @f.seek(offset)
261
+ if (marker = read_char) != 1 && marker != 2
262
+ PEROBS.log.fatal "Cannot delete blob stored at address #{address} " +
263
+ "of EquiBlobsFile #{@file_name}. Blob is " +
264
+ (marker == 0 ? 'empty' : 'corrupted') + '.'
265
+ end
266
+ @f.seek(address_to_offset(address))
267
+ write_char(0)
268
+ write_unsigned_int(@first_space)
269
+ rescue IOError => e
270
+ PEROBS.log.fatal "Cannot delete blob at address #{address}: " +
271
+ e.message
272
+ end
273
+
274
+ @first_space = offset
275
+ @total_spaces += 1
276
+ @total_entries -= 1
277
+ write_header
278
+
279
+ if offset == @f.size - 1 - @entry_bytes
280
+ # We have deleted the last entry in the file. Make sure that all empty
281
+ # entries are removed up to the now new last used entry.
282
+ trim_file
283
+ end
284
+ end
285
+
286
+ # Check the file for logical errors.
287
+ # @return [Boolean] true of file has no errors, false otherwise.
288
+ def check
289
+ return false unless check_spaces
290
+ return false unless check_entries
291
+
292
+ if @f.size != HEADER_SIZE + (@total_entries + @total_spaces) *
293
+ (1 + @entry_bytes)
294
+ PEROBS.log.error "Size mismatch in EquiBlobsFile #{@file_name}"
295
+ return false
296
+ end
297
+
298
+ true
299
+ end
300
+
301
+ # Check if the file exists and is larger than 0.
302
+ def file_exist?
303
+ File.exist?(@file_name) && File.size(@file_name) > 0
304
+ end
305
+
306
+ private
307
+
308
+ def reset_counters
309
+ # The total number of entries stored in the file.
310
+ @total_entries = 0
311
+ # The total number of spaces (empty entries) in the file.
312
+ @total_spaces = 0
313
+ # The address of the first entry.
314
+ @first_entry = @first_entry_default
315
+ # The file offset of the first empty entry.
316
+ @first_space = 0
317
+ end
318
+
319
+ def read_header
320
+ begin
321
+ @f.seek(0)
322
+ @total_entries, @total_spaces, @first_entry, @first_space =
323
+ @f.read(HEADER_SIZE).unpack('QQQQ')
324
+ rescue IOError => e
325
+ PEROBS.log.fatal "Cannot read EquiBlobsFile header: #{e.message}"
326
+ end
327
+ end
328
+
329
+ def write_header
330
+ header_ary = [ @total_entries, @total_spaces, @first_entry, @first_space ]
331
+ begin
332
+ @f.seek(0)
333
+ @f.write(header_ary.pack('QQQQ'))
334
+ @f.flush
335
+ end
336
+ end
337
+
338
+ def check_spaces
339
+ begin
340
+ # Read and check total space count
341
+ @f.seek(TOTAL_SPACES_OFFSET)
342
+ total_spaces = read_unsigned_int
343
+ unless total_spaces == @total_spaces
344
+ PEROBS.log.error "Mismatch in total space count in EquiBlobsFile " +
345
+ "#{@file_name}. Memory: #{@total_spaces} File: #{total_spaces}"
346
+ return false
347
+ end
348
+
349
+ # Read offset of first empty space
350
+ @f.seek(FIRST_SPACE_OFFSET)
351
+ next_offset = read_unsigned_int
352
+ rescue IOError => e
353
+ PEROBS.log.error "Cannot check header of EquiBlobsFile " +
354
+ "#{@file_name}: #{e.message}"
355
+ return false
356
+ end
357
+
358
+ total_spaces = 0
359
+ begin
360
+ while next_offset != 0
361
+ # Check that the marker byte is 0
362
+ @f.seek(next_offset)
363
+ if (marker = read_char) != 0
364
+ PEROBS.log.error "Marker byte at address " +
365
+ "#{offset_to_address(next_offset)} is #{marker} instead of 0."
366
+ return false
367
+ end
368
+ # Read offset of next empty space
369
+ next_offset = read_unsigned_int
370
+
371
+ total_spaces += 1
372
+ end
373
+ rescue IOError => e
374
+ PEROBS.log.error "Cannot check space list of EquiBlobsFile " +
375
+ "#{@file_name}: #{e.message}"
376
+ return false
377
+ end
378
+
379
+ unless total_spaces == @total_spaces
380
+ PEROBS.log.error "Mismatch between space counter and entries in " +
381
+ "EquiBlobsFile #{@file_name}. Counter: #{@total_spaces} " +
382
+ "Entries: #{total_spaces}"
383
+ return false
384
+ end
385
+
386
+ true
387
+ end
388
+
389
+ def check_entries
390
+ begin
391
+ # Read total entry count
392
+ @f.seek(TOTAL_ENTRIES_OFFSET)
393
+ total_entries = read_unsigned_int
394
+ unless total_entries == @total_entries
395
+ PEROBS.log.error "Mismatch in total entry count in EquiBlobsFile " +
396
+ "#{@file_name}. Memory: #{@total_entries} File: #{total_entries}"
397
+ return false
398
+ end
399
+ rescue IOError => e
400
+ PEROBS.log.error "Cannot check header of EquiBlobsFile " +
401
+ "#{@file_name}: #{e.message}"
402
+ return false
403
+ end
404
+
405
+ next_offset = HEADER_SIZE
406
+ total_entries = 0
407
+ total_spaces = 0
408
+ begin
409
+ @f.seek(next_offset)
410
+ while !@f.eof
411
+ marker, bytes = @f.read(1 + @entry_bytes).
412
+ unpack("C#{1 + @entry_bytes}")
413
+ case marker
414
+ when 0
415
+ total_spaces += 1
416
+ when 1
417
+ PEROBS.log.error "Entry at address " +
418
+ "#{offset_to_address(next_offset)} in EquiBlobsFile " +
419
+ "#{@file_name} has reserved marker"
420
+ return false
421
+ when 2
422
+ total_entries += 1
423
+ else
424
+ PEROBS.log.error "Entry at address " +
425
+ "#{offset_to_address(next_offset)} in EquiBlobsFile " +
426
+ "#{@file_name} has illegal marker #{marker}"
427
+ return false
428
+ end
429
+ next_offset += 1 + @entry_bytes
430
+ end
431
+ rescue
432
+ PEROBS.log.error "Cannot check entries of EquiBlobsFile " +
433
+ "#{@file_name}: #{e.message}"
434
+ return false
435
+ end
436
+
437
+ unless total_spaces == @total_spaces
438
+ PEROBS.log.error "Mismatch between space counter and spaces in " +
439
+ "EquiBlobsFile #{@file_name}. Counter: #{@total_spaces} " +
440
+ "Found spaces: #{total_spaces}"
441
+ return false
442
+ end
443
+ unless total_entries == @total_entries
444
+ PEROBS.log.error "Mismatch between entries counter and entries in " +
445
+ "EquiBlobsFile #{@file_name}. Counter: #{@total_entries} " +
446
+ "Found entries: #{total_entries}"
447
+ return false
448
+ end
449
+
450
+ true
451
+ end
452
+
453
+ def trim_file
454
+ offset = @f.size - 1 - @entry_bytes
455
+ while offset >= HEADER_SIZE
456
+ @f.seek(offset)
457
+ begin
458
+ if (marker = read_char) == 0
459
+ # This entry is a deleted entry
460
+ unlink_space(offset)
461
+ else
462
+ # No more empty entries at the end of the file to trim.
463
+ return
464
+ end
465
+ @f.truncate(offset)
466
+ @f.flush
467
+ rescue IOError => e
468
+ PEROBS.log.fatal "Error while trimming EquiBlobsFile " +
469
+ "#{@file_name}: #{e.message}"
470
+ end
471
+
472
+ # Push offset to the previous entry
473
+ offset -= 1 + @entry_bytes
474
+ end
475
+ end
476
+
477
+ def unlink_space(offset_to_unlink)
478
+ # The space list entry that points to offset_to_unlink will be
479
+ # eliminated.
480
+ #
481
+ # This is the offset in the file that potentially holds the offset to
482
+ # the entry that we want to eliminate. If so, it will be overwritten
483
+ # with the offset that the eliminated entry was pointing to.
484
+ offset_to_modify = FIRST_SPACE_OFFSET
485
+
486
+ begin
487
+ # Read the offset that potentially points to the entry to be
488
+ # eliminated.
489
+ @f.seek(offset_to_modify)
490
+ if (next_entry_offset = read_unsigned_int) == 0
491
+ PEROBS.log.fatal "Cannot delete space from an empty space list"
492
+ end
493
+
494
+ loop do
495
+ # Get the offset of the successor in the chain.
496
+ @f.seek(next_entry_offset)
497
+ # Check that it really is a empty space entry.
498
+ unless (marker = read_char) == 0
499
+ PEROBS.log.fatal "Marker of space must be 0"
500
+ end
501
+ offset_to_insert = read_unsigned_int
502
+
503
+ if next_entry_offset == offset_to_unlink
504
+ # We've found the entry that we want to eliminate.
505
+ # Rewind to the location of the offset that pointed to the entry
506
+ # to eliminate.
507
+ @f.seek(offset_to_modify)
508
+ # Write the offset of the successor to this offset.
509
+ write_unsigned_int(offset_to_insert)
510
+ if offset_to_modify == FIRST_SPACE_OFFSET
511
+ # When we modify the first space entry we also have to update
512
+ # the in-memory value.
513
+ @first_space = offset_to_insert
514
+ end
515
+
516
+ # Reduce the space counter.
517
+ @total_spaces -= 1
518
+ write_header
519
+
520
+ return
521
+ end
522
+
523
+ # Check if we've reached the end of the chain.
524
+ return if offset_to_insert == 0
525
+
526
+ # The next_entry_offset did not match the offset to unlink. Shift
527
+ # offset_to_modify and next_entry_offset to their successor entries.
528
+ offset_to_modify = next_entry_offset + 1
529
+ next_entry_offset = offset_to_insert
530
+ end
531
+ rescue IOError => e
532
+ PEROBS.log.fatal "Cannot unlink space of EquiBlobsFile " +
533
+ "#{@file_name}: #{e.message}"
534
+ end
535
+ end
536
+
537
+ # Translate a blob address to the actual offset in the file.
538
+ def address_to_offset(address)
539
+ HEADER_SIZE + (address - 1) * (1 + @entry_bytes)
540
+ end
541
+
542
+ # Translate the file offset to the address of a blob.
543
+ def offset_to_address(offset)
544
+ (offset - HEADER_SIZE) / (1 + @entry_bytes) + 1
545
+ end
546
+
547
+ def write_char(c)
548
+ @f.write([ c ].pack('C'))
549
+ end
550
+
551
+ def read_char
552
+ @f.read(1).unpack('C')[0]
553
+ end
554
+
555
+ def write_unsigned_int(uint)
556
+ @f.write([ uint ].pack('Q'))
557
+ end
558
+
559
+ def write_n_bytes(bytes)
560
+ @f.write(bytes.pack("C#{bytes.size}"))
561
+ end
562
+
563
+ def read_unsigned_int
564
+ @f.read(8).unpack('Q')[0]
565
+ end
566
+
567
+ end
568
+
569
+ end
570
+