perobs 3.0.1 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +19 -18
  3. data/lib/perobs.rb +2 -0
  4. data/lib/perobs/Array.rb +68 -21
  5. data/lib/perobs/BTree.rb +110 -54
  6. data/lib/perobs/BTreeBlob.rb +14 -13
  7. data/lib/perobs/BTreeDB.rb +11 -10
  8. data/lib/perobs/BTreeNode.rb +551 -197
  9. data/lib/perobs/BTreeNodeCache.rb +10 -8
  10. data/lib/perobs/BTreeNodeLink.rb +11 -1
  11. data/lib/perobs/BigArray.rb +285 -0
  12. data/lib/perobs/BigArrayNode.rb +1002 -0
  13. data/lib/perobs/BigHash.rb +246 -0
  14. data/lib/perobs/BigTree.rb +197 -0
  15. data/lib/perobs/BigTreeNode.rb +873 -0
  16. data/lib/perobs/Cache.rb +47 -22
  17. data/lib/perobs/ClassMap.rb +2 -2
  18. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  19. data/lib/perobs/DataBase.rb +4 -3
  20. data/lib/perobs/DynamoDB.rb +62 -20
  21. data/lib/perobs/EquiBlobsFile.rb +174 -59
  22. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  23. data/lib/perobs/FlatFile.rb +536 -242
  24. data/lib/perobs/FlatFileBlobHeader.rb +120 -84
  25. data/lib/perobs/FlatFileDB.rb +58 -27
  26. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  27. data/lib/perobs/Hash.rb +129 -35
  28. data/lib/perobs/IDList.rb +144 -0
  29. data/lib/perobs/IDListPage.rb +107 -0
  30. data/lib/perobs/IDListPageFile.rb +180 -0
  31. data/lib/perobs/IDListPageRecord.rb +142 -0
  32. data/lib/perobs/LockFile.rb +3 -0
  33. data/lib/perobs/Object.rb +28 -20
  34. data/lib/perobs/ObjectBase.rb +53 -10
  35. data/lib/perobs/PersistentObjectCache.rb +142 -0
  36. data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
  37. data/lib/perobs/ProgressMeter.rb +97 -0
  38. data/lib/perobs/SpaceManager.rb +273 -0
  39. data/lib/perobs/SpaceTree.rb +63 -47
  40. data/lib/perobs/SpaceTreeNode.rb +134 -115
  41. data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
  42. data/lib/perobs/StackFile.rb +1 -1
  43. data/lib/perobs/Store.rb +180 -70
  44. data/lib/perobs/version.rb +1 -1
  45. data/perobs.gemspec +4 -4
  46. data/test/Array_spec.rb +48 -39
  47. data/test/BTreeDB_spec.rb +2 -2
  48. data/test/BTree_spec.rb +50 -1
  49. data/test/BigArray_spec.rb +261 -0
  50. data/test/BigHash_spec.rb +152 -0
  51. data/test/BigTreeNode_spec.rb +153 -0
  52. data/test/BigTree_spec.rb +259 -0
  53. data/test/EquiBlobsFile_spec.rb +105 -5
  54. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  55. data/test/FlatFileDB_spec.rb +199 -15
  56. data/test/FuzzyStringMatcher_spec.rb +261 -0
  57. data/test/Hash_spec.rb +27 -16
  58. data/test/IDList_spec.rb +77 -0
  59. data/test/LegacyDBs/LegacyDB.rb +155 -0
  60. data/test/LegacyDBs/version_3/class_map.json +1 -0
  61. data/test/LegacyDBs/version_3/config.json +1 -0
  62. data/test/LegacyDBs/version_3/database.blobs +0 -0
  63. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  64. data/test/LegacyDBs/version_3/index.blobs +0 -0
  65. data/test/LegacyDBs/version_3/version +1 -0
  66. data/test/LockFile_spec.rb +9 -6
  67. data/test/Object_spec.rb +5 -5
  68. data/test/SpaceManager_spec.rb +176 -0
  69. data/test/SpaceTree_spec.rb +27 -9
  70. data/test/Store_spec.rb +353 -206
  71. data/test/perobs_spec.rb +7 -3
  72. data/test/spec_helper.rb +9 -4
  73. metadata +59 -16
  74. data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
  75. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,107 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPage.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module PEROBS
29
+
30
+ class IDListPage
31
+
32
+ attr_reader :uid, :values
33
+ attr_accessor :record
34
+
35
+ def initialize(page_file, record, uid, values = [])
36
+ @page_file = page_file
37
+ @record = record
38
+ @uid = uid
39
+ @values = values
40
+ @record.page_entries = @values.length
41
+ end
42
+
43
+ def IDListPage::load(page_file, uid, ref)
44
+ page_file.load(uid, ref)
45
+ end
46
+
47
+ def is_full?
48
+ @values.length >= @page_file.page_size
49
+ end
50
+
51
+ def length
52
+ @values.length
53
+ end
54
+
55
+ def save
56
+ @page_file.save_page(self)
57
+ end
58
+
59
+ def insert(id)
60
+ if is_full?
61
+ raise ArgumentError, "IDListPage is already full"
62
+ end
63
+ index = @values.bsearch_index { |v| v >= id } || @values.length
64
+
65
+ # If the value isn't stored already, insert it.
66
+ if @values[index] != id
67
+ @values.insert(index, id)
68
+ @record.page_entries = @values.length
69
+ @page_file.mark_page_as_modified(self)
70
+ end
71
+ end
72
+
73
+ def include?(id)
74
+ !(v = @values.bsearch { |v| v >= id }).nil? && v == id
75
+ end
76
+
77
+ def delete(max_id)
78
+ a = []
79
+ @values.delete_if { |v| v > max_id ? a << v : false }
80
+
81
+ unless a.empty?
82
+ @record.page_entries = @values.length
83
+ @page_file.mark_page_as_modified(self)
84
+ end
85
+
86
+ a
87
+ end
88
+
89
+ def check
90
+ last_value = nil
91
+ @values.each_with_index do |v, i|
92
+ if last_value && last_value >= v
93
+ raise RuntimeError, "The values #{last_value} and #{v} must be " +
94
+ "strictly ascending: #{@values.inspect}"
95
+ end
96
+ last_value = v
97
+ end
98
+ end
99
+
100
+ def to_s
101
+ "[ #{@values.join(', ')} ]"
102
+ end
103
+
104
+ end
105
+
106
+ end
107
+
@@ -0,0 +1,180 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPageFile.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/IDListPage'
29
+ require 'perobs/IDListPageRecord'
30
+ require 'perobs/Log'
31
+ require 'perobs/PersistentObjectCache'
32
+
33
+ module PEROBS
34
+
35
+ # The IDListPageFile class provides filesystem based cache for the
36
+ # IDListPage objects. The IDListRecord objects only hold the index of the
37
+ # page in this cache. This allows the pages to be garbage collected and
38
+ # swapped to the file. If accessed, the pages will be swaped in again. While
39
+ # this process is similar to the demand paging of the OS it has absolutely
40
+ # nothing to do with it.
41
+ class IDListPageFile
42
+
43
+ attr_reader :page_size, :pages
44
+
45
+ # Create a new IDListPageFile object that uses the given file in the given
46
+ # directory as cache file.
47
+ # @param list [IDList] The IDList object that caches pages here
48
+ # @param dir [String] An existing directory
49
+ # @param name [String] A file name (without path)
50
+ # @param max_in_memory [Integer] Maximum number of pages to keep in memory
51
+ # @param page_size [Integer] The number of values in each page
52
+ def initialize(list, dir, name, max_in_memory, page_size)
53
+ @list = list
54
+ @file_name = File.join(dir, name + '.cache')
55
+ @page_size = page_size
56
+ open
57
+ @pages = PersistentObjectCache.new(max_in_memory, max_in_memory,
58
+ IDListPage, self)
59
+ @page_counter = 0
60
+ end
61
+
62
+ # Load the IDListPage from the cache file.
63
+ # @param page_idx [Integer] The page index in the page file
64
+ # @param record [IDListPageRecord] the corresponding IDListPageRecord
65
+ # @return [IDListPage] The loaded values
66
+ def load(page_idx, record)
67
+ # The IDListPageRecord will tell us the actual number of values stored
68
+ # in this page.
69
+ values = []
70
+ unless (entries = record.page_entries) == 0
71
+ begin
72
+ @f.seek(page_idx * @page_size * 8)
73
+ values = @f.read(entries * 8).unpack("Q#{entries}")
74
+ rescue IOError => e
75
+ PEROBS.log.fatal "Cannot read cache file #{@file_name}: #{e.message}"
76
+ end
77
+ end
78
+
79
+ # Create the IDListPage object with the given values.
80
+ p = IDListPage.new(self, record, page_idx, values)
81
+ @pages.insert(p, false)
82
+
83
+ p
84
+ end
85
+
86
+ # Return the number of registered pages.
87
+ def page_count
88
+ @page_counter
89
+ end
90
+
91
+ # Create a new IDListPage and register it.
92
+ # @param record [IDListPageRecord] The corresponding record.
93
+ # @param values [Array of Integer] The values stored in the page
94
+ # @return [IDListPage]
95
+ def new_page(record, values = [])
96
+ idx = @page_counter
97
+ @page_counter += 1
98
+ mark_page_as_modified(IDListPage.new(self, record, idx, values))
99
+ idx
100
+ end
101
+
102
+ # Return the IDListPage object with the given index.
103
+ # @param record [IDListPageRecord] the corresponding IDListPageRecord
104
+ # @return [IDListPage] The page corresponding to the index.
105
+ def page(record)
106
+ p = @pages.get(record.page_idx, record) || load(record.page_idx, record)
107
+ unless p.uid == record.page_idx
108
+ raise RuntimeError, "Page reference mismatch. Record " +
109
+ "#{record.page_idx} points to page #{p.uid}"
110
+ end
111
+
112
+ p
113
+ end
114
+
115
+ # Mark a page as modified. This means it has to be written into the cache
116
+ # before it is removed from memory.
117
+ # @param p [IDListPage] page reference
118
+ def mark_page_as_modified(p)
119
+ @pages.insert(p)
120
+ @pages.flush
121
+ end
122
+
123
+ # Clear all pages, erase the cache and re-open it again.
124
+ def clear
125
+ @pages.clear
126
+ @page_counter = 0
127
+ begin
128
+ @f.truncate(0)
129
+ rescue IOError => e
130
+ raise RuntimeError, "Cannote truncate cache file #{@file_name}: " +
131
+ e.message
132
+ end
133
+ end
134
+
135
+ # Discard all pages and erase the cache file.
136
+ def erase
137
+ @pages.clear
138
+ @page_counter = 0
139
+ close
140
+ end
141
+
142
+ # Save the given IDListPage into the cache file.
143
+ # @param p [IDListPage] page to store
144
+ def save_page(p)
145
+ if p.record.page_entries != p.values.length
146
+ raise RuntimeError, "page_entries mismatch for node #{p.uid}"
147
+ end
148
+ begin
149
+ @f.seek(p.uid * @page_size * 8)
150
+ @f.write(p.values.pack('Q*'))
151
+ rescue IOError => e
152
+ PEROBS.log.fatal "Cannot write cache file #{@file_name}: #{e.message}"
153
+ end
154
+ end
155
+
156
+ private
157
+
158
+ def open
159
+ begin
160
+ # Create a new file by writing a new header.
161
+ @f = File.open(@file_name, 'wb+')
162
+ rescue IOError => e
163
+ PEROBS.log.fatal "Cannot open cache file #{@file_name}: #{e.message}"
164
+ end
165
+ end
166
+
167
+ def close
168
+ begin
169
+ @f.close
170
+ File.delete(@file_name) if File.exist?(@file_name)
171
+ rescue IOError => e
172
+ PEROBS.log.fatal "Cannot erase cache file #{@file_name}: #{e.message}"
173
+ end
174
+ @f = nil
175
+ end
176
+
177
+ end
178
+
179
+ end
180
+
@@ -0,0 +1,142 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPageRecord.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module PEROBS
29
+
30
+ # The IDListPageRecord class models the elements of the IDList. Each page
31
+ # holds up to a certain number of IDs that can be cached into a file if
32
+ # needed. Each page holds IDs within a given interval. The cache is managed
33
+ # by the IDListPageFile object.
34
+ class IDListPageRecord
35
+
36
+ attr_reader :min_id, :max_id, :page_idx
37
+ attr_accessor :page_entries
38
+
39
+ # Create a new IDListPageRecord object.
40
+ # @param page_file [IDListPageFile] The page file that manages the cache.
41
+ # @param min_id [Integer] The smallest ID that can be stored in this page
42
+ # @param max_id [Integer] the largest ID that can be stored in this page
43
+ # @param values [Array] An array of IDs to be stored in this page
44
+ def initialize(page_file, min_id, max_id, values = [])
45
+ @page_file = page_file
46
+ @min_id = min_id
47
+ @max_id = max_id
48
+ @page_entries = 0
49
+ @page_idx = @page_file.new_page(self, values)
50
+ end
51
+
52
+ # Check if the given ID is included in this page.
53
+ # @param id [Integer]
54
+ # @return [True of False] Return true if found, false otherwise.
55
+ def include?(id)
56
+ return false if id < @min_id || @max_id < id
57
+
58
+ page.include?(id)
59
+ end
60
+
61
+ # Check if the page is full and can't store any more IDs.
62
+ # @return [True or False]
63
+ def is_full?
64
+ page.is_full?
65
+ end
66
+
67
+ # Insert an ID into the page.
68
+ # @param ID [Integer] The ID to store
69
+ def insert(id)
70
+ unless @min_id <= id && id <= @max_id
71
+ raise ArgumentError, "IDs for this page must be between #{@min_id} " +
72
+ "and #{@max_id}. #{id} is outside this range."
73
+ end
74
+
75
+ page.insert(id)
76
+ end
77
+
78
+ # Split the current page. This split is done by splitting the ID range in
79
+ # half. This page will keep the first half, the newly created page will
80
+ # get the second half. This may not actually yield an empty page as all
81
+ # values could remain with one of the pages. In this case further splits
82
+ # need to be issued by the caller.
83
+ # @return [IDListPageRecord] A new IDListPageRecord object.
84
+ def split
85
+ # Determine the new max_id for the old page.
86
+ max_id = @min_id + (@max_id - @min_id) / 2
87
+ # Create a new page that stores the upper half of the ID range. Remove
88
+ # all IDs from this page that now belong into the new page and transfer
89
+ # them.
90
+ new_page_record = IDListPageRecord.new(@page_file, max_id + 1, @max_id,
91
+ page.delete(max_id))
92
+ # Adjust the max_id of the current page.
93
+ @max_id = max_id
94
+
95
+ new_page_record
96
+ end
97
+
98
+ def values
99
+ page.values
100
+ end
101
+
102
+ def <=>(pr)
103
+ @min_id <=> pr.min_id
104
+ end
105
+
106
+ def check
107
+ unless @min_id < @max_id
108
+ raise RuntimeError, "min_id must be smaller than max_id"
109
+ end
110
+
111
+ p = page
112
+ values = p.values
113
+ unless @page_entries == values.length
114
+ raise RuntimeError, "Mismatch between node page_entries " +
115
+ "(#{@page_entries}) and number of values (#{p.values.length})"
116
+ end
117
+
118
+ values.each do |v|
119
+ if v < @min_id
120
+ raise RuntimeError, "Page value #{v} is smaller than min_id " +
121
+ "#{@min_id}"
122
+ end
123
+ if v > @max_id
124
+ raise RuntimeError, "Page value #{v} is larger than max_id #{@max_id}"
125
+ end
126
+ end
127
+
128
+ p.check
129
+ end
130
+
131
+ private
132
+
133
+ def page
134
+ # The leaf pages reference the IDListPage objects only by their index.
135
+ # This method will convert the index into a reference to the actual
136
+ # object. These references should be very short-lived as a life
137
+ # reference prevents the page object from being collected.
138
+ @page_file.page(self)
139
+ end
140
+ end
141
+
142
+ end
@@ -70,12 +70,14 @@ module PEROBS
70
70
  while retries > 0
71
71
  begin
72
72
  @file = File.open(@file_name, File::RDWR | File::CREAT, 0644)
73
+ @file.sync = true
73
74
 
74
75
  if @file.flock(File::LOCK_EX | File::LOCK_NB)
75
76
  # We have taken the lock. Write the PID into the file and leave it
76
77
  # open.
77
78
  @file.write($$)
78
79
  @file.flush
80
+ @file.fsync
79
81
  @file.truncate(@file.pos)
80
82
  PEROBS.log.debug "Lock file #{@file_name} has been taken for " +
81
83
  "process #{$$}"
@@ -129,6 +131,7 @@ module PEROBS
129
131
 
130
132
  begin
131
133
  @file.flock(File::LOCK_UN)
134
+ @file.fsync
132
135
  @file.close
133
136
  forced_unlock
134
137
  PEROBS.log.debug "Lock file #{@file_name} for PID #{$$} has been " +