perobs 3.0.1 → 4.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +19 -18
  3. data/lib/perobs.rb +2 -0
  4. data/lib/perobs/Array.rb +68 -21
  5. data/lib/perobs/BTree.rb +110 -54
  6. data/lib/perobs/BTreeBlob.rb +14 -13
  7. data/lib/perobs/BTreeDB.rb +11 -10
  8. data/lib/perobs/BTreeNode.rb +551 -197
  9. data/lib/perobs/BTreeNodeCache.rb +10 -8
  10. data/lib/perobs/BTreeNodeLink.rb +11 -1
  11. data/lib/perobs/BigArray.rb +285 -0
  12. data/lib/perobs/BigArrayNode.rb +1002 -0
  13. data/lib/perobs/BigHash.rb +246 -0
  14. data/lib/perobs/BigTree.rb +197 -0
  15. data/lib/perobs/BigTreeNode.rb +873 -0
  16. data/lib/perobs/Cache.rb +47 -22
  17. data/lib/perobs/ClassMap.rb +2 -2
  18. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  19. data/lib/perobs/DataBase.rb +4 -3
  20. data/lib/perobs/DynamoDB.rb +62 -20
  21. data/lib/perobs/EquiBlobsFile.rb +174 -59
  22. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  23. data/lib/perobs/FlatFile.rb +536 -242
  24. data/lib/perobs/FlatFileBlobHeader.rb +120 -84
  25. data/lib/perobs/FlatFileDB.rb +58 -27
  26. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  27. data/lib/perobs/Hash.rb +129 -35
  28. data/lib/perobs/IDList.rb +144 -0
  29. data/lib/perobs/IDListPage.rb +107 -0
  30. data/lib/perobs/IDListPageFile.rb +180 -0
  31. data/lib/perobs/IDListPageRecord.rb +142 -0
  32. data/lib/perobs/LockFile.rb +3 -0
  33. data/lib/perobs/Object.rb +28 -20
  34. data/lib/perobs/ObjectBase.rb +53 -10
  35. data/lib/perobs/PersistentObjectCache.rb +142 -0
  36. data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
  37. data/lib/perobs/ProgressMeter.rb +97 -0
  38. data/lib/perobs/SpaceManager.rb +273 -0
  39. data/lib/perobs/SpaceTree.rb +63 -47
  40. data/lib/perobs/SpaceTreeNode.rb +134 -115
  41. data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
  42. data/lib/perobs/StackFile.rb +1 -1
  43. data/lib/perobs/Store.rb +180 -70
  44. data/lib/perobs/version.rb +1 -1
  45. data/perobs.gemspec +4 -4
  46. data/test/Array_spec.rb +48 -39
  47. data/test/BTreeDB_spec.rb +2 -2
  48. data/test/BTree_spec.rb +50 -1
  49. data/test/BigArray_spec.rb +261 -0
  50. data/test/BigHash_spec.rb +152 -0
  51. data/test/BigTreeNode_spec.rb +153 -0
  52. data/test/BigTree_spec.rb +259 -0
  53. data/test/EquiBlobsFile_spec.rb +105 -5
  54. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  55. data/test/FlatFileDB_spec.rb +199 -15
  56. data/test/FuzzyStringMatcher_spec.rb +261 -0
  57. data/test/Hash_spec.rb +27 -16
  58. data/test/IDList_spec.rb +77 -0
  59. data/test/LegacyDBs/LegacyDB.rb +155 -0
  60. data/test/LegacyDBs/version_3/class_map.json +1 -0
  61. data/test/LegacyDBs/version_3/config.json +1 -0
  62. data/test/LegacyDBs/version_3/database.blobs +0 -0
  63. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  64. data/test/LegacyDBs/version_3/index.blobs +0 -0
  65. data/test/LegacyDBs/version_3/version +1 -0
  66. data/test/LockFile_spec.rb +9 -6
  67. data/test/Object_spec.rb +5 -5
  68. data/test/SpaceManager_spec.rb +176 -0
  69. data/test/SpaceTree_spec.rb +27 -9
  70. data/test/Store_spec.rb +353 -206
  71. data/test/perobs_spec.rb +7 -3
  72. data/test/spec_helper.rb +9 -4
  73. metadata +59 -16
  74. data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
  75. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,107 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPage.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module PEROBS
29
+
30
+ class IDListPage
31
+
32
+ attr_reader :uid, :values
33
+ attr_accessor :record
34
+
35
+ def initialize(page_file, record, uid, values = [])
36
+ @page_file = page_file
37
+ @record = record
38
+ @uid = uid
39
+ @values = values
40
+ @record.page_entries = @values.length
41
+ end
42
+
43
+ def IDListPage::load(page_file, uid, ref)
44
+ page_file.load(uid, ref)
45
+ end
46
+
47
+ def is_full?
48
+ @values.length >= @page_file.page_size
49
+ end
50
+
51
+ def length
52
+ @values.length
53
+ end
54
+
55
+ def save
56
+ @page_file.save_page(self)
57
+ end
58
+
59
+ def insert(id)
60
+ if is_full?
61
+ raise ArgumentError, "IDListPage is already full"
62
+ end
63
+ index = @values.bsearch_index { |v| v >= id } || @values.length
64
+
65
+ # If the value isn't stored already, insert it.
66
+ if @values[index] != id
67
+ @values.insert(index, id)
68
+ @record.page_entries = @values.length
69
+ @page_file.mark_page_as_modified(self)
70
+ end
71
+ end
72
+
73
+ def include?(id)
74
+ !(v = @values.bsearch { |v| v >= id }).nil? && v == id
75
+ end
76
+
77
+ def delete(max_id)
78
+ a = []
79
+ @values.delete_if { |v| v > max_id ? a << v : false }
80
+
81
+ unless a.empty?
82
+ @record.page_entries = @values.length
83
+ @page_file.mark_page_as_modified(self)
84
+ end
85
+
86
+ a
87
+ end
88
+
89
+ def check
90
+ last_value = nil
91
+ @values.each_with_index do |v, i|
92
+ if last_value && last_value >= v
93
+ raise RuntimeError, "The values #{last_value} and #{v} must be " +
94
+ "strictly ascending: #{@values.inspect}"
95
+ end
96
+ last_value = v
97
+ end
98
+ end
99
+
100
+ def to_s
101
+ "[ #{@values.join(', ')} ]"
102
+ end
103
+
104
+ end
105
+
106
+ end
107
+
@@ -0,0 +1,180 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPageFile.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/IDListPage'
29
+ require 'perobs/IDListPageRecord'
30
+ require 'perobs/Log'
31
+ require 'perobs/PersistentObjectCache'
32
+
33
+ module PEROBS
34
+
35
+ # The IDListPageFile class provides filesystem based cache for the
36
+ # IDListPage objects. The IDListRecord objects only hold the index of the
37
+ # page in this cache. This allows the pages to be garbage collected and
38
+ # swapped to the file. If accessed, the pages will be swaped in again. While
39
+ # this process is similar to the demand paging of the OS it has absolutely
40
+ # nothing to do with it.
41
+ class IDListPageFile
42
+
43
+ attr_reader :page_size, :pages
44
+
45
+ # Create a new IDListPageFile object that uses the given file in the given
46
+ # directory as cache file.
47
+ # @param list [IDList] The IDList object that caches pages here
48
+ # @param dir [String] An existing directory
49
+ # @param name [String] A file name (without path)
50
+ # @param max_in_memory [Integer] Maximum number of pages to keep in memory
51
+ # @param page_size [Integer] The number of values in each page
52
+ def initialize(list, dir, name, max_in_memory, page_size)
53
+ @list = list
54
+ @file_name = File.join(dir, name + '.cache')
55
+ @page_size = page_size
56
+ open
57
+ @pages = PersistentObjectCache.new(max_in_memory, max_in_memory,
58
+ IDListPage, self)
59
+ @page_counter = 0
60
+ end
61
+
62
+ # Load the IDListPage from the cache file.
63
+ # @param page_idx [Integer] The page index in the page file
64
+ # @param record [IDListPageRecord] the corresponding IDListPageRecord
65
+ # @return [IDListPage] The loaded values
66
+ def load(page_idx, record)
67
+ # The IDListPageRecord will tell us the actual number of values stored
68
+ # in this page.
69
+ values = []
70
+ unless (entries = record.page_entries) == 0
71
+ begin
72
+ @f.seek(page_idx * @page_size * 8)
73
+ values = @f.read(entries * 8).unpack("Q#{entries}")
74
+ rescue IOError => e
75
+ PEROBS.log.fatal "Cannot read cache file #{@file_name}: #{e.message}"
76
+ end
77
+ end
78
+
79
+ # Create the IDListPage object with the given values.
80
+ p = IDListPage.new(self, record, page_idx, values)
81
+ @pages.insert(p, false)
82
+
83
+ p
84
+ end
85
+
86
+ # Return the number of registered pages.
87
+ def page_count
88
+ @page_counter
89
+ end
90
+
91
+ # Create a new IDListPage and register it.
92
+ # @param record [IDListPageRecord] The corresponding record.
93
+ # @param values [Array of Integer] The values stored in the page
94
+ # @return [IDListPage]
95
+ def new_page(record, values = [])
96
+ idx = @page_counter
97
+ @page_counter += 1
98
+ mark_page_as_modified(IDListPage.new(self, record, idx, values))
99
+ idx
100
+ end
101
+
102
+ # Return the IDListPage object with the given index.
103
+ # @param record [IDListPageRecord] the corresponding IDListPageRecord
104
+ # @return [IDListPage] The page corresponding to the index.
105
+ def page(record)
106
+ p = @pages.get(record.page_idx, record) || load(record.page_idx, record)
107
+ unless p.uid == record.page_idx
108
+ raise RuntimeError, "Page reference mismatch. Record " +
109
+ "#{record.page_idx} points to page #{p.uid}"
110
+ end
111
+
112
+ p
113
+ end
114
+
115
+ # Mark a page as modified. This means it has to be written into the cache
116
+ # before it is removed from memory.
117
+ # @param p [IDListPage] page reference
118
+ def mark_page_as_modified(p)
119
+ @pages.insert(p)
120
+ @pages.flush
121
+ end
122
+
123
+ # Clear all pages, erase the cache and re-open it again.
124
+ def clear
125
+ @pages.clear
126
+ @page_counter = 0
127
+ begin
128
+ @f.truncate(0)
129
+ rescue IOError => e
130
+ raise RuntimeError, "Cannote truncate cache file #{@file_name}: " +
131
+ e.message
132
+ end
133
+ end
134
+
135
+ # Discard all pages and erase the cache file.
136
+ def erase
137
+ @pages.clear
138
+ @page_counter = 0
139
+ close
140
+ end
141
+
142
+ # Save the given IDListPage into the cache file.
143
+ # @param p [IDListPage] page to store
144
+ def save_page(p)
145
+ if p.record.page_entries != p.values.length
146
+ raise RuntimeError, "page_entries mismatch for node #{p.uid}"
147
+ end
148
+ begin
149
+ @f.seek(p.uid * @page_size * 8)
150
+ @f.write(p.values.pack('Q*'))
151
+ rescue IOError => e
152
+ PEROBS.log.fatal "Cannot write cache file #{@file_name}: #{e.message}"
153
+ end
154
+ end
155
+
156
+ private
157
+
158
+ def open
159
+ begin
160
+ # Create a new file by writing a new header.
161
+ @f = File.open(@file_name, 'wb+')
162
+ rescue IOError => e
163
+ PEROBS.log.fatal "Cannot open cache file #{@file_name}: #{e.message}"
164
+ end
165
+ end
166
+
167
+ def close
168
+ begin
169
+ @f.close
170
+ File.delete(@file_name) if File.exist?(@file_name)
171
+ rescue IOError => e
172
+ PEROBS.log.fatal "Cannot erase cache file #{@file_name}: #{e.message}"
173
+ end
174
+ @f = nil
175
+ end
176
+
177
+ end
178
+
179
+ end
180
+
@@ -0,0 +1,142 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = IDListPageRecord.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module PEROBS
29
+
30
+ # The IDListPageRecord class models the elements of the IDList. Each page
31
+ # holds up to a certain number of IDs that can be cached into a file if
32
+ # needed. Each page holds IDs within a given interval. The cache is managed
33
+ # by the IDListPageFile object.
34
+ class IDListPageRecord
35
+
36
+ attr_reader :min_id, :max_id, :page_idx
37
+ attr_accessor :page_entries
38
+
39
+ # Create a new IDListPageRecord object.
40
+ # @param page_file [IDListPageFile] The page file that manages the cache.
41
+ # @param min_id [Integer] The smallest ID that can be stored in this page
42
+ # @param max_id [Integer] the largest ID that can be stored in this page
43
+ # @param values [Array] An array of IDs to be stored in this page
44
+ def initialize(page_file, min_id, max_id, values = [])
45
+ @page_file = page_file
46
+ @min_id = min_id
47
+ @max_id = max_id
48
+ @page_entries = 0
49
+ @page_idx = @page_file.new_page(self, values)
50
+ end
51
+
52
+ # Check if the given ID is included in this page.
53
+ # @param id [Integer]
54
+ # @return [True of False] Return true if found, false otherwise.
55
+ def include?(id)
56
+ return false if id < @min_id || @max_id < id
57
+
58
+ page.include?(id)
59
+ end
60
+
61
+ # Check if the page is full and can't store any more IDs.
62
+ # @return [True or False]
63
+ def is_full?
64
+ page.is_full?
65
+ end
66
+
67
+ # Insert an ID into the page.
68
+ # @param ID [Integer] The ID to store
69
+ def insert(id)
70
+ unless @min_id <= id && id <= @max_id
71
+ raise ArgumentError, "IDs for this page must be between #{@min_id} " +
72
+ "and #{@max_id}. #{id} is outside this range."
73
+ end
74
+
75
+ page.insert(id)
76
+ end
77
+
78
+ # Split the current page. This split is done by splitting the ID range in
79
+ # half. This page will keep the first half, the newly created page will
80
+ # get the second half. This may not actually yield an empty page as all
81
+ # values could remain with one of the pages. In this case further splits
82
+ # need to be issued by the caller.
83
+ # @return [IDListPageRecord] A new IDListPageRecord object.
84
+ def split
85
+ # Determine the new max_id for the old page.
86
+ max_id = @min_id + (@max_id - @min_id) / 2
87
+ # Create a new page that stores the upper half of the ID range. Remove
88
+ # all IDs from this page that now belong into the new page and transfer
89
+ # them.
90
+ new_page_record = IDListPageRecord.new(@page_file, max_id + 1, @max_id,
91
+ page.delete(max_id))
92
+ # Adjust the max_id of the current page.
93
+ @max_id = max_id
94
+
95
+ new_page_record
96
+ end
97
+
98
+ def values
99
+ page.values
100
+ end
101
+
102
+ def <=>(pr)
103
+ @min_id <=> pr.min_id
104
+ end
105
+
106
+ def check
107
+ unless @min_id < @max_id
108
+ raise RuntimeError, "min_id must be smaller than max_id"
109
+ end
110
+
111
+ p = page
112
+ values = p.values
113
+ unless @page_entries == values.length
114
+ raise RuntimeError, "Mismatch between node page_entries " +
115
+ "(#{@page_entries}) and number of values (#{p.values.length})"
116
+ end
117
+
118
+ values.each do |v|
119
+ if v < @min_id
120
+ raise RuntimeError, "Page value #{v} is smaller than min_id " +
121
+ "#{@min_id}"
122
+ end
123
+ if v > @max_id
124
+ raise RuntimeError, "Page value #{v} is larger than max_id #{@max_id}"
125
+ end
126
+ end
127
+
128
+ p.check
129
+ end
130
+
131
+ private
132
+
133
+ def page
134
+ # The leaf pages reference the IDListPage objects only by their index.
135
+ # This method will convert the index into a reference to the actual
136
+ # object. These references should be very short-lived as a life
137
+ # reference prevents the page object from being collected.
138
+ @page_file.page(self)
139
+ end
140
+ end
141
+
142
+ end
@@ -70,12 +70,14 @@ module PEROBS
70
70
  while retries > 0
71
71
  begin
72
72
  @file = File.open(@file_name, File::RDWR | File::CREAT, 0644)
73
+ @file.sync = true
73
74
 
74
75
  if @file.flock(File::LOCK_EX | File::LOCK_NB)
75
76
  # We have taken the lock. Write the PID into the file and leave it
76
77
  # open.
77
78
  @file.write($$)
78
79
  @file.flush
80
+ @file.fsync
79
81
  @file.truncate(@file.pos)
80
82
  PEROBS.log.debug "Lock file #{@file_name} has been taken for " +
81
83
  "process #{$$}"
@@ -129,6 +131,7 @@ module PEROBS
129
131
 
130
132
  begin
131
133
  @file.flock(File::LOCK_UN)
134
+ @file.fsync
132
135
  @file.close
133
136
  forced_unlock
134
137
  PEROBS.log.debug "Lock file #{@file_name} for PID #{$$} has been " +