tigerbeetle-node 0.11.13 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/aarch64-linux-gnu/client.node +0 -0
- package/dist/bin/aarch64-linux-musl/client.node +0 -0
- package/dist/bin/aarch64-macos/client.node +0 -0
- package/dist/bin/x86_64-linux-gnu/client.node +0 -0
- package/dist/bin/x86_64-linux-musl/client.node +0 -0
- package/dist/bin/x86_64-macos/client.node +0 -0
- package/dist/index.js +33 -1
- package/dist/index.js.map +1 -1
- package/package-lock.json +66 -0
- package/package.json +6 -16
- package/src/index.ts +56 -1
- package/src/node.zig +9 -9
- package/dist/.client.node.sha256 +0 -1
- package/scripts/build_lib.sh +0 -61
- package/scripts/download_node_headers.sh +0 -32
- package/src/tigerbeetle/scripts/benchmark.bat +0 -55
- package/src/tigerbeetle/scripts/benchmark.sh +0 -66
- package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
- package/src/tigerbeetle/scripts/fail_on_diff.sh +0 -9
- package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
- package/src/tigerbeetle/scripts/fuzz_loop_hash_log.sh +0 -12
- package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
- package/src/tigerbeetle/scripts/install.bat +0 -7
- package/src/tigerbeetle/scripts/install.sh +0 -21
- package/src/tigerbeetle/scripts/install_zig.bat +0 -113
- package/src/tigerbeetle/scripts/install_zig.sh +0 -90
- package/src/tigerbeetle/scripts/lint.zig +0 -199
- package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
- package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -55
- package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
- package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
- package/src/tigerbeetle/scripts/scripts/fail_on_diff.sh +0 -9
- package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
- package/src/tigerbeetle/scripts/scripts/fuzz_loop_hash_log.sh +0 -12
- package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
- package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
- package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
- package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
- package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
- package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
- package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
- package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
- package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
- package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
- package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
- package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
- package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
- package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
- package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
- package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
- package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
- package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
- package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
- package/src/tigerbeetle/src/benchmark.zig +0 -336
- package/src/tigerbeetle/src/config.zig +0 -233
- package/src/tigerbeetle/src/constants.zig +0 -428
- package/src/tigerbeetle/src/ewah.zig +0 -286
- package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
- package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
- package/src/tigerbeetle/src/fifo.zig +0 -120
- package/src/tigerbeetle/src/io/benchmark.zig +0 -213
- package/src/tigerbeetle/src/io/darwin.zig +0 -814
- package/src/tigerbeetle/src/io/linux.zig +0 -1071
- package/src/tigerbeetle/src/io/test.zig +0 -643
- package/src/tigerbeetle/src/io/windows.zig +0 -1183
- package/src/tigerbeetle/src/io.zig +0 -34
- package/src/tigerbeetle/src/iops.zig +0 -107
- package/src/tigerbeetle/src/lsm/README.md +0 -308
- package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
- package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
- package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
- package/src/tigerbeetle/src/lsm/direction.zig +0 -11
- package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
- package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
- package/src/tigerbeetle/src/lsm/forest.zig +0 -205
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -450
- package/src/tigerbeetle/src/lsm/grid.zig +0 -573
- package/src/tigerbeetle/src/lsm/groove.zig +0 -1036
- package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
- package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
- package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
- package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -878
- package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
- package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
- package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
- package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
- package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -381
- package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1329
- package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
- package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
- package/src/tigerbeetle/src/lsm/table.zig +0 -1009
- package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -192
- package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
- package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -203
- package/src/tigerbeetle/src/lsm/test.zig +0 -439
- package/src/tigerbeetle/src/lsm/tree.zig +0 -1169
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -479
- package/src/tigerbeetle/src/message_bus.zig +0 -1013
- package/src/tigerbeetle/src/message_pool.zig +0 -156
- package/src/tigerbeetle/src/ring_buffer.zig +0 -399
- package/src/tigerbeetle/src/simulator.zig +0 -580
- package/src/tigerbeetle/src/state_machine/auditor.zig +0 -578
- package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
- package/src/tigerbeetle/src/state_machine.zig +0 -2099
- package/src/tigerbeetle/src/static_allocator.zig +0 -65
- package/src/tigerbeetle/src/stdx.zig +0 -171
- package/src/tigerbeetle/src/storage.zig +0 -393
- package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
- package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
- package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
- package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
- package/src/tigerbeetle/src/testing/cluster.zig +0 -444
- package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
- package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
- package/src/tigerbeetle/src/testing/id.zig +0 -99
- package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -374
- package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
- package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
- package/src/tigerbeetle/src/testing/state_machine.zig +0 -250
- package/src/tigerbeetle/src/testing/storage.zig +0 -757
- package/src/tigerbeetle/src/testing/table.zig +0 -247
- package/src/tigerbeetle/src/testing/time.zig +0 -84
- package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
- package/src/tigerbeetle/src/time.zig +0 -112
- package/src/tigerbeetle/src/tracer.zig +0 -529
- package/src/tigerbeetle/src/unit_tests.zig +0 -40
- package/src/tigerbeetle/src/vopr.zig +0 -495
- package/src/tigerbeetle/src/vsr/README.md +0 -209
- package/src/tigerbeetle/src/vsr/client.zig +0 -544
- package/src/tigerbeetle/src/vsr/clock.zig +0 -855
- package/src/tigerbeetle/src/vsr/journal.zig +0 -2415
- package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
- package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
- package/src/tigerbeetle/src/vsr/replica.zig +0 -6616
- package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
- package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
- package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
- package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
- package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
- package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
- package/src/tigerbeetle/src/vsr.zig +0 -1425
|
@@ -1,617 +0,0 @@
|
|
|
1
|
-
const std = @import("std");
|
|
2
|
-
const mem = std.mem;
|
|
3
|
-
const math = std.math;
|
|
4
|
-
const assert = std.debug.assert;
|
|
5
|
-
|
|
6
|
-
const constants = @import("../constants.zig");
|
|
7
|
-
const growth_factor = constants.lsm_growth_factor;
|
|
8
|
-
|
|
9
|
-
const table_count_max = @import("tree.zig").table_count_max;
|
|
10
|
-
const table_count_max_for_level = @import("tree.zig").table_count_max_for_level;
|
|
11
|
-
const snapshot_latest = @import("tree.zig").snapshot_latest;
|
|
12
|
-
|
|
13
|
-
const Direction = @import("direction.zig").Direction;
|
|
14
|
-
const GridType = @import("grid.zig").GridType;
|
|
15
|
-
const ManifestLogType = @import("manifest_log.zig").ManifestLogType;
|
|
16
|
-
const ManifestLevelType = @import("manifest_level.zig").ManifestLevelType;
|
|
17
|
-
const NodePool = @import("node_pool.zig").NodePool(constants.lsm_manifest_node_size, 16);
|
|
18
|
-
|
|
19
|
-
pub fn TableInfoType(comptime Table: type) type {
|
|
20
|
-
const Key = Table.Key;
|
|
21
|
-
const compare_keys = Table.compare_keys;
|
|
22
|
-
|
|
23
|
-
return extern struct {
|
|
24
|
-
const TableInfo = @This();
|
|
25
|
-
|
|
26
|
-
/// Checksum of the table's index block.
|
|
27
|
-
checksum: u128,
|
|
28
|
-
/// Address of the table's index block.
|
|
29
|
-
address: u64,
|
|
30
|
-
/// Unused.
|
|
31
|
-
flags: u64 = 0,
|
|
32
|
-
|
|
33
|
-
/// The minimum snapshot that can see this table (with exclusive bounds).
|
|
34
|
-
/// - This value is set to the current snapshot tick on table creation.
|
|
35
|
-
snapshot_min: u64,
|
|
36
|
-
|
|
37
|
-
/// The maximum snapshot that can see this table (with inclusive bounds).
|
|
38
|
-
/// - This value is set to maxInt(64) when the table is created (output) by compaction.
|
|
39
|
-
/// - This value is set to the current snapshot tick when the table is processed (input) by
|
|
40
|
-
/// compaction.
|
|
41
|
-
snapshot_max: u64 = math.maxInt(u64),
|
|
42
|
-
|
|
43
|
-
key_min: Key, // Inclusive.
|
|
44
|
-
key_max: Key, // Inclusive.
|
|
45
|
-
|
|
46
|
-
comptime {
|
|
47
|
-
assert(@sizeOf(TableInfo) == 48 + Table.key_size * 2);
|
|
48
|
-
assert(@alignOf(TableInfo) == 16);
|
|
49
|
-
assert(@bitSizeOf(TableInfo) == @sizeOf(TableInfo) * 8);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/// Every query targets a particular snapshot. The snapshot determines which tables are
|
|
53
|
-
/// visible to the query — i.e., which tables are accessed to answer the query.
|
|
54
|
-
///
|
|
55
|
-
/// A table is "visible" to a snapshot if the snapshot lies within the table's
|
|
56
|
-
/// snapshot_min/snapshot_max interval.
|
|
57
|
-
///
|
|
58
|
-
/// Snapshot visibility is:
|
|
59
|
-
/// - inclusive to snapshot_min.
|
|
60
|
-
/// (New tables are inserted with `snapshot_min = compaction.snapshot + 1`).
|
|
61
|
-
/// - inclusive to snapshot_max.
|
|
62
|
-
/// (Tables are made invisible by setting `snapshot_max = compaction.snapshot`).
|
|
63
|
-
///
|
|
64
|
-
/// Prefetch does not query the output tables of an ongoing compaction, because the output
|
|
65
|
-
/// tables are not ready. Output tables are added to the manifest before being written to
|
|
66
|
-
/// disk.
|
|
67
|
-
///
|
|
68
|
-
/// Instead, prefetch will continue to query the compaction's input tables until the
|
|
69
|
-
/// half-bar of compaction completes. At that point `tree.prefetch_snapshot_max` is
|
|
70
|
-
/// updated (to the compaction's `compaction_op`), simultaneously rendering the old (input)
|
|
71
|
-
/// tables invisible, and the new (output) tables visible.
|
|
72
|
-
pub fn visible(table: *const TableInfo, snapshot: u64) bool {
|
|
73
|
-
assert(table.address != 0);
|
|
74
|
-
assert(table.snapshot_min <= table.snapshot_max);
|
|
75
|
-
assert(snapshot <= snapshot_latest);
|
|
76
|
-
|
|
77
|
-
return table.snapshot_min <= snapshot and snapshot <= table.snapshot_max;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
pub fn invisible(table: *const TableInfo, snapshots: []const u64) bool {
|
|
81
|
-
// Return early and do not iterate all snapshots if the table was never deleted:
|
|
82
|
-
if (table.visible(snapshot_latest)) return false;
|
|
83
|
-
for (snapshots) |snapshot| if (table.visible(snapshot)) return false;
|
|
84
|
-
assert(table.snapshot_max < math.maxInt(u64));
|
|
85
|
-
return true;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
pub fn equal(table: *const TableInfo, other: *const TableInfo) bool {
|
|
89
|
-
// TODO Since the layout of TableInfo is well defined, a direct memcmp may be faster
|
|
90
|
-
// here. However, it's not clear if we can make the assumption that compare_keys()
|
|
91
|
-
// will return .eq exactly when the memory of the keys are equal.
|
|
92
|
-
// Consider defining the API to allow this.
|
|
93
|
-
return table.checksum == other.checksum and
|
|
94
|
-
table.address == other.address and
|
|
95
|
-
table.flags == other.flags and
|
|
96
|
-
table.snapshot_min == other.snapshot_min and
|
|
97
|
-
table.snapshot_max == other.snapshot_max and
|
|
98
|
-
compare_keys(table.key_min, other.key_min) == .eq and
|
|
99
|
-
compare_keys(table.key_max, other.key_max) == .eq;
|
|
100
|
-
}
|
|
101
|
-
};
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
pub fn ManifestType(comptime Table: type, comptime Storage: type) type {
|
|
105
|
-
const Key = Table.Key;
|
|
106
|
-
const compare_keys = Table.compare_keys;
|
|
107
|
-
|
|
108
|
-
return struct {
|
|
109
|
-
const Manifest = @This();
|
|
110
|
-
|
|
111
|
-
pub const TableInfo = TableInfoType(Table);
|
|
112
|
-
|
|
113
|
-
const Grid = GridType(Storage);
|
|
114
|
-
const Callback = fn (*Manifest) void;
|
|
115
|
-
|
|
116
|
-
/// Here, we use a structure with indexes over the segmented array for performance.
|
|
117
|
-
const Level = ManifestLevelType(NodePool, Key, TableInfo, compare_keys, table_count_max);
|
|
118
|
-
const KeyRange = Level.KeyRange;
|
|
119
|
-
|
|
120
|
-
const ManifestLog = ManifestLogType(Storage, TableInfo);
|
|
121
|
-
|
|
122
|
-
node_pool: *NodePool,
|
|
123
|
-
|
|
124
|
-
levels: [constants.lsm_levels]Level,
|
|
125
|
-
|
|
126
|
-
// TODO Set this at startup when reading in the manifest.
|
|
127
|
-
// This should be the greatest TableInfo.snapshot_min/snapshot_max (if deleted) or
|
|
128
|
-
// registered snapshot seen so far.
|
|
129
|
-
snapshot_max: u64 = 1,
|
|
130
|
-
|
|
131
|
-
manifest_log: ManifestLog,
|
|
132
|
-
|
|
133
|
-
open_callback: ?Callback = null,
|
|
134
|
-
compact_callback: ?Callback = null,
|
|
135
|
-
checkpoint_callback: ?Callback = null,
|
|
136
|
-
|
|
137
|
-
pub fn init(
|
|
138
|
-
allocator: mem.Allocator,
|
|
139
|
-
node_pool: *NodePool,
|
|
140
|
-
grid: *Grid,
|
|
141
|
-
tree_hash: u128,
|
|
142
|
-
) !Manifest {
|
|
143
|
-
var levels: [constants.lsm_levels]Level = undefined;
|
|
144
|
-
for (levels) |*level, i| {
|
|
145
|
-
errdefer for (levels[0..i]) |*l| l.deinit(allocator, node_pool);
|
|
146
|
-
level.* = try Level.init(allocator);
|
|
147
|
-
}
|
|
148
|
-
errdefer for (levels) |*l| l.deinit(allocator, node_pool);
|
|
149
|
-
|
|
150
|
-
var manifest_log = try ManifestLog.init(allocator, grid, tree_hash);
|
|
151
|
-
errdefer manifest_log.deinit(allocator);
|
|
152
|
-
|
|
153
|
-
return Manifest{
|
|
154
|
-
.node_pool = node_pool,
|
|
155
|
-
.levels = levels,
|
|
156
|
-
.manifest_log = manifest_log,
|
|
157
|
-
};
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
pub fn deinit(manifest: *Manifest, allocator: mem.Allocator) void {
|
|
161
|
-
for (manifest.levels) |*l| l.deinit(allocator, manifest.node_pool);
|
|
162
|
-
|
|
163
|
-
manifest.manifest_log.deinit(allocator);
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
pub fn open(manifest: *Manifest, callback: Callback) void {
|
|
167
|
-
assert(manifest.open_callback == null);
|
|
168
|
-
manifest.open_callback = callback;
|
|
169
|
-
|
|
170
|
-
manifest.manifest_log.open(manifest_log_open_event, manifest_log_open_callback);
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
fn manifest_log_open_event(
|
|
174
|
-
manifest_log: *ManifestLog,
|
|
175
|
-
level: u7,
|
|
176
|
-
table: *const TableInfo,
|
|
177
|
-
) void {
|
|
178
|
-
const manifest = @fieldParentPtr(Manifest, "manifest_log", manifest_log);
|
|
179
|
-
assert(manifest.open_callback != null);
|
|
180
|
-
|
|
181
|
-
assert(level < constants.lsm_levels);
|
|
182
|
-
manifest.levels[level].insert_table(manifest.node_pool, table);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
fn manifest_log_open_callback(manifest_log: *ManifestLog) void {
|
|
186
|
-
const manifest = @fieldParentPtr(Manifest, "manifest_log", manifest_log);
|
|
187
|
-
assert(manifest.open_callback != null);
|
|
188
|
-
|
|
189
|
-
const callback = manifest.open_callback.?;
|
|
190
|
-
manifest.open_callback = null;
|
|
191
|
-
callback(manifest);
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
pub fn insert_table(
|
|
195
|
-
manifest: *Manifest,
|
|
196
|
-
level: u8,
|
|
197
|
-
table: *const TableInfo,
|
|
198
|
-
) void {
|
|
199
|
-
const manifest_level = &manifest.levels[level];
|
|
200
|
-
manifest_level.insert_table(manifest.node_pool, table);
|
|
201
|
-
|
|
202
|
-
// Append insert changes to the manifest log
|
|
203
|
-
const log_level = @intCast(u7, level);
|
|
204
|
-
manifest.manifest_log.insert(log_level, table);
|
|
205
|
-
|
|
206
|
-
if (constants.verify) {
|
|
207
|
-
assert(manifest_level.contains(table));
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
/// Updates the snapshot_max on the provide table for the given level.
|
|
212
|
-
/// The table provided is mutable to allow its snapshot_max to be updated.
|
|
213
|
-
pub fn update_table(
|
|
214
|
-
manifest: *Manifest,
|
|
215
|
-
level: u8,
|
|
216
|
-
snapshot: u64,
|
|
217
|
-
table: *TableInfo,
|
|
218
|
-
) void {
|
|
219
|
-
const manifest_level = &manifest.levels[level];
|
|
220
|
-
|
|
221
|
-
assert(table.snapshot_max >= snapshot);
|
|
222
|
-
manifest_level.set_snapshot_max(snapshot, table);
|
|
223
|
-
assert(table.snapshot_max == snapshot);
|
|
224
|
-
|
|
225
|
-
// Append update changes to the manifest log
|
|
226
|
-
const log_level = @intCast(u7, level);
|
|
227
|
-
manifest.manifest_log.insert(log_level, table);
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
pub fn remove_invisible_tables(
|
|
231
|
-
manifest: *Manifest,
|
|
232
|
-
level: u8,
|
|
233
|
-
snapshot: u64,
|
|
234
|
-
key_min: Key,
|
|
235
|
-
key_max: Key,
|
|
236
|
-
) void {
|
|
237
|
-
assert(level < constants.lsm_levels);
|
|
238
|
-
assert(compare_keys(key_min, key_max) != .gt);
|
|
239
|
-
|
|
240
|
-
// Remove tables in descending order to avoid desynchronizing the iterator from
|
|
241
|
-
// the ManifestLevel.
|
|
242
|
-
const direction = .descending;
|
|
243
|
-
const snapshots = [_]u64{snapshot};
|
|
244
|
-
const manifest_level = &manifest.levels[level];
|
|
245
|
-
|
|
246
|
-
var it = manifest_level.iterator(
|
|
247
|
-
.invisible,
|
|
248
|
-
&snapshots,
|
|
249
|
-
direction,
|
|
250
|
-
KeyRange{ .key_min = key_min, .key_max = key_max },
|
|
251
|
-
);
|
|
252
|
-
|
|
253
|
-
while (it.next()) |table| {
|
|
254
|
-
assert(table.invisible(&snapshots));
|
|
255
|
-
assert(compare_keys(key_min, table.key_max) != .gt);
|
|
256
|
-
assert(compare_keys(key_max, table.key_min) != .lt);
|
|
257
|
-
|
|
258
|
-
// Append remove changes to the manifest log.
|
|
259
|
-
const log_level = @intCast(u7, level);
|
|
260
|
-
manifest.manifest_log.remove(log_level, table);
|
|
261
|
-
manifest_level.remove_table(manifest.node_pool, &snapshots, table);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
if (constants.verify) manifest.assert_no_invisible_tables_at_level(level, snapshot);
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
/// Returns an iterator over tables that might contain `key` (but are not guaranteed to).
|
|
268
|
-
pub fn lookup(manifest: *Manifest, snapshot: u64, key: Key) LookupIterator {
|
|
269
|
-
return .{
|
|
270
|
-
.manifest = manifest,
|
|
271
|
-
.snapshot = snapshot,
|
|
272
|
-
.key = key,
|
|
273
|
-
};
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
pub const LookupIterator = struct {
|
|
277
|
-
manifest: *const Manifest,
|
|
278
|
-
snapshot: u64,
|
|
279
|
-
key: Key,
|
|
280
|
-
level: u8 = 0,
|
|
281
|
-
inner: ?Level.Iterator = null,
|
|
282
|
-
|
|
283
|
-
pub fn next(it: *LookupIterator) ?*const TableInfo {
|
|
284
|
-
while (it.level < constants.lsm_levels) : (it.level += 1) {
|
|
285
|
-
const level = &it.manifest.levels[it.level];
|
|
286
|
-
|
|
287
|
-
var inner = level.iterator(
|
|
288
|
-
.visible,
|
|
289
|
-
@as(*const [1]u64, &it.snapshot),
|
|
290
|
-
.ascending,
|
|
291
|
-
KeyRange{ .key_min = it.key, .key_max = it.key },
|
|
292
|
-
);
|
|
293
|
-
|
|
294
|
-
if (inner.next()) |table| {
|
|
295
|
-
assert(table.visible(it.snapshot));
|
|
296
|
-
assert(compare_keys(it.key, table.key_min) != .lt);
|
|
297
|
-
assert(compare_keys(it.key, table.key_max) != .gt);
|
|
298
|
-
assert(inner.next() == null);
|
|
299
|
-
|
|
300
|
-
it.level += 1;
|
|
301
|
-
return table;
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
assert(it.level == constants.lsm_levels);
|
|
306
|
-
return null;
|
|
307
|
-
}
|
|
308
|
-
};
|
|
309
|
-
|
|
310
|
-
pub fn assert_level_table_counts(manifest: *const Manifest) void {
|
|
311
|
-
for (manifest.levels) |*manifest_level, index| {
|
|
312
|
-
const level = @intCast(u8, index);
|
|
313
|
-
const table_count_visible_max = table_count_max_for_level(growth_factor, level);
|
|
314
|
-
assert(manifest_level.table_count_visible <= table_count_visible_max);
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
pub fn assert_no_invisible_tables(manifest: *const Manifest, snapshot: u64) void {
|
|
319
|
-
for (manifest.levels) |_, level| {
|
|
320
|
-
manifest.assert_no_invisible_tables_at_level(@intCast(u8, level), snapshot);
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
fn assert_no_invisible_tables_at_level(
|
|
325
|
-
manifest: *const Manifest,
|
|
326
|
-
level: u8,
|
|
327
|
-
snapshot: u64,
|
|
328
|
-
) void {
|
|
329
|
-
var it = manifest.levels[level].iterator(
|
|
330
|
-
.invisible,
|
|
331
|
-
@as(*const [1]u64, &snapshot),
|
|
332
|
-
.ascending,
|
|
333
|
-
null,
|
|
334
|
-
);
|
|
335
|
-
assert(it.next() == null);
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
/// Returns the next table in the range, after `key_exclusive` if provided.
|
|
339
|
-
///
|
|
340
|
-
/// * The table returned is visible to `snapshot`.
|
|
341
|
-
pub fn next_table(
|
|
342
|
-
manifest: *const Manifest,
|
|
343
|
-
level: u8,
|
|
344
|
-
snapshot: u64,
|
|
345
|
-
key_min: Key,
|
|
346
|
-
key_max: Key,
|
|
347
|
-
key_exclusive: ?Key,
|
|
348
|
-
direction: Direction,
|
|
349
|
-
) ?*const TableInfo {
|
|
350
|
-
assert(level < constants.lsm_levels);
|
|
351
|
-
assert(compare_keys(key_min, key_max) != .gt);
|
|
352
|
-
|
|
353
|
-
const snapshots = [_]u64{snapshot};
|
|
354
|
-
|
|
355
|
-
if (key_exclusive == null) {
|
|
356
|
-
return manifest.levels[level].iterator(
|
|
357
|
-
.visible,
|
|
358
|
-
&snapshots,
|
|
359
|
-
direction,
|
|
360
|
-
KeyRange{ .key_min = key_min, .key_max = key_max },
|
|
361
|
-
).next();
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
assert(compare_keys(key_exclusive.?, key_min) != .lt);
|
|
365
|
-
assert(compare_keys(key_exclusive.?, key_max) != .gt);
|
|
366
|
-
|
|
367
|
-
const key_min_exclusive = if (direction == .ascending) key_exclusive.? else key_min;
|
|
368
|
-
const key_max_exclusive = if (direction == .descending) key_exclusive.? else key_max;
|
|
369
|
-
assert(compare_keys(key_min_exclusive, key_max_exclusive) != .gt);
|
|
370
|
-
|
|
371
|
-
var it = manifest.levels[level].iterator(
|
|
372
|
-
.visible,
|
|
373
|
-
&snapshots,
|
|
374
|
-
direction,
|
|
375
|
-
KeyRange{ .key_min = key_min_exclusive, .key_max = key_max_exclusive },
|
|
376
|
-
);
|
|
377
|
-
|
|
378
|
-
while (it.next()) |table| {
|
|
379
|
-
assert(table.visible(snapshot));
|
|
380
|
-
assert(compare_keys(table.key_min, table.key_max) != .gt);
|
|
381
|
-
assert(compare_keys(table.key_max, key_min_exclusive) != .lt);
|
|
382
|
-
assert(compare_keys(table.key_min, key_max_exclusive) != .gt);
|
|
383
|
-
|
|
384
|
-
const next = switch (direction) {
|
|
385
|
-
.ascending => compare_keys(table.key_min, key_exclusive.?) == .gt,
|
|
386
|
-
.descending => compare_keys(table.key_max, key_exclusive.?) == .lt,
|
|
387
|
-
};
|
|
388
|
-
if (next) return table;
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
return null;
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
/// Returns the most optimal table for compaction from a level that is due for compaction.
|
|
395
|
-
/// Returns null if the level is not due for compaction (table_count_visible < count_max).
|
|
396
|
-
pub fn compaction_table(manifest: *const Manifest, level_a: u8) ?CompactionTableRange {
|
|
397
|
-
// The last level is not compacted into another.
|
|
398
|
-
assert(level_a < constants.lsm_levels - 1);
|
|
399
|
-
|
|
400
|
-
const table_count_visible_max = table_count_max_for_level(growth_factor, level_a);
|
|
401
|
-
assert(table_count_visible_max > 0);
|
|
402
|
-
|
|
403
|
-
const manifest_level: *const Level = &manifest.levels[level_a];
|
|
404
|
-
if (manifest_level.table_count_visible < table_count_visible_max) return null;
|
|
405
|
-
// If even levels are compacted ahead of odd levels, then odd levels may burst.
|
|
406
|
-
assert(manifest_level.table_count_visible <= table_count_visible_max + 1);
|
|
407
|
-
|
|
408
|
-
var optimal: ?CompactionTableRange = null;
|
|
409
|
-
|
|
410
|
-
const snapshots = [1]u64{snapshot_latest};
|
|
411
|
-
var iterations: usize = 0;
|
|
412
|
-
var it = manifest.levels[level_a].iterator(
|
|
413
|
-
.visible,
|
|
414
|
-
&snapshots,
|
|
415
|
-
.ascending,
|
|
416
|
-
null, // All visible tables in the level therefore no KeyRange filter.
|
|
417
|
-
);
|
|
418
|
-
|
|
419
|
-
while (it.next()) |table| {
|
|
420
|
-
iterations += 1;
|
|
421
|
-
|
|
422
|
-
const range = manifest.compaction_range(level_a + 1, table.key_min, table.key_max);
|
|
423
|
-
if (optimal == null or range.table_count < optimal.?.range.table_count) {
|
|
424
|
-
optimal = .{
|
|
425
|
-
.table = table,
|
|
426
|
-
.range = range,
|
|
427
|
-
};
|
|
428
|
-
}
|
|
429
|
-
// If the table can be moved directly between levels then that is already optimal.
|
|
430
|
-
if (optimal.?.range.table_count == 1) break;
|
|
431
|
-
}
|
|
432
|
-
assert(iterations > 0);
|
|
433
|
-
assert(iterations == manifest_level.table_count_visible or
|
|
434
|
-
optimal.?.range.table_count == 1);
|
|
435
|
-
|
|
436
|
-
return optimal.?;
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
pub const CompactionTableRange = struct {
|
|
440
|
-
table: *const TableInfo,
|
|
441
|
-
range: CompactionRange,
|
|
442
|
-
};
|
|
443
|
-
|
|
444
|
-
pub const CompactionRange = struct {
|
|
445
|
-
/// The total number of tables in the compaction across both levels, always at least 1.
|
|
446
|
-
table_count: usize,
|
|
447
|
-
/// The minimum key across both levels.
|
|
448
|
-
key_min: Key,
|
|
449
|
-
/// The maximum key across both levels.
|
|
450
|
-
key_max: Key,
|
|
451
|
-
};
|
|
452
|
-
|
|
453
|
-
/// Returns the smallest visible range across level A and B that overlaps key_min/max.
|
|
454
|
-
///
|
|
455
|
-
/// For example, for a table in level 2, count how many tables overlap in level 3, and
|
|
456
|
-
/// determine the span of their entire key range, which may be broader or narrower.
|
|
457
|
-
///
|
|
458
|
-
/// The range.table_count includes the input table from level A represented by key_min/max.
|
|
459
|
-
/// Thus range.table_count=1 means that the table may be moved directly between levels.
|
|
460
|
-
///
|
|
461
|
-
/// The range keys are guaranteed to encompass all the relevant level A and level B tables:
|
|
462
|
-
/// range.key_min = min(a.key_min, b.key_min)
|
|
463
|
-
/// range.key_max = max(a.key_max, b.key_max)
|
|
464
|
-
///
|
|
465
|
-
/// This last invariant is critical to ensuring that tombstones are dropped correctly.
|
|
466
|
-
pub fn compaction_range(
|
|
467
|
-
manifest: *const Manifest,
|
|
468
|
-
level_b: u8,
|
|
469
|
-
key_min: Key,
|
|
470
|
-
key_max: Key,
|
|
471
|
-
) CompactionRange {
|
|
472
|
-
assert(level_b < constants.lsm_levels);
|
|
473
|
-
assert(compare_keys(key_min, key_max) != .gt);
|
|
474
|
-
|
|
475
|
-
var range = CompactionRange{
|
|
476
|
-
.table_count = 1,
|
|
477
|
-
.key_min = key_min,
|
|
478
|
-
.key_max = key_max,
|
|
479
|
-
};
|
|
480
|
-
|
|
481
|
-
const snapshots = [_]u64{snapshot_latest};
|
|
482
|
-
var it = manifest.levels[level_b].iterator(
|
|
483
|
-
.visible,
|
|
484
|
-
&snapshots,
|
|
485
|
-
.ascending,
|
|
486
|
-
KeyRange{ .key_min = range.key_min, .key_max = range.key_max },
|
|
487
|
-
);
|
|
488
|
-
|
|
489
|
-
while (it.next()) |table| : (range.table_count += 1) {
|
|
490
|
-
assert(table.visible(snapshot_latest));
|
|
491
|
-
assert(compare_keys(table.key_min, table.key_max) != .gt);
|
|
492
|
-
assert(compare_keys(table.key_max, range.key_min) != .lt);
|
|
493
|
-
assert(compare_keys(table.key_min, range.key_max) != .gt);
|
|
494
|
-
|
|
495
|
-
// The first iterated table.key_min/max may overlap range.key_min/max entirely.
|
|
496
|
-
if (compare_keys(table.key_min, range.key_min) == .lt) {
|
|
497
|
-
range.key_min = table.key_min;
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
// Thereafter, iterated tables may/may not extend the range in ascending order.
|
|
501
|
-
if (compare_keys(table.key_max, range.key_max) == .gt) {
|
|
502
|
-
range.key_max = table.key_max;
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
assert(range.table_count > 0);
|
|
507
|
-
assert(compare_keys(range.key_min, range.key_max) != .gt);
|
|
508
|
-
assert(compare_keys(range.key_min, key_min) != .gt);
|
|
509
|
-
assert(compare_keys(range.key_max, key_max) != .lt);
|
|
510
|
-
|
|
511
|
-
return range;
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
/// If no subsequent levels have any overlap, then tombstones must be dropped.
|
|
515
|
-
pub fn compaction_must_drop_tombstones(
|
|
516
|
-
manifest: *const Manifest,
|
|
517
|
-
level_b: u8,
|
|
518
|
-
range: CompactionRange,
|
|
519
|
-
) bool {
|
|
520
|
-
assert(level_b < constants.lsm_levels);
|
|
521
|
-
assert(range.table_count > 0);
|
|
522
|
-
assert(compare_keys(range.key_min, range.key_max) != .gt);
|
|
523
|
-
|
|
524
|
-
var level_c: u8 = level_b + 1;
|
|
525
|
-
while (level_c < constants.lsm_levels) : (level_c += 1) {
|
|
526
|
-
const snapshots = [_]u64{snapshot_latest};
|
|
527
|
-
|
|
528
|
-
var it = manifest.levels[level_c].iterator(
|
|
529
|
-
.visible,
|
|
530
|
-
&snapshots,
|
|
531
|
-
.ascending,
|
|
532
|
-
KeyRange{ .key_min = range.key_min, .key_max = range.key_max },
|
|
533
|
-
);
|
|
534
|
-
if (it.next() != null) {
|
|
535
|
-
// If the range is being compacted into the last level then this is unreachable,
|
|
536
|
-
// as the last level has no subsequent levels and must always drop tombstones.
|
|
537
|
-
assert(level_b != constants.lsm_levels - 1);
|
|
538
|
-
return false;
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
|
-
|
|
542
|
-
assert(level_c == constants.lsm_levels);
|
|
543
|
-
return true;
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
pub fn reserve(manifest: *Manifest) void {
|
|
547
|
-
assert(manifest.compact_callback == null);
|
|
548
|
-
assert(manifest.checkpoint_callback == null);
|
|
549
|
-
|
|
550
|
-
manifest.manifest_log.reserve();
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
pub fn compact(manifest: *Manifest, callback: Callback) void {
|
|
554
|
-
assert(manifest.compact_callback == null);
|
|
555
|
-
assert(manifest.checkpoint_callback == null);
|
|
556
|
-
manifest.compact_callback = callback;
|
|
557
|
-
|
|
558
|
-
manifest.manifest_log.compact(manifest_log_compact_callback);
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
fn manifest_log_compact_callback(manifest_log: *ManifestLog) void {
|
|
562
|
-
const manifest = @fieldParentPtr(Manifest, "manifest_log", manifest_log);
|
|
563
|
-
assert(manifest.compact_callback != null);
|
|
564
|
-
assert(manifest.checkpoint_callback == null);
|
|
565
|
-
|
|
566
|
-
const callback = manifest.compact_callback.?;
|
|
567
|
-
manifest.compact_callback = null;
|
|
568
|
-
callback(manifest);
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
pub fn checkpoint(manifest: *Manifest, callback: Callback) void {
|
|
572
|
-
assert(manifest.compact_callback == null);
|
|
573
|
-
assert(manifest.checkpoint_callback == null);
|
|
574
|
-
manifest.checkpoint_callback = callback;
|
|
575
|
-
|
|
576
|
-
manifest.manifest_log.checkpoint(manifest_log_checkpoint_callback);
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
fn manifest_log_checkpoint_callback(manifest_log: *ManifestLog) void {
|
|
580
|
-
const manifest = @fieldParentPtr(Manifest, "manifest_log", manifest_log);
|
|
581
|
-
assert(manifest.compact_callback == null);
|
|
582
|
-
assert(manifest.checkpoint_callback != null);
|
|
583
|
-
|
|
584
|
-
const callback = manifest.checkpoint_callback.?;
|
|
585
|
-
manifest.checkpoint_callback = null;
|
|
586
|
-
callback(manifest);
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
pub fn verify(manifest: *Manifest, snapshot: u64) void {
|
|
590
|
-
for (manifest.levels) |*level| {
|
|
591
|
-
var key_max_prev: ?Key = null;
|
|
592
|
-
var table_info_iter = level.iterator(
|
|
593
|
-
.visible,
|
|
594
|
-
&.{snapshot},
|
|
595
|
-
.ascending,
|
|
596
|
-
null,
|
|
597
|
-
);
|
|
598
|
-
while (table_info_iter.next()) |table_info| {
|
|
599
|
-
if (key_max_prev) |k| {
|
|
600
|
-
assert(compare_keys(k, table_info.key_min) == .lt);
|
|
601
|
-
}
|
|
602
|
-
// We could have key_min == key_max if there is only one value.
|
|
603
|
-
assert(compare_keys(table_info.key_min, table_info.key_max) != .gt);
|
|
604
|
-
key_max_prev = table_info.key_max;
|
|
605
|
-
|
|
606
|
-
Table.verify(
|
|
607
|
-
Storage,
|
|
608
|
-
manifest.manifest_log.grid.superblock.storage,
|
|
609
|
-
table_info.address,
|
|
610
|
-
table_info.key_min,
|
|
611
|
-
table_info.key_max,
|
|
612
|
-
);
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
}
|
|
616
|
-
};
|
|
617
|
-
}
|