tigerbeetle-node 0.11.13 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/dist/bin/aarch64-linux-gnu/client.node +0 -0
  2. package/dist/bin/aarch64-linux-musl/client.node +0 -0
  3. package/dist/bin/aarch64-macos/client.node +0 -0
  4. package/dist/bin/x86_64-linux-gnu/client.node +0 -0
  5. package/dist/bin/x86_64-linux-musl/client.node +0 -0
  6. package/dist/bin/x86_64-macos/client.node +0 -0
  7. package/dist/index.js +33 -1
  8. package/dist/index.js.map +1 -1
  9. package/package-lock.json +66 -0
  10. package/package.json +6 -16
  11. package/src/index.ts +56 -1
  12. package/src/node.zig +9 -9
  13. package/dist/.client.node.sha256 +0 -1
  14. package/scripts/build_lib.sh +0 -61
  15. package/scripts/download_node_headers.sh +0 -32
  16. package/src/tigerbeetle/scripts/benchmark.bat +0 -55
  17. package/src/tigerbeetle/scripts/benchmark.sh +0 -66
  18. package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
  19. package/src/tigerbeetle/scripts/fail_on_diff.sh +0 -9
  20. package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
  21. package/src/tigerbeetle/scripts/fuzz_loop_hash_log.sh +0 -12
  22. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
  23. package/src/tigerbeetle/scripts/install.bat +0 -7
  24. package/src/tigerbeetle/scripts/install.sh +0 -21
  25. package/src/tigerbeetle/scripts/install_zig.bat +0 -113
  26. package/src/tigerbeetle/scripts/install_zig.sh +0 -90
  27. package/src/tigerbeetle/scripts/lint.zig +0 -199
  28. package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
  29. package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -55
  30. package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
  31. package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
  32. package/src/tigerbeetle/scripts/scripts/fail_on_diff.sh +0 -9
  33. package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
  34. package/src/tigerbeetle/scripts/scripts/fuzz_loop_hash_log.sh +0 -12
  35. package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
  36. package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
  37. package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
  38. package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
  39. package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
  40. package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
  41. package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
  42. package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
  43. package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
  44. package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
  45. package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
  46. package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
  47. package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
  48. package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
  49. package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
  50. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
  51. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
  52. package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
  53. package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
  54. package/src/tigerbeetle/src/benchmark.zig +0 -336
  55. package/src/tigerbeetle/src/config.zig +0 -233
  56. package/src/tigerbeetle/src/constants.zig +0 -428
  57. package/src/tigerbeetle/src/ewah.zig +0 -286
  58. package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
  59. package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
  60. package/src/tigerbeetle/src/fifo.zig +0 -120
  61. package/src/tigerbeetle/src/io/benchmark.zig +0 -213
  62. package/src/tigerbeetle/src/io/darwin.zig +0 -814
  63. package/src/tigerbeetle/src/io/linux.zig +0 -1071
  64. package/src/tigerbeetle/src/io/test.zig +0 -643
  65. package/src/tigerbeetle/src/io/windows.zig +0 -1183
  66. package/src/tigerbeetle/src/io.zig +0 -34
  67. package/src/tigerbeetle/src/iops.zig +0 -107
  68. package/src/tigerbeetle/src/lsm/README.md +0 -308
  69. package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
  70. package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
  71. package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
  72. package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
  73. package/src/tigerbeetle/src/lsm/direction.zig +0 -11
  74. package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
  75. package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
  76. package/src/tigerbeetle/src/lsm/forest.zig +0 -205
  77. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -450
  78. package/src/tigerbeetle/src/lsm/grid.zig +0 -573
  79. package/src/tigerbeetle/src/lsm/groove.zig +0 -1036
  80. package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
  81. package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
  82. package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
  83. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -878
  84. package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
  85. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
  86. package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
  87. package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
  88. package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -381
  89. package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1329
  90. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
  91. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
  92. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
  93. package/src/tigerbeetle/src/lsm/table.zig +0 -1009
  94. package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -192
  95. package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
  96. package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -203
  97. package/src/tigerbeetle/src/lsm/test.zig +0 -439
  98. package/src/tigerbeetle/src/lsm/tree.zig +0 -1169
  99. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -479
  100. package/src/tigerbeetle/src/message_bus.zig +0 -1013
  101. package/src/tigerbeetle/src/message_pool.zig +0 -156
  102. package/src/tigerbeetle/src/ring_buffer.zig +0 -399
  103. package/src/tigerbeetle/src/simulator.zig +0 -580
  104. package/src/tigerbeetle/src/state_machine/auditor.zig +0 -578
  105. package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
  106. package/src/tigerbeetle/src/state_machine.zig +0 -2099
  107. package/src/tigerbeetle/src/static_allocator.zig +0 -65
  108. package/src/tigerbeetle/src/stdx.zig +0 -171
  109. package/src/tigerbeetle/src/storage.zig +0 -393
  110. package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
  111. package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
  112. package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
  113. package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
  114. package/src/tigerbeetle/src/testing/cluster.zig +0 -444
  115. package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
  116. package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
  117. package/src/tigerbeetle/src/testing/id.zig +0 -99
  118. package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -374
  119. package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
  120. package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
  121. package/src/tigerbeetle/src/testing/state_machine.zig +0 -250
  122. package/src/tigerbeetle/src/testing/storage.zig +0 -757
  123. package/src/tigerbeetle/src/testing/table.zig +0 -247
  124. package/src/tigerbeetle/src/testing/time.zig +0 -84
  125. package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
  126. package/src/tigerbeetle/src/time.zig +0 -112
  127. package/src/tigerbeetle/src/tracer.zig +0 -529
  128. package/src/tigerbeetle/src/unit_tests.zig +0 -40
  129. package/src/tigerbeetle/src/vopr.zig +0 -495
  130. package/src/tigerbeetle/src/vsr/README.md +0 -209
  131. package/src/tigerbeetle/src/vsr/client.zig +0 -544
  132. package/src/tigerbeetle/src/vsr/clock.zig +0 -855
  133. package/src/tigerbeetle/src/vsr/journal.zig +0 -2415
  134. package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
  135. package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
  136. package/src/tigerbeetle/src/vsr/replica.zig +0 -6616
  137. package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
  138. package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
  139. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
  140. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
  141. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
  142. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
  143. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
  144. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
  145. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
  146. package/src/tigerbeetle/src/vsr.zig +0 -1425
@@ -1,603 +0,0 @@
1
- //! Compaction moves or merges a table's values into the next level.
2
- //!
3
- //! Each Compaction is paced to run in one half-bar.
4
- //!
5
- //!
6
- //! Compaction overview:
7
- //!
8
- //! 1. Given:
9
- //!
10
- //! - levels A and B, where A+1=B
11
- //! - a single table in level A ("table A")
12
- //! - all tables from level B which intersect table A's key range ("tables B")
13
- //! (This can include anything between 0 tables and all of level B's tables.)
14
- //!
15
- //! 2. If table A's key range is disjoint from the keys in level B, move table A into level B.
16
- //! All done! (But if the key ranges intersect, jump to step 3).
17
- //!
18
- //! 3. Create an iterator from the sort-merge of table A and the concatenation of tables B.
19
- //! If the same key exists in level A and B, take A's and discard B's. †
20
- //!
21
- //! 4. Write the sort-merge iterator into a sequence of new tables on disk.
22
- //!
23
- //! 5. Update the input tables in the Manifest with their new `snapshot_max` so that they become
24
- //! invisible to subsequent read transactions.
25
- //!
26
- //! 6. Insert the new level-B tables into the Manifest.
27
- //!
28
- //! † When A's value is a tombstone, there is a special case for garbage collection. When either:
29
- //! * level B is the final level, or
30
- //! * A's key does not exist in B or any deeper level,
31
- //! then the tombstone is omitted from the compacted output (see: `compaction_must_drop_tombstones`).
32
- //!
33
- const std = @import("std");
34
- const mem = std.mem;
35
- const math = std.math;
36
- const assert = std.debug.assert;
37
-
38
- const log = std.log.scoped(.compaction);
39
- const tracer = @import("../tracer.zig");
40
-
41
- const constants = @import("../constants.zig");
42
-
43
- const GridType = @import("grid.zig").GridType;
44
- const ManifestType = @import("manifest.zig").ManifestType;
45
- const MergeIteratorType = @import("merge_iterator.zig").MergeIteratorType;
46
- const TableIteratorType = @import("table_iterator.zig").TableIteratorType;
47
- const LevelIteratorType = @import("level_iterator.zig").LevelIteratorType;
48
-
49
- pub fn CompactionType(
50
- comptime Table: type,
51
- comptime Storage: type,
52
- comptime IteratorAType: anytype,
53
- ) type {
54
- const tombstone = Table.tombstone;
55
-
56
- return struct {
57
- const Compaction = @This();
58
-
59
- const Grid = GridType(Storage);
60
- const BlockPtr = Grid.BlockPtr;
61
- const BlockPtrConst = Grid.BlockPtrConst;
62
- const BlockWrite = struct {
63
- write: Grid.Write = undefined,
64
- block: *BlockPtr = undefined,
65
- state: BlockState = .building,
66
- };
67
- const BlockState = enum {
68
- building,
69
- writable,
70
- writing,
71
- };
72
-
73
- const Manifest = ManifestType(Table, Storage);
74
- const TableInfo = Manifest.TableInfo;
75
-
76
- const IteratorA = IteratorAType(Table, Storage);
77
- const IteratorB = LevelIteratorType(Table, Storage);
78
-
79
- const MergeIterator = MergeIteratorType(
80
- Table,
81
- IteratorA,
82
- IteratorB,
83
- );
84
-
85
- pub const Callback = fn (it: *Compaction) void;
86
-
87
- const Status = enum {
88
- idle,
89
- processing,
90
- done,
91
- };
92
-
93
- /// Used only for debugging/tracing.
94
- name: [:0]const u8,
95
-
96
- grid: *Grid,
97
- grid_reservation: Grid.Reservation,
98
- range: Manifest.CompactionRange,
99
-
100
- /// `op_min` is the first op/beat of this compaction's half-bar.
101
- /// `op_min` is used as a snapshot — the compaction's input tables must be visible
102
- /// to `op_min`.
103
- ///
104
- /// After this compaction finishes:
105
- /// - `op_min + half_bar_beat_count - 1` will be the input tables' snapshot_max.
106
- /// - `op_min + half_bar_beat_count` will be the output tables' snapshot_min.
107
- op_min: u64,
108
- drop_tombstones: bool,
109
-
110
- status: Status,
111
- callback: ?Callback = null,
112
- io_pending: u32 = 0,
113
-
114
- iterator_a: IteratorA,
115
- iterator_b: IteratorB,
116
-
117
- merge_done: bool,
118
- merge_iterator: ?MergeIterator,
119
-
120
- table_builder: Table.Builder,
121
- index: BlockWrite,
122
- filter: BlockWrite,
123
- data: BlockWrite,
124
-
125
- manifest: *Manifest,
126
- level_b: u8,
127
- level_a_input: ?TableInfo,
128
-
129
- tables_output_count: usize = 0,
130
-
131
- tracer_slot: ?tracer.SpanStart = null,
132
-
133
- pub fn init(allocator: mem.Allocator, name: [:0]const u8) !Compaction {
134
- var iterator_a = try IteratorA.init(allocator);
135
- errdefer iterator_a.deinit(allocator);
136
-
137
- var iterator_b = try IteratorB.init(allocator);
138
- errdefer iterator_b.deinit(allocator);
139
-
140
- var table_builder = try Table.Builder.init(allocator);
141
- errdefer table_builder.deinit(allocator);
142
-
143
- return Compaction{
144
- .name = name,
145
-
146
- // Assigned by start()
147
- .grid = undefined,
148
- .grid_reservation = undefined,
149
- .range = undefined,
150
- .op_min = undefined,
151
- .drop_tombstones = undefined,
152
-
153
- .status = .idle,
154
- .iterator_a = iterator_a,
155
- .iterator_b = iterator_b,
156
-
157
- .merge_done = false,
158
- .merge_iterator = null,
159
-
160
- .table_builder = table_builder,
161
- .index = .{},
162
- .filter = .{},
163
- .data = .{},
164
-
165
- // Assigned by start()
166
- .manifest = undefined,
167
- .level_b = undefined,
168
- .level_a_input = null,
169
- };
170
- }
171
-
172
- pub fn deinit(compaction: *Compaction, allocator: mem.Allocator) void {
173
- compaction.table_builder.deinit(allocator);
174
-
175
- compaction.iterator_b.deinit(allocator);
176
- compaction.iterator_a.deinit(allocator);
177
- }
178
-
179
- /// The compaction's input tables are:
180
- /// * table_a (which is null when level B is 0), and
181
- /// * any level-B tables visible to `op_min` within `range`.
182
- pub fn start(
183
- compaction: *Compaction,
184
- grid: *Grid,
185
- manifest: *Manifest,
186
- op_min: u64,
187
- range: Manifest.CompactionRange,
188
- table_a: ?*const TableInfo,
189
- level_b: u8,
190
- iterator_a_context: IteratorA.Context,
191
- ) void {
192
- assert(compaction.status == .idle);
193
- assert(compaction.callback == null);
194
- assert(compaction.io_pending == 0);
195
- assert(!compaction.merge_done and compaction.merge_iterator == null);
196
- assert(compaction.tracer_slot == null);
197
-
198
- assert(op_min % @divExact(constants.lsm_batch_multiple, 2) == 0);
199
- assert(range.table_count > 0);
200
- if (table_a) |t| assert(t.visible(op_min));
201
-
202
- assert(level_b < constants.lsm_levels);
203
- assert((level_b == 0) == (table_a == null));
204
-
205
- // Levels may choose to drop tombstones if keys aren't included in the lower levels.
206
- // This invariant is always true for the last level as it doesn't have any lower ones.
207
- const drop_tombstones = manifest.compaction_must_drop_tombstones(level_b, range);
208
- assert(drop_tombstones or level_b < constants.lsm_levels - 1);
209
-
210
- compaction.* = .{
211
- .name = compaction.name,
212
-
213
- .grid = grid,
214
- // Reserve enough blocks to write our output tables in the worst case, where:
215
- // - no tombstones are dropped,
216
- // - no values are overwritten,
217
- // - and all tables are full.
218
- //
219
- // We must reserve before doing any async work so that the block acquisition order
220
- // is deterministic (relative to other concurrent compactions).
221
- // TODO The replica must stop accepting requests if it runs out of blocks/capacity,
222
- // rather than panicking here.
223
- // TODO(Compaction Pacing): Reserve smaller increments, at the start of each beat.
224
- // (And likewise release the reservation at the end of each beat, instead of at the
225
- // end of each half-bar).
226
- // TODO(Move Table) Don't reserve these when we just move the table to the next level.
227
- .grid_reservation = grid.reserve(range.table_count * Table.block_count_max).?,
228
- .range = range,
229
- .op_min = op_min,
230
- .drop_tombstones = drop_tombstones,
231
-
232
- .status = .processing,
233
- .iterator_a = compaction.iterator_a,
234
- .iterator_b = compaction.iterator_b,
235
-
236
- .merge_done = false,
237
- .merge_iterator = null,
238
-
239
- .table_builder = compaction.table_builder,
240
- .index = compaction.index,
241
- .filter = compaction.filter,
242
- .data = compaction.data,
243
-
244
- .manifest = manifest,
245
- .level_b = level_b,
246
- .level_a_input = if (table_a) |table| table.* else null,
247
- };
248
-
249
- assert(compaction.index.state == .building);
250
- assert(compaction.filter.state == .building);
251
- assert(compaction.data.state == .building);
252
-
253
- // TODO Implement manifest.move_table() optimization if there's only range.table_count == 1.
254
- // This would do update_tables + insert_tables inline without going through the iterators.
255
-
256
- const iterator_b_context = .{
257
- .grid = grid,
258
- .manifest = manifest,
259
- .level = level_b,
260
- .snapshot = op_min,
261
- .key_min = range.key_min,
262
- .key_max = range.key_max,
263
- .direction = .ascending,
264
- .table_info_callback = iterator_b_table_info_callback,
265
- };
266
-
267
- compaction.iterator_a.start(iterator_a_context, iterator_a_io_callback);
268
- compaction.iterator_b.start(iterator_b_context, iterator_b_io_callback);
269
- }
270
-
271
- fn iterator_a_io_callback(iterator_a: *IteratorA) void {
272
- const compaction = @fieldParentPtr(Compaction, "iterator_a", iterator_a);
273
- compaction.io_finish();
274
- }
275
-
276
- fn iterator_b_io_callback(iterator_b: *IteratorB) void {
277
- const compaction = @fieldParentPtr(Compaction, "iterator_b", iterator_b);
278
- compaction.io_finish();
279
- }
280
-
281
- fn iterator_b_table_info_callback(
282
- iterator_b: *IteratorB,
283
- table: *const TableInfo,
284
- index_block: BlockPtrConst,
285
- ) void {
286
- const compaction = @fieldParentPtr(Compaction, "iterator_b", iterator_b);
287
- assert(compaction.status == .processing);
288
- assert(compaction.callback != null);
289
- assert(!compaction.merge_done);
290
- assert(table.visible(compaction.op_min));
291
-
292
- // Tables discovered by iterator_b that are visible at the start of compaction.
293
- var table_copy = table.*;
294
- compaction.manifest.update_table(
295
- compaction.level_b,
296
- snapshot_max_for_table_input(compaction.op_min),
297
- &table_copy,
298
- );
299
-
300
- // Release the table's block addresses in the Grid as it will be made invisible.
301
- // This is safe; iterator_b makes a copy of the block before calling us.
302
- const grid = compaction.grid;
303
- for (Table.index_data_addresses_used(index_block)) |address| {
304
- grid.release(address);
305
- }
306
- for (Table.index_filter_addresses_used(index_block)) |address| {
307
- grid.release(address);
308
- }
309
- grid.release(Table.index_block_address(index_block));
310
- }
311
-
312
- pub fn compact_tick(compaction: *Compaction, callback: Callback) void {
313
- assert(compaction.status == .processing);
314
- assert(compaction.callback == null);
315
- assert(compaction.io_pending == 0);
316
- assert(!compaction.merge_done);
317
-
318
- compaction.callback = callback;
319
-
320
- tracer.start(
321
- &compaction.tracer_slot,
322
- .{ .tree_compaction = .{ .compaction_name = compaction.name } },
323
- .{ .tree_compaction_tick = .{ .level_b = compaction.level_b } },
324
- @src(),
325
- );
326
-
327
- // Generate fake IO to make sure io_pending doesn't reach zero multiple times from
328
- // IO being completed inline down below.
329
- // The fake IO is immediately resolved and triggers the cpu_merge_start if all
330
- // IO completes inline or if no IO was started.
331
- compaction.io_start();
332
- defer compaction.io_finish();
333
-
334
- // Start reading blocks from the iterators to merge them.
335
- if (compaction.iterator_a.tick()) compaction.io_start();
336
- if (compaction.iterator_b.tick()) compaction.io_start();
337
-
338
- // Start writing blocks prepared by the merge iterator from a previous compact_tick().
339
- compaction.io_write_start(.data);
340
- compaction.io_write_start(.filter);
341
- compaction.io_write_start(.index);
342
- }
343
-
344
- const BlockWriteField = enum { data, filter, index };
345
-
346
- fn io_write_start(compaction: *Compaction, comptime field: BlockWriteField) void {
347
- const write_callback = struct {
348
- fn callback(write: *Grid.Write) void {
349
- const block_write = @fieldParentPtr(BlockWrite, "write", write);
350
-
351
- assert(block_write.state == .writing);
352
- block_write.state = .building;
353
-
354
- if (constants.verify) {
355
- // We've finished writing so the block should now be zeroed.
356
- assert(mem.allEqual(u8, block_write.block.*, 0));
357
- }
358
- block_write.block = undefined;
359
-
360
- const _compaction = @fieldParentPtr(Compaction, @tagName(field), block_write);
361
- _compaction.io_finish();
362
- }
363
- }.callback;
364
-
365
- const block_write: *BlockWrite = &@field(compaction, @tagName(field));
366
- if (block_write.state == .writable) {
367
- block_write.state = .writing;
368
-
369
- compaction.io_start();
370
- compaction.grid.write_block(
371
- write_callback,
372
- &block_write.write,
373
- block_write.block,
374
- Table.block_address(block_write.block.*),
375
- );
376
- }
377
- }
378
-
379
- fn io_start(compaction: *Compaction) void {
380
- assert(compaction.status == .processing);
381
- assert(compaction.callback != null);
382
- assert(!compaction.merge_done);
383
-
384
- compaction.io_pending += 1;
385
- }
386
-
387
- fn io_finish(compaction: *Compaction) void {
388
- assert(compaction.status == .processing);
389
- assert(compaction.callback != null);
390
- assert(compaction.io_pending > 0);
391
- assert(!compaction.merge_done);
392
-
393
- compaction.io_pending -= 1;
394
- if (compaction.io_pending == 0) compaction.cpu_merge_start();
395
- }
396
-
397
- fn cpu_merge_start(compaction: *Compaction) void {
398
- assert(compaction.status == .processing);
399
- assert(compaction.callback != null);
400
- assert(compaction.io_pending == 0);
401
- assert(!compaction.merge_done);
402
-
403
- var tracer_slot: ?tracer.SpanStart = null;
404
- tracer.start(
405
- &tracer_slot,
406
- .{ .tree_compaction = .{ .compaction_name = compaction.name } },
407
- .{ .tree_compaction_merge = .{ .level_b = compaction.level_b } },
408
- @src(),
409
- );
410
-
411
- // Create the merge iterator only when we can peek() from the read iterators.
412
- // This happens after IO for the first reads complete.
413
- if (compaction.merge_iterator == null) {
414
- compaction.merge_iterator = MergeIterator.init(
415
- &compaction.iterator_a,
416
- &compaction.iterator_b,
417
- );
418
- assert(!compaction.merge_iterator.?.empty());
419
- }
420
-
421
- assert(compaction.data.state == .building);
422
- assert(compaction.filter.state == .building);
423
- assert(compaction.index.state == .building);
424
-
425
- if (!compaction.merge_iterator.?.empty()) {
426
- compaction.cpu_merge();
427
- } else {
428
- compaction.cpu_merge_finish();
429
- }
430
-
431
- tracer.end(
432
- &tracer_slot,
433
- .{ .tree_compaction = .{ .compaction_name = compaction.name } },
434
- .{ .tree_compaction_merge = .{ .level_b = compaction.level_b } },
435
- );
436
- tracer.end(
437
- &compaction.tracer_slot,
438
- .{ .tree_compaction = .{ .compaction_name = compaction.name } },
439
- .{ .tree_compaction_tick = .{ .level_b = compaction.level_b } },
440
- );
441
-
442
- // TODO Implement pacing here by deciding if we should do another compact_tick()
443
- // instead of invoking the callback, using compaction.range.table_count as the heuristic.
444
-
445
- const callback = compaction.callback.?;
446
- compaction.callback = null;
447
- callback(compaction);
448
- }
449
-
450
- fn cpu_merge(compaction: *Compaction) void {
451
- // Ensure this is the result of a compact_tick() call that finished processing IO.
452
- assert(compaction.status == .processing);
453
- assert(compaction.callback != null);
454
- assert(compaction.io_pending == 0);
455
- assert(!compaction.merge_done);
456
-
457
- // Ensure there are values to merge and that is it safe to do so.
458
- const merge_iterator = &compaction.merge_iterator.?;
459
- assert(!merge_iterator.empty());
460
- assert(compaction.data.state == .building);
461
- assert(compaction.filter.state == .building);
462
- assert(compaction.index.state == .building);
463
-
464
- // Build up a data block with values merged from the read iterators.
465
- // This skips tombstone values if compaction was started with the intent to drop them.
466
- while (!compaction.table_builder.data_block_full()) {
467
- const value = merge_iterator.pop() orelse break;
468
- if (compaction.drop_tombstones and tombstone(&value)) continue;
469
- compaction.table_builder.data_block_append(&value);
470
- }
471
-
472
- // Finalize the data block if it's full or if it contains pending values when there's
473
- // no more left to merge.
474
- if (compaction.table_builder.data_block_full() or
475
- compaction.table_builder.filter_block_full() or
476
- compaction.table_builder.index_block_full() or
477
- (merge_iterator.empty() and !compaction.table_builder.data_block_empty()))
478
- {
479
- compaction.table_builder.data_block_finish(.{
480
- .cluster = compaction.grid.superblock.working.cluster,
481
- .address = compaction.grid.acquire(compaction.grid_reservation),
482
- });
483
-
484
- // Mark the finished data block as writable for the next compact_tick() call.
485
- compaction.data.block = &compaction.table_builder.data_block;
486
- assert(compaction.data.state == .building);
487
- compaction.data.state = .writable;
488
- }
489
-
490
- // Finalize the filter block if it's full or if it contains pending data blocks
491
- // when there's no more merged values to fill them.
492
- if (compaction.table_builder.filter_block_full() or
493
- compaction.table_builder.index_block_full() or
494
- (merge_iterator.empty() and !compaction.table_builder.filter_block_empty()))
495
- {
496
- compaction.table_builder.filter_block_finish(.{
497
- .cluster = compaction.grid.superblock.working.cluster,
498
- .address = compaction.grid.acquire(compaction.grid_reservation),
499
- });
500
-
501
- // Mark the finished filter block as writable for the next compact_tick() call.
502
- compaction.filter.block = &compaction.table_builder.filter_block;
503
- assert(compaction.filter.state == .building);
504
- compaction.filter.state = .writable;
505
- }
506
-
507
- // Finalize the index block if it's full or if it contains pending data blocks
508
- // when there's no more merged values to fill them.
509
- if (compaction.table_builder.index_block_full() or
510
- (merge_iterator.empty() and !compaction.table_builder.index_block_empty()))
511
- {
512
- const table = compaction.table_builder.index_block_finish(.{
513
- .cluster = compaction.grid.superblock.working.cluster,
514
- .address = compaction.grid.acquire(compaction.grid_reservation),
515
- .snapshot_min = snapshot_min_for_table_output(compaction.op_min),
516
- // TODO(Persistent Snapshots) set snapshot_max to the minimum snapshot_max of
517
- // all the (original) input tables.
518
- });
519
- compaction.manifest.insert_table(compaction.level_b, &table);
520
-
521
- // Mark the finished index block as writable for the next compact_tick() call.
522
- compaction.index.block = &compaction.table_builder.index_block;
523
- assert(compaction.index.state == .building);
524
- compaction.index.state = .writable;
525
-
526
- compaction.tables_output_count += 1;
527
- assert(compaction.tables_output_count <= compaction.range.table_count);
528
- }
529
- }
530
-
531
- fn cpu_merge_finish(compaction: *Compaction) void {
532
- // Ensure this is the result of a compact_tick() call that finished processing IO.
533
- assert(compaction.status == .processing);
534
- assert(compaction.callback != null);
535
- assert(compaction.io_pending == 0);
536
- assert(!compaction.merge_done);
537
-
538
- // Ensure merging is truly finished.
539
- assert(compaction.merge_iterator.?.empty());
540
- assert(compaction.data.state == .building);
541
- assert(compaction.filter.state == .building);
542
- assert(compaction.index.state == .building);
543
-
544
- // Double check the iterators are finished as well.
545
- const stream_empty = struct {
546
- fn empty(it: anytype) bool {
547
- _ = it.peek() catch |err| switch (err) {
548
- error.Drained => {},
549
- error.Empty => {
550
- assert(it.buffered_all_values());
551
- return true;
552
- },
553
- };
554
- return false;
555
- }
556
- }.empty;
557
- assert(stream_empty(&compaction.iterator_a));
558
- assert(stream_empty(&compaction.iterator_b));
559
-
560
- // Mark the level_a table as invisible if it was provided;
561
- // it has been merged into level_b.
562
- // TODO: Release the grid blocks associated with level_a as well
563
- if (compaction.level_a_input) |*level_a_table| {
564
- const level_a = compaction.level_b - 1;
565
- const snapshot_max = snapshot_max_for_table_input(compaction.op_min);
566
- compaction.manifest.update_table(level_a, snapshot_max, level_a_table);
567
- assert(level_a_table.snapshot_max == snapshot_max);
568
- } else {
569
- assert(compaction.level_b == 0);
570
- }
571
-
572
- // Finally, mark Compaction as officially complete and ready to be reset().
573
- compaction.merge_iterator = null;
574
- compaction.merge_done = true;
575
- compaction.status = .done;
576
- }
577
-
578
- pub fn reset(compaction: *Compaction) void {
579
- assert(compaction.status == .done);
580
- assert(compaction.callback == null);
581
- assert(compaction.io_pending == 0);
582
- assert(compaction.merge_done);
583
- assert(compaction.tracer_slot == null);
584
-
585
- // TODO(Beat Pacing) This should really be where the compaction callback is invoked,
586
- // but currently that can occur multiple times per beat.
587
- compaction.grid.forfeit(compaction.grid_reservation);
588
-
589
- compaction.status = .idle;
590
- compaction.merge_done = false;
591
- }
592
- };
593
- }
594
-
595
- fn snapshot_max_for_table_input(op_min: u64) u64 {
596
- assert(op_min % @divExact(constants.lsm_batch_multiple, 2) == 0);
597
- return op_min + @divExact(constants.lsm_batch_multiple, 2) - 1;
598
- }
599
-
600
- fn snapshot_min_for_table_output(op_min: u64) u64 {
601
- assert(op_min % @divExact(constants.lsm_batch_multiple, 2) == 0);
602
- return op_min + @divExact(constants.lsm_batch_multiple, 2);
603
- }
@@ -1,77 +0,0 @@
1
- const std = @import("std");
2
- const assert = std.debug.assert;
3
- const math = std.math;
4
-
5
- pub fn CompositeKey(comptime Field: type) type {
6
- assert(Field == u128 or Field == u64);
7
-
8
- return packed struct {
9
- const Self = @This();
10
-
11
- pub const sentinel_key: Self = .{
12
- .field = math.maxInt(Field),
13
- .timestamp = math.maxInt(u64),
14
- };
15
-
16
- const tombstone_bit = 1 << 63;
17
-
18
- // If zeroed padding is needed after the timestamp field.
19
- const pad = Field == u128;
20
-
21
- pub const Value = packed struct {
22
- field: Field align(@alignOf(Field)),
23
- /// The most significant bit indicates if the value is a tombstone.
24
- timestamp: u64 align(@alignOf(u64)),
25
- padding: (if (pad) u64 else u0) = 0,
26
-
27
- comptime {
28
- assert(@sizeOf(Value) == @sizeOf(Field) * 2);
29
- assert(@alignOf(Value) == @alignOf(Field));
30
- assert(@sizeOf(Value) * 8 == @bitSizeOf(Value));
31
- }
32
- };
33
-
34
- field: Field align(@alignOf(Field)),
35
- /// The most significant bit must be unset as it is used to indicate a tombstone.
36
- timestamp: u64 align(@alignOf(u64)),
37
- padding: (if (pad) u64 else u0) = 0,
38
-
39
- comptime {
40
- assert(@sizeOf(Self) == @sizeOf(Field) * 2);
41
- assert(@alignOf(Self) == @alignOf(Field));
42
- assert(@sizeOf(Self) * 8 == @bitSizeOf(Self));
43
- }
44
-
45
- pub inline fn compare_keys(a: Self, b: Self) math.Order {
46
- if (a.field < b.field) {
47
- return .lt;
48
- } else if (a.field > b.field) {
49
- return .gt;
50
- } else if (a.timestamp < b.timestamp) {
51
- return .lt;
52
- } else if (a.timestamp > b.timestamp) {
53
- return .gt;
54
- } else {
55
- return .eq;
56
- }
57
- }
58
-
59
- pub inline fn key_from_value(value: *const Value) Self {
60
- return .{
61
- .field = value.field,
62
- .timestamp = @truncate(u63, value.timestamp),
63
- };
64
- }
65
-
66
- pub inline fn tombstone(value: *const Value) bool {
67
- return (value.timestamp & tombstone_bit) != 0;
68
- }
69
-
70
- pub inline fn tombstone_from_key(key: Self) Value {
71
- return .{
72
- .field = key.field,
73
- .timestamp = key.timestamp | tombstone_bit,
74
- };
75
- }
76
- };
77
- }
@@ -1,11 +0,0 @@
1
- pub const Direction = enum {
2
- ascending,
3
- descending,
4
-
5
- pub fn reverse(d: Direction) Direction {
6
- return switch (d) {
7
- .ascending => .descending,
8
- .descending => .ascending,
9
- };
10
- }
11
- };