tigerbeetle-node 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +302 -101
  2. package/dist/index.d.ts +70 -72
  3. package/dist/index.js +70 -72
  4. package/dist/index.js.map +1 -1
  5. package/package.json +6 -6
  6. package/scripts/download_node_headers.sh +14 -7
  7. package/src/index.ts +6 -10
  8. package/src/node.zig +6 -3
  9. package/src/tigerbeetle/scripts/benchmark.sh +4 -4
  10. package/src/tigerbeetle/scripts/confirm_image.sh +44 -0
  11. package/src/tigerbeetle/scripts/fuzz_loop.sh +15 -0
  12. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +7 -0
  13. package/src/tigerbeetle/scripts/install.sh +19 -4
  14. package/src/tigerbeetle/scripts/install_zig.bat +5 -1
  15. package/src/tigerbeetle/scripts/install_zig.sh +24 -14
  16. package/src/tigerbeetle/scripts/pre-commit.sh +9 -0
  17. package/src/tigerbeetle/scripts/shellcheck.sh +5 -0
  18. package/src/tigerbeetle/scripts/tests_on_alpine.sh +10 -0
  19. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +14 -0
  20. package/src/tigerbeetle/src/benchmark.zig +4 -2
  21. package/src/tigerbeetle/src/benchmark_array_search.zig +3 -3
  22. package/src/tigerbeetle/src/c/tb_client/thread.zig +8 -9
  23. package/src/tigerbeetle/src/c/tb_client.h +100 -80
  24. package/src/tigerbeetle/src/c/tb_client.zig +4 -1
  25. package/src/tigerbeetle/src/cli.zig +1 -1
  26. package/src/tigerbeetle/src/config.zig +48 -16
  27. package/src/tigerbeetle/src/demo.zig +3 -1
  28. package/src/tigerbeetle/src/eytzinger_benchmark.zig +3 -3
  29. package/src/tigerbeetle/src/io/linux.zig +1 -1
  30. package/src/tigerbeetle/src/lsm/README.md +214 -0
  31. package/src/tigerbeetle/src/lsm/binary_search.zig +137 -10
  32. package/src/tigerbeetle/src/lsm/bloom_filter.zig +43 -0
  33. package/src/tigerbeetle/src/lsm/compaction.zig +352 -398
  34. package/src/tigerbeetle/src/lsm/composite_key.zig +2 -0
  35. package/src/tigerbeetle/src/lsm/eytzinger.zig +1 -1
  36. package/src/tigerbeetle/src/lsm/forest.zig +21 -447
  37. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +412 -0
  38. package/src/tigerbeetle/src/lsm/grid.zig +145 -69
  39. package/src/tigerbeetle/src/lsm/groove.zig +196 -133
  40. package/src/tigerbeetle/src/lsm/k_way_merge.zig +40 -18
  41. package/src/tigerbeetle/src/lsm/level_iterator.zig +28 -9
  42. package/src/tigerbeetle/src/lsm/manifest.zig +81 -181
  43. package/src/tigerbeetle/src/lsm/manifest_level.zig +210 -454
  44. package/src/tigerbeetle/src/lsm/manifest_log.zig +77 -28
  45. package/src/tigerbeetle/src/lsm/posted_groove.zig +64 -76
  46. package/src/tigerbeetle/src/lsm/segmented_array.zig +561 -241
  47. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +148 -0
  48. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +9 -0
  49. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +62 -12
  50. package/src/tigerbeetle/src/lsm/table.zig +83 -48
  51. package/src/tigerbeetle/src/lsm/table_immutable.zig +30 -23
  52. package/src/tigerbeetle/src/lsm/table_iterator.zig +25 -14
  53. package/src/tigerbeetle/src/lsm/table_mutable.zig +63 -12
  54. package/src/tigerbeetle/src/lsm/test.zig +49 -55
  55. package/src/tigerbeetle/src/lsm/tree.zig +407 -402
  56. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +457 -0
  57. package/src/tigerbeetle/src/main.zig +28 -6
  58. package/src/tigerbeetle/src/message_bus.zig +2 -2
  59. package/src/tigerbeetle/src/message_pool.zig +14 -17
  60. package/src/tigerbeetle/src/simulator.zig +145 -112
  61. package/src/tigerbeetle/src/state_machine.zig +338 -228
  62. package/src/tigerbeetle/src/static_allocator.zig +65 -0
  63. package/src/tigerbeetle/src/storage.zig +3 -7
  64. package/src/tigerbeetle/src/test/accounting/auditor.zig +577 -0
  65. package/src/tigerbeetle/src/test/accounting/workload.zig +819 -0
  66. package/src/tigerbeetle/src/test/cluster.zig +18 -48
  67. package/src/tigerbeetle/src/test/conductor.zig +365 -0
  68. package/src/tigerbeetle/src/test/fuzz.zig +121 -0
  69. package/src/tigerbeetle/src/test/id.zig +89 -0
  70. package/src/tigerbeetle/src/test/priority_queue.zig +645 -0
  71. package/src/tigerbeetle/src/test/state_checker.zig +93 -69
  72. package/src/tigerbeetle/src/test/state_machine.zig +11 -35
  73. package/src/tigerbeetle/src/test/storage.zig +29 -8
  74. package/src/tigerbeetle/src/tigerbeetle.zig +14 -16
  75. package/src/tigerbeetle/src/unit_tests.zig +7 -0
  76. package/src/tigerbeetle/src/vopr.zig +494 -0
  77. package/src/tigerbeetle/src/vopr_hub/README.md +58 -0
  78. package/src/tigerbeetle/src/vopr_hub/SETUP.md +199 -0
  79. package/src/tigerbeetle/src/vopr_hub/go.mod +3 -0
  80. package/src/tigerbeetle/src/vopr_hub/main.go +1022 -0
  81. package/src/tigerbeetle/src/vopr_hub/scheduler/go.mod +3 -0
  82. package/src/tigerbeetle/src/vopr_hub/scheduler/main.go +403 -0
  83. package/src/tigerbeetle/src/vsr/client.zig +13 -0
  84. package/src/tigerbeetle/src/vsr/journal.zig +16 -13
  85. package/src/tigerbeetle/src/vsr/replica.zig +924 -491
  86. package/src/tigerbeetle/src/vsr/superblock.zig +55 -37
  87. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -10
  88. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +2 -2
  89. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +18 -3
  90. package/src/tigerbeetle/src/vsr.zig +75 -55
  91. package/src/tigerbeetle/scripts/vopr.bat +0 -48
  92. package/src/tigerbeetle/scripts/vopr.sh +0 -33
@@ -0,0 +1,214 @@
1
+ # Glossary
2
+
3
+ - _bar_/_measure_: `lsm_batch_multiple` beats; unit of incremental compaction.
4
+ - _beat_: `op % lsm_batch_multiple`; Single step of an incremental compaction.
5
+ - _groove_: A collection of LSM trees, storing objects and their indices.
6
+ - _immutable table_: in-memory table; one per tree. Used to periodically flush the mutable table to
7
+ disk.
8
+ - _level_: Between `0` and `lsm_levels - 1` (usually `lsm_levels = 7`).
9
+ - _forest_: a collection of grooves.
10
+ - _manifest_: index of table and level metadata; one per tree.
11
+ - _mutable table_: in-memory table; one per tree. All tree updates are applied only to this table.
12
+ - _snapshot_: sequence number which selects the queryable partition of on-disk tables.
13
+
14
+ # Tree
15
+ ## Tables
16
+
17
+ A tree is a hierarchy of in-memory and on-disk tables. There are three categories of tables:
18
+
19
+ - The [mutable table](table_mutable.zig) is an in-memory table.
20
+ - Each tree has a single mutable table.
21
+ - All tree updates, inserts, and removes are applied to the mutable table.
22
+ - The mutable table's size is allocated to accommodate a full bar of updates.
23
+ - The [immutable table](table_immutable.zig) is an in-memory table.
24
+ - Each tree has a single immutable table.
25
+ - The mutable table's contents are periodically moved to the immutable table,
26
+ where they are stored while being flushed to level `0`.
27
+ - Level `0` … level `config.lsm_levels - 1` each contain an exponentially increasing number of
28
+ on-disk tables.
29
+ - Each tree has as many as `config.lsm_growth_factor ^ (level + 1)` tables per level.
30
+ (`config.lsm_growth_factor` is typically 8).
31
+ - Within a given level and snapshot, the tables' key ranges are [disjoint](manifest_level.zig).
32
+
33
+ ## Compaction
34
+
35
+ Tree compaction runs to the sound of music!
36
+
37
+ Compacting LSM trees involves merging and moving tables into the next levels as needed.
38
+ To avoid write amplification stalls and bound latency, compaction is done incrementally.
39
+
40
+ A full compaction phase is denoted as a bar or measure, using terms from music notation.
41
+ Each bar consists of `lsm_batch_multiple` beats or "compaction ticks" of work.
42
+ A compaction tick executes asynchronously immediately after every commit, with
43
+ `beat = commit.op % lsm_batch_multiple`.
44
+
45
+ A bar is split in half according to the "first" beat and "middle" beat.
46
+ The first half of the bar compacts even levels while the latter compacts odd levels.
47
+ Mutable table changes are sorted and compacted into the immutable table.
48
+ The immutable table is compacted into level 0 during the odd level half of the bar.
49
+
50
+ At any given point, there are at most `levels/2` compactions running concurrently.
51
+ The source level is denoted as `level_a` and the target level as `level_b`.
52
+ The last level in the LSM tree has no target level so it is never a source level.
53
+ Each compaction compacts a [single table](#table-selection) from `level_a` into all tables in
54
+ `level_b` which intersect the `level_a` table's key range.
55
+
56
+ Invariants:
57
+ * At the end of every beat, there is space in mutable table for the next beat.
58
+ * The manifest is compacted at the end of every beat.
59
+ * The compactions' output tables are not [visible](#snapshots-and-compaction) until the compaction has finished.
60
+
61
+ 1. First half-bar, first beat ("first beat"):
62
+ * Assert no compactions are currently running.
63
+ * Allow the per-level table limits to overflow if needed (for example, if we may compact a table
64
+ from level `A` to level `B`, where level `B` is already full).
65
+ * Start compactions from even levels that have reached their table limit.
66
+
67
+ 2. First half-bar, last beat:
68
+ * Finish ticking any incomplete even-level compactions.
69
+ * Assert on callback completion that all compactions are complete.
70
+
71
+ 3. Second half-bar, first beat ("middle beat"):
72
+ * Assert no compactions are currently running.
73
+ * Start compactions from odd levels that have reached their table limit.
74
+ * Compact the immutable table if it contains any sorted values (it might be empty).
75
+
76
+ 4. Second half-bar, last beat:
77
+ * Finish ticking any incomplete odd-level and immutable table compactions.
78
+ * Assert on callback completion that all compactions are complete.
79
+ * Assert on callback completion that no level's table count overflows.
80
+ * Flush, clear, and sort mutable table values into immutable table for next bar.
81
+ * Remove input tables that are invisible to all current and persisted snapshots.
82
+
83
+ ### Compaction Selection Policy
84
+
85
+ Compaction targets the table from level `A` which overlaps the fewest tables of level `B`.
86
+
87
+ For example, in the following table (with `lsm_growth_factor=2`), each table is depicted as the range of keys it includes. The tables with uppercase letters would be chosen for compaction next.
88
+
89
+ ```
90
+ Level 0 A─────────────H l───────────────────────────z
91
+ Level 1 a───────e L─M o───────s u───────y
92
+ Level 2 b───d e─────h i───k l───n o─p q───s u─v w─────z
93
+ (Keys) a b c d e f g h i j k l m n o p q r s t u v w x y z
94
+ ```
95
+
96
+ Links:
97
+ - [`Manifest.compaction_table`](manifest.zig)
98
+ - [Constructing and Analyzing the LSM Compaction Design Space](http://vldb.org/pvldb/vol14/p2216-sarkar.pdf) describes the tradeoffs of various data movement policies. TigerBeetle implements the "least overlapping with parent" policy.
99
+ - [Option of Compaction Priority](https://rocksdb.org/blog/2016/01/29/compaction_pri.html)
100
+
101
+ ## Snapshots
102
+
103
+ Each table has a minimum and maximum integer snapshot (`snapshot_min` and `snapshot_max`).
104
+
105
+ Each query targets a particular snapshot. A table `T` is _visible_ to a snapshot `S` when
106
+
107
+ ```
108
+ T.snapshot_min ≤ S ≤ T.snapshot_max
109
+ ```
110
+
111
+ and is _invisible_ to the snapshot otherwise.
112
+
113
+ Compaction does not modify tables in place — it copies data. Snapshots control and distinguish
114
+ which copies are useful, and which can be deleted. Snapshots can also be persisted, enabling
115
+ queries against past states of the tree (unimplemented; future work).
116
+
117
+ ### Snapshots and Compaction
118
+
119
+ Consider the half-bar compaction beginning at op=`X` (`12`), with `lsm_batch_multiple=M` (`8`).
120
+ Each half-bar contains `N=M/2` (`4`) beats. The next half-bar begins at `Y=X+N` (`16`).
121
+
122
+ During the half-bar compaction `X` (op=`X…Y-1`; `12…15`), each commit prefetches from the snapshot
123
+ [equal to its own op](#current-snapshot). As shown, they continue to query the old (input) tables.
124
+
125
+ During the half-bar compaction `X`:
126
+ - `snapshot_max` of each input table is truncated to `Y-1` (`15`).
127
+ - `snapshot_min` of each output table is initialized to `Y` (`16`).
128
+
129
+ ```
130
+ 0 4 8 12 16 20 24 (op, snapshot)
131
+ ┼───┬───┼───┬───┼───┬───┼
132
+ ####
133
+ ····────────X────────···· (input tables, before compaction)
134
+ ····──────────── (input tables, after compaction)
135
+ Y────···· (output tables, after compaction)
136
+ ```
137
+
138
+ Beginning from the next op after the compaction (`Y`; `16`):
139
+ - The output tables of the above compaction `X` are visible.
140
+ - The input tables of the above compaction `X` are invisible.
141
+ - Therefore, it will lookup from the output tables, but ignore the input tables.
142
+ - Callers must not query from the output tables of `X` before the compaction half-bar has finished
143
+ (i.e. before the end of beat `Y-1` (`15`)), since those tables are incomplete.
144
+
145
+ At this point the input tables can be removed if they are invisible to all persistent snapshots.
146
+
147
+ ### Snapshot Queries
148
+
149
+ Each query targets a particular snapshot, either:
150
+ - the [current snapshot](#current-snapshot), or
151
+ - a [persisted snapshot](#persistent-snapshots).
152
+
153
+ #### Current Snapshot
154
+
155
+ Each tree tracks the highest snapshot safe to query from (`tree.lookup_snapshot_max`), to ensure that
156
+ an ongoing compaction's incomplete output tables are not visible. Queries targeting
157
+ `tree.lookup_snapshot_max` always read from the mutable and immutable tables — so each commit can
158
+ see all previous commits' updates.)
159
+
160
+ During typical operation, the `lookup_snapshot_max` when prefetching op `S` is snapshot `S`.
161
+ The following chart depicts:
162
+ - `lookup_snapshot_max` (`$`)
163
+ - for each commit op (the left column)
164
+ - and a compaction that began at op `12` and completed at the end of op `15`.
165
+
166
+ ```
167
+ op 0 4 8 12 16 20 24 (op, snapshot)
168
+ ┼───┬───┼───┬───┼───┬───┼
169
+ 12 ····────────$───
170
+ 13 ····─────────$──
171
+ 14 ····──────────$─
172
+ 15 ····───────────$
173
+ 16 $────····
174
+ 17 ─$───····
175
+ 18 ──$──····
176
+ 19 ───$─····
177
+ ```
178
+
179
+ However, commits in the first measure following recovery from a checkpoint prefetch from a higher
180
+ snapshot to avoid querying tables that were deleted at the checkpoint.
181
+ See [`lookup_snapshot_max_for_checkpoint()`](#tree.zig) for more detail.
182
+
183
+ #### Persistent Snapshots
184
+
185
+ TODO(Persistent Snapshots): Expand this section.
186
+
187
+ ### Snapshot Values
188
+
189
+ - The on-disk tables visible to a snapshot `B` do not contain the updates from the commit with op `B`.
190
+ - Rather, snapshot `B` is first visible to a prefetch from the commit with op `B`.
191
+
192
+ Consider the following diagram (`lsm_batch_multiple=8`):
193
+
194
+ ```
195
+ 0 4 8 12 16 20 24 28 (op, snapshot)
196
+ ┼───┬───┼───┬───┼───┬───┼───┬
197
+ ,,,,,,,,........
198
+ ↑A ↑B ↑C
199
+ ```
200
+
201
+ Compaction is driven by the commits of ops `B→C` (`16…23`). While these ops are being committed:
202
+ - Updates from ops `0→A` (`0…7`) are on-disk.
203
+ - Updates from ops `A→B` (`8…15`) are in the immutable table.
204
+ - These updates were moved to the immutable table from the immutable table at the end of op `B-1`
205
+ (`15`).
206
+ - These updates will exist in the immutable table until it is reset at the end of op `C-1` (`23`).
207
+ - Updates from ops `B→C` (`16…23`) are added to the mutable table (by the respective commit).
208
+ - `tree.lookup_snapshot_max` is `B` when committing op `B`.
209
+ - `tree.lookup_snapshot_max` is `x` when committing op `x` (for `x ∈ {16,17,…,23}`).
210
+
211
+ At the end of the last beat of the compaction bar (`23`):
212
+ - Updates from ops `0→B` (`0…15`) are on disk.
213
+ - Updates from ops `B→C` (`16…23`) are moved from the mutable table to the immutable table.
214
+ - `tree.lookup_snapshot_max` is `x` when committing op `x` (for `x ∈ {24,25,…}`).
@@ -2,14 +2,23 @@ const std = @import("std");
2
2
  const assert = std.debug.assert;
3
3
  const math = std.math;
4
4
 
5
+ pub const Config = struct {
6
+ verify: bool = false,
7
+ };
8
+
5
9
  // TODO Add prefeching when @prefetch is available: https://github.com/ziglang/zig/issues/3600.
6
10
  //
7
11
  // TODO The Zig self hosted compiler will implement inlining itself before passing the IR to llvm,
8
12
  // which should eliminate the current poor codegen of key_from_value/compare_keys.
9
13
 
10
- /// Returns the index of the key either exactly equal to the target key or, if there is no exact
11
- /// match, the next greatest key.
12
- /// Doesn't preform the extra key comparison to determine if the match is exact
14
+ /// Returns either the index of the first value equal to `key`,
15
+ /// or if there is no such value then the index where `key` would be inserted.
16
+ ///
17
+ /// In other words, return `i` such that both:
18
+ /// * key_from_value(values[i]) >= key or i == values.len
19
+ /// * key_value_from(values[i-1]) < key or i == 0
20
+ ///
21
+ /// Doesn't perform the extra key comparison to determine if the match is exact.
13
22
  pub fn binary_search_values_raw(
14
23
  comptime Key: type,
15
24
  comptime Value: type,
@@ -17,24 +26,57 @@ pub fn binary_search_values_raw(
17
26
  comptime compare_keys: fn (Key, Key) callconv(.Inline) math.Order,
18
27
  values: []const Value,
19
28
  key: Key,
29
+ comptime config: Config,
20
30
  ) u32 {
21
- assert(values.len > 0);
31
+ if (values.len == 0) return 0;
32
+
33
+ if (config.verify) {
34
+ // Input must be sorted by key.
35
+ for (values) |_, i| {
36
+ assert(i == 0 or
37
+ compare_keys(key_from_value(&values[i - 1]), key_from_value(&values[i])) != .gt);
38
+ }
39
+ }
22
40
 
23
41
  var offset: usize = 0;
24
42
  var length: usize = values.len;
25
43
  while (length > 1) {
44
+ if (config.verify) {
45
+ assert(offset == 0 or
46
+ compare_keys(key_from_value(&values[offset - 1]), key) != .gt);
47
+ assert(offset + length == values.len or
48
+ compare_keys(key_from_value(&values[offset + length]), key) != .lt);
49
+ }
50
+
26
51
  const half = length / 2;
27
52
  const mid = offset + half;
28
53
 
29
54
  // This trick seems to be what's needed to get llvm to emit branchless code for this,
30
- // a ternay-style if expression was generated as a jump here for whatever reason.
55
+ // a ternary-style if expression was generated as a jump here for whatever reason.
31
56
  const next_offsets = [_]usize{ offset, mid };
32
57
  offset = next_offsets[@boolToInt(compare_keys(key_from_value(&values[mid]), key) == .lt)];
33
58
 
34
59
  length -= half;
35
60
  }
36
61
 
37
- return @intCast(u32, offset + @boolToInt(compare_keys(key_from_value(&values[offset]), key) == .lt));
62
+ if (config.verify) {
63
+ assert(length == 1);
64
+ assert(offset == 0 or
65
+ compare_keys(key_from_value(&values[offset - 1]), key) != .gt);
66
+ assert(offset + length == values.len or
67
+ compare_keys(key_from_value(&values[offset + length]), key) != .lt);
68
+ }
69
+
70
+ offset += @boolToInt(compare_keys(key_from_value(&values[offset]), key) == .lt);
71
+
72
+ if (config.verify) {
73
+ assert(offset == 0 or
74
+ compare_keys(key_from_value(&values[offset - 1]), key) == .lt);
75
+ assert(offset == values.len or
76
+ compare_keys(key_from_value(&values[offset]), key) != .lt);
77
+ }
78
+
79
+ return @intCast(u32, offset);
38
80
  }
39
81
 
40
82
  pub inline fn binary_search_keys_raw(
@@ -42,6 +84,7 @@ pub inline fn binary_search_keys_raw(
42
84
  comptime compare_keys: fn (Key, Key) callconv(.Inline) math.Order,
43
85
  keys: []const Key,
44
86
  key: Key,
87
+ comptime config: Config,
45
88
  ) u32 {
46
89
  return binary_search_values_raw(
47
90
  Key,
@@ -54,6 +97,7 @@ pub inline fn binary_search_keys_raw(
54
97
  compare_keys,
55
98
  keys,
56
99
  key,
100
+ config,
57
101
  );
58
102
  }
59
103
 
@@ -69,8 +113,9 @@ pub inline fn binary_search_values(
69
113
  comptime compare_keys: fn (Key, Key) callconv(.Inline) math.Order,
70
114
  values: []const Value,
71
115
  key: Key,
116
+ comptime config: Config,
72
117
  ) BinarySearchResult {
73
- const index = binary_search_values_raw(Key, Value, key_from_value, compare_keys, values, key);
118
+ const index = binary_search_values_raw(Key, Value, key_from_value, compare_keys, values, key, config);
74
119
  return .{
75
120
  .index = index,
76
121
  .exact = index < values.len and compare_keys(key_from_value(&values[index]), key) == .eq,
@@ -82,8 +127,9 @@ pub inline fn binary_search_keys(
82
127
  comptime compare_keys: fn (Key, Key) callconv(.Inline) math.Order,
83
128
  keys: []const Key,
84
129
  key: Key,
130
+ comptime config: Config,
85
131
  ) BinarySearchResult {
86
- const index = binary_search_keys_raw(Key, compare_keys, keys, key);
132
+ const index = binary_search_keys_raw(Key, compare_keys, keys, key, config);
87
133
  return .{
88
134
  .index = index,
89
135
  .exact = index < keys.len and compare_keys(keys[index], key) == .eq,
@@ -91,6 +137,8 @@ pub inline fn binary_search_keys(
91
137
  }
92
138
 
93
139
  const test_binary_search = struct {
140
+ const fuzz = @import("../test/fuzz.zig");
141
+
94
142
  const log = false;
95
143
 
96
144
  const gpa = std.testing.allocator;
@@ -99,6 +147,10 @@ const test_binary_search = struct {
99
147
  return math.order(a, b);
100
148
  }
101
149
 
150
+ fn less_than_key(_: void, a: u32, b: u32) bool {
151
+ return a < b;
152
+ }
153
+
102
154
  fn exhaustive_search(keys_count: u32) !void {
103
155
  const keys = try gpa.alloc(u32, keys_count);
104
156
  defer gpa.free(keys);
@@ -131,6 +183,7 @@ const test_binary_search = struct {
131
183
  compare_keys,
132
184
  keys,
133
185
  target_key,
186
+ .{ .verify = true },
134
187
  );
135
188
 
136
189
  if (log) std.debug.print("expected: {}, actual: {}\n", .{ expect, actual });
@@ -159,11 +212,50 @@ const test_binary_search = struct {
159
212
  compare_keys,
160
213
  keys,
161
214
  target_key,
215
+ .{ .verify = true },
162
216
  );
163
217
  try std.testing.expectEqual(expect.index, actual.index);
164
218
  try std.testing.expectEqual(expect.exact, actual.exact);
165
219
  }
166
220
  }
221
+
222
+ fn random_search(random: std.rand.Random, iter: usize) !void {
223
+ const keys_count = @minimum(
224
+ @as(usize, 1E6),
225
+ fuzz.random_int_exponential(random, usize, iter),
226
+ );
227
+
228
+ const keys = try std.testing.allocator.alloc(u32, keys_count);
229
+ defer std.testing.allocator.free(keys);
230
+
231
+ for (keys) |*key| key.* = fuzz.random_int_exponential(random, u32, 100);
232
+ std.sort.sort(u32, keys, {}, less_than_key);
233
+ const target_key = fuzz.random_int_exponential(random, u32, 100);
234
+
235
+ var expect: BinarySearchResult = .{ .index = 0, .exact = false };
236
+ for (keys) |key, i| {
237
+ switch (compare_keys(key, target_key)) {
238
+ .lt => expect.index = @intCast(u32, i) + 1,
239
+ .eq => {
240
+ expect.exact = true;
241
+ break;
242
+ },
243
+ .gt => break,
244
+ }
245
+ }
246
+
247
+ const actual = binary_search_keys(
248
+ u32,
249
+ compare_keys,
250
+ keys,
251
+ target_key,
252
+ .{ .verify = true },
253
+ );
254
+
255
+ if (log) std.debug.print("expected: {}, actual: {}\n", .{ expect, actual });
256
+ try std.testing.expectEqual(expect.index, actual.index);
257
+ try std.testing.expectEqual(expect.exact, actual.exact);
258
+ }
167
259
  };
168
260
 
169
261
  // TODO test search on empty slice
@@ -178,11 +270,38 @@ test "binary search: exhaustive" {
178
270
  test "binary search: explicit" {
179
271
  if (test_binary_search.log) std.debug.print("\n", .{});
180
272
  try test_binary_search.explicit_search(
181
- &[_]u32{ 0, 3, 5, 8, 9, 11 },
182
- &[_]u32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 },
273
+ &[_]u32{},
274
+ &[_]u32{0},
275
+ &[_]BinarySearchResult{
276
+ .{ .index = 0, .exact = false },
277
+ },
278
+ );
279
+ try test_binary_search.explicit_search(
280
+ &[_]u32{1},
281
+ &[_]u32{ 0, 1, 2 },
183
282
  &[_]BinarySearchResult{
283
+ .{ .index = 0, .exact = false },
184
284
  .{ .index = 0, .exact = true },
185
285
  .{ .index = 1, .exact = false },
286
+ },
287
+ );
288
+ try test_binary_search.explicit_search(
289
+ &[_]u32{ 1, 3 },
290
+ &[_]u32{ 0, 1, 2, 3, 4 },
291
+ &[_]BinarySearchResult{
292
+ .{ .index = 0, .exact = false },
293
+ .{ .index = 0, .exact = true },
294
+ .{ .index = 1, .exact = false },
295
+ .{ .index = 1, .exact = true },
296
+ .{ .index = 2, .exact = false },
297
+ },
298
+ );
299
+ try test_binary_search.explicit_search(
300
+ &[_]u32{ 1, 3, 5, 8, 9, 11 },
301
+ &[_]u32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 },
302
+ &[_]BinarySearchResult{
303
+ .{ .index = 0, .exact = false },
304
+ .{ .index = 0, .exact = true },
186
305
  .{ .index = 1, .exact = false },
187
306
  .{ .index = 1, .exact = true },
188
307
  .{ .index = 2, .exact = false },
@@ -212,3 +331,11 @@ test "binary search: duplicates" {
212
331
  },
213
332
  );
214
333
  }
334
+
335
+ test "binary search: random" {
336
+ var rng = std.rand.DefaultPrng.init(42);
337
+ var i: usize = 0;
338
+ while (i < 2048) : (i += 1) {
339
+ try test_binary_search.random_search(rng.random(), i);
340
+ }
341
+ }
@@ -80,3 +80,46 @@ inline fn block_index(hash: u32, size: usize) u32 {
80
80
  test {
81
81
  _ = std.testing.refAllDecls(@This());
82
82
  }
83
+
84
+ const test_bloom_filter = struct {
85
+ const fuzz = @import("../test/fuzz.zig");
86
+ const block_size = @import("../config.zig").block_size;
87
+
88
+ fn random_keys(random: std.rand.Random, iter: usize) !void {
89
+ const keys_count = @minimum(
90
+ @as(usize, 1E6),
91
+ fuzz.random_int_exponential(random, usize, iter),
92
+ );
93
+
94
+ const keys = try std.testing.allocator.alloc(u32, keys_count);
95
+ defer std.testing.allocator.free(keys);
96
+
97
+ for (keys) |*key| key.* = random.int(u32);
98
+
99
+ // `block_size` is currently the only size bloom_filter that we use.
100
+ const filter = try std.testing.allocator.alloc(u8, block_size);
101
+ std.mem.set(u8, filter, 0);
102
+ defer std.testing.allocator.free(filter);
103
+
104
+ for (keys) |key| {
105
+ add(Fingerprint.create(std.mem.asBytes(&key)), filter);
106
+ }
107
+ for (keys) |key| {
108
+ try std.testing.expect(may_contain(Fingerprint.create(std.mem.asBytes(&key)), filter));
109
+ }
110
+
111
+ // TODO Test the false positive rate:
112
+ // * Calculate the expected false positive rate
113
+ // * Test with a large number of random keys.
114
+ // * Use Chernoff bound or similar to determine a reasonable test cutoff.
115
+ }
116
+ };
117
+
118
+ test "bloom filter: random" {
119
+ var rng = std.rand.DefaultPrng.init(42);
120
+ const iterations_max: usize = (1 << 12);
121
+ var iterations: usize = 0;
122
+ while (iterations < iterations_max) : (iterations += 1) {
123
+ try test_bloom_filter.random_keys(rng.random(), iterations);
124
+ }
125
+ }