tigerbeetle 0.0.36 → 0.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/tb_client/extconf.rb +13 -13
- data/ext/tb_client/tigerbeetle/LICENSE +177 -0
- data/ext/tb_client/tigerbeetle/build.zig +2327 -0
- data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
- data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
- data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
- data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1092 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
- data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
- data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
- data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
- data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
- data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
- data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
- data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
- data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
- data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
- data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
- data/ext/tb_client/tigerbeetle/src/direction.zig +120 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
- data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
- data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
- data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
- data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
- data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
- data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
- data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
- data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
- data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
- data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
- data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
- data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
- data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
- data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
- data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
- data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
- data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
- data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
- data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
- data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +359 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +962 -0
- data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
- data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
- data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
- data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
- data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +90 -0
- data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
- data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
- data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +534 -0
- data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
- data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
- data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
- data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
- data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
- data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
- data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
- data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
- data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
- data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
- data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
- data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
- data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
- data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
- data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
- data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
- data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
- data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
- data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
- data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
- data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
- data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
- data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
- data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
- data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
- data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
- data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
- data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
- data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
- data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
- data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
- data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
- data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
- data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
- data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
- data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
- data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
- data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
- data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
- data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
- data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
- data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
- data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
- data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
- data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
- data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
- data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
- data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
- data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
- data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
- data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
- data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
- data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
- data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
- data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
- data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
- data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
- data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
- data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
- data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
- data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
- data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
- data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
- data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
- data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
- data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
- data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
- data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
- data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
- data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
- data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
- data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
- data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
- data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
- data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
- data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2928 -0
- data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
- data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
- data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
- data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
- data/lib/tb_client/shared_lib.rb +12 -5
- data/lib/tigerbeetle/platforms.rb +9 -0
- data/lib/tigerbeetle/version.rb +2 -2
- data/tigerbeetle.gemspec +22 -5
- metadata +242 -3
- data/ext/tb_client/pkg.tar.gz +0 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
//! Zipfian-distributed random number generation.
|
|
2
|
+
//!
|
|
3
|
+
//! In the Zipfian distribution a small percentage of candidate
|
|
4
|
+
//! items have a high probability of being selected, while most items
|
|
5
|
+
//! have a very low probability of being selected.
|
|
6
|
+
//! It is commonly understood to model the "80-20" Pareto principle,
|
|
7
|
+
//! and to be a discreet version of the Pareto distribution,
|
|
8
|
+
//! and terminology related to both are often used interchangeably.
|
|
9
|
+
//!
|
|
10
|
+
//! Zipfian numbers follow an inverse power law, where the 1st item
|
|
11
|
+
//! is selected with high probability, and subsequent items
|
|
12
|
+
//! quickly fall off in probability. The rate of the fall off
|
|
13
|
+
//! is tunable by the _skew_, also called `s`, or `theta`,
|
|
14
|
+
//! depending on the source.
|
|
15
|
+
//!
|
|
16
|
+
//! Reference:
|
|
17
|
+
//!
|
|
18
|
+
//! - https://en.wikipedia.org/wiki/Zipf's_law#Formal_definition
|
|
19
|
+
//!
|
|
20
|
+
//! Note that it is not actually possible to select a value for
|
|
21
|
+
//! theta that literally follows the "80-20" rule for arbitrary set sizes;
|
|
22
|
+
//! the proportion of items that cumulatively make up 80% probability will
|
|
23
|
+
//! change as the set grows.
|
|
24
|
+
//! A zipfian generator that can adaptively follow the 80-20 rule is left for future work.
|
|
25
|
+
//!
|
|
26
|
+
//! In practice these probabilities often need to be spread across e.g. a
|
|
27
|
+
//! table's keyspace, which involves some kind of mapping step from index to index.
|
|
28
|
+
//! Because that mapping is non-trivial to optimize, it is also provided here.
|
|
29
|
+
//!
|
|
30
|
+
//! The algorithm here is based on
|
|
31
|
+
//! "Quickly Generating Billion-Record Synthetic Databases", Jim Gray et al, SIGMOD 1994.
|
|
32
|
+
//! Per the paper it is adapted from Knuth vol 3.
|
|
33
|
+
//! This is also the algorithm used by YCSB's ZipfianGenerator.java.
|
|
34
|
+
//! Note that the code listing in the paper contains obvious errors,
|
|
35
|
+
//! corrected here and in YCSB.
|
|
36
|
+
//!
|
|
37
|
+
//! There are two generators here,
|
|
38
|
+
//! both of which generate random keys from 0 to a specified maximum.
|
|
39
|
+
//! In the basic `ZipfianGenerator`, key 0 has the highest probability,
|
|
40
|
+
//! 1 the next highest, etc.
|
|
41
|
+
//! The `ZipfianShuffled` generator instead spreads the distribution out
|
|
42
|
+
//! across the key space as if it were a shuffled deck.
|
|
43
|
+
//!
|
|
44
|
+
//! The `ZipfianGenerator` allows the key space to grow,
|
|
45
|
+
//! but the `ZipfianShuffled` does not - maintaining the illusion of a shuffled
|
|
46
|
+
//! deck while growing the keyspace involves tradeoffs in the quality
|
|
47
|
+
//! of the distribution. A previous revision of `ZipfianShuffled` _was_ growable,
|
|
48
|
+
//! at the cost of not preserving a true Zipfian distribution for the long tail
|
|
49
|
+
//! of unlikely items. Dig that out of commit history if it's ever needed.
|
|
50
|
+
//!
|
|
51
|
+
//! Both should pass a 2-sample Kolmogorov–Smirnov test.
|
|
52
|
+
|
|
53
|
+
const std = @import("std");
|
|
54
|
+
const stdx = @import("stdx.zig");
|
|
55
|
+
const assert = std.debug.assert;
|
|
56
|
+
const Random = std.Random;
|
|
57
|
+
const math = std.math;
|
|
58
|
+
const Snap = stdx.Snap;
|
|
59
|
+
const module_path = "src/stdx";
|
|
60
|
+
const snap = Snap.snap_fn(module_path);
|
|
61
|
+
|
|
62
|
+
/// The default "skew" of the distribution.
|
|
63
|
+
const theta_default = 0.99; // per YCSB
|
|
64
|
+
|
|
65
|
+
/// Generates Zipfian-distributed numbers from 0 to a specified maximum.
|
|
66
|
+
///
|
|
67
|
+
/// Many internal variables here are the same is in the paper, which I think
|
|
68
|
+
/// should reduce confusion if this subject needs to be revisited; the external
|
|
69
|
+
/// intended to be more understandable to the user and follow TigerStyle.
|
|
70
|
+
pub const ZipfianGenerator = struct {
|
|
71
|
+
theta: f64,
|
|
72
|
+
|
|
73
|
+
/// The number of items in the set.
|
|
74
|
+
n: u64,
|
|
75
|
+
/// The Riemann zeta function calculated up to `n`,
|
|
76
|
+
/// aka the "generalized harmonic number" of order `theta` for `n`.
|
|
77
|
+
/// This is a pre-calculated factor in the probability of any particular item
|
|
78
|
+
/// being selected.
|
|
79
|
+
/// It is expensive to calculate for large but useful values of `n`,
|
|
80
|
+
/// but can be calculated incrementally as `n` grows.
|
|
81
|
+
zetan: f64,
|
|
82
|
+
|
|
83
|
+
/// Create a generator from `[0, items)` with `theta` equal to 0.99.
|
|
84
|
+
pub fn init(items: u64) ZipfianGenerator {
|
|
85
|
+
return ZipfianGenerator.init_theta(items, theta_default);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/// Create a generator from `[0, items)` with given `theta`.
|
|
89
|
+
///
|
|
90
|
+
/// `theta` is the "skew" and is usually specified to be greater than 0 and less than 1,
|
|
91
|
+
/// with YCSB using 0.99, though values greater than 1 also seem to generate reasonable
|
|
92
|
+
/// distributions. `theta = 1` isn't allowed since it does not behave reasonably.
|
|
93
|
+
pub fn init_theta(items: u64, theta: f64) ZipfianGenerator {
|
|
94
|
+
assert(theta > 0.0);
|
|
95
|
+
assert(theta != 1.0);
|
|
96
|
+
return ZipfianGenerator{
|
|
97
|
+
.theta = theta,
|
|
98
|
+
.n = items,
|
|
99
|
+
.zetan = zeta(items, theta),
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/// Note that the variables in this function are mostly named
|
|
104
|
+
/// as in the reference paper and do not follow TigerStyle.
|
|
105
|
+
pub fn next(self: *const ZipfianGenerator, prng: *stdx.PRNG) u64 {
|
|
106
|
+
assert(self.n > 0);
|
|
107
|
+
|
|
108
|
+
// Math voodoo, copied from the paper,
|
|
109
|
+
// which doesn't explain it, but claims it is from Knuth volume 3.
|
|
110
|
+
|
|
111
|
+
// NB: These depend only on zetan and could be cached for a minor speedup.
|
|
112
|
+
const alpha = 1.0 / (1.0 - self.theta);
|
|
113
|
+
const eta = (1.0 - math.pow(
|
|
114
|
+
f64,
|
|
115
|
+
2.0 / @as(f64, @floatFromInt(self.n)),
|
|
116
|
+
1.0 - self.theta,
|
|
117
|
+
)) /
|
|
118
|
+
(1.0 - zeta(2.0, self.theta) / self.zetan);
|
|
119
|
+
|
|
120
|
+
const u = random_f64(prng);
|
|
121
|
+
const uz = u * self.zetan;
|
|
122
|
+
|
|
123
|
+
if (uz < 1.0) {
|
|
124
|
+
return 0;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (uz < 1.0 + math.pow(f64, 0.5, self.theta)) {
|
|
128
|
+
return 1;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return @as(u64, @intFromFloat(
|
|
132
|
+
@as(f64, @floatFromInt(self.n)) *
|
|
133
|
+
math.pow(f64, (eta * u) - eta + 1.0, alpha),
|
|
134
|
+
));
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/// Grow the size of the random set.
|
|
138
|
+
pub fn grow(self: *ZipfianGenerator, new_items: u64) void {
|
|
139
|
+
const items = self.n + new_items;
|
|
140
|
+
const zetan_new = zeta_incremental(self.n, new_items, self.zetan, self.theta);
|
|
141
|
+
self.* = .{
|
|
142
|
+
.theta = self.theta,
|
|
143
|
+
.n = items,
|
|
144
|
+
.zetan = zetan_new,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
/// The Riemann zeta function up to `n`,
|
|
150
|
+
/// aka the "generalized harmonic number" of order 'theta' for `n`.
|
|
151
|
+
fn zeta(n: u64, theta: f64) f64 {
|
|
152
|
+
var i: u64 = 1;
|
|
153
|
+
var zeta_sum: f64 = 0.0;
|
|
154
|
+
while (i <= n) : (i += 1) {
|
|
155
|
+
zeta_sum += math.pow(f64, 1.0 / @as(f64, @floatFromInt(i)), theta);
|
|
156
|
+
}
|
|
157
|
+
return zeta_sum;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/// Incremental calculation of zeta.
|
|
161
|
+
fn zeta_incremental(
|
|
162
|
+
n_previous: u64,
|
|
163
|
+
n_additional: u64,
|
|
164
|
+
zetan_previous: f64,
|
|
165
|
+
theta: f64,
|
|
166
|
+
) f64 {
|
|
167
|
+
const n_new = n_previous + n_additional;
|
|
168
|
+
var i = n_previous + 1;
|
|
169
|
+
var zeta_sum = zetan_previous;
|
|
170
|
+
while (i <= n_new) : (i += 1) {
|
|
171
|
+
zeta_sum += math.pow(f64, 1.0 / @as(f64, @floatFromInt(i)), theta);
|
|
172
|
+
}
|
|
173
|
+
return zeta_sum;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/// Generates Zipfian-distributed numbers from 0 to maximum,
|
|
177
|
+
/// but the probabilities of each number are "shuffled",
|
|
178
|
+
/// not clustered around 0.
|
|
179
|
+
///
|
|
180
|
+
/// This is used to simulate typical data access patterns in
|
|
181
|
+
/// some keyspace, where a few keys are hot and most are cold.
|
|
182
|
+
///
|
|
183
|
+
/// This behaves as if it maintains a shuffled mapping
|
|
184
|
+
/// from every index to a different index. Internally, it is implemented
|
|
185
|
+
/// with a bijective "hash" function (modular‑multiplication permutation)
|
|
186
|
+
/// f(i) = (a * i) mod N
|
|
187
|
+
/// with gcd(a, N) = 1, so every original (Zipfian) index i
|
|
188
|
+
/// maps to a unique “shuffled” index without collisions.
|
|
189
|
+
/// Refer to PR #3070 for further details: https://github.com/tigerbeetle/tigerbeetle/pull/3070
|
|
190
|
+
pub const ZipfianShuffled = struct {
|
|
191
|
+
gen: ZipfianGenerator,
|
|
192
|
+
a: u64,
|
|
193
|
+
|
|
194
|
+
pub fn init(items: u64, prng: *stdx.PRNG) ZipfianShuffled {
|
|
195
|
+
return ZipfianShuffled.init_theta(items, theta_default, prng);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
pub fn init_theta(items: u64, theta: f64, prng: *stdx.PRNG) ZipfianShuffled {
|
|
199
|
+
var zipf = ZipfianShuffled{
|
|
200
|
+
.gen = ZipfianGenerator.init_theta(0, theta),
|
|
201
|
+
.a = 0, // Correct a is determined in grow.
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
zipf.choose_shuffle_function(items, prng);
|
|
205
|
+
|
|
206
|
+
return zipf;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
fn transform(self: *const ZipfianShuffled, zipf_standard: u64) u64 {
|
|
210
|
+
return (zipf_standard * self.a) % self.gen.n;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
pub fn next(self: *const ZipfianShuffled, prng: *stdx.PRNG) u64 {
|
|
214
|
+
const zipf_standard = self.gen.next(prng);
|
|
215
|
+
const zipf_shuffled = self.transform(zipf_standard);
|
|
216
|
+
return zipf_shuffled;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
fn choose_shuffle_function(self: *ZipfianShuffled, new_items: u64, prng: *stdx.PRNG) void {
|
|
220
|
+
if (new_items == 0) {
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const old_n = self.gen.n;
|
|
225
|
+
const new_n = old_n + new_items;
|
|
226
|
+
|
|
227
|
+
self.gen.grow(new_items);
|
|
228
|
+
|
|
229
|
+
assert(self.gen.n == new_n);
|
|
230
|
+
|
|
231
|
+
// We try to find an `a` so that it satisifies gcd(a,N) == 1.
|
|
232
|
+
// This allows us to generate a permutation with (a*zipf_standard) mod N.
|
|
233
|
+
// This permutation maps one index to another without holes, i.e. is bijective.
|
|
234
|
+
self.a = random_coprime(prng, self.gen.n);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
fn random_coprime(prng: *stdx.PRNG, n: u64) u64 {
|
|
238
|
+
// The bound is arbitrary but should be large enough to find a number that satisifies
|
|
239
|
+
// the requirement (see https://en.wikipedia.org/wiki/Euler%27s_totient_function).
|
|
240
|
+
for (0..100_000) |_| {
|
|
241
|
+
const a = prng.range_inclusive(u64, 1, n);
|
|
242
|
+
if (std.math.gcd(a, n) == 1) {
|
|
243
|
+
return a;
|
|
244
|
+
}
|
|
245
|
+
} else {
|
|
246
|
+
@panic("Did not find a random coprime (probabilistic)");
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
/// stdx.PRNG intentionally doesn't support generating floats, to ensure determinism. For
|
|
252
|
+
/// benchmarking purposes, using floats is OK though, so we fall back to std implementation here.
|
|
253
|
+
fn random_f64(prng: *stdx.PRNG) f64 {
|
|
254
|
+
return std.Random.init(prng, stdx.PRNG.fill).float(f64);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
test "zeta_incremental" {
|
|
258
|
+
const Case = struct {
|
|
259
|
+
n_start: u64,
|
|
260
|
+
n_incremental: u64,
|
|
261
|
+
theta: f64,
|
|
262
|
+
};
|
|
263
|
+
const cases = [_]Case{
|
|
264
|
+
.{
|
|
265
|
+
.n_start = 0,
|
|
266
|
+
.n_incremental = 10,
|
|
267
|
+
.theta = 0.99,
|
|
268
|
+
},
|
|
269
|
+
.{
|
|
270
|
+
.n_start = 0,
|
|
271
|
+
.n_incremental = 10,
|
|
272
|
+
.theta = 1.01,
|
|
273
|
+
},
|
|
274
|
+
.{
|
|
275
|
+
.n_start = 100,
|
|
276
|
+
.n_incremental = 100,
|
|
277
|
+
.theta = 0.99,
|
|
278
|
+
},
|
|
279
|
+
};
|
|
280
|
+
|
|
281
|
+
for (cases) |case| {
|
|
282
|
+
const n = case.n_start + case.n_incremental;
|
|
283
|
+
const zeta_expected = zeta(n, case.theta);
|
|
284
|
+
const zeta_actual_start = zeta(case.n_start, case.theta);
|
|
285
|
+
const zeta_actual = zeta_incremental(
|
|
286
|
+
case.n_start,
|
|
287
|
+
case.n_incremental,
|
|
288
|
+
zeta_actual_start,
|
|
289
|
+
case.theta,
|
|
290
|
+
);
|
|
291
|
+
assert(zeta_expected == zeta_actual);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Testing that the grow function correctly calculates zeta incrementally.
|
|
296
|
+
test "zipfian-grow" {
|
|
297
|
+
// Need to try multiple times to ensure they don't both coincidentally
|
|
298
|
+
// pick the likely 0 value.
|
|
299
|
+
var i: u64 = 10;
|
|
300
|
+
while (i < 100) : (i += 1) {
|
|
301
|
+
const expected = brk: {
|
|
302
|
+
var prng = stdx.PRNG.from_seed(0);
|
|
303
|
+
var zipf = ZipfianGenerator.init_theta(i, 0.9);
|
|
304
|
+
break :brk zipf.next(&prng);
|
|
305
|
+
};
|
|
306
|
+
const actual = brk: {
|
|
307
|
+
var prng = stdx.PRNG.from_seed(0);
|
|
308
|
+
var zipf = ZipfianGenerator.init_theta(1, 0.9);
|
|
309
|
+
zipf.grow(i - 1);
|
|
310
|
+
break :brk zipf.next(&prng);
|
|
311
|
+
};
|
|
312
|
+
assert(expected == actual);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Test that ctors are all doing the same thing.
|
|
317
|
+
test "zipfian-ctors" {
|
|
318
|
+
var prng = stdx.PRNG.from_seed(0);
|
|
319
|
+
|
|
320
|
+
for ([_]u64{ 0, 1, 10, 999 }) |i| {
|
|
321
|
+
{
|
|
322
|
+
const zipf1 = ZipfianGenerator.init(i);
|
|
323
|
+
const zipf2 = ZipfianGenerator.init_theta(i, theta_default);
|
|
324
|
+
const szipf1 = ZipfianShuffled.init(i, &prng);
|
|
325
|
+
const szipf2 = ZipfianShuffled.init_theta(i, theta_default, &prng);
|
|
326
|
+
|
|
327
|
+
assert(zipf1.n == zipf2.n);
|
|
328
|
+
assert(zipf1.n == szipf1.gen.n);
|
|
329
|
+
assert(zipf1.n == szipf2.gen.n);
|
|
330
|
+
|
|
331
|
+
assert(zipf1.zetan == zipf2.zetan);
|
|
332
|
+
assert(zipf1.zetan == szipf1.gen.zetan);
|
|
333
|
+
assert(zipf1.zetan == szipf2.gen.zetan);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
{
|
|
337
|
+
const zipf1 = ZipfianGenerator.init_theta(i, 0.89);
|
|
338
|
+
const szipf1 = ZipfianShuffled.init_theta(i, 0.89, &prng);
|
|
339
|
+
|
|
340
|
+
assert(zipf1.n == szipf1.gen.n);
|
|
341
|
+
assert(zipf1.zetan == szipf1.gen.zetan);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
test "zipfian-distribution" {
|
|
347
|
+
const max_number = 10;
|
|
348
|
+
|
|
349
|
+
var prng = stdx.PRNG.from_seed(42);
|
|
350
|
+
const zipf = ZipfianGenerator.init(max_number);
|
|
351
|
+
|
|
352
|
+
var distribution: [max_number]u32 = @splat(0);
|
|
353
|
+
|
|
354
|
+
for (0..1000) |_| {
|
|
355
|
+
const n = zipf.next(&prng);
|
|
356
|
+
distribution[n] += 1;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
try snap(@src(),
|
|
360
|
+
\\{ 333, 170, 125, 90, 59, 61, 43, 47, 38, 34 }
|
|
361
|
+
).diff_fmt("{d}", .{distribution});
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
test "shuffled-zipfian-distribution" {
|
|
365
|
+
const max_number = 10;
|
|
366
|
+
|
|
367
|
+
var prng = stdx.PRNG.from_seed(42);
|
|
368
|
+
const zipf_shuffled = ZipfianShuffled.init(max_number, &prng);
|
|
369
|
+
|
|
370
|
+
var distribution: [max_number]u32 = @splat(0);
|
|
371
|
+
|
|
372
|
+
for (0..1000) |_| {
|
|
373
|
+
const n = zipf_shuffled.next(&prng);
|
|
374
|
+
distribution[n] += 1;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
try snap(@src(),
|
|
378
|
+
\\{ 333, 34, 38, 47, 43, 61, 60, 89, 125, 170 }
|
|
379
|
+
).diff_fmt("{d}", .{distribution});
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// Non-statistical smoke tests related to the shuffled hot items optimization.
|
|
383
|
+
// These could fail if that optimization is tweaked or if the prng changes.
|
|
384
|
+
// The standard zipf generator is tested, here we test the mapping of the shuffled one.
|
|
385
|
+
test "zipfian-shuffled" {
|
|
386
|
+
const max = 100;
|
|
387
|
+
var prng = stdx.PRNG.from_seed(0);
|
|
388
|
+
const allocator = std.testing.allocator;
|
|
389
|
+
var found = try allocator.alloc(bool, max);
|
|
390
|
+
defer allocator.free(found);
|
|
391
|
+
|
|
392
|
+
for (1..max) |items| {
|
|
393
|
+
@memset(found, false);
|
|
394
|
+
var zipf = ZipfianShuffled.init(items, &prng);
|
|
395
|
+
|
|
396
|
+
for (0..items) |i| {
|
|
397
|
+
const zipf_shuffled = zipf.transform(i);
|
|
398
|
+
try std.testing.expect(!found[zipf_shuffled]);
|
|
399
|
+
found[zipf_shuffled] = true;
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|