tigerbeetle 0.0.36 → 0.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/tb_client/extconf.rb +13 -13
  4. data/ext/tb_client/tigerbeetle/LICENSE +177 -0
  5. data/ext/tb_client/tigerbeetle/build.zig +2327 -0
  6. data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
  7. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
  8. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
  9. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
  10. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
  11. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
  12. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
  13. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
  14. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1092 -0
  15. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
  16. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
  17. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
  18. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
  19. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
  20. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
  21. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
  22. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
  23. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
  24. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
  25. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
  26. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
  27. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
  28. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
  29. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
  30. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
  31. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
  32. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
  33. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
  34. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
  35. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
  36. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
  37. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
  38. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
  39. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
  40. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
  41. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
  42. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
  43. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
  44. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
  45. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
  46. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
  47. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
  48. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
  49. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
  50. data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
  51. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
  52. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
  53. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
  54. data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
  55. data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
  56. data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
  57. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
  58. data/ext/tb_client/tigerbeetle/src/direction.zig +120 -0
  59. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
  60. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
  61. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
  62. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
  63. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
  64. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
  65. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
  66. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
  67. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
  68. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
  69. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
  70. data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
  71. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
  72. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
  73. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
  74. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
  75. data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
  76. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
  77. data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
  78. data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
  79. data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
  80. data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
  81. data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
  82. data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
  83. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
  84. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
  85. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
  86. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
  87. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
  88. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
  89. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
  90. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
  91. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
  92. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
  93. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
  94. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
  95. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
  96. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
  97. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
  98. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
  99. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
  100. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
  101. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
  102. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
  103. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
  104. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
  105. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +359 -0
  106. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
  107. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
  108. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +962 -0
  109. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
  110. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
  111. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
  112. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
  113. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
  114. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
  115. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
  116. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
  117. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +90 -0
  118. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
  119. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
  120. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
  121. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +534 -0
  122. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
  123. data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
  124. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
  125. data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
  126. data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
  127. data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
  128. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
  129. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
  130. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
  131. data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
  132. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
  133. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
  134. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
  135. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
  136. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
  137. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
  138. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
  139. data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
  140. data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
  141. data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
  142. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
  143. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
  144. data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
  145. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
  146. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
  147. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
  148. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
  149. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
  150. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
  151. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
  152. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
  153. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
  154. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
  155. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
  156. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
  157. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
  158. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
  159. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
  160. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
  161. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
  162. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
  163. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
  164. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
  165. data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
  166. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
  167. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
  168. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
  169. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
  170. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
  171. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
  172. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
  173. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
  174. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
  175. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
  176. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
  177. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
  178. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
  179. data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
  180. data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
  181. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
  182. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
  183. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
  184. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
  185. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
  186. data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
  187. data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
  188. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
  189. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
  190. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
  191. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
  192. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
  193. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
  194. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
  195. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
  196. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
  197. data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
  198. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
  199. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
  200. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
  201. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
  202. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
  203. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
  204. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
  205. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
  206. data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
  207. data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
  208. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
  209. data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
  210. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
  211. data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
  212. data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
  213. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
  214. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
  215. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
  216. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
  217. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
  218. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
  219. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
  220. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
  221. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
  222. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
  223. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
  224. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
  225. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
  226. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
  227. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
  228. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
  229. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
  230. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
  231. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
  232. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
  233. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
  234. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
  235. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2928 -0
  236. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
  237. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
  238. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
  239. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
  240. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
  241. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
  242. data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
  243. data/lib/tb_client/shared_lib.rb +12 -5
  244. data/lib/tigerbeetle/platforms.rb +9 -0
  245. data/lib/tigerbeetle/version.rb +2 -2
  246. data/tigerbeetle.gemspec +22 -5
  247. metadata +242 -3
  248. data/ext/tb_client/pkg.tar.gz +0 -0
@@ -0,0 +1,402 @@
1
+ //! Zipfian-distributed random number generation.
2
+ //!
3
+ //! In the Zipfian distribution a small percentage of candidate
4
+ //! items have a high probability of being selected, while most items
5
+ //! have a very low probability of being selected.
6
+ //! It is commonly understood to model the "80-20" Pareto principle,
7
+ //! and to be a discreet version of the Pareto distribution,
8
+ //! and terminology related to both are often used interchangeably.
9
+ //!
10
+ //! Zipfian numbers follow an inverse power law, where the 1st item
11
+ //! is selected with high probability, and subsequent items
12
+ //! quickly fall off in probability. The rate of the fall off
13
+ //! is tunable by the _skew_, also called `s`, or `theta`,
14
+ //! depending on the source.
15
+ //!
16
+ //! Reference:
17
+ //!
18
+ //! - https://en.wikipedia.org/wiki/Zipf's_law#Formal_definition
19
+ //!
20
+ //! Note that it is not actually possible to select a value for
21
+ //! theta that literally follows the "80-20" rule for arbitrary set sizes;
22
+ //! the proportion of items that cumulatively make up 80% probability will
23
+ //! change as the set grows.
24
+ //! A zipfian generator that can adaptively follow the 80-20 rule is left for future work.
25
+ //!
26
+ //! In practice these probabilities often need to be spread across e.g. a
27
+ //! table's keyspace, which involves some kind of mapping step from index to index.
28
+ //! Because that mapping is non-trivial to optimize, it is also provided here.
29
+ //!
30
+ //! The algorithm here is based on
31
+ //! "Quickly Generating Billion-Record Synthetic Databases", Jim Gray et al, SIGMOD 1994.
32
+ //! Per the paper it is adapted from Knuth vol 3.
33
+ //! This is also the algorithm used by YCSB's ZipfianGenerator.java.
34
+ //! Note that the code listing in the paper contains obvious errors,
35
+ //! corrected here and in YCSB.
36
+ //!
37
+ //! There are two generators here,
38
+ //! both of which generate random keys from 0 to a specified maximum.
39
+ //! In the basic `ZipfianGenerator`, key 0 has the highest probability,
40
+ //! 1 the next highest, etc.
41
+ //! The `ZipfianShuffled` generator instead spreads the distribution out
42
+ //! across the key space as if it were a shuffled deck.
43
+ //!
44
+ //! The `ZipfianGenerator` allows the key space to grow,
45
+ //! but the `ZipfianShuffled` does not - maintaining the illusion of a shuffled
46
+ //! deck while growing the keyspace involves tradeoffs in the quality
47
+ //! of the distribution. A previous revision of `ZipfianShuffled` _was_ growable,
48
+ //! at the cost of not preserving a true Zipfian distribution for the long tail
49
+ //! of unlikely items. Dig that out of commit history if it's ever needed.
50
+ //!
51
+ //! Both should pass a 2-sample Kolmogorov–Smirnov test.
52
+
53
+ const std = @import("std");
54
+ const stdx = @import("stdx.zig");
55
+ const assert = std.debug.assert;
56
+ const Random = std.Random;
57
+ const math = std.math;
58
+ const Snap = stdx.Snap;
59
+ const module_path = "src/stdx";
60
+ const snap = Snap.snap_fn(module_path);
61
+
62
+ /// The default "skew" of the distribution.
63
+ const theta_default = 0.99; // per YCSB
64
+
65
+ /// Generates Zipfian-distributed numbers from 0 to a specified maximum.
66
+ ///
67
+ /// Many internal variables here are the same is in the paper, which I think
68
+ /// should reduce confusion if this subject needs to be revisited; the external
69
+ /// intended to be more understandable to the user and follow TigerStyle.
70
+ pub const ZipfianGenerator = struct {
71
+ theta: f64,
72
+
73
+ /// The number of items in the set.
74
+ n: u64,
75
+ /// The Riemann zeta function calculated up to `n`,
76
+ /// aka the "generalized harmonic number" of order `theta` for `n`.
77
+ /// This is a pre-calculated factor in the probability of any particular item
78
+ /// being selected.
79
+ /// It is expensive to calculate for large but useful values of `n`,
80
+ /// but can be calculated incrementally as `n` grows.
81
+ zetan: f64,
82
+
83
+ /// Create a generator from `[0, items)` with `theta` equal to 0.99.
84
+ pub fn init(items: u64) ZipfianGenerator {
85
+ return ZipfianGenerator.init_theta(items, theta_default);
86
+ }
87
+
88
+ /// Create a generator from `[0, items)` with given `theta`.
89
+ ///
90
+ /// `theta` is the "skew" and is usually specified to be greater than 0 and less than 1,
91
+ /// with YCSB using 0.99, though values greater than 1 also seem to generate reasonable
92
+ /// distributions. `theta = 1` isn't allowed since it does not behave reasonably.
93
+ pub fn init_theta(items: u64, theta: f64) ZipfianGenerator {
94
+ assert(theta > 0.0);
95
+ assert(theta != 1.0);
96
+ return ZipfianGenerator{
97
+ .theta = theta,
98
+ .n = items,
99
+ .zetan = zeta(items, theta),
100
+ };
101
+ }
102
+
103
+ /// Note that the variables in this function are mostly named
104
+ /// as in the reference paper and do not follow TigerStyle.
105
+ pub fn next(self: *const ZipfianGenerator, prng: *stdx.PRNG) u64 {
106
+ assert(self.n > 0);
107
+
108
+ // Math voodoo, copied from the paper,
109
+ // which doesn't explain it, but claims it is from Knuth volume 3.
110
+
111
+ // NB: These depend only on zetan and could be cached for a minor speedup.
112
+ const alpha = 1.0 / (1.0 - self.theta);
113
+ const eta = (1.0 - math.pow(
114
+ f64,
115
+ 2.0 / @as(f64, @floatFromInt(self.n)),
116
+ 1.0 - self.theta,
117
+ )) /
118
+ (1.0 - zeta(2.0, self.theta) / self.zetan);
119
+
120
+ const u = random_f64(prng);
121
+ const uz = u * self.zetan;
122
+
123
+ if (uz < 1.0) {
124
+ return 0;
125
+ }
126
+
127
+ if (uz < 1.0 + math.pow(f64, 0.5, self.theta)) {
128
+ return 1;
129
+ }
130
+
131
+ return @as(u64, @intFromFloat(
132
+ @as(f64, @floatFromInt(self.n)) *
133
+ math.pow(f64, (eta * u) - eta + 1.0, alpha),
134
+ ));
135
+ }
136
+
137
+ /// Grow the size of the random set.
138
+ pub fn grow(self: *ZipfianGenerator, new_items: u64) void {
139
+ const items = self.n + new_items;
140
+ const zetan_new = zeta_incremental(self.n, new_items, self.zetan, self.theta);
141
+ self.* = .{
142
+ .theta = self.theta,
143
+ .n = items,
144
+ .zetan = zetan_new,
145
+ };
146
+ }
147
+ };
148
+
149
+ /// The Riemann zeta function up to `n`,
150
+ /// aka the "generalized harmonic number" of order 'theta' for `n`.
151
+ fn zeta(n: u64, theta: f64) f64 {
152
+ var i: u64 = 1;
153
+ var zeta_sum: f64 = 0.0;
154
+ while (i <= n) : (i += 1) {
155
+ zeta_sum += math.pow(f64, 1.0 / @as(f64, @floatFromInt(i)), theta);
156
+ }
157
+ return zeta_sum;
158
+ }
159
+
160
+ /// Incremental calculation of zeta.
161
+ fn zeta_incremental(
162
+ n_previous: u64,
163
+ n_additional: u64,
164
+ zetan_previous: f64,
165
+ theta: f64,
166
+ ) f64 {
167
+ const n_new = n_previous + n_additional;
168
+ var i = n_previous + 1;
169
+ var zeta_sum = zetan_previous;
170
+ while (i <= n_new) : (i += 1) {
171
+ zeta_sum += math.pow(f64, 1.0 / @as(f64, @floatFromInt(i)), theta);
172
+ }
173
+ return zeta_sum;
174
+ }
175
+
176
+ /// Generates Zipfian-distributed numbers from 0 to maximum,
177
+ /// but the probabilities of each number are "shuffled",
178
+ /// not clustered around 0.
179
+ ///
180
+ /// This is used to simulate typical data access patterns in
181
+ /// some keyspace, where a few keys are hot and most are cold.
182
+ ///
183
+ /// This behaves as if it maintains a shuffled mapping
184
+ /// from every index to a different index. Internally, it is implemented
185
+ /// with a bijective "hash" function (modular‑multiplication permutation)
186
+ /// f(i) = (a * i) mod N
187
+ /// with gcd(a, N) = 1, so every original (Zipfian) index i
188
+ /// maps to a unique “shuffled” index without collisions.
189
+ /// Refer to PR #3070 for further details: https://github.com/tigerbeetle/tigerbeetle/pull/3070
190
+ pub const ZipfianShuffled = struct {
191
+ gen: ZipfianGenerator,
192
+ a: u64,
193
+
194
+ pub fn init(items: u64, prng: *stdx.PRNG) ZipfianShuffled {
195
+ return ZipfianShuffled.init_theta(items, theta_default, prng);
196
+ }
197
+
198
+ pub fn init_theta(items: u64, theta: f64, prng: *stdx.PRNG) ZipfianShuffled {
199
+ var zipf = ZipfianShuffled{
200
+ .gen = ZipfianGenerator.init_theta(0, theta),
201
+ .a = 0, // Correct a is determined in grow.
202
+ };
203
+
204
+ zipf.choose_shuffle_function(items, prng);
205
+
206
+ return zipf;
207
+ }
208
+
209
+ fn transform(self: *const ZipfianShuffled, zipf_standard: u64) u64 {
210
+ return (zipf_standard * self.a) % self.gen.n;
211
+ }
212
+
213
+ pub fn next(self: *const ZipfianShuffled, prng: *stdx.PRNG) u64 {
214
+ const zipf_standard = self.gen.next(prng);
215
+ const zipf_shuffled = self.transform(zipf_standard);
216
+ return zipf_shuffled;
217
+ }
218
+
219
+ fn choose_shuffle_function(self: *ZipfianShuffled, new_items: u64, prng: *stdx.PRNG) void {
220
+ if (new_items == 0) {
221
+ return;
222
+ }
223
+
224
+ const old_n = self.gen.n;
225
+ const new_n = old_n + new_items;
226
+
227
+ self.gen.grow(new_items);
228
+
229
+ assert(self.gen.n == new_n);
230
+
231
+ // We try to find an `a` so that it satisifies gcd(a,N) == 1.
232
+ // This allows us to generate a permutation with (a*zipf_standard) mod N.
233
+ // This permutation maps one index to another without holes, i.e. is bijective.
234
+ self.a = random_coprime(prng, self.gen.n);
235
+ }
236
+
237
+ fn random_coprime(prng: *stdx.PRNG, n: u64) u64 {
238
+ // The bound is arbitrary but should be large enough to find a number that satisifies
239
+ // the requirement (see https://en.wikipedia.org/wiki/Euler%27s_totient_function).
240
+ for (0..100_000) |_| {
241
+ const a = prng.range_inclusive(u64, 1, n);
242
+ if (std.math.gcd(a, n) == 1) {
243
+ return a;
244
+ }
245
+ } else {
246
+ @panic("Did not find a random coprime (probabilistic)");
247
+ }
248
+ }
249
+ };
250
+
251
+ /// stdx.PRNG intentionally doesn't support generating floats, to ensure determinism. For
252
+ /// benchmarking purposes, using floats is OK though, so we fall back to std implementation here.
253
+ fn random_f64(prng: *stdx.PRNG) f64 {
254
+ return std.Random.init(prng, stdx.PRNG.fill).float(f64);
255
+ }
256
+
257
+ test "zeta_incremental" {
258
+ const Case = struct {
259
+ n_start: u64,
260
+ n_incremental: u64,
261
+ theta: f64,
262
+ };
263
+ const cases = [_]Case{
264
+ .{
265
+ .n_start = 0,
266
+ .n_incremental = 10,
267
+ .theta = 0.99,
268
+ },
269
+ .{
270
+ .n_start = 0,
271
+ .n_incremental = 10,
272
+ .theta = 1.01,
273
+ },
274
+ .{
275
+ .n_start = 100,
276
+ .n_incremental = 100,
277
+ .theta = 0.99,
278
+ },
279
+ };
280
+
281
+ for (cases) |case| {
282
+ const n = case.n_start + case.n_incremental;
283
+ const zeta_expected = zeta(n, case.theta);
284
+ const zeta_actual_start = zeta(case.n_start, case.theta);
285
+ const zeta_actual = zeta_incremental(
286
+ case.n_start,
287
+ case.n_incremental,
288
+ zeta_actual_start,
289
+ case.theta,
290
+ );
291
+ assert(zeta_expected == zeta_actual);
292
+ }
293
+ }
294
+
295
+ // Testing that the grow function correctly calculates zeta incrementally.
296
+ test "zipfian-grow" {
297
+ // Need to try multiple times to ensure they don't both coincidentally
298
+ // pick the likely 0 value.
299
+ var i: u64 = 10;
300
+ while (i < 100) : (i += 1) {
301
+ const expected = brk: {
302
+ var prng = stdx.PRNG.from_seed(0);
303
+ var zipf = ZipfianGenerator.init_theta(i, 0.9);
304
+ break :brk zipf.next(&prng);
305
+ };
306
+ const actual = brk: {
307
+ var prng = stdx.PRNG.from_seed(0);
308
+ var zipf = ZipfianGenerator.init_theta(1, 0.9);
309
+ zipf.grow(i - 1);
310
+ break :brk zipf.next(&prng);
311
+ };
312
+ assert(expected == actual);
313
+ }
314
+ }
315
+
316
+ // Test that ctors are all doing the same thing.
317
+ test "zipfian-ctors" {
318
+ var prng = stdx.PRNG.from_seed(0);
319
+
320
+ for ([_]u64{ 0, 1, 10, 999 }) |i| {
321
+ {
322
+ const zipf1 = ZipfianGenerator.init(i);
323
+ const zipf2 = ZipfianGenerator.init_theta(i, theta_default);
324
+ const szipf1 = ZipfianShuffled.init(i, &prng);
325
+ const szipf2 = ZipfianShuffled.init_theta(i, theta_default, &prng);
326
+
327
+ assert(zipf1.n == zipf2.n);
328
+ assert(zipf1.n == szipf1.gen.n);
329
+ assert(zipf1.n == szipf2.gen.n);
330
+
331
+ assert(zipf1.zetan == zipf2.zetan);
332
+ assert(zipf1.zetan == szipf1.gen.zetan);
333
+ assert(zipf1.zetan == szipf2.gen.zetan);
334
+ }
335
+
336
+ {
337
+ const zipf1 = ZipfianGenerator.init_theta(i, 0.89);
338
+ const szipf1 = ZipfianShuffled.init_theta(i, 0.89, &prng);
339
+
340
+ assert(zipf1.n == szipf1.gen.n);
341
+ assert(zipf1.zetan == szipf1.gen.zetan);
342
+ }
343
+ }
344
+ }
345
+
346
+ test "zipfian-distribution" {
347
+ const max_number = 10;
348
+
349
+ var prng = stdx.PRNG.from_seed(42);
350
+ const zipf = ZipfianGenerator.init(max_number);
351
+
352
+ var distribution: [max_number]u32 = @splat(0);
353
+
354
+ for (0..1000) |_| {
355
+ const n = zipf.next(&prng);
356
+ distribution[n] += 1;
357
+ }
358
+
359
+ try snap(@src(),
360
+ \\{ 333, 170, 125, 90, 59, 61, 43, 47, 38, 34 }
361
+ ).diff_fmt("{d}", .{distribution});
362
+ }
363
+
364
+ test "shuffled-zipfian-distribution" {
365
+ const max_number = 10;
366
+
367
+ var prng = stdx.PRNG.from_seed(42);
368
+ const zipf_shuffled = ZipfianShuffled.init(max_number, &prng);
369
+
370
+ var distribution: [max_number]u32 = @splat(0);
371
+
372
+ for (0..1000) |_| {
373
+ const n = zipf_shuffled.next(&prng);
374
+ distribution[n] += 1;
375
+ }
376
+
377
+ try snap(@src(),
378
+ \\{ 333, 34, 38, 47, 43, 61, 60, 89, 125, 170 }
379
+ ).diff_fmt("{d}", .{distribution});
380
+ }
381
+
382
+ // Non-statistical smoke tests related to the shuffled hot items optimization.
383
+ // These could fail if that optimization is tweaked or if the prng changes.
384
+ // The standard zipf generator is tested, here we test the mapping of the shuffled one.
385
+ test "zipfian-shuffled" {
386
+ const max = 100;
387
+ var prng = stdx.PRNG.from_seed(0);
388
+ const allocator = std.testing.allocator;
389
+ var found = try allocator.alloc(bool, max);
390
+ defer allocator.free(found);
391
+
392
+ for (1..max) |items| {
393
+ @memset(found, false);
394
+ var zipf = ZipfianShuffled.init(items, &prng);
395
+
396
+ for (0..items) |i| {
397
+ const zipf_shuffled = zipf.transform(i);
398
+ try std.testing.expect(!found[zipf_shuffled]);
399
+ found[zipf_shuffled] = true;
400
+ }
401
+ }
402
+ }