tigerbeetle 0.0.34 → 0.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/ext/tb_client/extconf.rb +13 -13
  4. data/ext/tb_client/tigerbeetle/LICENSE +177 -0
  5. data/ext/tb_client/tigerbeetle/build.zig +2327 -0
  6. data/ext/tb_client/tigerbeetle/src/aof.zig +1000 -0
  7. data/ext/tb_client/tigerbeetle/src/build_multiversion.zig +808 -0
  8. data/ext/tb_client/tigerbeetle/src/cdc/amqp/protocol.zig +1283 -0
  9. data/ext/tb_client/tigerbeetle/src/cdc/amqp/spec.zig +1704 -0
  10. data/ext/tb_client/tigerbeetle/src/cdc/amqp/types.zig +341 -0
  11. data/ext/tb_client/tigerbeetle/src/cdc/amqp.zig +1450 -0
  12. data/ext/tb_client/tigerbeetle/src/cdc/runner.zig +1659 -0
  13. data/ext/tb_client/tigerbeetle/src/clients/c/samples/main.c +406 -0
  14. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/context.zig +1084 -0
  15. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/echo_client.zig +286 -0
  16. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/packet.zig +158 -0
  17. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal.zig +229 -0
  18. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client/signal_fuzz.zig +110 -0
  19. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.h +386 -0
  20. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client.zig +34 -0
  21. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_exports.zig +281 -0
  22. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header.zig +312 -0
  23. data/ext/tb_client/tigerbeetle/src/clients/c/tb_client_header_test.zig +138 -0
  24. data/ext/tb_client/tigerbeetle/src/clients/c/test.zig +466 -0
  25. data/ext/tb_client/tigerbeetle/src/clients/docs_samples.zig +157 -0
  26. data/ext/tb_client/tigerbeetle/src/clients/docs_types.zig +90 -0
  27. data/ext/tb_client/tigerbeetle/src/clients/dotnet/ci.zig +203 -0
  28. data/ext/tb_client/tigerbeetle/src/clients/dotnet/docs.zig +79 -0
  29. data/ext/tb_client/tigerbeetle/src/clients/dotnet/dotnet_bindings.zig +542 -0
  30. data/ext/tb_client/tigerbeetle/src/clients/go/ci.zig +109 -0
  31. data/ext/tb_client/tigerbeetle/src/clients/go/docs.zig +86 -0
  32. data/ext/tb_client/tigerbeetle/src/clients/go/go_bindings.zig +370 -0
  33. data/ext/tb_client/tigerbeetle/src/clients/go/pkg/native/tb_client.h +386 -0
  34. data/ext/tb_client/tigerbeetle/src/clients/java/ci.zig +167 -0
  35. data/ext/tb_client/tigerbeetle/src/clients/java/docs.zig +126 -0
  36. data/ext/tb_client/tigerbeetle/src/clients/java/java_bindings.zig +996 -0
  37. data/ext/tb_client/tigerbeetle/src/clients/java/src/client.zig +748 -0
  38. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni.zig +3238 -0
  39. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_tests.zig +1718 -0
  40. data/ext/tb_client/tigerbeetle/src/clients/java/src/jni_thread_cleaner.zig +190 -0
  41. data/ext/tb_client/tigerbeetle/src/clients/node/ci.zig +104 -0
  42. data/ext/tb_client/tigerbeetle/src/clients/node/docs.zig +75 -0
  43. data/ext/tb_client/tigerbeetle/src/clients/node/node.zig +522 -0
  44. data/ext/tb_client/tigerbeetle/src/clients/node/node_bindings.zig +267 -0
  45. data/ext/tb_client/tigerbeetle/src/clients/node/src/c.zig +3 -0
  46. data/ext/tb_client/tigerbeetle/src/clients/node/src/translate.zig +379 -0
  47. data/ext/tb_client/tigerbeetle/src/clients/python/ci.zig +131 -0
  48. data/ext/tb_client/tigerbeetle/src/clients/python/docs.zig +63 -0
  49. data/ext/tb_client/tigerbeetle/src/clients/python/python_bindings.zig +588 -0
  50. data/ext/tb_client/tigerbeetle/src/clients/rust/assets/tb_client.h +386 -0
  51. data/ext/tb_client/tigerbeetle/src/clients/rust/ci.zig +73 -0
  52. data/ext/tb_client/tigerbeetle/src/clients/rust/docs.zig +106 -0
  53. data/ext/tb_client/tigerbeetle/src/clients/rust/rust_bindings.zig +305 -0
  54. data/ext/tb_client/tigerbeetle/src/config.zig +296 -0
  55. data/ext/tb_client/tigerbeetle/src/constants.zig +790 -0
  56. data/ext/tb_client/tigerbeetle/src/copyhound.zig +202 -0
  57. data/ext/tb_client/tigerbeetle/src/counting_allocator.zig +72 -0
  58. data/ext/tb_client/tigerbeetle/src/direction.zig +11 -0
  59. data/ext/tb_client/tigerbeetle/src/docs_website/build.zig +158 -0
  60. data/ext/tb_client/tigerbeetle/src/docs_website/src/content.zig +156 -0
  61. data/ext/tb_client/tigerbeetle/src/docs_website/src/docs.zig +252 -0
  62. data/ext/tb_client/tigerbeetle/src/docs_website/src/file_checker.zig +313 -0
  63. data/ext/tb_client/tigerbeetle/src/docs_website/src/html.zig +87 -0
  64. data/ext/tb_client/tigerbeetle/src/docs_website/src/page_writer.zig +63 -0
  65. data/ext/tb_client/tigerbeetle/src/docs_website/src/redirects.zig +47 -0
  66. data/ext/tb_client/tigerbeetle/src/docs_website/src/search_index_writer.zig +28 -0
  67. data/ext/tb_client/tigerbeetle/src/docs_website/src/service_worker_writer.zig +61 -0
  68. data/ext/tb_client/tigerbeetle/src/docs_website/src/single_page_writer.zig +169 -0
  69. data/ext/tb_client/tigerbeetle/src/docs_website/src/website.zig +46 -0
  70. data/ext/tb_client/tigerbeetle/src/ewah.zig +445 -0
  71. data/ext/tb_client/tigerbeetle/src/ewah_benchmark.zig +128 -0
  72. data/ext/tb_client/tigerbeetle/src/ewah_fuzz.zig +171 -0
  73. data/ext/tb_client/tigerbeetle/src/fuzz_tests.zig +179 -0
  74. data/ext/tb_client/tigerbeetle/src/integration_tests.zig +662 -0
  75. data/ext/tb_client/tigerbeetle/src/io/common.zig +155 -0
  76. data/ext/tb_client/tigerbeetle/src/io/darwin.zig +1093 -0
  77. data/ext/tb_client/tigerbeetle/src/io/linux.zig +1880 -0
  78. data/ext/tb_client/tigerbeetle/src/io/test.zig +1005 -0
  79. data/ext/tb_client/tigerbeetle/src/io/windows.zig +1598 -0
  80. data/ext/tb_client/tigerbeetle/src/io.zig +34 -0
  81. data/ext/tb_client/tigerbeetle/src/iops.zig +134 -0
  82. data/ext/tb_client/tigerbeetle/src/list.zig +236 -0
  83. data/ext/tb_client/tigerbeetle/src/lsm/binary_search.zig +848 -0
  84. data/ext/tb_client/tigerbeetle/src/lsm/binary_search_benchmark.zig +179 -0
  85. data/ext/tb_client/tigerbeetle/src/lsm/cache_map.zig +424 -0
  86. data/ext/tb_client/tigerbeetle/src/lsm/cache_map_fuzz.zig +420 -0
  87. data/ext/tb_client/tigerbeetle/src/lsm/compaction.zig +2117 -0
  88. data/ext/tb_client/tigerbeetle/src/lsm/composite_key.zig +182 -0
  89. data/ext/tb_client/tigerbeetle/src/lsm/forest.zig +1119 -0
  90. data/ext/tb_client/tigerbeetle/src/lsm/forest_fuzz.zig +1102 -0
  91. data/ext/tb_client/tigerbeetle/src/lsm/forest_table_iterator.zig +200 -0
  92. data/ext/tb_client/tigerbeetle/src/lsm/groove.zig +1495 -0
  93. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge.zig +739 -0
  94. data/ext/tb_client/tigerbeetle/src/lsm/k_way_merge_benchmark.zig +166 -0
  95. data/ext/tb_client/tigerbeetle/src/lsm/manifest.zig +754 -0
  96. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level.zig +1294 -0
  97. data/ext/tb_client/tigerbeetle/src/lsm/manifest_level_fuzz.zig +510 -0
  98. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log.zig +1263 -0
  99. data/ext/tb_client/tigerbeetle/src/lsm/manifest_log_fuzz.zig +628 -0
  100. data/ext/tb_client/tigerbeetle/src/lsm/node_pool.zig +247 -0
  101. data/ext/tb_client/tigerbeetle/src/lsm/scan_buffer.zig +116 -0
  102. data/ext/tb_client/tigerbeetle/src/lsm/scan_builder.zig +543 -0
  103. data/ext/tb_client/tigerbeetle/src/lsm/scan_fuzz.zig +938 -0
  104. data/ext/tb_client/tigerbeetle/src/lsm/scan_lookup.zig +293 -0
  105. data/ext/tb_client/tigerbeetle/src/lsm/scan_merge.zig +362 -0
  106. data/ext/tb_client/tigerbeetle/src/lsm/scan_range.zig +99 -0
  107. data/ext/tb_client/tigerbeetle/src/lsm/scan_state.zig +17 -0
  108. data/ext/tb_client/tigerbeetle/src/lsm/scan_tree.zig +1036 -0
  109. data/ext/tb_client/tigerbeetle/src/lsm/schema.zig +617 -0
  110. data/ext/tb_client/tigerbeetle/src/lsm/scratch_memory.zig +84 -0
  111. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array.zig +1500 -0
  112. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_benchmark.zig +149 -0
  113. data/ext/tb_client/tigerbeetle/src/lsm/segmented_array_fuzz.zig +7 -0
  114. data/ext/tb_client/tigerbeetle/src/lsm/set_associative_cache.zig +865 -0
  115. data/ext/tb_client/tigerbeetle/src/lsm/table.zig +607 -0
  116. data/ext/tb_client/tigerbeetle/src/lsm/table_memory.zig +843 -0
  117. data/ext/tb_client/tigerbeetle/src/lsm/table_value_iterator.zig +105 -0
  118. data/ext/tb_client/tigerbeetle/src/lsm/timestamp_range.zig +40 -0
  119. data/ext/tb_client/tigerbeetle/src/lsm/tree.zig +630 -0
  120. data/ext/tb_client/tigerbeetle/src/lsm/tree_fuzz.zig +933 -0
  121. data/ext/tb_client/tigerbeetle/src/lsm/zig_zag_merge.zig +557 -0
  122. data/ext/tb_client/tigerbeetle/src/message_buffer.zig +469 -0
  123. data/ext/tb_client/tigerbeetle/src/message_bus.zig +1214 -0
  124. data/ext/tb_client/tigerbeetle/src/message_bus_fuzz.zig +936 -0
  125. data/ext/tb_client/tigerbeetle/src/message_pool.zig +343 -0
  126. data/ext/tb_client/tigerbeetle/src/multiversion.zig +2195 -0
  127. data/ext/tb_client/tigerbeetle/src/queue.zig +390 -0
  128. data/ext/tb_client/tigerbeetle/src/repl/completion.zig +201 -0
  129. data/ext/tb_client/tigerbeetle/src/repl/parser.zig +1356 -0
  130. data/ext/tb_client/tigerbeetle/src/repl/terminal.zig +496 -0
  131. data/ext/tb_client/tigerbeetle/src/repl.zig +1034 -0
  132. data/ext/tb_client/tigerbeetle/src/scripts/amqp.zig +973 -0
  133. data/ext/tb_client/tigerbeetle/src/scripts/cfo.zig +1866 -0
  134. data/ext/tb_client/tigerbeetle/src/scripts/changelog.zig +304 -0
  135. data/ext/tb_client/tigerbeetle/src/scripts/ci.zig +227 -0
  136. data/ext/tb_client/tigerbeetle/src/scripts/client_readmes.zig +658 -0
  137. data/ext/tb_client/tigerbeetle/src/scripts/devhub.zig +466 -0
  138. data/ext/tb_client/tigerbeetle/src/scripts/release.zig +1058 -0
  139. data/ext/tb_client/tigerbeetle/src/scripts.zig +105 -0
  140. data/ext/tb_client/tigerbeetle/src/shell.zig +1195 -0
  141. data/ext/tb_client/tigerbeetle/src/stack.zig +260 -0
  142. data/ext/tb_client/tigerbeetle/src/state_machine/auditor.zig +911 -0
  143. data/ext/tb_client/tigerbeetle/src/state_machine/workload.zig +2079 -0
  144. data/ext/tb_client/tigerbeetle/src/state_machine.zig +4872 -0
  145. data/ext/tb_client/tigerbeetle/src/state_machine_fuzz.zig +288 -0
  146. data/ext/tb_client/tigerbeetle/src/state_machine_tests.zig +3128 -0
  147. data/ext/tb_client/tigerbeetle/src/static_allocator.zig +82 -0
  148. data/ext/tb_client/tigerbeetle/src/stdx/bit_set.zig +157 -0
  149. data/ext/tb_client/tigerbeetle/src/stdx/bounded_array.zig +292 -0
  150. data/ext/tb_client/tigerbeetle/src/stdx/debug.zig +65 -0
  151. data/ext/tb_client/tigerbeetle/src/stdx/flags.zig +1414 -0
  152. data/ext/tb_client/tigerbeetle/src/stdx/mlock.zig +92 -0
  153. data/ext/tb_client/tigerbeetle/src/stdx/prng.zig +677 -0
  154. data/ext/tb_client/tigerbeetle/src/stdx/radix.zig +336 -0
  155. data/ext/tb_client/tigerbeetle/src/stdx/ring_buffer.zig +511 -0
  156. data/ext/tb_client/tigerbeetle/src/stdx/sort_test.zig +112 -0
  157. data/ext/tb_client/tigerbeetle/src/stdx/stdx.zig +1160 -0
  158. data/ext/tb_client/tigerbeetle/src/stdx/testing/low_level_hash_vectors.zig +142 -0
  159. data/ext/tb_client/tigerbeetle/src/stdx/testing/snaptest.zig +361 -0
  160. data/ext/tb_client/tigerbeetle/src/stdx/time_units.zig +275 -0
  161. data/ext/tb_client/tigerbeetle/src/stdx/unshare.zig +295 -0
  162. data/ext/tb_client/tigerbeetle/src/stdx/vendored/aegis.zig +436 -0
  163. data/ext/tb_client/tigerbeetle/src/stdx/windows.zig +48 -0
  164. data/ext/tb_client/tigerbeetle/src/stdx/zipfian.zig +402 -0
  165. data/ext/tb_client/tigerbeetle/src/storage.zig +489 -0
  166. data/ext/tb_client/tigerbeetle/src/storage_fuzz.zig +180 -0
  167. data/ext/tb_client/tigerbeetle/src/testing/bench.zig +146 -0
  168. data/ext/tb_client/tigerbeetle/src/testing/cluster/grid_checker.zig +53 -0
  169. data/ext/tb_client/tigerbeetle/src/testing/cluster/journal_checker.zig +61 -0
  170. data/ext/tb_client/tigerbeetle/src/testing/cluster/manifest_checker.zig +76 -0
  171. data/ext/tb_client/tigerbeetle/src/testing/cluster/message_bus.zig +110 -0
  172. data/ext/tb_client/tigerbeetle/src/testing/cluster/network.zig +412 -0
  173. data/ext/tb_client/tigerbeetle/src/testing/cluster/state_checker.zig +331 -0
  174. data/ext/tb_client/tigerbeetle/src/testing/cluster/storage_checker.zig +458 -0
  175. data/ext/tb_client/tigerbeetle/src/testing/cluster.zig +1198 -0
  176. data/ext/tb_client/tigerbeetle/src/testing/exhaustigen.zig +128 -0
  177. data/ext/tb_client/tigerbeetle/src/testing/fixtures.zig +181 -0
  178. data/ext/tb_client/tigerbeetle/src/testing/fuzz.zig +144 -0
  179. data/ext/tb_client/tigerbeetle/src/testing/id.zig +97 -0
  180. data/ext/tb_client/tigerbeetle/src/testing/io.zig +317 -0
  181. data/ext/tb_client/tigerbeetle/src/testing/marks.zig +126 -0
  182. data/ext/tb_client/tigerbeetle/src/testing/packet_simulator.zig +533 -0
  183. data/ext/tb_client/tigerbeetle/src/testing/reply_sequence.zig +154 -0
  184. data/ext/tb_client/tigerbeetle/src/testing/state_machine.zig +389 -0
  185. data/ext/tb_client/tigerbeetle/src/testing/storage.zig +1247 -0
  186. data/ext/tb_client/tigerbeetle/src/testing/table.zig +249 -0
  187. data/ext/tb_client/tigerbeetle/src/testing/time.zig +98 -0
  188. data/ext/tb_client/tigerbeetle/src/testing/tmp_tigerbeetle.zig +212 -0
  189. data/ext/tb_client/tigerbeetle/src/testing/vortex/constants.zig +26 -0
  190. data/ext/tb_client/tigerbeetle/src/testing/vortex/faulty_network.zig +580 -0
  191. data/ext/tb_client/tigerbeetle/src/testing/vortex/java_driver/ci.zig +39 -0
  192. data/ext/tb_client/tigerbeetle/src/testing/vortex/logged_process.zig +214 -0
  193. data/ext/tb_client/tigerbeetle/src/testing/vortex/rust_driver/ci.zig +34 -0
  194. data/ext/tb_client/tigerbeetle/src/testing/vortex/supervisor.zig +766 -0
  195. data/ext/tb_client/tigerbeetle/src/testing/vortex/workload.zig +543 -0
  196. data/ext/tb_client/tigerbeetle/src/testing/vortex/zig_driver.zig +181 -0
  197. data/ext/tb_client/tigerbeetle/src/tidy.zig +1448 -0
  198. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_driver.zig +227 -0
  199. data/ext/tb_client/tigerbeetle/src/tigerbeetle/benchmark_load.zig +1069 -0
  200. data/ext/tb_client/tigerbeetle/src/tigerbeetle/cli.zig +1422 -0
  201. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect.zig +1658 -0
  202. data/ext/tb_client/tigerbeetle/src/tigerbeetle/inspect_integrity.zig +518 -0
  203. data/ext/tb_client/tigerbeetle/src/tigerbeetle/libtb_client.zig +36 -0
  204. data/ext/tb_client/tigerbeetle/src/tigerbeetle/main.zig +646 -0
  205. data/ext/tb_client/tigerbeetle/src/tigerbeetle.zig +958 -0
  206. data/ext/tb_client/tigerbeetle/src/time.zig +236 -0
  207. data/ext/tb_client/tigerbeetle/src/trace/event.zig +745 -0
  208. data/ext/tb_client/tigerbeetle/src/trace/statsd.zig +462 -0
  209. data/ext/tb_client/tigerbeetle/src/trace.zig +556 -0
  210. data/ext/tb_client/tigerbeetle/src/unit_tests.zig +321 -0
  211. data/ext/tb_client/tigerbeetle/src/vopr.zig +1785 -0
  212. data/ext/tb_client/tigerbeetle/src/vortex.zig +101 -0
  213. data/ext/tb_client/tigerbeetle/src/vsr/checkpoint_trailer.zig +473 -0
  214. data/ext/tb_client/tigerbeetle/src/vsr/checksum.zig +208 -0
  215. data/ext/tb_client/tigerbeetle/src/vsr/checksum_benchmark.zig +43 -0
  216. data/ext/tb_client/tigerbeetle/src/vsr/client.zig +768 -0
  217. data/ext/tb_client/tigerbeetle/src/vsr/client_replies.zig +532 -0
  218. data/ext/tb_client/tigerbeetle/src/vsr/client_sessions.zig +338 -0
  219. data/ext/tb_client/tigerbeetle/src/vsr/clock.zig +1019 -0
  220. data/ext/tb_client/tigerbeetle/src/vsr/fault_detector.zig +279 -0
  221. data/ext/tb_client/tigerbeetle/src/vsr/free_set.zig +1381 -0
  222. data/ext/tb_client/tigerbeetle/src/vsr/free_set_fuzz.zig +315 -0
  223. data/ext/tb_client/tigerbeetle/src/vsr/grid.zig +1460 -0
  224. data/ext/tb_client/tigerbeetle/src/vsr/grid_blocks_missing.zig +757 -0
  225. data/ext/tb_client/tigerbeetle/src/vsr/grid_scrubber.zig +797 -0
  226. data/ext/tb_client/tigerbeetle/src/vsr/journal.zig +2586 -0
  227. data/ext/tb_client/tigerbeetle/src/vsr/marzullo.zig +308 -0
  228. data/ext/tb_client/tigerbeetle/src/vsr/message_header.zig +1777 -0
  229. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch.zig +715 -0
  230. data/ext/tb_client/tigerbeetle/src/vsr/multi_batch_fuzz.zig +185 -0
  231. data/ext/tb_client/tigerbeetle/src/vsr/repair_budget.zig +333 -0
  232. data/ext/tb_client/tigerbeetle/src/vsr/replica.zig +12355 -0
  233. data/ext/tb_client/tigerbeetle/src/vsr/replica_format.zig +416 -0
  234. data/ext/tb_client/tigerbeetle/src/vsr/replica_reformat.zig +165 -0
  235. data/ext/tb_client/tigerbeetle/src/vsr/replica_test.zig +2910 -0
  236. data/ext/tb_client/tigerbeetle/src/vsr/routing.zig +1075 -0
  237. data/ext/tb_client/tigerbeetle/src/vsr/superblock.zig +1603 -0
  238. data/ext/tb_client/tigerbeetle/src/vsr/superblock_fuzz.zig +484 -0
  239. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums.zig +405 -0
  240. data/ext/tb_client/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +355 -0
  241. data/ext/tb_client/tigerbeetle/src/vsr/sync.zig +29 -0
  242. data/ext/tb_client/tigerbeetle/src/vsr.zig +1727 -0
  243. data/lib/tb_client/shared_lib.rb +12 -5
  244. data/lib/tigerbeetle/client.rb +1 -1
  245. data/lib/tigerbeetle/platforms.rb +9 -0
  246. data/lib/tigerbeetle/version.rb +2 -2
  247. data/tigerbeetle.gemspec +22 -5
  248. metadata +242 -3
  249. data/ext/tb_client/pkg.tar.gz +0 -0
@@ -0,0 +1,445 @@
1
+ const std = @import("std");
2
+ const assert = std.debug.assert;
3
+ const math = std.math;
4
+ const mem = std.mem;
5
+ const stdx = @import("stdx");
6
+ const div_ceil = stdx.div_ceil;
7
+ const disjoint_slices = stdx.disjoint_slices;
8
+ const maybe = stdx.maybe;
9
+
10
+ const constants = @import("constants.zig");
11
+
12
+ /// Encode or decode a bitset using Daniel Lemire's EWAH codec.
13
+ /// ("Histogram-Aware Sorting for Enhanced Word-Aligned Compression in Bitmap Indexes")
14
+ ///
15
+ /// EWAH uses only two types of words, where the first type is a 64-bit verbatim ("literal") word.
16
+ /// The second type of word is a marker word:
17
+ /// * The first bit indicates which uniform word will follow.
18
+ /// * The next 31 bits are used to store the number of uniform words.
19
+ /// * The last 32 bits are used to store the number of literal words following the uniform words.
20
+ /// EWAH bitmaps begin with a marker word. A 'marker' looks like (assuming a 64-bit word):
21
+ ///
22
+ /// [uniform_bit:u1][uniform_word_count:u31(LE)][literal_word_count:u32(LE)]
23
+ ///
24
+ /// and is immediately followed by `literal_word_count` 64-bit literals.
25
+ /// When decoding a marker, the uniform words precede the literal words.
26
+ ///
27
+ /// This encoding requires that the architecture is little-endian with 64-bit words.
28
+ pub fn ewah(comptime Word: type) type {
29
+ const word_bits = @bitSizeOf(Word);
30
+
31
+ return struct {
32
+ const marker_uniform_word_count_max = (1 << ((word_bits / 2) - 1)) - 1;
33
+ const marker_literal_word_count_max = (1 << (word_bits / 2)) - 1;
34
+
35
+ pub const MarkerUniformCount = std.meta.Int(.unsigned, word_bits / 2 - 1); // Word=u64 → u31
36
+ pub const MarkerLiteralCount = std.meta.Int(.unsigned, word_bits / 2); // Word=u64 → u32
37
+
38
+ pub const Marker = packed struct(Word) {
39
+ // Whether the uniform word is all 0s or all 1s.
40
+ uniform_bit: u1,
41
+ // 31-bit number of uniform words following the marker.
42
+ uniform_word_count: MarkerUniformCount,
43
+ // 32-bit number of literal words following the uniform words.
44
+ literal_word_count: MarkerLiteralCount,
45
+ };
46
+
47
+ comptime {
48
+ assert(@import("builtin").target.cpu.arch.endian() == std.builtin.Endian.little);
49
+ assert(@typeInfo(Word).int.signedness == .unsigned);
50
+ assert(word_bits % 8 == 0); // A multiple of a byte, so that words can be cast to bytes.
51
+ assert(@bitSizeOf(Marker) == word_bits);
52
+ assert(@sizeOf(Marker) == @sizeOf(Word));
53
+
54
+ assert(@bitSizeOf(MarkerUniformCount) % 2 == 1);
55
+ assert(math.maxInt(MarkerUniformCount) == marker_uniform_word_count_max);
56
+
57
+ assert(@bitSizeOf(MarkerLiteralCount) % 2 == 0);
58
+ assert(math.maxInt(MarkerLiteralCount) == marker_literal_word_count_max);
59
+ }
60
+
61
+ inline fn marker_word(mark: Marker) Word {
62
+ return @bitCast(mark);
63
+ }
64
+
65
+ pub const Decoder = struct {
66
+ /// The number of bytes of the source buffer (the encoded data) that still need to be
67
+ /// processed.
68
+ source_size_remaining: usize,
69
+ target_words: []Word,
70
+ target_index: usize = 0,
71
+ source_literal_words: usize = 0,
72
+
73
+ /// Returns the number of *words* written to `target_words` by this invocation.
74
+ // TODO Refactor to return an error when `source_chunk` is invalid,
75
+ // so that we can test invalid encodings.
76
+ pub fn decode_chunk(
77
+ decoder: *Decoder,
78
+ source_chunk: []align(@alignOf(Word)) const u8,
79
+ ) usize {
80
+ assert(source_chunk.len % @sizeOf(Word) == 0);
81
+
82
+ decoder.source_size_remaining -= source_chunk.len;
83
+
84
+ const source_words = mem.bytesAsSlice(Word, source_chunk);
85
+ const target_words = decoder.target_words;
86
+ assert(disjoint_slices(u8, Word, source_chunk, target_words));
87
+
88
+ var source_index: usize = 0;
89
+ var target_index: usize = decoder.target_index;
90
+ defer decoder.target_index = target_index;
91
+
92
+ if (decoder.source_literal_words > 0) {
93
+ const literal_word_count_chunk =
94
+ @min(decoder.source_literal_words, source_words.len);
95
+
96
+ stdx.copy_disjoint(
97
+ .exact,
98
+ Word,
99
+ target_words[target_index..][0..literal_word_count_chunk],
100
+ source_words[source_index..][0..literal_word_count_chunk],
101
+ );
102
+ source_index += literal_word_count_chunk;
103
+ target_index += literal_word_count_chunk;
104
+ decoder.source_literal_words -= literal_word_count_chunk;
105
+ }
106
+
107
+ while (source_index < source_words.len) {
108
+ assert(decoder.source_literal_words == 0);
109
+
110
+ const marker: *const Marker = @ptrCast(&source_words[source_index]);
111
+ source_index += 1;
112
+ @memset(
113
+ target_words[target_index..][0..marker.uniform_word_count],
114
+ if (marker.uniform_bit == 1) ~@as(Word, 0) else 0,
115
+ );
116
+ target_index += marker.uniform_word_count;
117
+
118
+ const literal_word_count_chunk =
119
+ @min(marker.literal_word_count, source_words.len - source_index);
120
+ stdx.copy_disjoint(
121
+ .exact,
122
+ Word,
123
+ target_words[target_index..][0..literal_word_count_chunk],
124
+ source_words[source_index..][0..literal_word_count_chunk],
125
+ );
126
+ source_index += literal_word_count_chunk;
127
+ target_index += literal_word_count_chunk;
128
+ decoder.source_literal_words =
129
+ marker.literal_word_count - literal_word_count_chunk;
130
+ }
131
+ assert(source_index <= source_words.len);
132
+ assert(target_index <= target_words.len);
133
+
134
+ return target_index - decoder.target_index;
135
+ }
136
+
137
+ pub fn done(decoder: *const Decoder) bool {
138
+ assert(decoder.target_index <= decoder.target_words.len);
139
+
140
+ if (decoder.source_size_remaining == 0) {
141
+ assert(decoder.source_literal_words == 0);
142
+ return true;
143
+ } else {
144
+ maybe(decoder.source_literal_words == 0);
145
+ return false;
146
+ }
147
+ }
148
+ };
149
+
150
+ pub fn decode_chunks(target_words: []Word, source_size: usize) Decoder {
151
+ return .{
152
+ .target_words = target_words,
153
+ .source_size_remaining = source_size,
154
+ };
155
+ }
156
+
157
+ // (This is a helper for testing only.)
158
+ /// Decodes the compressed bitset in `source` into `target_words`.
159
+ /// Returns the number of *words* written to `target_words`.
160
+ pub fn decode_all(source: []align(@alignOf(Word)) const u8, target_words: []Word) usize {
161
+ comptime assert(constants.verify);
162
+ assert(source.len % @sizeOf(Word) == 0);
163
+ assert(disjoint_slices(u8, Word, source, target_words));
164
+
165
+ var decoder = decode_chunks(target_words, source.len);
166
+ return decoder.decode_chunk(source);
167
+ }
168
+
169
+ pub const Encoder = struct {
170
+ source_words: []const Word,
171
+ source_index: usize = 0,
172
+ /// The number of literals left over from the previous encode() call that still need to
173
+ /// be copied.
174
+ literal_word_count: usize = 0,
175
+
176
+ trailing_zero_runs_count: usize = 0,
177
+
178
+ /// Returns the number of bytes written to `target_chunk` by this invocation.
179
+ pub fn encode_chunk(encoder: *Encoder, target_chunk: []align(@alignOf(Word)) u8) usize {
180
+ const source_words = encoder.source_words;
181
+ assert(disjoint_slices(Word, u8, source_words, target_chunk));
182
+ assert(encoder.source_index <= encoder.source_words.len);
183
+ assert(encoder.literal_word_count <= encoder.source_words.len);
184
+
185
+ const target_words = mem.bytesAsSlice(Word, target_chunk);
186
+ @memset(target_words, 0);
187
+
188
+ var target_index: usize = 0;
189
+ var source_index: usize = encoder.source_index;
190
+
191
+ if (encoder.literal_word_count > 0) {
192
+ maybe(encoder.source_index == 0);
193
+
194
+ const literal_word_count_chunk =
195
+ @min(encoder.literal_word_count, target_words.len);
196
+
197
+ stdx.copy_disjoint(
198
+ .exact,
199
+ Word,
200
+ target_words[target_index..][0..literal_word_count_chunk],
201
+ source_words[source_index..][0..literal_word_count_chunk],
202
+ );
203
+
204
+ source_index += literal_word_count_chunk;
205
+ target_index += literal_word_count_chunk;
206
+ encoder.literal_word_count -= literal_word_count_chunk;
207
+ }
208
+
209
+ while (source_index < source_words.len and target_index < target_words.len) {
210
+ assert(encoder.literal_word_count == 0);
211
+
212
+ const word = source_words[source_index];
213
+
214
+ const uniform_word_count = count: {
215
+ if (is_literal(word)) break :count 0;
216
+ // Measure run length.
217
+ const uniform_max = @min(
218
+ source_words.len - source_index,
219
+ marker_uniform_word_count_max,
220
+ );
221
+ for (source_words[source_index..][0..uniform_max], 0..) |w, i| {
222
+ if (w != word) break :count i;
223
+ }
224
+ break :count uniform_max;
225
+ };
226
+ source_index += uniform_word_count;
227
+ // For consistent encoding, set the run/uniform bit to 0 when there is no run.
228
+ const uniform_bit: u1 =
229
+ if (uniform_word_count == 0) 0 else @intCast(word & 1);
230
+
231
+ const literal_word_count = count: {
232
+ // Count sequential literals that immediately follow the run.
233
+ const literals_max = @min(
234
+ source_words.len - source_index,
235
+ marker_literal_word_count_max,
236
+ );
237
+ for (source_words[source_index..][0..literals_max], 0..) |w, i| {
238
+ if (!is_literal(w)) break :count i;
239
+ }
240
+ break :count literals_max;
241
+ };
242
+
243
+ target_words[target_index] = marker_word(.{
244
+ .uniform_bit = uniform_bit,
245
+ .uniform_word_count = @intCast(uniform_word_count),
246
+ .literal_word_count = @intCast(literal_word_count),
247
+ });
248
+ target_index += 1;
249
+
250
+ const literal_word_count_chunk =
251
+ @min(literal_word_count, target_words.len - target_index);
252
+ stdx.copy_disjoint(
253
+ .exact,
254
+ Word,
255
+ target_words[target_index..][0..literal_word_count_chunk],
256
+ source_words[source_index..][0..literal_word_count_chunk],
257
+ );
258
+ source_index += literal_word_count_chunk;
259
+ target_index += literal_word_count_chunk;
260
+
261
+ encoder.literal_word_count = literal_word_count - literal_word_count_chunk;
262
+
263
+ if (uniform_bit == 0 and literal_word_count == 0) {
264
+ assert(uniform_word_count > 0);
265
+ encoder.trailing_zero_runs_count += 1;
266
+ } else {
267
+ encoder.trailing_zero_runs_count = 0;
268
+ }
269
+ }
270
+ assert(source_index <= source_words.len);
271
+
272
+ encoder.source_index = source_index;
273
+ return target_index * @sizeOf(Word);
274
+ }
275
+
276
+ pub fn done(encoder: *const Encoder) bool {
277
+ assert(encoder.source_index <= encoder.source_words.len);
278
+ return encoder.source_index == encoder.source_words.len;
279
+ }
280
+ };
281
+
282
+ pub fn encode_chunks(source_words: []const Word) Encoder {
283
+ return .{ .source_words = source_words };
284
+ }
285
+
286
+ // (This is a helper for testing only.)
287
+ // Returns the number of bytes written to `target`.
288
+ pub fn encode_all(source_words: []const Word, target: []align(@alignOf(Word)) u8) usize {
289
+ comptime assert(constants.verify);
290
+ assert(target.len == encode_size_max(source_words.len));
291
+ assert(disjoint_slices(Word, u8, source_words, target));
292
+
293
+ var encoder = encode_chunks(source_words);
294
+ defer assert(encoder.done());
295
+
296
+ return encoder.encode_chunk(target);
297
+ }
298
+
299
+ /// Returns the maximum number of bytes required to encode `word_count` words.
300
+ /// Assumes (pessimistically) that every word will be encoded as a literal.
301
+ pub fn encode_size_max(word_count: usize) usize {
302
+ const marker_count = div_ceil(word_count, marker_literal_word_count_max);
303
+ return marker_count * @sizeOf(Marker) + word_count * @sizeOf(Word);
304
+ }
305
+
306
+ inline fn is_literal(word: Word) bool {
307
+ return word != 0 and word != ~@as(Word, 0);
308
+ }
309
+ };
310
+ }
311
+
312
+ test "ewah encode→decode cycle" {
313
+ const fuzz = @import("./ewah_fuzz.zig");
314
+ var prng = stdx.PRNG.from_seed_testing();
315
+
316
+ inline for (.{ u8, u16, u32, u64, usize }) |Word| {
317
+ const Context = fuzz.ContextType(Word);
318
+ for ([_]usize{ 1, 2, 4, 5, 8, 16, 17, 32 }) |chunk_count| {
319
+ var decoded: [4096]Word = undefined;
320
+ const fuzz_options: Context.TestOptions = .{
321
+ .encode_chunk_words_count = @divFloor(decoded.len, chunk_count),
322
+ .decode_chunk_words_count = @divFloor(decoded.len, chunk_count),
323
+ };
324
+
325
+ @memset(&decoded, 0);
326
+ try fuzz.fuzz_encode_decode(Word, std.testing.allocator, &decoded, fuzz_options);
327
+
328
+ @memset(&decoded, std.math.maxInt(Word));
329
+ try fuzz.fuzz_encode_decode(Word, std.testing.allocator, &decoded, fuzz_options);
330
+
331
+ prng.fill(std.mem.asBytes(&decoded));
332
+ try fuzz.fuzz_encode_decode(Word, std.testing.allocator, &decoded, fuzz_options);
333
+ }
334
+ }
335
+ }
336
+
337
+ test "ewah Word=u8" {
338
+ try test_decode_with_word(u8);
339
+
340
+ const codec = ewah(u8);
341
+ for (0..math.maxInt(codec.MarkerUniformCount) + 1) |uniform_word_count| {
342
+ try test_decode(u8, &.{
343
+ codec.marker_word(.{
344
+ .uniform_bit = 0,
345
+ .uniform_word_count = @intCast(uniform_word_count),
346
+ .literal_word_count = 3,
347
+ }),
348
+ 12,
349
+ 34,
350
+ 56,
351
+ });
352
+ }
353
+
354
+ try std.testing.expectEqual(codec.encode_size_max(0), 0);
355
+ try std.testing.expectEqual(codec.encode_all(&.{}, &.{}), 0);
356
+ }
357
+
358
+ test "ewah Word=u16" {
359
+ try test_decode_with_word(u16);
360
+ }
361
+
362
+ // decode → encode → decode
363
+ fn test_decode_with_word(comptime Word: type) !void {
364
+ const codec = ewah(Word);
365
+
366
+ // No set bits.
367
+ try test_decode(Word, &.{});
368
+ // Alternating runs, no literals.
369
+ try test_decode(Word, &.{
370
+ codec.marker_word(.{ .uniform_bit = 0, .uniform_word_count = 2, .literal_word_count = 0 }),
371
+ codec.marker_word(.{ .uniform_bit = 1, .uniform_word_count = 3, .literal_word_count = 0 }),
372
+ codec.marker_word(.{ .uniform_bit = 0, .uniform_word_count = 4, .literal_word_count = 0 }),
373
+ });
374
+ // Alternating runs, with literals.
375
+ try test_decode(Word, &.{
376
+ codec.marker_word(.{ .uniform_bit = 0, .uniform_word_count = 2, .literal_word_count = 1 }),
377
+ 12,
378
+ codec.marker_word(.{ .uniform_bit = 1, .uniform_word_count = 3, .literal_word_count = 1 }),
379
+ 34,
380
+ codec.marker_word(.{ .uniform_bit = 0, .uniform_word_count = 4, .literal_word_count = 1 }),
381
+ 56,
382
+ });
383
+ // Consecutive run marker overflow.
384
+ try test_decode(Word, &.{
385
+ codec.marker_word(.{
386
+ .uniform_bit = 0,
387
+ .uniform_word_count = math.maxInt(codec.MarkerUniformCount),
388
+ .literal_word_count = 0,
389
+ }),
390
+ codec.marker_word(.{ .uniform_bit = 0, .uniform_word_count = 2, .literal_word_count = 0 }),
391
+ });
392
+
393
+ var encoding = std.ArrayList(Word).init(std.testing.allocator);
394
+ defer encoding.deinit();
395
+
396
+ {
397
+ // Consecutive literal marker overflow.
398
+ try encoding.append(codec.marker_word(.{
399
+ .uniform_bit = 0,
400
+ .uniform_word_count = 0,
401
+ .literal_word_count = math.maxInt(codec.MarkerLiteralCount),
402
+ }));
403
+ var i: Word = 0;
404
+ while (i < math.maxInt(codec.MarkerLiteralCount)) : (i += 1) try encoding.append(i + 1);
405
+ try encoding.append(codec.marker_word(.{
406
+ .uniform_bit = 0,
407
+ .uniform_word_count = 0,
408
+ .literal_word_count = 2,
409
+ }));
410
+ try encoding.append(i + 2);
411
+ try encoding.append(i + 3);
412
+ try test_decode(Word, encoding.items);
413
+ encoding.items.len = 0;
414
+ }
415
+ }
416
+
417
+ fn test_decode(comptime Word: type, encoded_expect_words: []const Word) !void {
418
+ const encoded_expect = mem.sliceAsBytes(encoded_expect_words);
419
+ const codec = ewah(Word);
420
+ const decoded_expect_data = try std.testing.allocator.alloc(Word, 4 * math.maxInt(Word));
421
+ defer std.testing.allocator.free(decoded_expect_data);
422
+
423
+ const decoded_expect_length = codec.decode_all(encoded_expect, decoded_expect_data);
424
+ const decoded_expect = decoded_expect_data[0..decoded_expect_length];
425
+ const encoded_actual = try std.testing.allocator.alignedAlloc(
426
+ u8,
427
+ @alignOf(Word),
428
+ codec.encode_size_max(decoded_expect.len),
429
+ );
430
+ defer std.testing.allocator.free(encoded_actual);
431
+
432
+ const encoded_actual_length = codec.encode_all(decoded_expect, encoded_actual);
433
+ try std.testing.expectEqual(encoded_expect.len, encoded_actual_length);
434
+ try std.testing.expectEqualSlices(u8, encoded_expect, encoded_actual[0..encoded_actual_length]);
435
+
436
+ const encoded_size_max = codec.encode_size_max(decoded_expect.len);
437
+ try std.testing.expect(encoded_expect.len <= encoded_size_max);
438
+
439
+ const decoded_actual = try std.testing.allocator.alloc(Word, decoded_expect.len);
440
+ defer std.testing.allocator.free(decoded_actual);
441
+
442
+ const decoded_actual_length = codec.decode_all(encoded_actual, decoded_actual);
443
+ try std.testing.expectEqual(decoded_expect.len, decoded_actual_length);
444
+ try std.testing.expectEqualSlices(Word, decoded_expect, decoded_actual);
445
+ }
@@ -0,0 +1,128 @@
1
+ const std = @import("std");
2
+ const assert = std.debug.assert;
3
+ const ewah = @import("ewah.zig").ewah(usize);
4
+ const stdx = @import("stdx");
5
+
6
+ const log = std.log;
7
+
8
+ const BitSetConfig = struct {
9
+ words: usize,
10
+ run_length_e: usize,
11
+ literals_length_e: usize,
12
+ };
13
+
14
+ // Bump these up if you want to use this as a real benchmark rather than as a test.
15
+ const samples = 10;
16
+ const repeats: usize = 1_000;
17
+
18
+ // Explanation of fields:
19
+ // - "n": Number of randomly generate bitsets to test.
20
+ // - "words": The length of the decoded bitset, in u64s.
21
+ // - "run_length_e": The expected length of a run, ignoring truncation due to reaching the end of
22
+ // the bitset.
23
+ // - "literals_length_e": Expected length of a sequence of literals.
24
+ const configs = [_]BitSetConfig{
25
+ // primarily runs
26
+ .{ .words = 640, .run_length_e = 10, .literals_length_e = 10 },
27
+ .{ .words = 640, .run_length_e = 100, .literals_length_e = 10 },
28
+ .{ .words = 640, .run_length_e = 200, .literals_length_e = 10 },
29
+ // primarily literals
30
+ .{ .words = 640, .run_length_e = 1, .literals_length_e = 100 },
31
+ };
32
+
33
+ var prng = stdx.PRNG.from_seed(42);
34
+
35
+ test "benchmark: ewah" {
36
+ for (configs) |config| {
37
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
38
+ defer arena.deinit();
39
+
40
+ const allocator = arena.allocator();
41
+ var i: usize = 0;
42
+ var bitsets: [samples][]usize = undefined;
43
+ var bitsets_encoded: [samples][]align(@alignOf(usize)) u8 = undefined;
44
+ var bitsets_decoded: [samples][]usize = undefined;
45
+ var bitset_lengths: [samples]usize = undefined;
46
+ while (i < samples) : (i += 1) {
47
+ bitsets[i] = try make_bitset(allocator, config);
48
+ bitsets_encoded[i] = try allocator.alignedAlloc(
49
+ u8,
50
+ @alignOf(usize),
51
+ ewah.encode_size_max(bitsets[i].len),
52
+ );
53
+ bitsets_decoded[i] = try allocator.alloc(usize, config.words);
54
+ }
55
+
56
+ // Benchmark encoding.
57
+ var encode_timer = try std.time.Timer.start();
58
+ i = 0;
59
+ while (i < samples) : (i += 1) {
60
+ var j: usize = 0;
61
+ var size: usize = undefined;
62
+ while (j < repeats) : (j += 1) {
63
+ size = ewah.encode_all(bitsets[i], bitsets_encoded[i]);
64
+ }
65
+ bitset_lengths[i] = size;
66
+ }
67
+ const encode_time = encode_timer.read() / samples / repeats;
68
+
69
+ var decode_timer = try std.time.Timer.start();
70
+ // Benchmark decoding.
71
+ i = 0;
72
+ while (i < samples) : (i += 1) {
73
+ const bitset_encoded = bitsets_encoded[i][0..bitset_lengths[i]];
74
+ var j: usize = 0;
75
+ while (j < repeats) : (j += 1) {
76
+ _ = ewah.decode_all(bitset_encoded, bitsets_decoded[i]);
77
+ }
78
+ }
79
+ const decode_time = decode_timer.read() / samples / repeats;
80
+
81
+ i = 0;
82
+ while (i < samples) : (i += 1) {
83
+ assert(std.mem.eql(usize, bitsets[i], bitsets_decoded[i]));
84
+ }
85
+
86
+ // Compute compression ratio.
87
+ var total_uncompressed: f64 = 0.0;
88
+ var total_compressed: f64 = 0.0;
89
+ i = 0;
90
+ while (i < samples) : (i += 1) {
91
+ total_uncompressed += @as(f64, @floatFromInt(bitsets[i].len * @sizeOf(usize)));
92
+ total_compressed += @as(f64, @floatFromInt(bitset_lengths[i]));
93
+ }
94
+
95
+ log.info(
96
+ \\Words={:_>3} E(Run)={:_>3} E(Literal)={:_>3} EncTime={:_>6}ns DecTime={:_>6}ns Ratio={d:_>6.2}
97
+ , .{
98
+ config.words,
99
+ config.run_length_e,
100
+ config.literals_length_e,
101
+ encode_time,
102
+ decode_time,
103
+ total_uncompressed / total_compressed,
104
+ });
105
+ }
106
+ }
107
+
108
+ fn make_bitset(allocator: std.mem.Allocator, config: BitSetConfig) ![]usize {
109
+ var words = try allocator.alloc(usize, config.words);
110
+ var w: usize = 0;
111
+ var literal: usize = 1;
112
+ while (w < words.len) : (w += 1) {
113
+ const run_length = prng.int_inclusive(usize, (2 * config.run_length_e) - 1);
114
+ const literals_length = prng.int_inclusive(usize, (2 * config.literals_length_e) - 1);
115
+ const run_bit = prng.boolean();
116
+
117
+ const run_end = @min(w + run_length, words.len);
118
+ while (w < run_end) : (w += 1) {
119
+ words[w] = if (run_bit) std.math.maxInt(usize) else 0;
120
+ }
121
+ const literals_end = @min(w + literals_length, words.len);
122
+ while (w < literals_end) : (w += 1) {
123
+ words[w] = literal;
124
+ literal += 1;
125
+ }
126
+ }
127
+ return words;
128
+ }