vibe_zstd 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +3 -0
  3. data/CHANGELOG.md +22 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +978 -0
  6. data/Rakefile +20 -0
  7. data/benchmark/README.md +198 -0
  8. data/benchmark/compression_levels.rb +99 -0
  9. data/benchmark/context_reuse.rb +174 -0
  10. data/benchmark/decompression_speed_by_level.rb +65 -0
  11. data/benchmark/dictionary_training.rb +182 -0
  12. data/benchmark/dictionary_usage.rb +121 -0
  13. data/benchmark/for_readme.rb +157 -0
  14. data/benchmark/generate_fixture.rb +82 -0
  15. data/benchmark/helpers.rb +237 -0
  16. data/benchmark/multithreading.rb +105 -0
  17. data/benchmark/run_all.rb +150 -0
  18. data/benchmark/streaming.rb +154 -0
  19. data/ext/vibe_zstd/Makefile +270 -0
  20. data/ext/vibe_zstd/cctx.c +565 -0
  21. data/ext/vibe_zstd/dctx.c +493 -0
  22. data/ext/vibe_zstd/dict.c +587 -0
  23. data/ext/vibe_zstd/extconf.rb +52 -0
  24. data/ext/vibe_zstd/frames.c +132 -0
  25. data/ext/vibe_zstd/libzstd/LICENSE +30 -0
  26. data/ext/vibe_zstd/libzstd/common/allocations.h +55 -0
  27. data/ext/vibe_zstd/libzstd/common/bits.h +205 -0
  28. data/ext/vibe_zstd/libzstd/common/bitstream.h +454 -0
  29. data/ext/vibe_zstd/libzstd/common/compiler.h +464 -0
  30. data/ext/vibe_zstd/libzstd/common/cpu.h +249 -0
  31. data/ext/vibe_zstd/libzstd/common/debug.c +30 -0
  32. data/ext/vibe_zstd/libzstd/common/debug.h +107 -0
  33. data/ext/vibe_zstd/libzstd/common/entropy_common.c +340 -0
  34. data/ext/vibe_zstd/libzstd/common/error_private.c +64 -0
  35. data/ext/vibe_zstd/libzstd/common/error_private.h +158 -0
  36. data/ext/vibe_zstd/libzstd/common/fse.h +625 -0
  37. data/ext/vibe_zstd/libzstd/common/fse_decompress.c +315 -0
  38. data/ext/vibe_zstd/libzstd/common/huf.h +277 -0
  39. data/ext/vibe_zstd/libzstd/common/mem.h +422 -0
  40. data/ext/vibe_zstd/libzstd/common/pool.c +371 -0
  41. data/ext/vibe_zstd/libzstd/common/pool.h +81 -0
  42. data/ext/vibe_zstd/libzstd/common/portability_macros.h +171 -0
  43. data/ext/vibe_zstd/libzstd/common/threading.c +182 -0
  44. data/ext/vibe_zstd/libzstd/common/threading.h +142 -0
  45. data/ext/vibe_zstd/libzstd/common/xxhash.c +18 -0
  46. data/ext/vibe_zstd/libzstd/common/xxhash.h +7094 -0
  47. data/ext/vibe_zstd/libzstd/common/zstd_common.c +48 -0
  48. data/ext/vibe_zstd/libzstd/common/zstd_deps.h +123 -0
  49. data/ext/vibe_zstd/libzstd/common/zstd_internal.h +324 -0
  50. data/ext/vibe_zstd/libzstd/common/zstd_trace.h +156 -0
  51. data/ext/vibe_zstd/libzstd/compress/clevels.h +134 -0
  52. data/ext/vibe_zstd/libzstd/compress/fse_compress.c +625 -0
  53. data/ext/vibe_zstd/libzstd/compress/hist.c +191 -0
  54. data/ext/vibe_zstd/libzstd/compress/hist.h +82 -0
  55. data/ext/vibe_zstd/libzstd/compress/huf_compress.c +1464 -0
  56. data/ext/vibe_zstd/libzstd/compress/zstd_compress.c +7843 -0
  57. data/ext/vibe_zstd/libzstd/compress/zstd_compress_internal.h +1636 -0
  58. data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.c +235 -0
  59. data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.h +39 -0
  60. data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.c +442 -0
  61. data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.h +55 -0
  62. data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.c +688 -0
  63. data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.h +32 -0
  64. data/ext/vibe_zstd/libzstd/compress/zstd_cwksp.h +765 -0
  65. data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.c +778 -0
  66. data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.h +42 -0
  67. data/ext/vibe_zstd/libzstd/compress/zstd_fast.c +985 -0
  68. data/ext/vibe_zstd/libzstd/compress/zstd_fast.h +30 -0
  69. data/ext/vibe_zstd/libzstd/compress/zstd_lazy.c +2199 -0
  70. data/ext/vibe_zstd/libzstd/compress/zstd_lazy.h +193 -0
  71. data/ext/vibe_zstd/libzstd/compress/zstd_ldm.c +745 -0
  72. data/ext/vibe_zstd/libzstd/compress/zstd_ldm.h +109 -0
  73. data/ext/vibe_zstd/libzstd/compress/zstd_ldm_geartab.h +106 -0
  74. data/ext/vibe_zstd/libzstd/compress/zstd_opt.c +1580 -0
  75. data/ext/vibe_zstd/libzstd/compress/zstd_opt.h +72 -0
  76. data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.c +238 -0
  77. data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.h +33 -0
  78. data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.c +1923 -0
  79. data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.h +102 -0
  80. data/ext/vibe_zstd/libzstd/decompress/huf_decompress.c +1944 -0
  81. data/ext/vibe_zstd/libzstd/decompress/huf_decompress_amd64.S +602 -0
  82. data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.c +244 -0
  83. data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.h +44 -0
  84. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress.c +2410 -0
  85. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.c +2209 -0
  86. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.h +73 -0
  87. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_internal.h +240 -0
  88. data/ext/vibe_zstd/libzstd/deprecated/zbuff.h +214 -0
  89. data/ext/vibe_zstd/libzstd/deprecated/zbuff_common.c +26 -0
  90. data/ext/vibe_zstd/libzstd/deprecated/zbuff_compress.c +167 -0
  91. data/ext/vibe_zstd/libzstd/deprecated/zbuff_decompress.c +77 -0
  92. data/ext/vibe_zstd/libzstd/dictBuilder/cover.c +1302 -0
  93. data/ext/vibe_zstd/libzstd/dictBuilder/cover.h +152 -0
  94. data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.c +1913 -0
  95. data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.h +57 -0
  96. data/ext/vibe_zstd/libzstd/dictBuilder/fastcover.c +766 -0
  97. data/ext/vibe_zstd/libzstd/dictBuilder/zdict.c +1133 -0
  98. data/ext/vibe_zstd/libzstd/zdict.h +481 -0
  99. data/ext/vibe_zstd/libzstd/zstd.h +3198 -0
  100. data/ext/vibe_zstd/libzstd/zstd_errors.h +107 -0
  101. data/ext/vibe_zstd/streaming.c +410 -0
  102. data/ext/vibe_zstd/vibe_zstd.c +293 -0
  103. data/ext/vibe_zstd/vibe_zstd.h +56 -0
  104. data/ext/vibe_zstd/vibe_zstd_internal.h +27 -0
  105. data/lib/vibe_zstd/constants.rb +67 -0
  106. data/lib/vibe_zstd/version.rb +5 -0
  107. data/lib/vibe_zstd.rb +255 -0
  108. data/sig/vibe_zstd.rbs +76 -0
  109. metadata +179 -0
data/Rakefile ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "minitest/test_task"
5
+
6
+ Minitest::TestTask.create
7
+
8
+ require "standard/rake"
9
+
10
+ require "rake/extensiontask"
11
+
12
+ task build: :compile
13
+
14
+ GEMSPEC = Gem::Specification.load("vibe_zstd.gemspec")
15
+
16
+ Rake::ExtensionTask.new("vibe_zstd", GEMSPEC) do |ext|
17
+ ext.lib_dir = "lib/vibe_zstd"
18
+ end
19
+
20
+ task default: %i[clobber compile test standard]
@@ -0,0 +1,198 @@
1
+ # VibeZstd Benchmarks
2
+
3
+ Comprehensive benchmark suite for vibe_zstd, demonstrating performance characteristics and best practices.
4
+
5
+ ## Prerequisites
6
+
7
+ Install benchmark dependencies:
8
+
9
+ ```bash
10
+ bundle install
11
+ ```
12
+
13
+ Generate the test dictionary fixture (one-time setup):
14
+
15
+ ```bash
16
+ ruby benchmark/generate_fixture.rb
17
+ ```
18
+
19
+ ## Running Benchmarks
20
+
21
+ ### Run all benchmarks
22
+
23
+ ```bash
24
+ ruby benchmark/run_all.rb
25
+ ```
26
+
27
+ ### Run specific benchmarks
28
+
29
+ ```bash
30
+ # List available benchmarks
31
+ ruby benchmark/run_all.rb --list
32
+
33
+ # Run specific benchmark
34
+ ruby benchmark/run_all.rb --benchmark "context reuse"
35
+ ruby benchmark/run_all.rb --benchmark "dictionary"
36
+
37
+ # Run multiple benchmarks
38
+ ruby benchmark/run_all.rb --benchmark "context" --benchmark "streaming"
39
+ ```
40
+
41
+ ### Run individual benchmarks
42
+
43
+ ```bash
44
+ ruby benchmark/context_reuse.rb
45
+ ruby benchmark/dictionary_usage.rb
46
+ ruby benchmark/compression_levels.rb
47
+ ruby benchmark/streaming.rb
48
+ ruby benchmark/multithreading.rb
49
+ ruby benchmark/dictionary_training.rb
50
+ ```
51
+
52
+ ## Benchmark Descriptions
53
+
54
+ ### 1. Context Reuse (`context_reuse.rb`)
55
+
56
+ **What it tests:** Performance difference between reusing compression/decompression contexts vs creating new ones for each operation.
57
+
58
+ **Key findings:**
59
+ - Reusing contexts is **3-5x faster** than creating new ones
60
+ - Saves **significant memory** (avoiding repeated allocations)
61
+ - Always reuse contexts when performing multiple operations
62
+
63
+ **When to reuse:**
64
+ - āœ“ Processing multiple files in a loop
65
+ - āœ“ Compressing/decompressing multiple messages
66
+ - āœ“ Any scenario with > 1 operation
67
+
68
+ ### 2. Dictionary Usage (`dictionary_usage.rb`)
69
+
70
+ **What it tests:** Compression ratio and speed improvements when using trained dictionaries.
71
+
72
+ **Key findings:**
73
+ - Dictionaries provide **40-70% better compression** for small, similar data
74
+ - Especially effective for JSON, logs, and repeated patterns
75
+ - Small memory overhead for dictionary storage
76
+
77
+ **When to use dictionaries:**
78
+ - āœ“ Small messages (< 10KB each) with similar structure
79
+ - āœ“ JSON API responses
80
+ - āœ“ Log messages
81
+ - āœ“ When compression ratio matters more than speed
82
+ - āœ— Large files (> 1MB each)
83
+ - āœ— Highly variable data
84
+
85
+ ### 3. Compression Levels (`compression_levels.rb`)
86
+
87
+ **What it tests:** Trade-off between compression speed and compression ratio across different levels.
88
+
89
+ **Key findings:**
90
+ - **Level -1:** Ultra-fast, 3-5x faster than level 1
91
+ - **Level 1-3:** Fast compression, good for high-throughput
92
+ - **Level 3 (default):** Best balance of speed/ratio
93
+ - **Level 9-15:** Better compression, slower speed
94
+ - **Level 16-22:** Maximum compression, very slow, high memory
95
+
96
+ **Recommendations:**
97
+ - Real-time compression: Level -1 to 1
98
+ - General use: Level 3 (default)
99
+ - Archival: Level 9-15
100
+ - Maximum compression: Level 19-22
101
+
102
+ ### 4. Streaming (`streaming.rb`)
103
+
104
+ **What it tests:** Streaming API vs one-shot compression for different use cases.
105
+
106
+ **Key findings:**
107
+ - One-shot is **simpler** but requires all data in memory
108
+ - Streaming provides **constant memory usage** regardless of file size
109
+ - Streaming is essential for **large files** (> 1MB)
110
+ - Chunk size affects performance (8KB chunks perform well)
111
+
112
+ **When to use streaming:**
113
+ - āœ“ Large files (> 1MB)
114
+ - āœ“ Memory-constrained environments
115
+ - āœ“ Network streams, incremental data
116
+ - āœ“ Processing data on-the-fly
117
+
118
+ **When to use one-shot:**
119
+ - āœ“ Small data (< 1MB)
120
+ - āœ“ Data already in memory
121
+ - āœ“ Simplicity is priority
122
+
123
+ ### 5. Multi-threading (`multithreading.rb`)
124
+
125
+ **What it tests:** Performance impact of using multiple worker threads for compression.
126
+
127
+ **Key findings:**
128
+ - Multi-threading provides **1.5-3x speedup** for large data
129
+ - Only helps with data **> 256KB** (overhead for smaller)
130
+ - Optimal: **2-4 workers** (diminishing returns after)
131
+ - More workers = higher memory usage
132
+
133
+ **Typical speedups:**
134
+ - 2 workers: 1.5-1.8x faster
135
+ - 4 workers: 2.0-2.5x faster
136
+ - 8 workers: 2.2-3.0x faster
137
+
138
+ **When to use:**
139
+ - āœ“ Large files (> 256KB)
140
+ - āœ“ High-throughput scenarios
141
+ - āœ“ When CPU is available
142
+ - āœ— Small data (< 256KB)
143
+ - āœ— Memory-constrained environments
144
+
145
+ ### 6. Dictionary Training (`dictionary_training.rb`)
146
+
147
+ **What it tests:** Comparison of dictionary training algorithms and dictionary sizes.
148
+
149
+ **Key findings:**
150
+ - **train_dict:** Fastest training, good quality
151
+ - **train_dict_cover:** Best compression ratios, slower (2-10x)
152
+ - **train_dict_fast_cover:** Balanced speed/quality
153
+ - Larger dictionaries = better compression (diminishing returns > 64KB)
154
+
155
+ **Recommendations:**
156
+ - Quick iteration: `train_dict`
157
+ - Production dictionaries: `train_dict_cover`
158
+ - Balanced: `train_dict_fast_cover` with `accel: 5`
159
+ - Dictionary size: 16KB-64KB for small messages
160
+
161
+ ## Benchmark Results
162
+
163
+ Run the benchmarks on your system to see platform-specific results. The benchmarks will generate markdown-formatted tables that you can include in documentation.
164
+
165
+ ## Adding New Benchmarks
166
+
167
+ 1. Create a new file in `benchmark/` (e.g., `my_benchmark.rb`)
168
+ 2. Use the helper utilities from `helpers.rb`:
169
+ - `BenchmarkHelpers.run_comparison` - Main benchmark runner
170
+ - `DataGenerator.*` - Generate test data
171
+ - `Formatter.*` - Format output
172
+ - `Memory.*` - Memory estimation
173
+ 3. Add your benchmark to `run_all.rb`
174
+
175
+ Example:
176
+
177
+ ```ruby
178
+ #!/usr/bin/env ruby
179
+ require_relative "helpers"
180
+ include BenchmarkHelpers
181
+
182
+ BenchmarkHelpers.run_comparison(title: "My Benchmark") do |results|
183
+ # Your benchmark code here
184
+ results << BenchmarkResult.new(
185
+ name: "Test case",
186
+ iterations_per_sec: 1000,
187
+ memory_bytes: 1024
188
+ )
189
+ end
190
+ ```
191
+
192
+ ## Contributing
193
+
194
+ Benchmark improvements and additions are welcome! Please ensure:
195
+ - Use realistic test data
196
+ - Include memory measurements
197
+ - Provide clear recommendations
198
+ - Format output for README inclusion
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ # Benchmark: Compression Levels Speed vs Ratio
7
+ # Demonstrates the trade-off between compression speed and compression ratio
8
+
9
+ BenchmarkHelpers.run_comparison(title: "Compression Levels: Speed vs Ratio") do |results|
10
+ # Test with different data types
11
+ test_data = {
12
+ "JSON data" => DataGenerator.json_data(count: 100),
13
+ "Log data" => DataGenerator.log_data(count: 100),
14
+ "Mixed data" => DataGenerator.mixed_data(size: 10_000)
15
+ }
16
+
17
+ # Levels to test: negative (ultra-fast), low, default, medium, high, max
18
+ levels_to_test = [-1, 1, 3, 9, 15, 19, 22]
19
+
20
+ puts "Testing compression levels: #{levels_to_test.join(", ")}\n"
21
+ puts "Min level: #{VibeZstd.min_compression_level}"
22
+ puts "Max level: #{VibeZstd.max_compression_level}"
23
+ puts "Default level: #{VibeZstd.default_compression_level}\n\n"
24
+
25
+ test_data.each do |data_name, data|
26
+ Formatter.section("Data type: #{data_name} (#{Formatter.format_bytes(data.bytesize)})")
27
+
28
+ data_results = []
29
+
30
+ levels_to_test.each do |level|
31
+ cctx = VibeZstd::CCtx.new
32
+ dctx = VibeZstd::DCtx.new
33
+
34
+ # Benchmark compression
35
+ compressed = nil
36
+ compress_time = Benchmark.measure do
37
+ 100.times { compressed = cctx.compress(data, level: level) }
38
+ end
39
+ compress_ops_per_sec = 100 / compress_time.real
40
+
41
+ # Benchmark decompression
42
+ decompress_time = Benchmark.measure do
43
+ 100.times { dctx.decompress(compressed) }
44
+ end
45
+ decompress_ops_per_sec = 100 / decompress_time.real
46
+
47
+ compression_ratio = data.bytesize.to_f / compressed.bytesize
48
+ memory = Memory.estimate_cctx(level)
49
+
50
+ data_results << {
51
+ "Level" => level,
52
+ "Compressed" => Formatter.format_bytes(compressed.bytesize),
53
+ "Ratio" => Formatter.format_ratio(compression_ratio),
54
+ "Compress" => "#{compress_ops_per_sec.round(0)} ops/s",
55
+ "Decompress" => "#{decompress_ops_per_sec.round(0)} ops/s",
56
+ "Memory" => Formatter.format_bytes(memory)
57
+ }
58
+
59
+ print "."
60
+ end
61
+
62
+ puts "\n"
63
+ Formatter.table(data_results)
64
+ end
65
+
66
+ # Overall recommendations with a single data type for summary
67
+ Formatter.section("Performance Summary (Mixed Data)")
68
+ summary_data = DataGenerator.mixed_data(size: 50_000)
69
+
70
+ levels_to_test.each do |level|
71
+ cctx = VibeZstd::CCtx.new
72
+
73
+ compressed = nil
74
+ time = Benchmark.measure do
75
+ 10.times { compressed = cctx.compress(summary_data, level: level) }
76
+ end
77
+
78
+ ops_per_sec = 10 / time.real
79
+ compression_ratio = summary_data.bytesize.to_f / compressed.bytesize
80
+ memory = Memory.estimate_cctx(level)
81
+
82
+ results << BenchmarkResult.new(
83
+ :name => "Level #{level}",
84
+ :iterations_per_sec => ops_per_sec,
85
+ :compression_ratio => compression_ratio,
86
+ :memory_bytes => memory,
87
+ "Compressed" => Formatter.format_bytes(compressed.bytesize)
88
+ )
89
+ end
90
+ end
91
+
92
+ puts "\nšŸ’” Level Recommendations:"
93
+ puts " Level -1: Ultra-fast, use for real-time compression (3-5x faster than level 1)"
94
+ puts " Level 1-3: Fast compression, good for high-throughput scenarios"
95
+ puts " Level 3: Default, balanced speed/ratio (recommended for most use cases)"
96
+ puts " Level 5-9: Better compression, moderate speed cost"
97
+ puts " Level 10-15: High compression, slower (good for archival)"
98
+ puts " Level 16-22: Maximum compression, very slow (archival, one-time compression)"
99
+ puts "\n šŸ’¾ Memory usage increases significantly at higher levels (15+)"
@@ -0,0 +1,174 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ # Benchmark: Context Reuse vs New Context Creation
7
+ # This demonstrates the performance benefit of reusing compression/decompression contexts
8
+
9
+ BenchmarkHelpers.run_comparison(title: "Context Reuse Performance Comparison") do |results|
10
+ # Test with different data sizes to show consistent benefit
11
+ test_cases = {
12
+ "Small (1KB)" => DataGenerator.json_data(count: 5),
13
+ "Medium (10KB)" => DataGenerator.json_data(count: 50),
14
+ "Large (100KB)" => DataGenerator.json_data(count: 500)
15
+ }
16
+
17
+ test_cases.each do |size_label, test_data|
18
+ Formatter.section("Data size: #{size_label} (#{Formatter.format_bytes(test_data.bytesize)})")
19
+
20
+ # Determine iterations based on data size (more iterations for smaller data to reduce noise)
21
+ iterations = case test_data.bytesize
22
+ when 0..5000 then 10000
23
+ when 5001..50000 then 5000
24
+ else 2000
25
+ end
26
+
27
+ puts "Running #{Formatter.format_number(iterations)} iterations...\n"
28
+
29
+ # Benchmark 1: Creating new contexts every time (inefficient)
30
+ print " New context per operation: "
31
+ new_context_time = Benchmark.measure do
32
+ iterations.times do
33
+ cctx = VibeZstd::CCtx.new
34
+ dctx = VibeZstd::DCtx.new
35
+ compressed = cctx.compress(test_data)
36
+ dctx.decompress(compressed)
37
+ end
38
+ end
39
+ new_context_ops_per_sec = iterations / new_context_time.real
40
+ puts "#{new_context_time.real.round(3)}s (#{Formatter.format_number(new_context_ops_per_sec.to_i)} ops/sec)"
41
+
42
+ # Benchmark 2: Reusing contexts (efficient)
43
+ print " Reused context (no reset): "
44
+ reused_context_time = Benchmark.measure do
45
+ cctx = VibeZstd::CCtx.new
46
+ dctx = VibeZstd::DCtx.new
47
+
48
+ iterations.times do
49
+ compressed = cctx.compress(test_data)
50
+ dctx.decompress(compressed)
51
+ end
52
+ end
53
+ reused_context_ops_per_sec = iterations / reused_context_time.real
54
+ puts "#{reused_context_time.real.round(3)}s (#{Formatter.format_number(reused_context_ops_per_sec.to_i)} ops/sec)"
55
+
56
+ speedup = reused_context_ops_per_sec / new_context_ops_per_sec
57
+ puts " → Speedup: #{Formatter.format_ratio(speedup)}"
58
+
59
+ # Benchmark 3: Reusing contexts with reset(:session)
60
+ print " Reused + reset(:session): "
61
+ reused_reset_session_time = Benchmark.measure do
62
+ cctx = VibeZstd::CCtx.new
63
+ dctx = VibeZstd::DCtx.new
64
+
65
+ iterations.times do
66
+ compressed = cctx.compress(test_data)
67
+ dctx.decompress(compressed)
68
+ cctx.reset(VibeZstd::ResetDirective::SESSION)
69
+ dctx.reset(VibeZstd::ResetDirective::SESSION)
70
+ end
71
+ end
72
+ reused_reset_session_ops_per_sec = iterations / reused_reset_session_time.real
73
+ puts "#{reused_reset_session_time.real.round(3)}s (#{Formatter.format_number(reused_reset_session_ops_per_sec.to_i)} ops/sec)"
74
+
75
+ session_speedup = reused_reset_session_ops_per_sec / new_context_ops_per_sec
76
+ puts " → Speedup: #{Formatter.format_ratio(session_speedup)}"
77
+
78
+ # Benchmark 4: Reusing contexts with reset(:parameters)
79
+ print " Reused + reset(:parameters): "
80
+ reused_reset_params_time = Benchmark.measure do
81
+ cctx = VibeZstd::CCtx.new
82
+ dctx = VibeZstd::DCtx.new
83
+
84
+ iterations.times do
85
+ compressed = cctx.compress(test_data)
86
+ dctx.decompress(compressed)
87
+ cctx.reset(VibeZstd::ResetDirective::PARAMETERS)
88
+ dctx.reset(VibeZstd::ResetDirective::PARAMETERS)
89
+ end
90
+ end
91
+ reused_reset_params_ops_per_sec = iterations / reused_reset_params_time.real
92
+ puts "#{reused_reset_params_time.real.round(3)}s (#{Formatter.format_number(reused_reset_params_ops_per_sec.to_i)} ops/sec)"
93
+
94
+ params_speedup = reused_reset_params_ops_per_sec / new_context_ops_per_sec
95
+ puts " → Speedup: #{Formatter.format_ratio(params_speedup)}"
96
+
97
+ # Benchmark 5: Reusing contexts with reset(:both)
98
+ print " Reused + reset(:both): "
99
+ reused_reset_both_time = Benchmark.measure do
100
+ cctx = VibeZstd::CCtx.new
101
+ dctx = VibeZstd::DCtx.new
102
+
103
+ iterations.times do
104
+ compressed = cctx.compress(test_data)
105
+ dctx.decompress(compressed)
106
+ cctx.reset(VibeZstd::ResetDirective::BOTH)
107
+ dctx.reset(VibeZstd::ResetDirective::BOTH)
108
+ end
109
+ end
110
+ reused_reset_both_ops_per_sec = iterations / reused_reset_both_time.real
111
+ puts "#{reused_reset_both_time.real.round(3)}s (#{Formatter.format_number(reused_reset_both_ops_per_sec.to_i)} ops/sec)"
112
+
113
+ both_speedup = reused_reset_both_ops_per_sec / new_context_ops_per_sec
114
+ puts " → Speedup: #{Formatter.format_ratio(both_speedup)}\n\n"
115
+
116
+ # Collect results for this data size
117
+ results << BenchmarkResult.new(
118
+ :name => "#{size_label} - New ctx",
119
+ :iterations_per_sec => new_context_ops_per_sec,
120
+ "Data size" => Formatter.format_bytes(test_data.bytesize),
121
+ "Time (#{iterations})" => "#{new_context_time.real.round(3)}s"
122
+ )
123
+
124
+ results << BenchmarkResult.new(
125
+ :name => "#{size_label} - Reused (no reset)",
126
+ :iterations_per_sec => reused_context_ops_per_sec,
127
+ "Data size" => Formatter.format_bytes(test_data.bytesize),
128
+ "Time (#{iterations})" => "#{reused_context_time.real.round(3)}s",
129
+ "Speedup" => "#{speedup.round(2)}x"
130
+ )
131
+
132
+ results << BenchmarkResult.new(
133
+ :name => "#{size_label} - Reused + reset(:session)",
134
+ :iterations_per_sec => reused_reset_session_ops_per_sec,
135
+ "Data size" => Formatter.format_bytes(test_data.bytesize),
136
+ "Time (#{iterations})" => "#{reused_reset_session_time.real.round(3)}s",
137
+ "Speedup" => "#{session_speedup.round(2)}x"
138
+ )
139
+
140
+ results << BenchmarkResult.new(
141
+ :name => "#{size_label} - Reused + reset(:parameters)",
142
+ :iterations_per_sec => reused_reset_params_ops_per_sec,
143
+ "Data size" => Formatter.format_bytes(test_data.bytesize),
144
+ "Time (#{iterations})" => "#{reused_reset_params_time.real.round(3)}s",
145
+ "Speedup" => "#{params_speedup.round(2)}x"
146
+ )
147
+
148
+ results << BenchmarkResult.new(
149
+ :name => "#{size_label} - Reused + reset(:both)",
150
+ :iterations_per_sec => reused_reset_both_ops_per_sec,
151
+ "Data size" => Formatter.format_bytes(test_data.bytesize),
152
+ "Time (#{iterations})" => "#{reused_reset_both_time.real.round(3)}s",
153
+ "Speedup" => "#{both_speedup.round(2)}x"
154
+ )
155
+ end
156
+
157
+ # Memory estimates
158
+ cctx_memory = Memory.estimate_cctx(3)
159
+ dctx_memory = Memory.estimate_dctx
160
+ total_context_memory = cctx_memory + dctx_memory
161
+
162
+ puts "\nšŸ“Š Memory Usage Analysis:"
163
+ puts " CCtx (level 3): #{Formatter.format_bytes(cctx_memory)}"
164
+ puts " DCtx: #{Formatter.format_bytes(dctx_memory)}"
165
+ puts " Total per context pair: #{Formatter.format_bytes(total_context_memory)}"
166
+ puts "\n For 10,000 operations:"
167
+ puts " New contexts: #{Formatter.format_bytes(total_context_memory * 10000)} allocated"
168
+ puts " Reused contexts: #{Formatter.format_bytes(total_context_memory)} allocated"
169
+ puts " Memory saved: #{Formatter.format_bytes(total_context_memory * 9999)} (#{((9999.0 / 10000) * 100).round(1)}%)"
170
+ end
171
+
172
+ puts "\nšŸ’” Recommendation:"
173
+ puts " Always reuse CCtx/DCtx instances when performing multiple operations."
174
+ puts " This provides #{((1000 * (Memory.estimate_cctx(3) + Memory.estimate_dctx) / 1024.0 / 1024)).round(1)}MB memory savings for 1000 operations!"
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ # Benchmark: Does compression level affect decompression speed?
7
+ # Theory: Decompression speed should be constant regardless of compression level
8
+
9
+ BenchmarkHelpers.run_comparison(title: "Decompression Speed by Compression Level") do |results|
10
+ # Use a large enough dataset to get meaningful measurements
11
+ test_data = DataGenerator.json_data(count: 500) # ~100KB
12
+
13
+ puts "Test data size: #{Formatter.format_bytes(test_data.bytesize)}\n\n"
14
+
15
+ # Compression levels to test
16
+ levels = [-1, 1, 3, 9, 19]
17
+
18
+ # Pre-compress the data at each level
19
+ compressed_by_level = {}
20
+
21
+ puts "Compressing data at different levels...\n"
22
+ levels.each do |level|
23
+ cctx = VibeZstd::CCtx.new
24
+ compressed = cctx.compress(test_data, level: level)
25
+ compressed_by_level[level] = compressed
26
+
27
+ ratio = test_data.bytesize.to_f / compressed.bytesize
28
+ puts " Level #{level.to_s.rjust(3)}: #{Formatter.format_bytes(compressed.bytesize)} (#{ratio.round(2)}x ratio)"
29
+ end
30
+
31
+ puts "\nDecompressing 5,000 times at each level...\n"
32
+ iterations = 5000
33
+
34
+ levels.each do |level|
35
+ compressed = compressed_by_level[level]
36
+
37
+ print " Level #{level.to_s.rjust(3)}: "
38
+
39
+ # Benchmark decompression
40
+ time = Benchmark.measure do
41
+ dctx = VibeZstd::DCtx.new
42
+ iterations.times do
43
+ dctx.decompress(compressed)
44
+ end
45
+ end
46
+
47
+ ops_per_sec = iterations / time.real
48
+ throughput_mb = (test_data.bytesize * iterations / time.real) / (1024.0 * 1024.0)
49
+
50
+ puts "#{time.real.round(3)}s (#{Formatter.format_number(ops_per_sec.to_i)} ops/sec, #{throughput_mb.round(1)} MB/s)"
51
+
52
+ results << BenchmarkResult.new(
53
+ :name => "Level #{level}",
54
+ :iterations_per_sec => ops_per_sec,
55
+ "Compressed Size" => Formatter.format_bytes(compressed.bytesize),
56
+ "Ratio" => "#{(test_data.bytesize.to_f / compressed.bytesize).round(2)}x",
57
+ "Throughput" => "#{throughput_mb.round(1)} MB/s"
58
+ )
59
+ end
60
+ end
61
+
62
+ puts "\nšŸ’” Key Insight:"
63
+ puts " Decompression speed is essentially constant regardless of compression level!"
64
+ puts " Higher levels = better compression ratio, but same decompression speed."
65
+ puts " This means you can use high compression levels for storage without impacting read performance."