vibe_zstd 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +3 -0
  3. data/CHANGELOG.md +22 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +978 -0
  6. data/Rakefile +20 -0
  7. data/benchmark/README.md +198 -0
  8. data/benchmark/compression_levels.rb +99 -0
  9. data/benchmark/context_reuse.rb +174 -0
  10. data/benchmark/decompression_speed_by_level.rb +65 -0
  11. data/benchmark/dictionary_training.rb +182 -0
  12. data/benchmark/dictionary_usage.rb +121 -0
  13. data/benchmark/for_readme.rb +157 -0
  14. data/benchmark/generate_fixture.rb +82 -0
  15. data/benchmark/helpers.rb +237 -0
  16. data/benchmark/multithreading.rb +105 -0
  17. data/benchmark/run_all.rb +150 -0
  18. data/benchmark/streaming.rb +154 -0
  19. data/ext/vibe_zstd/Makefile +270 -0
  20. data/ext/vibe_zstd/cctx.c +565 -0
  21. data/ext/vibe_zstd/dctx.c +493 -0
  22. data/ext/vibe_zstd/dict.c +587 -0
  23. data/ext/vibe_zstd/extconf.rb +52 -0
  24. data/ext/vibe_zstd/frames.c +132 -0
  25. data/ext/vibe_zstd/libzstd/LICENSE +30 -0
  26. data/ext/vibe_zstd/libzstd/common/allocations.h +55 -0
  27. data/ext/vibe_zstd/libzstd/common/bits.h +205 -0
  28. data/ext/vibe_zstd/libzstd/common/bitstream.h +454 -0
  29. data/ext/vibe_zstd/libzstd/common/compiler.h +464 -0
  30. data/ext/vibe_zstd/libzstd/common/cpu.h +249 -0
  31. data/ext/vibe_zstd/libzstd/common/debug.c +30 -0
  32. data/ext/vibe_zstd/libzstd/common/debug.h +107 -0
  33. data/ext/vibe_zstd/libzstd/common/entropy_common.c +340 -0
  34. data/ext/vibe_zstd/libzstd/common/error_private.c +64 -0
  35. data/ext/vibe_zstd/libzstd/common/error_private.h +158 -0
  36. data/ext/vibe_zstd/libzstd/common/fse.h +625 -0
  37. data/ext/vibe_zstd/libzstd/common/fse_decompress.c +315 -0
  38. data/ext/vibe_zstd/libzstd/common/huf.h +277 -0
  39. data/ext/vibe_zstd/libzstd/common/mem.h +422 -0
  40. data/ext/vibe_zstd/libzstd/common/pool.c +371 -0
  41. data/ext/vibe_zstd/libzstd/common/pool.h +81 -0
  42. data/ext/vibe_zstd/libzstd/common/portability_macros.h +171 -0
  43. data/ext/vibe_zstd/libzstd/common/threading.c +182 -0
  44. data/ext/vibe_zstd/libzstd/common/threading.h +142 -0
  45. data/ext/vibe_zstd/libzstd/common/xxhash.c +18 -0
  46. data/ext/vibe_zstd/libzstd/common/xxhash.h +7094 -0
  47. data/ext/vibe_zstd/libzstd/common/zstd_common.c +48 -0
  48. data/ext/vibe_zstd/libzstd/common/zstd_deps.h +123 -0
  49. data/ext/vibe_zstd/libzstd/common/zstd_internal.h +324 -0
  50. data/ext/vibe_zstd/libzstd/common/zstd_trace.h +156 -0
  51. data/ext/vibe_zstd/libzstd/compress/clevels.h +134 -0
  52. data/ext/vibe_zstd/libzstd/compress/fse_compress.c +625 -0
  53. data/ext/vibe_zstd/libzstd/compress/hist.c +191 -0
  54. data/ext/vibe_zstd/libzstd/compress/hist.h +82 -0
  55. data/ext/vibe_zstd/libzstd/compress/huf_compress.c +1464 -0
  56. data/ext/vibe_zstd/libzstd/compress/zstd_compress.c +7843 -0
  57. data/ext/vibe_zstd/libzstd/compress/zstd_compress_internal.h +1636 -0
  58. data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.c +235 -0
  59. data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.h +39 -0
  60. data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.c +442 -0
  61. data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.h +55 -0
  62. data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.c +688 -0
  63. data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.h +32 -0
  64. data/ext/vibe_zstd/libzstd/compress/zstd_cwksp.h +765 -0
  65. data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.c +778 -0
  66. data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.h +42 -0
  67. data/ext/vibe_zstd/libzstd/compress/zstd_fast.c +985 -0
  68. data/ext/vibe_zstd/libzstd/compress/zstd_fast.h +30 -0
  69. data/ext/vibe_zstd/libzstd/compress/zstd_lazy.c +2199 -0
  70. data/ext/vibe_zstd/libzstd/compress/zstd_lazy.h +193 -0
  71. data/ext/vibe_zstd/libzstd/compress/zstd_ldm.c +745 -0
  72. data/ext/vibe_zstd/libzstd/compress/zstd_ldm.h +109 -0
  73. data/ext/vibe_zstd/libzstd/compress/zstd_ldm_geartab.h +106 -0
  74. data/ext/vibe_zstd/libzstd/compress/zstd_opt.c +1580 -0
  75. data/ext/vibe_zstd/libzstd/compress/zstd_opt.h +72 -0
  76. data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.c +238 -0
  77. data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.h +33 -0
  78. data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.c +1923 -0
  79. data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.h +102 -0
  80. data/ext/vibe_zstd/libzstd/decompress/huf_decompress.c +1944 -0
  81. data/ext/vibe_zstd/libzstd/decompress/huf_decompress_amd64.S +602 -0
  82. data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.c +244 -0
  83. data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.h +44 -0
  84. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress.c +2410 -0
  85. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.c +2209 -0
  86. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.h +73 -0
  87. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_internal.h +240 -0
  88. data/ext/vibe_zstd/libzstd/deprecated/zbuff.h +214 -0
  89. data/ext/vibe_zstd/libzstd/deprecated/zbuff_common.c +26 -0
  90. data/ext/vibe_zstd/libzstd/deprecated/zbuff_compress.c +167 -0
  91. data/ext/vibe_zstd/libzstd/deprecated/zbuff_decompress.c +77 -0
  92. data/ext/vibe_zstd/libzstd/dictBuilder/cover.c +1302 -0
  93. data/ext/vibe_zstd/libzstd/dictBuilder/cover.h +152 -0
  94. data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.c +1913 -0
  95. data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.h +57 -0
  96. data/ext/vibe_zstd/libzstd/dictBuilder/fastcover.c +766 -0
  97. data/ext/vibe_zstd/libzstd/dictBuilder/zdict.c +1133 -0
  98. data/ext/vibe_zstd/libzstd/zdict.h +481 -0
  99. data/ext/vibe_zstd/libzstd/zstd.h +3198 -0
  100. data/ext/vibe_zstd/libzstd/zstd_errors.h +107 -0
  101. data/ext/vibe_zstd/streaming.c +410 -0
  102. data/ext/vibe_zstd/vibe_zstd.c +293 -0
  103. data/ext/vibe_zstd/vibe_zstd.h +56 -0
  104. data/ext/vibe_zstd/vibe_zstd_internal.h +27 -0
  105. data/lib/vibe_zstd/constants.rb +67 -0
  106. data/lib/vibe_zstd/version.rb +5 -0
  107. data/lib/vibe_zstd.rb +255 -0
  108. data/sig/vibe_zstd.rbs +76 -0
  109. metadata +179 -0
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ # Benchmark: Dictionary Training Algorithms
7
+ # Compares train_dict, train_dict_cover, and train_dict_fast_cover
8
+
9
+ BenchmarkHelpers.run_comparison(title: "Dictionary Training Algorithm Comparison") do |results|
10
+ # Generate training samples
11
+ puts "Generating training samples..."
12
+ samples = 200.times.map do |i|
13
+ {
14
+ id: i,
15
+ name: "User #{i}",
16
+ email: "user#{i}@example.com",
17
+ created_at: Time.now.to_i - rand(100000),
18
+ status: %w[active pending inactive verified][rand(4)],
19
+ preferences: {
20
+ theme: %w[light dark auto][rand(3)],
21
+ notifications: rand(2) == 1
22
+ }
23
+ }.to_json
24
+ end
25
+
26
+ puts "Training samples: #{samples.size}"
27
+ puts "Average sample size: #{Formatter.format_bytes(samples.map(&:bytesize).sum / samples.size)}"
28
+ puts "Total training data: #{Formatter.format_bytes(samples.map(&:bytesize).sum)}\n\n"
29
+
30
+ dict_size = 16 * 1024 # 16KB
31
+
32
+ # Benchmark 1: train_dict (default algorithm - fast)
33
+ Formatter.section("Testing: train_dict (default/fast algorithm)")
34
+ dict_fast = nil
35
+ fast_time = Benchmark.measure do
36
+ dict_fast = VibeZstd.train_dict(samples, max_dict_size: dict_size)
37
+ end
38
+
39
+ puts "Training time: #{fast_time.real.round(3)}s"
40
+ puts "Dictionary size: #{Formatter.format_bytes(dict_fast.bytesize)}"
41
+ puts "Dictionary ID: #{VibeZstd.get_dict_id(dict_fast)}"
42
+
43
+ # Test compression effectiveness
44
+ test_sample = samples[100]
45
+ cdict_fast = VibeZstd::CDict.new(dict_fast)
46
+ compressed_fast = VibeZstd.compress(test_sample, dict: cdict_fast)
47
+ ratio_fast = test_sample.bytesize.to_f / compressed_fast.bytesize
48
+
49
+ results << BenchmarkResult.new(
50
+ :name => "train_dict",
51
+ :compression_ratio => ratio_fast,
52
+ :memory_bytes => dict_fast.bytesize,
53
+ "Training time" => "#{fast_time.real.round(3)}s",
54
+ "Compressed" => Formatter.format_bytes(compressed_fast.bytesize)
55
+ )
56
+
57
+ # Benchmark 2: train_dict_cover (COVER algorithm - better quality)
58
+ Formatter.section("Testing: train_dict_cover (COVER algorithm)")
59
+ dict_cover = nil
60
+ cover_time = Benchmark.measure do
61
+ dict_cover = VibeZstd.train_dict_cover(
62
+ samples,
63
+ max_dict_size: dict_size,
64
+ k: 200, # Segment size
65
+ d: 6 # Dmer size
66
+ )
67
+ end
68
+
69
+ puts "Training time: #{cover_time.real.round(3)}s"
70
+ puts "Dictionary size: #{Formatter.format_bytes(dict_cover.bytesize)}"
71
+ puts "Dictionary ID: #{VibeZstd.get_dict_id(dict_cover)}"
72
+
73
+ cdict_cover = VibeZstd::CDict.new(dict_cover)
74
+ compressed_cover = VibeZstd.compress(test_sample, dict: cdict_cover)
75
+ ratio_cover = test_sample.bytesize.to_f / compressed_cover.bytesize
76
+
77
+ results << BenchmarkResult.new(
78
+ :name => "train_dict_cover",
79
+ :compression_ratio => ratio_cover,
80
+ :memory_bytes => dict_cover.bytesize,
81
+ "Training time" => "#{cover_time.real.round(3)}s",
82
+ "Compressed" => Formatter.format_bytes(compressed_cover.bytesize)
83
+ )
84
+
85
+ # Benchmark 3: train_dict_fast_cover (fast COVER - balanced)
86
+ Formatter.section("Testing: train_dict_fast_cover (fast COVER)")
87
+ dict_fast_cover = nil
88
+ fast_cover_time = Benchmark.measure do
89
+ dict_fast_cover = VibeZstd.train_dict_fast_cover(
90
+ samples,
91
+ max_dict_size: dict_size,
92
+ k: 200,
93
+ d: 6,
94
+ f: 20, # Frequency array size
95
+ accel: 5 # Acceleration (1-10, higher = faster)
96
+ )
97
+ end
98
+
99
+ puts "Training time: #{fast_cover_time.real.round(3)}s"
100
+ puts "Dictionary size: #{Formatter.format_bytes(dict_fast_cover.bytesize)}"
101
+ puts "Dictionary ID: #{VibeZstd.get_dict_id(dict_fast_cover)}"
102
+
103
+ cdict_fast_cover = VibeZstd::CDict.new(dict_fast_cover)
104
+ compressed_fast_cover = VibeZstd.compress(test_sample, dict: cdict_fast_cover)
105
+ ratio_fast_cover = test_sample.bytesize.to_f / compressed_fast_cover.bytesize
106
+
107
+ results << BenchmarkResult.new(
108
+ :name => "train_dict_fast_cover",
109
+ :compression_ratio => ratio_fast_cover,
110
+ :memory_bytes => dict_fast_cover.bytesize,
111
+ "Training time" => "#{fast_cover_time.real.round(3)}s",
112
+ "Compressed" => Formatter.format_bytes(compressed_fast_cover.bytesize)
113
+ )
114
+
115
+ # Compare compression across multiple samples
116
+ puts "\n"
117
+ Formatter.section("Compression effectiveness across test samples")
118
+
119
+ test_samples = (101..110).map { |i| samples[i] }
120
+
121
+ [:fast, :cover, :fast_cover].each do |dict_type|
122
+ dict_data = case dict_type
123
+ when :fast then dict_fast
124
+ when :cover then dict_cover
125
+ when :fast_cover then dict_fast_cover
126
+ end
127
+
128
+ cdict = VibeZstd::CDict.new(dict_data)
129
+ total_original = 0
130
+ total_compressed = 0
131
+
132
+ test_samples.each do |sample|
133
+ total_original += sample.bytesize
134
+ compressed = VibeZstd.compress(sample, dict: cdict)
135
+ total_compressed += compressed.bytesize
136
+ end
137
+
138
+ avg_ratio = total_original.to_f / total_compressed
139
+ puts " #{dict_type}: #{Formatter.format_ratio(avg_ratio)} average ratio"
140
+ end
141
+
142
+ # Test dictionary sizes
143
+ puts "\n"
144
+ Formatter.section("Dictionary size impact")
145
+
146
+ sizes = [4096, 8192, 16384, 32768]
147
+ size_results = []
148
+
149
+ sizes.each do |size|
150
+ dict = VibeZstd.train_dict(samples, max_dict_size: size)
151
+ cdict = VibeZstd::CDict.new(dict)
152
+ compressed = VibeZstd.compress(test_sample, dict: cdict)
153
+ ratio = test_sample.bytesize.to_f / compressed.bytesize
154
+
155
+ size_results << {
156
+ "Dict Size" => Formatter.format_bytes(size),
157
+ "Actual Size" => Formatter.format_bytes(dict.bytesize),
158
+ "Ratio" => Formatter.format_ratio(ratio),
159
+ "Compressed" => Formatter.format_bytes(compressed.bytesize)
160
+ }
161
+ end
162
+
163
+ Formatter.table(size_results)
164
+ end
165
+
166
+ puts "\n💡 Dictionary Training Recommendations:"
167
+ puts " train_dict (default):"
168
+ puts " ✓ Fastest training"
169
+ puts " ✓ Good enough for most use cases"
170
+ puts " ✓ Use when training time matters"
171
+ puts "\n train_dict_cover:"
172
+ puts " ✓ Best compression ratios"
173
+ puts " ✓ Slower training (2-10x slower)"
174
+ puts " ✓ Use for production dictionaries"
175
+ puts "\n train_dict_fast_cover:"
176
+ puts " ✓ Balanced speed/quality"
177
+ puts " ✓ Configurable with accel parameter"
178
+ puts " ✓ Good default for most users"
179
+ puts "\n Dictionary size:"
180
+ puts " - Larger = better compression (diminishing returns > 64KB)"
181
+ puts " - Typical: 16KB-64KB for small messages"
182
+ puts " - Memory overhead: ~2x dictionary size in memory"
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ # Benchmark: Dictionary Usage Performance
7
+ # Demonstrates compression ratio and speed improvements when using trained dictionaries
8
+
9
+ BenchmarkHelpers.run_comparison(title: "Dictionary Usage Performance Comparison") do |results|
10
+ # Load the trained dictionary
11
+ dict_path = File.join(__dir__, "..", "test", "fixtures", "sample.dict")
12
+ unless File.exist?(dict_path)
13
+ puts "⚠️ Dictionary fixture not found. Run: ruby benchmark/generate_fixture.rb"
14
+ exit 1
15
+ end
16
+
17
+ dict_data = File.binread(dict_path)
18
+ cdict = VibeZstd::CDict.new(dict_data)
19
+ ddict = VibeZstd::DDict.new(dict_data)
20
+
21
+ puts "Dictionary size: #{Formatter.format_bytes(dict_data.bytesize)}"
22
+ puts "Dictionary ID: #{VibeZstd.get_dict_id(dict_data)}\n\n"
23
+
24
+ # Generate test data (similar to training data for best dictionary performance)
25
+ test_samples = 100.times.map do |i|
26
+ {
27
+ id: i + 1000,
28
+ name: "User #{i + 1000}",
29
+ email: "user#{i + 1000}@example.com",
30
+ created_at: Time.now.to_i,
31
+ status: %w[active pending inactive verified][rand(4)],
32
+ preferences: {
33
+ theme: %w[light dark auto][rand(3)],
34
+ notifications: rand(2) == 1,
35
+ language: %w[en es fr de][rand(4)]
36
+ },
37
+ metadata: {
38
+ login_count: rand(1000),
39
+ last_ip: "10.0.#{rand(255)}.#{rand(255)}",
40
+ session_duration: rand(3600)
41
+ }
42
+ }.to_json
43
+ end
44
+
45
+ # Benchmark without dictionary
46
+ Formatter.section("Testing: Compression without dictionary")
47
+ cctx = VibeZstd::CCtx.new
48
+ dctx = VibeZstd::DCtx.new
49
+
50
+ compressed_sizes_no_dict = []
51
+ no_dict_time = Benchmark.measure do
52
+ test_samples.each do |sample|
53
+ compressed = cctx.compress(sample)
54
+ compressed_sizes_no_dict << compressed.bytesize
55
+ dctx.decompress(compressed)
56
+ end
57
+ end
58
+
59
+ no_dict_ops_per_sec = test_samples.size / no_dict_time.real
60
+ avg_compressed_no_dict = compressed_sizes_no_dict.sum / compressed_sizes_no_dict.size.to_f
61
+ puts "Completed #{test_samples.size} operations in #{no_dict_time.real.round(3)}s"
62
+
63
+ # Benchmark with dictionary
64
+ Formatter.section("Testing: Compression with dictionary")
65
+
66
+ compressed_sizes_with_dict = []
67
+ with_dict_time = Benchmark.measure do
68
+ test_samples.each do |sample|
69
+ compressed = cctx.compress(sample, dict: cdict)
70
+ compressed_sizes_with_dict << compressed.bytesize
71
+ dctx.decompress(compressed, dict: ddict)
72
+ end
73
+ end
74
+
75
+ with_dict_ops_per_sec = test_samples.size / with_dict_time.real
76
+ avg_compressed_with_dict = compressed_sizes_with_dict.sum / compressed_sizes_with_dict.size.to_f
77
+ puts "Completed #{test_samples.size} operations in #{with_dict_time.real.round(3)}s"
78
+
79
+ # Calculate compression ratios
80
+ avg_original_size = test_samples.map(&:bytesize).sum / test_samples.size.to_f
81
+ compression_ratio_no_dict = avg_original_size / avg_compressed_no_dict
82
+ compression_ratio_with_dict = avg_original_size / avg_compressed_with_dict
83
+
84
+ # Memory estimates
85
+ cdict_memory = Memory.estimate_cdict(dict_data.bytesize)
86
+ ddict_memory = Memory.estimate_ddict(dict_data.bytesize)
87
+ dict_memory_overhead = cdict_memory + ddict_memory
88
+
89
+ # Collect results
90
+ results << BenchmarkResult.new(
91
+ :name => "Without dictionary",
92
+ :iterations_per_sec => no_dict_ops_per_sec,
93
+ :compression_ratio => compression_ratio_no_dict,
94
+ :memory_bytes => Memory.estimate_cctx + Memory.estimate_dctx,
95
+ "Avg compressed size" => Formatter.format_bytes(avg_compressed_no_dict.to_i)
96
+ )
97
+
98
+ results << BenchmarkResult.new(
99
+ :name => "With dictionary",
100
+ :iterations_per_sec => with_dict_ops_per_sec,
101
+ :compression_ratio => compression_ratio_with_dict,
102
+ :memory_bytes => Memory.estimate_cctx + Memory.estimate_dctx + dict_memory_overhead,
103
+ "Avg compressed size" => Formatter.format_bytes(avg_compressed_with_dict.to_i)
104
+ )
105
+
106
+ puts "\n📊 Detailed Statistics:"
107
+ puts " Average original size: #{Formatter.format_bytes(avg_original_size.to_i)}"
108
+ puts " Average compressed (no dict): #{Formatter.format_bytes(avg_compressed_no_dict.to_i)}"
109
+ puts " Average compressed (with dict): #{Formatter.format_bytes(avg_compressed_with_dict.to_i)}"
110
+ puts " Compression improvement: #{((avg_compressed_no_dict - avg_compressed_with_dict) / avg_compressed_no_dict * 100).round(1)}% smaller"
111
+ puts "\n💾 Memory Overhead:"
112
+ puts " Dictionary in memory: #{Formatter.format_bytes(dict_memory_overhead)}"
113
+ puts " Dictionary on disk: #{Formatter.format_bytes(dict_data.bytesize)}"
114
+ end
115
+
116
+ puts "\n💡 When to use dictionaries:"
117
+ puts " ✓ Small, similar data (JSON, logs, etc.)"
118
+ puts " ✓ Many small messages with repeated patterns"
119
+ puts " ✓ When compression ratio is more important than speed"
120
+ puts " ✗ Large files (> 1MB each)"
121
+ puts " ✗ Highly variable data with no patterns"
@@ -0,0 +1,157 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Quick benchmark script to generate results for README
5
+
6
+ require_relative "helpers"
7
+
8
+ puts "# Performance Benchmarks\n\n"
9
+ puts "Results from Ruby #{RUBY_VERSION} on #{RUBY_PLATFORM}, Zstd #{VibeZstd.version_string}\n\n"
10
+
11
+ # 1. Context Reuse
12
+ puts "## Context Reuse Performance\n\n"
13
+ puts "Reusing compression/decompression contexts vs creating new ones (5000 iterations each):\n\n"
14
+
15
+ # Test with different data sizes
16
+ test_cases = {
17
+ "1KB" => DataGenerator.json_data(count: 5),
18
+ "10KB" => DataGenerator.json_data(count: 50),
19
+ "100KB" => DataGenerator.json_data(count: 500)
20
+ }
21
+
22
+ puts "| Data Size | New Context | Reused Context | Speedup |"
23
+ puts "|-----------|-------------|----------------|---------|"
24
+
25
+ test_cases.each do |size_label, test_data|
26
+ iterations = 5000
27
+
28
+ new_ctx_time = Benchmark.measure do
29
+ iterations.times do
30
+ cctx = VibeZstd::CCtx.new
31
+ dctx = VibeZstd::DCtx.new
32
+ compressed = cctx.compress(test_data)
33
+ dctx.decompress(compressed)
34
+ end
35
+ end
36
+
37
+ reused_time = Benchmark.measure do
38
+ cctx = VibeZstd::CCtx.new
39
+ dctx = VibeZstd::DCtx.new
40
+ iterations.times do
41
+ compressed = cctx.compress(test_data)
42
+ dctx.decompress(compressed)
43
+ end
44
+ end
45
+
46
+ new_ops = iterations / new_ctx_time.real
47
+ reused_ops = iterations / reused_time.real
48
+ speedup = reused_ops / new_ops
49
+
50
+ puts "| #{size_label} | #{Formatter.format_number(new_ops.to_i)} ops/s | #{Formatter.format_number(reused_ops.to_i)} ops/s | #{speedup.round(2)}x |"
51
+ end
52
+
53
+ cctx_mem = Memory.estimate_cctx(3)
54
+ dctx_mem = Memory.estimate_dctx
55
+ total_mem = cctx_mem + dctx_mem
56
+
57
+ puts "\n**Memory savings:** Reusing contexts saves #{Formatter.format_bytes(total_mem * 4999)} for 5000 operations (99.98% reduction)\n"
58
+ puts "**Recommendation:** Always reuse CCtx/DCtx instances for multiple operations.\n\n"
59
+
60
+ # 2. Dictionary Performance
61
+ puts "## Dictionary Compression\n\n"
62
+ puts "Compression with vs without trained dictionaries (100 JSON samples):\n\n"
63
+
64
+ dict_path = File.join(__dir__, "..", "test", "fixtures", "sample.dict")
65
+ dict_data = File.binread(dict_path)
66
+ cdict = VibeZstd::CDict.new(dict_data)
67
+ VibeZstd::DDict.new(dict_data)
68
+
69
+ test_sample = {
70
+ id: 999,
71
+ name: "Test User",
72
+ email: "test@example.com",
73
+ status: "active",
74
+ preferences: {theme: "dark", notifications: true}
75
+ }.to_json
76
+
77
+ compressed_no_dict = VibeZstd.compress(test_sample)
78
+ compressed_with_dict = VibeZstd.compress(test_sample, dict: cdict)
79
+
80
+ ratio_no_dict = test_sample.bytesize.to_f / compressed_no_dict.bytesize
81
+ ratio_with_dict = test_sample.bytesize.to_f / compressed_with_dict.bytesize
82
+
83
+ puts "| Method | Compressed Size | Ratio | Improvement |"
84
+ puts "|--------|----------------|-------|-------------|"
85
+ puts "| Without dictionary | #{compressed_no_dict.bytesize}B | #{ratio_no_dict.round(2)}x | - |"
86
+ puts "| With dictionary (16KB) | #{compressed_with_dict.bytesize}B | #{ratio_with_dict.round(2)}x | #{((compressed_no_dict.bytesize - compressed_with_dict.bytesize).to_f / compressed_no_dict.bytesize * 100).round(1)}% smaller |"
87
+ puts "\nOriginal size: #{test_sample.bytesize} bytes\n\n"
88
+
89
+ # 3. Compression Levels
90
+ puts "## Compression Levels\n\n"
91
+ puts "Speed vs compression ratio trade-offs:\n\n"
92
+
93
+ large_data = DataGenerator.mixed_data(size: 50_000)
94
+ levels = [-1, 1, 3, 9, 19]
95
+
96
+ puts "| Level | Ratio | Speed (ops/sec) | Memory | Use Case |"
97
+ puts "|-------|-------|-----------------|--------|----------|"
98
+
99
+ levels.each do |level|
100
+ cctx = VibeZstd::CCtx.new
101
+
102
+ compressed = nil
103
+ time = Benchmark.measure do
104
+ 10.times { compressed = cctx.compress(large_data, level: level) }
105
+ end
106
+
107
+ ops_per_sec = 10 / time.real
108
+ ratio = large_data.bytesize.to_f / compressed.bytesize
109
+ memory = Memory.estimate_cctx(level)
110
+
111
+ use_case = case level
112
+ when -1 then "Ultra-fast, real-time"
113
+ when 1 then "Fast, high-throughput"
114
+ when 3 then "Balanced (default)"
115
+ when 9 then "Better compression"
116
+ when 19 then "Maximum compression"
117
+ end
118
+
119
+ puts "| #{level} | #{ratio.round(2)}x | #{Formatter.format_number(ops_per_sec.to_i)} | #{Formatter.format_bytes(memory)} | #{use_case} |"
120
+ end
121
+
122
+ puts "\n"
123
+
124
+ # 4. Multi-threading
125
+ puts "## Multi-threading Performance\n\n"
126
+ puts "Compression speedup with multiple workers (500KB data):\n\n"
127
+
128
+ mt_data = DataGenerator.mixed_data(size: 500_000)
129
+
130
+ puts "| Workers | Throughput | Speedup | Efficiency |"
131
+ puts "|---------|------------|---------|------------|"
132
+
133
+ baseline_throughput = nil
134
+
135
+ [0, 2, 4].each do |workers|
136
+ cctx = VibeZstd::CCtx.new
137
+ cctx.nb_workers = workers if workers > 0
138
+
139
+ cctx.compress(mt_data) # warmup
140
+
141
+ time = Benchmark.measure do
142
+ 5.times { cctx.compress(mt_data) }
143
+ end
144
+
145
+ throughput = (mt_data.bytesize * 5 / time.real)
146
+
147
+ if workers == 0
148
+ baseline_throughput = throughput
149
+ puts "| #{workers} (single) | #{Formatter.format_bytes(throughput.to_i)}/s | 1.0x | 100% |"
150
+ else
151
+ speedup = throughput / baseline_throughput
152
+ efficiency = (speedup / workers * 100).round(0)
153
+ puts "| #{workers} | #{Formatter.format_bytes(throughput.to_i)}/s | #{speedup.round(2)}x | #{efficiency}% |"
154
+ end
155
+ end
156
+
157
+ puts "\n**Note:** Multi-threading is most effective for data > 256KB. Diminishing returns after 4 workers.\n"
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Script to generate a trained dictionary fixture for testing and benchmarking
5
+
6
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
7
+ require "vibe_zstd"
8
+ require "json"
9
+ require "fileutils"
10
+
11
+ # Create fixtures directory
12
+ FIXTURES_DIR = File.join(__dir__, "..", "test", "fixtures")
13
+ FileUtils.mkdir_p(FIXTURES_DIR)
14
+
15
+ # Generate training samples (JSON-like data similar to web application data)
16
+ puts "Generating training samples..."
17
+ samples = 200.times.map do |i|
18
+ {
19
+ id: i,
20
+ name: "User #{i}",
21
+ email: "user#{i}@example.com",
22
+ created_at: Time.now.to_i - rand(100000),
23
+ status: %w[active pending inactive verified][rand(4)],
24
+ preferences: {
25
+ theme: %w[light dark auto][rand(3)],
26
+ notifications: rand(2) == 1,
27
+ language: %w[en es fr de][rand(4)]
28
+ },
29
+ metadata: {
30
+ login_count: rand(1000),
31
+ last_ip: "192.168.#{rand(255)}.#{rand(255)}",
32
+ user_agent: "Mozilla/5.0 (compatible; Bot/1.0)",
33
+ session_duration: rand(3600)
34
+ },
35
+ tags: rand(5).times.map { |t| "tag#{t}" }
36
+ }.to_json
37
+ end
38
+
39
+ puts "Training dictionary with #{samples.size} samples..."
40
+ dict_data = VibeZstd.train_dict(samples, max_dict_size: 16 * 1024) # 16KB dictionary
41
+
42
+ # Save the dictionary
43
+ dict_path = File.join(FIXTURES_DIR, "sample.dict")
44
+ File.binwrite(dict_path, dict_data)
45
+
46
+ puts "\nDictionary saved to: #{dict_path}"
47
+ puts "Dictionary size: #{dict_data.bytesize} bytes"
48
+ puts "Dictionary ID: #{VibeZstd.get_dict_id(dict_data)}"
49
+
50
+ # Test the dictionary
51
+ puts "\nTesting dictionary effectiveness..."
52
+ test_data = {
53
+ id: 999,
54
+ name: "Test User",
55
+ email: "test@example.com",
56
+ status: "active",
57
+ preferences: {theme: "dark", notifications: true, language: "en"},
58
+ metadata: {login_count: 42, last_ip: "192.168.1.1"}
59
+ }.to_json
60
+
61
+ # Compress without dictionary
62
+ compressed_no_dict = VibeZstd.compress(test_data)
63
+
64
+ # Compress with dictionary
65
+ cdict = VibeZstd::CDict.new(dict_data)
66
+ compressed_with_dict = VibeZstd.compress(test_data, dict: cdict)
67
+
68
+ puts "\nCompression comparison:"
69
+ puts " Original size: #{test_data.bytesize} bytes"
70
+ puts " Without dictionary: #{compressed_no_dict.bytesize} bytes (#{(test_data.bytesize.to_f / compressed_no_dict.bytesize).round(2)}x)"
71
+ puts " With dictionary: #{compressed_with_dict.bytesize} bytes (#{(test_data.bytesize.to_f / compressed_with_dict.bytesize).round(2)}x)"
72
+ puts " Dictionary improvement: #{((compressed_no_dict.bytesize - compressed_with_dict.bytesize).to_f / compressed_no_dict.bytesize * 100).round(1)}% smaller"
73
+
74
+ # Verify decompression works
75
+ ddict = VibeZstd::DDict.new(dict_data)
76
+ decompressed = VibeZstd.decompress(compressed_with_dict, dict: ddict)
77
+ if decompressed == test_data
78
+ puts "\n✓ Dictionary verification successful!"
79
+ else
80
+ puts "\n✗ Dictionary verification FAILED!"
81
+ exit 1
82
+ end