vibe_zstd 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +3 -0
  3. data/CHANGELOG.md +22 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +978 -0
  6. data/Rakefile +20 -0
  7. data/benchmark/README.md +198 -0
  8. data/benchmark/compression_levels.rb +99 -0
  9. data/benchmark/context_reuse.rb +174 -0
  10. data/benchmark/decompression_speed_by_level.rb +65 -0
  11. data/benchmark/dictionary_training.rb +182 -0
  12. data/benchmark/dictionary_usage.rb +121 -0
  13. data/benchmark/for_readme.rb +157 -0
  14. data/benchmark/generate_fixture.rb +82 -0
  15. data/benchmark/helpers.rb +237 -0
  16. data/benchmark/multithreading.rb +105 -0
  17. data/benchmark/run_all.rb +150 -0
  18. data/benchmark/streaming.rb +154 -0
  19. data/ext/vibe_zstd/Makefile +270 -0
  20. data/ext/vibe_zstd/cctx.c +565 -0
  21. data/ext/vibe_zstd/dctx.c +493 -0
  22. data/ext/vibe_zstd/dict.c +587 -0
  23. data/ext/vibe_zstd/extconf.rb +52 -0
  24. data/ext/vibe_zstd/frames.c +132 -0
  25. data/ext/vibe_zstd/libzstd/LICENSE +30 -0
  26. data/ext/vibe_zstd/libzstd/common/allocations.h +55 -0
  27. data/ext/vibe_zstd/libzstd/common/bits.h +205 -0
  28. data/ext/vibe_zstd/libzstd/common/bitstream.h +454 -0
  29. data/ext/vibe_zstd/libzstd/common/compiler.h +464 -0
  30. data/ext/vibe_zstd/libzstd/common/cpu.h +249 -0
  31. data/ext/vibe_zstd/libzstd/common/debug.c +30 -0
  32. data/ext/vibe_zstd/libzstd/common/debug.h +107 -0
  33. data/ext/vibe_zstd/libzstd/common/entropy_common.c +340 -0
  34. data/ext/vibe_zstd/libzstd/common/error_private.c +64 -0
  35. data/ext/vibe_zstd/libzstd/common/error_private.h +158 -0
  36. data/ext/vibe_zstd/libzstd/common/fse.h +625 -0
  37. data/ext/vibe_zstd/libzstd/common/fse_decompress.c +315 -0
  38. data/ext/vibe_zstd/libzstd/common/huf.h +277 -0
  39. data/ext/vibe_zstd/libzstd/common/mem.h +422 -0
  40. data/ext/vibe_zstd/libzstd/common/pool.c +371 -0
  41. data/ext/vibe_zstd/libzstd/common/pool.h +81 -0
  42. data/ext/vibe_zstd/libzstd/common/portability_macros.h +171 -0
  43. data/ext/vibe_zstd/libzstd/common/threading.c +182 -0
  44. data/ext/vibe_zstd/libzstd/common/threading.h +142 -0
  45. data/ext/vibe_zstd/libzstd/common/xxhash.c +18 -0
  46. data/ext/vibe_zstd/libzstd/common/xxhash.h +7094 -0
  47. data/ext/vibe_zstd/libzstd/common/zstd_common.c +48 -0
  48. data/ext/vibe_zstd/libzstd/common/zstd_deps.h +123 -0
  49. data/ext/vibe_zstd/libzstd/common/zstd_internal.h +324 -0
  50. data/ext/vibe_zstd/libzstd/common/zstd_trace.h +156 -0
  51. data/ext/vibe_zstd/libzstd/compress/clevels.h +134 -0
  52. data/ext/vibe_zstd/libzstd/compress/fse_compress.c +625 -0
  53. data/ext/vibe_zstd/libzstd/compress/hist.c +191 -0
  54. data/ext/vibe_zstd/libzstd/compress/hist.h +82 -0
  55. data/ext/vibe_zstd/libzstd/compress/huf_compress.c +1464 -0
  56. data/ext/vibe_zstd/libzstd/compress/zstd_compress.c +7843 -0
  57. data/ext/vibe_zstd/libzstd/compress/zstd_compress_internal.h +1636 -0
  58. data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.c +235 -0
  59. data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.h +39 -0
  60. data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.c +442 -0
  61. data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.h +55 -0
  62. data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.c +688 -0
  63. data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.h +32 -0
  64. data/ext/vibe_zstd/libzstd/compress/zstd_cwksp.h +765 -0
  65. data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.c +778 -0
  66. data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.h +42 -0
  67. data/ext/vibe_zstd/libzstd/compress/zstd_fast.c +985 -0
  68. data/ext/vibe_zstd/libzstd/compress/zstd_fast.h +30 -0
  69. data/ext/vibe_zstd/libzstd/compress/zstd_lazy.c +2199 -0
  70. data/ext/vibe_zstd/libzstd/compress/zstd_lazy.h +193 -0
  71. data/ext/vibe_zstd/libzstd/compress/zstd_ldm.c +745 -0
  72. data/ext/vibe_zstd/libzstd/compress/zstd_ldm.h +109 -0
  73. data/ext/vibe_zstd/libzstd/compress/zstd_ldm_geartab.h +106 -0
  74. data/ext/vibe_zstd/libzstd/compress/zstd_opt.c +1580 -0
  75. data/ext/vibe_zstd/libzstd/compress/zstd_opt.h +72 -0
  76. data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.c +238 -0
  77. data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.h +33 -0
  78. data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.c +1923 -0
  79. data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.h +102 -0
  80. data/ext/vibe_zstd/libzstd/decompress/huf_decompress.c +1944 -0
  81. data/ext/vibe_zstd/libzstd/decompress/huf_decompress_amd64.S +602 -0
  82. data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.c +244 -0
  83. data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.h +44 -0
  84. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress.c +2410 -0
  85. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.c +2209 -0
  86. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.h +73 -0
  87. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_internal.h +240 -0
  88. data/ext/vibe_zstd/libzstd/deprecated/zbuff.h +214 -0
  89. data/ext/vibe_zstd/libzstd/deprecated/zbuff_common.c +26 -0
  90. data/ext/vibe_zstd/libzstd/deprecated/zbuff_compress.c +167 -0
  91. data/ext/vibe_zstd/libzstd/deprecated/zbuff_decompress.c +77 -0
  92. data/ext/vibe_zstd/libzstd/dictBuilder/cover.c +1302 -0
  93. data/ext/vibe_zstd/libzstd/dictBuilder/cover.h +152 -0
  94. data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.c +1913 -0
  95. data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.h +57 -0
  96. data/ext/vibe_zstd/libzstd/dictBuilder/fastcover.c +766 -0
  97. data/ext/vibe_zstd/libzstd/dictBuilder/zdict.c +1133 -0
  98. data/ext/vibe_zstd/libzstd/zdict.h +481 -0
  99. data/ext/vibe_zstd/libzstd/zstd.h +3198 -0
  100. data/ext/vibe_zstd/libzstd/zstd_errors.h +107 -0
  101. data/ext/vibe_zstd/streaming.c +410 -0
  102. data/ext/vibe_zstd/vibe_zstd.c +293 -0
  103. data/ext/vibe_zstd/vibe_zstd.h +56 -0
  104. data/ext/vibe_zstd/vibe_zstd_internal.h +27 -0
  105. data/lib/vibe_zstd/constants.rb +67 -0
  106. data/lib/vibe_zstd/version.rb +5 -0
  107. data/lib/vibe_zstd.rb +255 -0
  108. data/sig/vibe_zstd.rbs +76 -0
  109. metadata +179 -0
@@ -0,0 +1,237 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
4
+ require "vibe_zstd"
5
+ require "json"
6
+ require "benchmark" # Built-in Ruby module
7
+
8
+ # Try to load optional dependencies (gracefully handle missing gems)
9
+ HAS_BENCHMARK_DEPS = begin
10
+ # Don't use bundler/setup - it's too strict. Just try to require the gems.
11
+ require "benchmark/ips"
12
+ require "terminal-table"
13
+ true
14
+ rescue LoadError
15
+ puts "āš ļø Optional benchmark dependencies not installed."
16
+ puts " Run: bundle install"
17
+ puts " Continuing with basic benchmark output...\n\n"
18
+ false
19
+ end
20
+
21
+ module BenchmarkHelpers
22
+ # Data generators for realistic test scenarios
23
+ module DataGenerator
24
+ # Generate JSON-like data (common in web apps)
25
+ def self.json_data(count: 100)
26
+ records = count.times.map do |i|
27
+ {
28
+ id: i,
29
+ name: "User #{i}",
30
+ email: "user#{i}@example.com",
31
+ created_at: Time.now.to_i - rand(100000),
32
+ status: %w[active pending inactive][rand(3)],
33
+ metadata: {
34
+ login_count: rand(1000),
35
+ last_ip: "192.168.1.#{rand(255)}"
36
+ }
37
+ }
38
+ end
39
+ records.map(&:to_json).join("\n")
40
+ end
41
+
42
+ # Generate log-like data (common in logging scenarios)
43
+ def self.log_data(count: 100)
44
+ levels = %w[INFO WARN ERROR DEBUG]
45
+ messages = [
46
+ "Request processed successfully",
47
+ "Database query took %dms",
48
+ "Cache miss for key: user_%d",
49
+ "Connection established to %s",
50
+ "Background job completed"
51
+ ]
52
+
53
+ count.times.map do |i|
54
+ level = levels[rand(levels.length)]
55
+ message = messages[rand(messages.length)]
56
+ message = format(message, rand(1000)) if message.include?("%")
57
+ timestamp = Time.now.to_i - rand(100000)
58
+ "[#{timestamp}] #{level}: #{message}"
59
+ end.join("\n")
60
+ end
61
+
62
+ # Generate data with repeated patterns (good for LDM testing)
63
+ def self.repeated_pattern_data(pattern_size: 1000, repetitions: 10, unique_middle: 500)
64
+ pattern = "A" * pattern_size
65
+ middle = "B" * unique_middle
66
+ (pattern * repetitions) + middle + (pattern * repetitions)
67
+ end
68
+
69
+ # Generate random data (worst case for compression)
70
+ def self.random_data(size: 10_000)
71
+ size.times.map { rand(256).chr }.join
72
+ end
73
+
74
+ # Generate highly compressible data
75
+ def self.compressible_data(size: 10_000)
76
+ "a" * size
77
+ end
78
+
79
+ # Mixed realistic data
80
+ def self.mixed_data(size: 10_000)
81
+ json_data(count: size / 100) + "\n" + log_data(count: size / 100)
82
+ end
83
+ end
84
+
85
+ # Formatting helpers for nice output
86
+ module Formatter
87
+ def self.header(title)
88
+ puts "\n#{"=" * 80}"
89
+ puts title.center(80)
90
+ puts "#{"=" * 80}\n\n"
91
+ end
92
+
93
+ def self.section(title)
94
+ puts "\n#{title}"
95
+ puts "-" * title.length
96
+ end
97
+
98
+ def self.table(data, title: nil)
99
+ puts "\n## #{title}\n" if title
100
+
101
+ # Create markdown table
102
+ if data.is_a?(Array) && data.first.is_a?(Hash)
103
+ headers = data.first.keys
104
+ rows = data.map(&:values)
105
+
106
+ if defined?(Terminal::Table)
107
+ table = Terminal::Table.new(headings: headers, rows: rows)
108
+ puts table
109
+ puts
110
+ end
111
+
112
+ # Print markdown version (always, for README)
113
+ puts "Markdown table (for README):" if defined?(Terminal::Table)
114
+ puts "| #{headers.join(" | ")} |"
115
+ puts "| #{headers.map { "---" }.join(" | ")} |"
116
+ rows.each do |row|
117
+ puts "| #{row.join(" | ")} |"
118
+ end
119
+ end
120
+ puts
121
+ end
122
+
123
+ def self.markdown_table(data, title: nil)
124
+ puts "\n## #{title}\n" if title
125
+
126
+ if data.is_a?(Array) && data.first.is_a?(Hash)
127
+ headers = data.first.keys
128
+ rows = data.map(&:values)
129
+
130
+ puts "| #{headers.join(" | ")} |"
131
+ puts "| #{headers.map { "---" }.join(" | ")} |"
132
+ rows.each do |row|
133
+ puts "| #{row.join(" | ")} |"
134
+ end
135
+ end
136
+ puts
137
+ end
138
+
139
+ def self.format_bytes(bytes)
140
+ if bytes < 1024
141
+ "#{bytes}B"
142
+ elsif bytes < 1024 * 1024
143
+ "#{(bytes / 1024.0).round(1)}KB"
144
+ else
145
+ "#{(bytes / (1024.0 * 1024)).round(2)}MB"
146
+ end
147
+ end
148
+
149
+ def self.format_number(num)
150
+ num.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
151
+ end
152
+
153
+ def self.format_ratio(ratio)
154
+ "#{ratio.round(2)}x"
155
+ end
156
+ end
157
+
158
+ # Memory tracking utilities
159
+ module Memory
160
+ def self.estimate_cctx(level = 3)
161
+ VibeZstd::CCtx.estimate_memory(level)
162
+ end
163
+
164
+ def self.estimate_dctx
165
+ VibeZstd::DCtx.estimate_memory
166
+ end
167
+
168
+ def self.estimate_cdict(dict_size, level = 3)
169
+ VibeZstd::CDict.estimate_memory(dict_size, level)
170
+ end
171
+
172
+ def self.estimate_ddict(dict_size)
173
+ VibeZstd::DDict.estimate_memory(dict_size)
174
+ end
175
+
176
+ def self.current_memory_usage
177
+ # Try to get actual memory usage (platform-specific)
178
+ if RUBY_PLATFORM.match?(/darwin/)
179
+ # macOS
180
+ `ps -o rss= -p #{Process.pid}`.to_i * 1024
181
+ elsif RUBY_PLATFORM.match?(/linux/)
182
+ # Linux
183
+ File.read("/proc/#{Process.pid}/statm").split[1].to_i * 4096
184
+ else
185
+ # Fallback
186
+ 0
187
+ end
188
+ end
189
+ end
190
+
191
+ # Benchmark result tracking
192
+ class BenchmarkResult
193
+ attr_reader :name, :iterations_per_sec, :memory_bytes, :compression_ratio, :extra_data
194
+
195
+ def initialize(name:, iterations_per_sec: nil, memory_bytes: nil, compression_ratio: nil, **extra_data)
196
+ @name = name
197
+ @iterations_per_sec = iterations_per_sec
198
+ @memory_bytes = memory_bytes
199
+ @compression_ratio = compression_ratio
200
+ @extra_data = extra_data
201
+ end
202
+
203
+ def to_h
204
+ h = {"Method" => name}
205
+ h["Speed"] = "#{Formatter.format_number(iterations_per_sec.to_i)} ops/sec" if iterations_per_sec
206
+ h["Memory"] = Formatter.format_bytes(memory_bytes) if memory_bytes
207
+ h["Ratio"] = Formatter.format_ratio(compression_ratio) if compression_ratio
208
+ h.merge!(extra_data.transform_values { |v| v.is_a?(Numeric) ? Formatter.format_number(v.to_i) : v.to_s })
209
+ h
210
+ end
211
+ end
212
+
213
+ # Helper to run a benchmark and collect results
214
+ def self.run_comparison(title:, &block)
215
+ Formatter.header(title)
216
+ results = []
217
+
218
+ yield results
219
+
220
+ # Display results
221
+ if results.any?
222
+ Formatter.table(results.map(&:to_h), title: "Results")
223
+
224
+ # Calculate speedups if we have iterations_per_sec
225
+ if results.all? { |r| r.iterations_per_sec }
226
+ baseline = results.first.iterations_per_sec
227
+ puts "\nRelative Performance:"
228
+ results.each do |result|
229
+ speedup = result.iterations_per_sec / baseline
230
+ puts " #{result.name}: #{Formatter.format_ratio(speedup)}"
231
+ end
232
+ end
233
+ end
234
+
235
+ results
236
+ end
237
+ end
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+
6
+ # Benchmark: Multi-threaded Compression Performance
7
+ # Demonstrates the performance impact of using multiple worker threads
8
+
9
+ BenchmarkHelpers.run_comparison(title: "Multi-threaded Compression Performance") do |results|
10
+ # Generate large test data (multi-threading only helps with larger data)
11
+ large_data = DataGenerator.mixed_data(size: 500_000)
12
+ puts "Test data size: #{Formatter.format_bytes(large_data.bytesize)}"
13
+ puts "CPU cores available: #{begin
14
+ `sysctl -n hw.ncpu`.strip
15
+ rescue
16
+ "unknown"
17
+ end}\n\n"
18
+
19
+ # Test with different worker counts
20
+ worker_counts = [0, 1, 2, 4, 8]
21
+
22
+ worker_counts.each do |workers|
23
+ Formatter.section("Testing: #{workers} worker#{(workers == 1) ? "" : "s"} #{(workers == 0) ? "(single-threaded)" : ""}")
24
+
25
+ cctx = VibeZstd::CCtx.new
26
+ cctx.nb_workers = workers if workers > 0
27
+
28
+ # Warm up
29
+ cctx.compress(large_data)
30
+
31
+ # Benchmark compression
32
+ compressed = nil
33
+ time = Benchmark.measure do
34
+ 10.times { compressed = cctx.compress(large_data) }
35
+ end
36
+
37
+ ops_per_sec = 10 / time.real
38
+ compression_ratio = large_data.bytesize.to_f / compressed.bytesize
39
+ memory = Memory.estimate_cctx(3)
40
+
41
+ puts "Completed 10 iterations in #{time.real.round(3)}s"
42
+ puts "Throughput: #{Formatter.format_bytes((large_data.bytesize * 10 / time.real).to_i)}/sec"
43
+
44
+ results << BenchmarkResult.new(
45
+ :name => "#{workers} worker#{"s" unless workers == 1}",
46
+ :iterations_per_sec => ops_per_sec,
47
+ :compression_ratio => compression_ratio,
48
+ :memory_bytes => memory,
49
+ "Throughput" => "#{Formatter.format_bytes((large_data.bytesize * 10 / time.real).to_i)}/s"
50
+ )
51
+ end
52
+
53
+ puts "\nšŸ“Š Multi-threading Analysis:"
54
+
55
+ # Calculate speedup vs single-threaded
56
+ baseline = results[0].iterations_per_sec
57
+ results.each do |result|
58
+ speedup = result.iterations_per_sec / baseline
59
+ efficiency = begin
60
+ (speedup / result.name.split.first.to_i * 100).round(1)
61
+ rescue
62
+ 100.0
63
+ end
64
+ puts " #{result.name}: #{Formatter.format_ratio(speedup)} speedup" +
65
+ ((result.name != "0 workers") ? " (#{efficiency}% efficient)" : "")
66
+ end
67
+
68
+ # Test with job_size parameter
69
+ puts "\n"
70
+ Formatter.section("Testing: Multi-threading with different job sizes")
71
+
72
+ job_sizes = [256 * 1024, 512 * 1024, 1024 * 1024] # 256KB, 512KB, 1MB
73
+ job_results = []
74
+
75
+ job_sizes.each do |job_size|
76
+ cctx = VibeZstd::CCtx.new
77
+ cctx.nb_workers = 4
78
+ cctx.job_size = job_size
79
+
80
+ time = Benchmark.measure do
81
+ 5.times { cctx.compress(large_data) }
82
+ end
83
+
84
+ ops_per_sec = 5 / time.real
85
+ puts " Job size #{Formatter.format_bytes(job_size)}: #{time.real.round(3)}s for 5 iterations"
86
+
87
+ job_results << {
88
+ "Job Size" => Formatter.format_bytes(job_size),
89
+ "Time (5 ops)" => "#{time.real.round(3)}s",
90
+ "Ops/sec" => ops_per_sec.round(1)
91
+ }
92
+ end
93
+
94
+ Formatter.table(job_results)
95
+ end
96
+
97
+ puts "\nšŸ’” Multi-threading Recommendations:"
98
+ puts " āœ“ Use for data > 256KB (overhead not worth it for smaller data)"
99
+ puts " āœ“ Optimal workers: 2-4 for most use cases (diminishing returns after)"
100
+ puts " āœ“ More workers = higher memory usage"
101
+ puts " āœ“ job_size affects compression ratio vs parallelism tradeoff"
102
+ puts "\n Typical speedups:"
103
+ puts " - 2 workers: 1.5-1.8x faster"
104
+ puts " - 4 workers: 2.0-2.5x faster"
105
+ puts " - 8 workers: 2.2-3.0x faster (diminishing returns)"
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Run all benchmarks and generate a comprehensive report
5
+
6
+ require_relative "helpers"
7
+
8
+ Formatter.header("VibeZstd Comprehensive Benchmark Suite")
9
+
10
+ puts "Ruby version: #{RUBY_VERSION}"
11
+ puts "Platform: #{RUBY_PLATFORM}"
12
+ puts "Zstd version: #{VibeZstd.version_string}"
13
+ puts "Date: #{Time.now.strftime("%Y-%m-%d %H:%M:%S")}"
14
+ puts
15
+
16
+ # Check for fixture
17
+ fixture_path = File.join(__dir__, "..", "test", "fixtures", "sample.dict")
18
+ unless File.exist?(fixture_path)
19
+ puts "āš ļø Dictionary fixture not found. Generating..."
20
+ require_relative "generate_fixture"
21
+ puts
22
+ end
23
+
24
+ # List of benchmarks to run
25
+ benchmarks = [
26
+ {
27
+ name: "Context Reuse",
28
+ file: "context_reuse.rb",
29
+ description: "Compare reusing contexts vs creating new ones"
30
+ },
31
+ {
32
+ name: "Dictionary Usage",
33
+ file: "dictionary_usage.rb",
34
+ description: "Compare compression with and without dictionaries"
35
+ },
36
+ {
37
+ name: "Compression Levels",
38
+ file: "compression_levels.rb",
39
+ description: "Speed vs compression ratio across levels"
40
+ },
41
+ {
42
+ name: "Streaming",
43
+ file: "streaming.rb",
44
+ description: "Streaming API vs one-shot compression"
45
+ },
46
+ {
47
+ name: "Multi-threading",
48
+ file: "multithreading.rb",
49
+ description: "Performance with different worker counts"
50
+ },
51
+ {
52
+ name: "Dictionary Training",
53
+ file: "dictionary_training.rb",
54
+ description: "Compare dictionary training algorithms"
55
+ }
56
+ ]
57
+
58
+ # Option parsing
59
+ require "optparse"
60
+
61
+ options = {
62
+ list: false,
63
+ benchmarks: []
64
+ }
65
+
66
+ OptionParser.new do |opts|
67
+ opts.banner = "Usage: ruby run_all.rb [options]"
68
+
69
+ opts.on("-l", "--list", "List available benchmarks") do
70
+ options[:list] = true
71
+ end
72
+
73
+ opts.on("-b", "--benchmark NAME", "Run specific benchmark (can specify multiple times)") do |name|
74
+ options[:benchmarks] << name
75
+ end
76
+
77
+ opts.on("-h", "--help", "Show this help") do
78
+ puts opts
79
+ exit
80
+ end
81
+ end.parse!
82
+
83
+ # List benchmarks if requested
84
+ if options[:list]
85
+ puts "Available benchmarks:\n\n"
86
+ benchmarks.each_with_index do |bench, i|
87
+ puts "#{i + 1}. #{bench[:name]}"
88
+ puts " File: #{bench[:file]}"
89
+ puts " Description: #{bench[:description]}"
90
+ puts
91
+ end
92
+ exit
93
+ end
94
+
95
+ # Filter benchmarks if specific ones requested
96
+ benchmarks_to_run = if options[:benchmarks].any?
97
+ benchmarks.select do |bench|
98
+ options[:benchmarks].any? { |name| bench[:name].downcase.include?(name.downcase) }
99
+ end
100
+ else
101
+ benchmarks
102
+ end
103
+
104
+ if benchmarks_to_run.empty?
105
+ puts "No benchmarks match your criteria. Use --list to see available benchmarks."
106
+ exit 1
107
+ end
108
+
109
+ # Run benchmarks
110
+ puts "Running #{benchmarks_to_run.size} benchmark(s)...\n\n"
111
+
112
+ results = []
113
+ start_time = Time.now
114
+
115
+ benchmarks_to_run.each_with_index do |bench, i|
116
+ puts "\n" + "=" * 80
117
+ puts "Benchmark #{i + 1}/#{benchmarks_to_run.size}: #{bench[:name]}"
118
+ puts "=" * 80
119
+ puts
120
+
121
+ begin
122
+ load File.join(__dir__, bench[:file])
123
+ results << {name: bench[:name], status: "āœ“ Completed"}
124
+ rescue => e
125
+ puts "\nāŒ Error running #{bench[:name]}: #{e.message}"
126
+ puts e.backtrace.first(5)
127
+ results << {name: bench[:name], status: "āœ— Failed: #{e.message}"}
128
+ end
129
+
130
+ # Add separator between benchmarks
131
+ puts "\n" + "-" * 80 + "\n" if i < benchmarks_to_run.size - 1
132
+ end
133
+
134
+ end_time = Time.now
135
+ duration = end_time - start_time
136
+
137
+ # Summary
138
+ puts "\n\n"
139
+ Formatter.header("Benchmark Suite Summary")
140
+
141
+ puts "Total time: #{duration.round(2)}s\n\n"
142
+
143
+ results.each do |result|
144
+ puts " #{result[:status].ljust(20)} #{result[:name]}"
145
+ end
146
+
147
+ puts "\n"
148
+ puts "=" * 80
149
+ puts "Benchmark suite completed!"
150
+ puts "=" * 80
@@ -0,0 +1,154 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "helpers"
5
+ require "stringio"
6
+ require "tempfile"
7
+
8
+ # Benchmark: Streaming vs One-Shot Compression
9
+ # Compares streaming API vs convenience methods for different use cases
10
+
11
+ BenchmarkHelpers.run_comparison(title: "Streaming vs One-Shot Compression") do |results|
12
+ # Generate test data
13
+ large_data = DataGenerator.mixed_data(size: 100_000)
14
+ puts "Test data size: #{Formatter.format_bytes(large_data.bytesize)}\n\n"
15
+
16
+ # Benchmark 1: One-shot compression (simple, all-in-memory)
17
+ Formatter.section("Testing: One-shot compression")
18
+ oneshot_time = Benchmark.measure do
19
+ 10.times do
20
+ compressed = VibeZstd.compress(large_data)
21
+ VibeZstd.decompress(compressed)
22
+ end
23
+ end
24
+
25
+ oneshot_ops_per_sec = 10 / oneshot_time.real
26
+ oneshot_compressed = VibeZstd.compress(large_data)
27
+ puts "Completed 10 iterations in #{oneshot_time.real.round(3)}s"
28
+ puts "Compressed size: #{Formatter.format_bytes(oneshot_compressed.bytesize)}"
29
+
30
+ # Benchmark 2: Streaming compression (more control, constant memory)
31
+ Formatter.section("Testing: Streaming compression")
32
+ streaming_time = Benchmark.measure do
33
+ 10.times do
34
+ # Compress
35
+ compressed_io = StringIO.new
36
+ writer = VibeZstd::Compress::Writer.new(compressed_io, level: 3)
37
+
38
+ # Write in chunks
39
+ large_data.scan(/.{1,1000}/m).each { |chunk| writer.write(chunk) }
40
+ writer.finish
41
+
42
+ # Decompress
43
+ compressed_io.rewind
44
+ reader = VibeZstd::Decompress::Reader.new(compressed_io)
45
+ decompressed = +""
46
+ while (chunk = reader.read)
47
+ decompressed << chunk
48
+ end
49
+ end
50
+ end
51
+
52
+ streaming_ops_per_sec = 10 / streaming_time.real
53
+ puts "Completed 10 iterations in #{streaming_time.real.round(3)}s"
54
+
55
+ # Benchmark 3: Streaming with larger chunks (optimization)
56
+ Formatter.section("Testing: Streaming with optimized chunk size")
57
+ optimized_streaming_time = Benchmark.measure do
58
+ 10.times do
59
+ # Compress
60
+ compressed_io = StringIO.new
61
+ writer = VibeZstd::Compress::Writer.new(compressed_io, level: 3)
62
+
63
+ # Write in larger chunks
64
+ large_data.scan(/.{1,8192}/m).each { |chunk| writer.write(chunk) }
65
+ writer.finish
66
+
67
+ # Decompress with optimized chunk size
68
+ compressed_io.rewind
69
+ reader = VibeZstd::Decompress::Reader.new(compressed_io, initial_chunk_size: 8192)
70
+ decompressed = +""
71
+ while (chunk = reader.read)
72
+ decompressed << chunk
73
+ end
74
+ end
75
+ end
76
+
77
+ optimized_streaming_ops_per_sec = 10 / optimized_streaming_time.real
78
+ puts "Completed 10 iterations in #{optimized_streaming_time.real.round(3)}s"
79
+
80
+ # File-based streaming benchmark
81
+ Formatter.section("Testing: Streaming to file")
82
+ file_streaming_time = Benchmark.measure do
83
+ 10.times do
84
+ Tempfile.create(["benchmark", ".zst"]) do |tmpfile|
85
+ # Compress to file
86
+ writer = VibeZstd::Compress::Writer.new(tmpfile, level: 3)
87
+ large_data.scan(/.{1,8192}/m).each { |chunk| writer.write(chunk) }
88
+ writer.finish
89
+
90
+ # Decompress from file
91
+ tmpfile.rewind
92
+ reader = VibeZstd::Decompress::Reader.new(tmpfile)
93
+ decompressed = +""
94
+ while (chunk = reader.read)
95
+ decompressed << chunk
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ file_streaming_ops_per_sec = 10 / file_streaming_time.real
102
+ puts "Completed 10 iterations in #{file_streaming_time.real.round(3)}s"
103
+
104
+ # Memory estimates (one-shot needs to buffer everything)
105
+ cctx_memory = Memory.estimate_cctx(3)
106
+ dctx_memory = Memory.estimate_dctx
107
+ oneshot_memory = cctx_memory + dctx_memory + large_data.bytesize + oneshot_compressed.bytesize
108
+ streaming_memory = cctx_memory + dctx_memory + 8192 # Only chunk size in memory
109
+
110
+ # Collect results
111
+ results << BenchmarkResult.new(
112
+ :name => "One-shot",
113
+ :iterations_per_sec => oneshot_ops_per_sec,
114
+ :memory_bytes => oneshot_memory,
115
+ "Use case" => "Small data, simplicity"
116
+ )
117
+
118
+ results << BenchmarkResult.new(
119
+ :name => "Streaming (1KB chunks)",
120
+ :iterations_per_sec => streaming_ops_per_sec,
121
+ :memory_bytes => streaming_memory,
122
+ "Use case" => "Large files, low memory"
123
+ )
124
+
125
+ results << BenchmarkResult.new(
126
+ :name => "Streaming (8KB chunks)",
127
+ :iterations_per_sec => optimized_streaming_ops_per_sec,
128
+ :memory_bytes => streaming_memory,
129
+ "Use case" => "Balanced performance"
130
+ )
131
+
132
+ results << BenchmarkResult.new(
133
+ :name => "File streaming",
134
+ :iterations_per_sec => file_streaming_ops_per_sec,
135
+ :memory_bytes => streaming_memory,
136
+ "Use case" => "Large files, disk I/O"
137
+ )
138
+
139
+ puts "\nšŸ’¾ Memory Comparison:"
140
+ puts " One-shot: #{Formatter.format_bytes(oneshot_memory)} (entire data in memory)"
141
+ puts " Streaming: #{Formatter.format_bytes(streaming_memory)} (only chunks in memory)"
142
+ puts " Memory savings: #{Formatter.format_bytes(oneshot_memory - streaming_memory)} (#{((oneshot_memory - streaming_memory).to_f / oneshot_memory * 100).round(1)}%)"
143
+ end
144
+
145
+ puts "\nšŸ’” When to use each approach:"
146
+ puts " One-shot compression (VibeZstd.compress):"
147
+ puts " āœ“ Small data (< 1MB)"
148
+ puts " āœ“ Data already in memory"
149
+ puts " āœ“ Simplicity is priority"
150
+ puts "\n Streaming compression (Writer/Reader):"
151
+ puts " āœ“ Large files (> 1MB)"
152
+ puts " āœ“ Memory-constrained environments"
153
+ puts " āœ“ Incremental data (network streams, logs)"
154
+ puts " āœ“ Need to process data on-the-fly"