serialbench 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/benchmark.yml +13 -5
  3. data/.github/workflows/docker.yml +35 -9
  4. data/.github/workflows/rake.yml +15 -0
  5. data/Gemfile +2 -1
  6. data/README.adoc +267 -1129
  7. data/Rakefile +0 -55
  8. data/config/benchmarks/full.yml +29 -0
  9. data/config/benchmarks/short.yml +26 -0
  10. data/config/environments/asdf-ruby-3.2.yml +8 -0
  11. data/config/environments/asdf-ruby-3.3.yml +8 -0
  12. data/config/environments/docker-ruby-3.0.yml +9 -0
  13. data/config/environments/docker-ruby-3.1.yml +9 -0
  14. data/config/environments/docker-ruby-3.2.yml +9 -0
  15. data/config/environments/docker-ruby-3.3.yml +9 -0
  16. data/config/environments/docker-ruby-3.4.yml +9 -0
  17. data/docker/Dockerfile.alpine +33 -0
  18. data/docker/{Dockerfile.benchmark → Dockerfile.ubuntu} +4 -3
  19. data/docker/README.md +2 -2
  20. data/exe/serialbench +1 -1
  21. data/lib/serialbench/benchmark_runner.rb +261 -423
  22. data/lib/serialbench/cli/base_cli.rb +51 -0
  23. data/lib/serialbench/cli/benchmark_cli.rb +380 -0
  24. data/lib/serialbench/cli/environment_cli.rb +181 -0
  25. data/lib/serialbench/cli/resultset_cli.rb +215 -0
  26. data/lib/serialbench/cli/ruby_build_cli.rb +238 -0
  27. data/lib/serialbench/cli.rb +58 -601
  28. data/lib/serialbench/config_manager.rb +140 -0
  29. data/lib/serialbench/models/benchmark_config.rb +63 -0
  30. data/lib/serialbench/models/benchmark_result.rb +45 -0
  31. data/lib/serialbench/models/environment_config.rb +71 -0
  32. data/lib/serialbench/models/platform.rb +59 -0
  33. data/lib/serialbench/models/result.rb +53 -0
  34. data/lib/serialbench/models/result_set.rb +71 -0
  35. data/lib/serialbench/models/result_store.rb +108 -0
  36. data/lib/serialbench/models.rb +54 -0
  37. data/lib/serialbench/ruby_build_manager.rb +153 -0
  38. data/lib/serialbench/runners/asdf_runner.rb +296 -0
  39. data/lib/serialbench/runners/base.rb +32 -0
  40. data/lib/serialbench/runners/docker_runner.rb +142 -0
  41. data/lib/serialbench/serializers/base_serializer.rb +8 -16
  42. data/lib/serialbench/serializers/json/base_json_serializer.rb +4 -4
  43. data/lib/serialbench/serializers/json/json_serializer.rb +0 -2
  44. data/lib/serialbench/serializers/json/oj_serializer.rb +0 -2
  45. data/lib/serialbench/serializers/json/yajl_serializer.rb +0 -2
  46. data/lib/serialbench/serializers/toml/base_toml_serializer.rb +5 -3
  47. data/lib/serialbench/serializers/toml/toml_rb_serializer.rb +0 -2
  48. data/lib/serialbench/serializers/toml/tomlib_serializer.rb +0 -2
  49. data/lib/serialbench/serializers/toml/tomlrb_serializer.rb +56 -0
  50. data/lib/serialbench/serializers/xml/base_xml_serializer.rb +4 -9
  51. data/lib/serialbench/serializers/xml/libxml_serializer.rb +0 -2
  52. data/lib/serialbench/serializers/xml/nokogiri_serializer.rb +0 -2
  53. data/lib/serialbench/serializers/xml/oga_serializer.rb +0 -2
  54. data/lib/serialbench/serializers/xml/ox_serializer.rb +0 -2
  55. data/lib/serialbench/serializers/xml/rexml_serializer.rb +0 -2
  56. data/lib/serialbench/serializers/yaml/base_yaml_serializer.rb +5 -1
  57. data/lib/serialbench/serializers/yaml/syck_serializer.rb +59 -22
  58. data/lib/serialbench/serializers.rb +23 -6
  59. data/lib/serialbench/site_generator.rb +105 -0
  60. data/lib/serialbench/templates/assets/css/benchmark_report.css +535 -0
  61. data/lib/serialbench/templates/assets/css/format_based.css +526 -0
  62. data/lib/serialbench/templates/assets/css/themes.css +588 -0
  63. data/lib/serialbench/templates/assets/js/chart_helpers.js +381 -0
  64. data/lib/serialbench/templates/assets/js/dashboard.js +796 -0
  65. data/lib/serialbench/templates/assets/js/navigation.js +142 -0
  66. data/lib/serialbench/templates/base.liquid +49 -0
  67. data/lib/serialbench/templates/format_based.liquid +279 -0
  68. data/lib/serialbench/templates/partials/chart_section.liquid +4 -0
  69. data/lib/serialbench/version.rb +1 -1
  70. data/lib/serialbench.rb +2 -31
  71. data/serialbench.gemspec +4 -1
  72. metadata +86 -16
  73. data/config/ci.yml +0 -22
  74. data/config/full.yml +0 -30
  75. data/docker/run-benchmarks.sh +0 -356
  76. data/lib/serialbench/chart_generator.rb +0 -821
  77. data/lib/serialbench/result_formatter.rb +0 -182
  78. data/lib/serialbench/result_merger.rb +0 -1201
  79. data/lib/serialbench/serializers/xml/base_parser.rb +0 -69
  80. data/lib/serialbench/serializers/xml/libxml_parser.rb +0 -98
  81. data/lib/serialbench/serializers/xml/nokogiri_parser.rb +0 -111
  82. data/lib/serialbench/serializers/xml/oga_parser.rb +0 -85
  83. data/lib/serialbench/serializers/xml/ox_parser.rb +0 -64
  84. data/lib/serialbench/serializers/xml/rexml_parser.rb +0 -129
@@ -3,6 +3,7 @@
3
3
  require 'benchmark'
4
4
  require 'benchmark/ips'
5
5
  require_relative 'serializers'
6
+ require_relative 'models/benchmark_result'
6
7
 
7
8
  begin
8
9
  require 'memory_profiler'
@@ -12,16 +13,14 @@ end
12
13
 
13
14
  module Serialbench
14
15
  class BenchmarkRunner
15
- attr_reader :serializers, :test_data, :results, :formats
16
-
17
- def initialize(formats: FORMATS, iterations: nil, warmup: nil, **options)
18
- @formats = Array(formats)
19
- @options = options
20
- @options[:iterations] = iterations if iterations
21
- @options[:warmup] = warmup if warmup
22
- @serializers = load_available_serializers
16
+ attr_reader :environment_config, :benchmark_config, :serializers, :test_data, :results
17
+
18
+ def initialize(benchmark_config:, environment_config:)
19
+ @environment_config = environment_config
20
+ @benchmark_config = benchmark_config
21
+ @serializers = Serializers.available
23
22
  @test_data = {}
24
- @results = {}
23
+ @results = []
25
24
  load_test_data
26
25
  end
27
26
 
@@ -29,201 +28,113 @@ module Serialbench
29
28
  puts 'Serialbench - Running comprehensive serialization performance tests'
30
29
  puts '=' * 70
31
30
  puts "Available serializers: #{@serializers.map(&:name).join(', ')}"
32
- puts "Test formats: #{@formats.join(', ')}"
31
+ puts "Test formats: #{@benchmark_config.formats.join(', ')}"
33
32
  puts "Test data sizes: #{@test_data.keys.join(', ')}"
34
33
  puts
35
34
 
36
- @results = {
37
- environment: collect_environment_info,
35
+ Models::BenchmarkResult.new(
36
+ serializers: Serializers.information,
38
37
  parsing: run_parsing_benchmarks,
39
38
  generation: run_generation_benchmarks,
40
- memory_usage: run_memory_benchmarks
41
- }
42
-
43
- # Add streaming benchmarks if any serializers support it
44
- streaming_serializers = @serializers.select(&:supports_streaming?)
45
- @results[:streaming] = run_streaming_benchmarks if streaming_serializers.any?
46
-
47
- @results
48
- end
49
-
50
- def environment_info
51
- collect_environment_info
39
+ memory_usage: run_memory_benchmarks,
40
+ streaming: run_streaming_benchmarks
41
+ )
52
42
  end
53
43
 
54
44
  def run_parsing_benchmarks
55
- puts 'Running parsing benchmarks...'
56
- results = {}
57
-
58
- @test_data.each do |size, format_data|
59
- puts " Testing #{size} files..."
60
- results[size] = {}
61
-
62
- format_data.each do |format, data|
63
- next unless @formats.include?(format)
64
-
65
- results[size][format] = {}
66
- iterations = get_iterations_for_size(size)
67
-
68
- serializers_for_format(format).each do |serializer|
69
- next unless serializer.available?
70
-
71
- begin
72
- # Warmup
73
- 3.times { serializer.parse(data) }
74
-
75
- # Benchmark
76
- time = Benchmark.realtime do
77
- iterations.times { serializer.parse(data) }
78
- end
79
-
80
- results[size][format][serializer.name] = {
81
- time_per_iterations: time,
82
- time_per_iteration: time / iterations.to_f,
83
- iterations_per_second: iterations.to_f / time,
84
- iterations_count: iterations
85
- }
86
-
87
- puts " #{format}/#{serializer.name}: #{(time / iterations.to_f * 1000).round(2)}ms per parse"
88
- rescue StandardError => e
89
- puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
90
- results[size][format][serializer.name] = { error: e.message }
91
- end
92
- end
93
- end
45
+ run_benchmark_type('parsing', 'parse') do |serializer, data|
46
+ serializer.parse(data)
94
47
  end
95
-
96
- results
97
48
  end
98
49
 
99
50
  def run_generation_benchmarks
100
- puts "\nRunning generation benchmarks..."
101
- results = {}
102
-
103
- @test_data.each do |size, format_data|
104
- puts " Testing #{size} files..."
105
- results[size] = {}
106
-
107
- format_data.each do |format, data|
108
- next unless @formats.include?(format)
109
-
110
- results[size][format] = {}
111
- iterations = get_iterations_for_size(size)
51
+ run_benchmark_type('generation', 'generation') do |serializer, data|
52
+ document = serializer.parse(data)
53
+ serializer.generate(document)
54
+ end
55
+ end
112
56
 
113
- serializers_for_format(format).each do |serializer|
114
- next unless serializer.available?
57
+ def run_streaming_benchmarks
58
+ run_benchmark_type('streaming', 'stream parse') do |serializer, data|
59
+ serializer.stream_parse(data) { |event, data| }
60
+ end
61
+ end
115
62
 
116
- begin
117
- # Parse document first to get object for generation
118
- document = serializer.parse(data)
119
-
120
- # Warmup
121
- 3.times { serializer.generate(document) }
122
-
123
- # Benchmark
124
- time = Benchmark.realtime do
125
- iterations.times { serializer.generate(document) }
126
- end
127
-
128
- results[size][format][serializer.name] = {
129
- time_per_iterations: time,
130
- time_per_iteration: time / iterations.to_f,
131
- iterations_per_second: iterations.to_f / time,
132
- iterations_count: iterations
133
- }
63
+ def run_memory_benchmarks
64
+ puts "\nRunning memory usage benchmarks..."
65
+ return [] unless defined?(::MemoryProfiler)
134
66
 
135
- puts " #{format}/#{serializer.name}: #{(time / iterations.to_f * 1000).round(2)}ms per generation"
136
- rescue StandardError => e
137
- puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
138
- results[size][format][serializer.name] = { error: e.message }
139
- end
140
- end
67
+ run_benchmark_iteration('memory') do |serializer, format, size, data|
68
+ # Memory profiling for parsing
69
+ report = ::MemoryProfiler.report do
70
+ 10.times { serializer.parse(data) }
141
71
  end
142
- end
143
72
 
144
- results
73
+ result = Models::MemoryPerformance.new(
74
+ adapter: serializer.name,
75
+ format: format,
76
+ data_size: size,
77
+ total_allocated: report.total_allocated,
78
+ total_retained: report.total_retained,
79
+ allocated_memory: report.total_allocated_memsize,
80
+ retained_memory: report.total_retained_memsize
81
+ )
82
+
83
+ puts " #{format}/#{serializer.name}: #{(report.total_allocated_memsize / 1024.0 / 1024.0).round(2)}MB allocated"
84
+ result
85
+ end
145
86
  end
146
87
 
147
- def run_streaming_benchmarks
148
- puts "\nRunning streaming benchmarks..."
149
- results = {}
150
-
151
- @test_data.each do |size, format_data|
152
- puts " Testing #{size} files..."
153
- results[size] = {}
154
-
155
- format_data.each do |format, data|
156
- next unless @formats.include?(format)
88
+ private
157
89
 
158
- results[size][format] = {}
159
- iterations = get_iterations_for_size(size)
90
+ def run_benchmark_type(type_name, operation_name, &block)
91
+ puts "#{type_name == 'parsing' ? '' : "\n"}Running #{type_name} benchmarks..."
160
92
 
161
- serializers_for_format(format).select(&:supports_streaming?).each do |serializer|
162
- next unless serializer.available?
93
+ run_benchmark_iteration(type_name) do |serializer, format, size, data|
94
+ iterations = get_iterations_for_size(size)
163
95
 
164
- begin
165
- # Warmup
166
- 3.times { serializer.stream_parse(data) { |event, data| } }
167
-
168
- # Benchmark
169
- time = Benchmark.realtime do
170
- iterations.times { serializer.stream_parse(data) { |event, data| } }
171
- end
172
-
173
- results[size][format][serializer.name] = {
174
- time_per_iterations: time,
175
- time_per_iteration: time / iterations.to_f,
176
- iterations_per_second: iterations.to_f / time,
177
- iterations_count: iterations
178
- }
96
+ # Warmup
97
+ 3.times { block.call(serializer, data) }
179
98
 
180
- puts " #{format}/#{serializer.name}: #{(time / iterations.to_f * 1000).round(2)}ms per stream parse"
181
- rescue StandardError => e
182
- puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
183
- results[size][format][serializer.name] = { error: e.message }
184
- end
185
- end
99
+ # Benchmark
100
+ time = Benchmark.realtime do
101
+ iterations.times { block.call(serializer, data) }
186
102
  end
187
- end
188
103
 
189
- results
104
+ result = Models::IterationPerformance.new(
105
+ adapter: serializer.name,
106
+ format: format,
107
+ data_size: size,
108
+ time_per_iterations: time,
109
+ time_per_iteration: time / iterations.to_f,
110
+ iterations_per_second: iterations.to_f / time,
111
+ iterations_count: iterations
112
+ )
113
+
114
+ puts " #{result.format}/#{result.adapter}: #{(result.time_per_iteration * 1000).round(2)}ms per #{operation_name}"
115
+ result
116
+ end
190
117
  end
191
118
 
192
- def run_memory_benchmarks
193
- puts "\nRunning memory usage benchmarks..."
194
- results = {}
195
-
196
- return results unless defined?(::MemoryProfiler)
119
+ def run_benchmark_iteration(type_name)
120
+ results = []
197
121
 
198
122
  @test_data.each do |size, format_data|
199
123
  puts " Testing #{size} files..."
200
- results[size] = {}
201
124
 
202
125
  format_data.each do |format, data|
203
- next unless @formats.include?(format)
126
+ next unless @benchmark_config.formats.include?(format)
204
127
 
205
- results[size][format] = {}
128
+ serializers = get_serializers_for_benchmark_type(type_name, format)
206
129
 
207
- serializers_for_format(format).each do |serializer|
130
+ serializers.each do |serializer|
208
131
  next unless serializer.available?
209
132
 
210
133
  begin
211
- # Memory profiling for parsing
212
- report = ::MemoryProfiler.report do
213
- 10.times { serializer.parse(data) }
214
- end
215
-
216
- results[size][format][serializer.name] = {
217
- total_allocated: report.total_allocated,
218
- total_retained: report.total_retained,
219
- allocated_memory: report.total_allocated_memsize,
220
- retained_memory: report.total_retained_memsize
221
- }
222
-
223
- puts " #{format}/#{serializer.name}: #{(report.total_allocated_memsize / 1024.0 / 1024.0).round(2)}MB allocated"
134
+ result = yield(serializer, format, size, data)
135
+ results << result if result
224
136
  rescue StandardError => e
225
137
  puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
226
- results[size][format][serializer.name] = { error: e.message }
227
138
  end
228
139
  end
229
140
  end
@@ -232,105 +143,160 @@ module Serialbench
232
143
  results
233
144
  end
234
145
 
235
- def serializers_for_format(format)
236
- @serializers.select { |s| s.format == format.to_sym }
237
- end
238
-
239
- def all_serializers
240
- @serializers
241
- end
242
-
243
- private
146
+ def get_serializers_for_benchmark_type(type_name, format)
147
+ serializers = Serializers.for_format(format)
244
148
 
245
- def get_iterations_for_size(size)
246
- case size
247
- when :small
248
- 20
249
- when :medium
250
- 5
251
- when :large
252
- 2
149
+ case type_name
150
+ when 'generation'
151
+ serializers.select(&:supports_generation?)
152
+ when 'streaming'
153
+ serializers.select(&:supports_streaming?)
253
154
  else
254
- 10
155
+ serializers
255
156
  end
256
157
  end
257
158
 
258
- def load_available_serializers
259
- Serializers.available.map(&:new)
159
+ def get_iterations_for_size(size)
160
+ @benchmark_config.iterations.send(size.to_s)
260
161
  end
261
162
 
262
163
  def load_test_data
263
- # Load test data for each format
264
- @test_data = {
265
- small: {},
266
- medium: {},
267
- large: {}
268
- }
164
+ # Determine which data sizes to load based on configuration
165
+ data_sizes = @benchmark_config.data_sizes
269
166
 
270
- # Generate data for each format
271
- @formats.each do |format|
272
- case format
273
- when :xml
274
- @test_data[:small][:xml] = generate_small_xml
275
- @test_data[:medium][:xml] = generate_medium_xml
276
- @test_data[:large][:xml] = generate_large_xml
277
- when :json
278
- @test_data[:small][:json] = generate_small_json
279
- @test_data[:medium][:json] = generate_medium_json
280
- @test_data[:large][:json] = generate_large_json
281
- when :yaml
282
- @test_data[:small][:yaml] = generate_small_yaml
283
- @test_data[:medium][:yaml] = generate_medium_yaml
284
- @test_data[:large][:yaml] = generate_large_yaml
285
- when :toml
286
- @test_data[:small][:toml] = generate_small_toml
287
- @test_data[:medium][:toml] = generate_medium_toml
288
- @test_data[:large][:toml] = generate_large_toml
167
+ # Initialize test data structure
168
+ @test_data = {}
169
+ data_sizes.each { |size| @test_data[size] = {} }
170
+
171
+ # Generate data for each format and size
172
+ @benchmark_config.formats.each do |format|
173
+ data_sizes.each do |size|
174
+ @test_data[size][format] = generate_test_data(format, size)
289
175
  end
290
176
  end
291
177
 
292
178
  # Try to load real test files if they exist
293
- %w[small medium large].each do |size|
294
- @formats.each do |format|
179
+ data_sizes.each do |size|
180
+ @benchmark_config.formats.each do |format|
295
181
  file_path = "test_data/#{size}.#{format}"
296
- @test_data[size.to_sym][format] = File.read(file_path) if File.exist?(file_path)
182
+ @test_data[size][format] = File.read(file_path) if File.exist?(file_path)
297
183
  end
298
184
  end
299
185
  end
300
186
 
187
+ def generate_test_data(format, size)
188
+ method_name = "generate_#{size}_#{format}"
189
+ send(method_name)
190
+ end
191
+
192
+ # Shared data structure generators
193
+ def small_test_data_structure
194
+ {
195
+ config: {
196
+ database: {
197
+ host: 'localhost',
198
+ port: 5432,
199
+ name: 'myapp',
200
+ user: 'admin',
201
+ password: 'secret'
202
+ },
203
+ cache: {
204
+ enabled: true,
205
+ ttl: 3600
206
+ }
207
+ }
208
+ }
209
+ end
210
+
211
+ def medium_test_data_structure
212
+ {
213
+ users: (1..1000).map do |i|
214
+ {
215
+ id: i,
216
+ name: "User #{i}",
217
+ email: "user#{i}@example.com",
218
+ created_at: "2023-01-#{(i % 28) + 1}T10:00:00Z",
219
+ profile: {
220
+ age: 20 + (i % 50),
221
+ city: "City #{i % 100}",
222
+ preferences: {
223
+ theme: i.even? ? 'dark' : 'light',
224
+ notifications: i % 3 == 0
225
+ }
226
+ }
227
+ }
228
+ end
229
+ }
230
+ end
231
+
232
+ def large_test_data_structure
233
+ {
234
+ dataset: {
235
+ header: {
236
+ created: '2023-01-01T00:00:00Z',
237
+ count: 10_000,
238
+ format: 'data'
239
+ },
240
+ records: (1..10_000).map do |i|
241
+ {
242
+ id: i,
243
+ timestamp: "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z",
244
+ data: {
245
+ field1: "Value #{i}",
246
+ field2: i * 2,
247
+ field3: i % 100 == 0 ? 'special' : 'normal',
248
+ nested: [
249
+ "Item #{i}-1",
250
+ "Item #{i}-2",
251
+ "Item #{i}-3"
252
+ ]
253
+ },
254
+ metadata: {
255
+ source: 'generator',
256
+ version: '1.0',
257
+ checksum: i.to_s(16)
258
+ }
259
+ }
260
+ end
261
+ }
262
+ }
263
+ end
264
+
301
265
  # XML test data generators
302
266
  def generate_small_xml
267
+ data = small_test_data_structure
303
268
  <<~XML
304
269
  <?xml version="1.0" encoding="UTF-8"?>
305
270
  <config>
306
271
  <database>
307
- <host>localhost</host>
308
- <port>5432</port>
309
- <name>myapp</name>
310
- <user>admin</user>
311
- <password>secret</password>
272
+ <host>#{data[:config][:database][:host]}</host>
273
+ <port>#{data[:config][:database][:port]}</port>
274
+ <name>#{data[:config][:database][:name]}</name>
275
+ <user>#{data[:config][:database][:user]}</user>
276
+ <password>#{data[:config][:database][:password]}</password>
312
277
  </database>
313
278
  <cache>
314
- <enabled>true</enabled>
315
- <ttl>3600</ttl>
279
+ <enabled>#{data[:config][:cache][:enabled]}</enabled>
280
+ <ttl>#{data[:config][:cache][:ttl]}</ttl>
316
281
  </cache>
317
282
  </config>
318
283
  XML
319
284
  end
320
285
 
321
286
  def generate_medium_xml
322
- users = (1..1000).map do |i|
287
+ data = medium_test_data_structure
288
+ users = data[:users].map do |user|
323
289
  <<~USER
324
- <user id="#{i}">
325
- <name>User #{i}</name>
326
- <email>user#{i}@example.com</email>
327
- <created_at>2023-01-#{(i % 28) + 1}T10:00:00Z</created_at>
290
+ <user id="#{user[:id]}">
291
+ <name>#{user[:name]}</name>
292
+ <email>#{user[:email]}</email>
293
+ <created_at>#{user[:created_at]}</created_at>
328
294
  <profile>
329
- <age>#{20 + (i % 50)}</age>
330
- <city>City #{i % 100}</city>
295
+ <age>#{user[:profile][:age]}</age>
296
+ <city>#{user[:profile][:city]}</city>
331
297
  <preferences>
332
- <theme>#{i.even? ? 'dark' : 'light'}</theme>
333
- <notifications>#{i % 3 == 0 ? 'true' : 'false'}</notifications>
298
+ <theme>#{user[:profile][:preferences][:theme]}</theme>
299
+ <notifications>#{user[:profile][:preferences][:notifications]}</notifications>
334
300
  </preferences>
335
301
  </profile>
336
302
  </user>
@@ -346,26 +312,26 @@ module Serialbench
346
312
  end
347
313
 
348
314
  def generate_large_xml
349
- records = (1..10_000).map do |i|
315
+ data = large_test_data_structure
316
+ records = data[:dataset][:records].map do |record|
317
+ nested_items = record[:data][:nested].map { |item| " <item>#{item}</item>" }.join("\n")
350
318
  <<~RECORD
351
- <record id="#{i}">
352
- <timestamp>2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z</timestamp>
353
- <data>
354
- <field1>Value #{i}</field1>
355
- <field2>#{i * 2}</field2>
356
- <field3>#{i % 100 == 0 ? 'special' : 'normal'}</field3>
357
- <nested>
358
- <item>Item #{i}-1</item>
359
- <item>Item #{i}-2</item>
360
- <item>Item #{i}-3</item>
361
- </nested>
362
- </data>
363
- <metadata>
364
- <source>generator</source>
365
- <version>1.0</version>
366
- <checksum>#{i.to_s(16)}</checksum>
367
- </metadata>
368
- </record>
319
+ <record id="#{record[:id]}">
320
+ <timestamp>#{record[:timestamp]}</timestamp>
321
+ <data>
322
+ <field1>#{record[:data][:field1]}</field1>
323
+ <field2>#{record[:data][:field2]}</field2>
324
+ <field3>#{record[:data][:field3]}</field3>
325
+ <nested>
326
+ #{nested_items}
327
+ </nested>
328
+ </data>
329
+ <metadata>
330
+ <source>#{record[:metadata][:source]}</source>
331
+ <version>#{record[:metadata][:version]}</version>
332
+ <checksum>#{record[:metadata][:checksum]}</checksum>
333
+ </metadata>
334
+ </record>
369
335
  RECORD
370
336
  end.join
371
337
 
@@ -373,8 +339,8 @@ module Serialbench
373
339
  <?xml version="1.0" encoding="UTF-8"?>
374
340
  <dataset>
375
341
  <header>
376
- <created>2023-01-01T00:00:00Z</created>
377
- <count>10000</count>
342
+ <created>#{data[:dataset][:header][:created]}</created>
343
+ <count>#{data[:dataset][:header][:count]}</count>
378
344
  <format>xml</format>
379
345
  </header>
380
346
  <records>
@@ -386,215 +352,96 @@ module Serialbench
386
352
 
387
353
  # JSON test data generators
388
354
  def generate_small_json
389
- require 'json'
390
- JSON.generate({
391
- config: {
392
- database: {
393
- host: 'localhost',
394
- port: 5432,
395
- name: 'myapp',
396
- user: 'admin',
397
- password: 'secret'
398
- },
399
- cache: {
400
- enabled: true,
401
- ttl: 3600
402
- }
403
- }
404
- })
355
+ JSON.generate(small_test_data_structure)
405
356
  end
406
357
 
407
358
  def generate_medium_json
408
- require 'json'
409
- users = (1..1000).map do |i|
410
- {
411
- id: i,
412
- name: "User #{i}",
413
- email: "user#{i}@example.com",
414
- created_at: "2023-01-#{(i % 28) + 1}T10:00:00Z",
415
- profile: {
416
- age: 20 + (i % 50),
417
- city: "City #{i % 100}",
418
- preferences: {
419
- theme: i.even? ? 'dark' : 'light',
420
- notifications: i % 3 == 0
421
- }
422
- }
423
- }
424
- end
425
-
426
- JSON.generate({ users: users })
359
+ JSON.generate(medium_test_data_structure)
427
360
  end
428
361
 
429
362
  def generate_large_json
430
- require 'json'
431
- records = (1..10_000).map do |i|
432
- {
433
- id: i,
434
- timestamp: "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z",
435
- data: {
436
- field1: "Value #{i}",
437
- field2: i * 2,
438
- field3: i % 100 == 0 ? 'special' : 'normal',
439
- nested: [
440
- "Item #{i}-1",
441
- "Item #{i}-2",
442
- "Item #{i}-3"
443
- ]
444
- },
445
- metadata: {
446
- source: 'generator',
447
- version: '1.0',
448
- checksum: i.to_s(16)
449
- }
450
- }
451
- end
452
-
453
- JSON.generate({
454
- dataset: {
455
- header: {
456
- created: '2023-01-01T00:00:00Z',
457
- count: 10_000,
458
- format: 'json'
459
- },
460
- records: records
461
- }
462
- })
363
+ data = large_test_data_structure
364
+ data[:dataset][:header][:format] = 'json'
365
+ JSON.generate(data)
463
366
  end
464
367
 
465
368
  # YAML test data generators
466
369
  def generate_small_yaml
467
- require 'yaml'
468
- {
469
- config: {
470
- database: {
471
- host: 'localhost',
472
- port: 5432,
473
- name: 'myapp',
474
- user: 'admin',
475
- password: 'secret'
476
- },
477
- cache: {
478
- enabled: true,
479
- ttl: 3600
480
- }
481
- }
482
- }.to_yaml
370
+ small_test_data_structure.to_yaml
483
371
  end
484
372
 
485
373
  def generate_medium_yaml
486
- require 'yaml'
487
- users = (1..1000).map do |i|
488
- {
489
- id: i,
490
- name: "User #{i}",
491
- email: "user#{i}@example.com",
492
- created_at: "2023-01-#{(i % 28) + 1}T10:00:00Z",
493
- profile: {
494
- age: 20 + (i % 50),
495
- city: "City #{i % 100}",
496
- preferences: {
497
- theme: i.even? ? 'dark' : 'light',
498
- notifications: i % 3 == 0
499
- }
500
- }
501
- }
502
- end
503
-
504
- { users: users }.to_yaml
374
+ medium_test_data_structure.to_yaml
505
375
  end
506
376
 
507
377
  def generate_large_yaml
508
- require 'yaml'
509
- records = (1..10_000).map do |i|
510
- {
511
- id: i,
512
- timestamp: "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z",
513
- data: {
514
- field1: "Value #{i}",
515
- field2: i * 2,
516
- field3: i % 100 == 0 ? 'special' : 'normal',
517
- nested: [
518
- "Item #{i}-1",
519
- "Item #{i}-2",
520
- "Item #{i}-3"
521
- ]
522
- },
523
- metadata: {
524
- source: 'generator',
525
- version: '1.0',
526
- checksum: i.to_s(16)
527
- }
528
- }
529
- end
530
-
531
- {
532
- dataset: {
533
- header: {
534
- created: '2023-01-01T00:00:00Z',
535
- count: 10_000,
536
- format: 'yaml'
537
- },
538
- records: records
539
- }
540
- }.to_yaml
378
+ data = large_test_data_structure
379
+ data[:dataset][:header][:format] = 'yaml'
380
+ data.to_yaml
541
381
  end
542
382
 
543
383
  # TOML test data generators
544
384
  def generate_small_toml
385
+ data = small_test_data_structure
545
386
  <<~TOML
546
387
  [config]
547
388
 
548
389
  [config.database]
549
- host = "localhost"
550
- port = 5432
551
- name = "myapp"
552
- user = "admin"
553
- password = "secret"
390
+ host = "#{data[:config][:database][:host]}"
391
+ port = #{data[:config][:database][:port]}
392
+ name = "#{data[:config][:database][:name]}"
393
+ user = "#{data[:config][:database][:user]}"
394
+ password = "#{data[:config][:database][:password]}"
554
395
 
555
396
  [config.cache]
556
- enabled = true
557
- ttl = 3600
397
+ enabled = #{data[:config][:cache][:enabled]}
398
+ ttl = #{data[:config][:cache][:ttl]}
558
399
  TOML
559
400
  end
560
401
 
561
402
  def generate_medium_toml
562
- (1..100).map do |i| # Smaller for TOML due to verbosity
403
+ data = medium_test_data_structure
404
+ # Use smaller dataset for TOML due to verbosity
405
+ users = data[:users].first(100)
406
+ users.map do |user|
563
407
  <<~USER
564
408
  [[users]]
565
- id = #{i}
566
- name = "User #{i}"
567
- email = "user#{i}@example.com"
568
- created_at = "2023-01-#{(i % 28) + 1}T10:00:00Z"
409
+ id = #{user[:id]}
410
+ name = "#{user[:name]}"
411
+ email = "#{user[:email]}"
412
+ created_at = "#{user[:created_at]}"
569
413
 
570
414
  [users.profile]
571
- age = #{20 + (i % 50)}
572
- city = "City #{i % 100}"
415
+ age = #{user[:profile][:age]}
416
+ city = "#{user[:profile][:city]}"
573
417
 
574
418
  [users.profile.preferences]
575
- theme = "#{i.even? ? 'dark' : 'light'}"
576
- notifications = #{i % 3 == 0}
419
+ theme = "#{user[:profile][:preferences][:theme]}"
420
+ notifications = #{user[:profile][:preferences][:notifications]}
577
421
  USER
578
422
  end.join("\n")
579
423
  end
580
424
 
581
425
  def generate_large_toml
582
- records_toml = (1..1000).map do |i| # Smaller for TOML due to verbosity
426
+ data = large_test_data_structure
427
+ # Use smaller dataset for TOML due to verbosity
428
+ records = data[:dataset][:records].first(1000)
429
+ records_toml = records.map do |record|
583
430
  <<~RECORD
584
431
  [[dataset.records]]
585
- id = #{i}
586
- timestamp = "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z"
432
+ id = #{record[:id]}
433
+ timestamp = "#{record[:timestamp]}"
587
434
 
588
435
  [dataset.records.data]
589
- field1 = "Value #{i}"
590
- field2 = #{i * 2}
591
- field3 = "#{i % 100 == 0 ? 'special' : 'normal'}"
592
- nested = ["Item #{i}-1", "Item #{i}-2", "Item #{i}-3"]
436
+ field1 = "#{record[:data][:field1]}"
437
+ field2 = #{record[:data][:field2]}
438
+ field3 = "#{record[:data][:field3]}"
439
+ nested = #{record[:data][:nested].inspect}
593
440
 
594
441
  [dataset.records.metadata]
595
- source = "generator"
596
- version = "1.0"
597
- checksum = "#{i.to_s(16)}"
442
+ source = "#{record[:metadata][:source]}"
443
+ version = "#{record[:metadata][:version]}"
444
+ checksum = "#{record[:metadata][:checksum]}"
598
445
  RECORD
599
446
  end.join("\n")
600
447
 
@@ -602,21 +449,12 @@ module Serialbench
602
449
  [dataset]
603
450
 
604
451
  [dataset.header]
605
- created = "2023-01-01T00:00:00Z"
606
- count = 1000
452
+ created = "#{data[:dataset][:header][:created]}"
453
+ count = #{records.length}
607
454
  format = "toml"
608
455
 
609
456
  #{records_toml}
610
457
  TOML
611
458
  end
612
-
613
- def collect_environment_info
614
- {
615
- ruby_version: RUBY_VERSION,
616
- ruby_platform: RUBY_PLATFORM,
617
- serializer_versions: @serializers.map { |s| [s.name, s.version] }.to_h,
618
- timestamp: Time.now.iso8601
619
- }
620
- end
621
459
  end
622
460
  end