serialbench 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/benchmark.yml +181 -30
  3. data/.github/workflows/ci.yml +3 -3
  4. data/.github/workflows/docker.yml +272 -0
  5. data/.github/workflows/rake.yml +15 -0
  6. data/.github/workflows/release.yml +25 -0
  7. data/Gemfile +6 -30
  8. data/README.adoc +381 -415
  9. data/Rakefile +0 -55
  10. data/config/benchmarks/full.yml +29 -0
  11. data/config/benchmarks/short.yml +26 -0
  12. data/config/environments/asdf-ruby-3.2.yml +8 -0
  13. data/config/environments/asdf-ruby-3.3.yml +8 -0
  14. data/config/environments/docker-ruby-3.0.yml +9 -0
  15. data/config/environments/docker-ruby-3.1.yml +9 -0
  16. data/config/environments/docker-ruby-3.2.yml +9 -0
  17. data/config/environments/docker-ruby-3.3.yml +9 -0
  18. data/config/environments/docker-ruby-3.4.yml +9 -0
  19. data/docker/Dockerfile.alpine +33 -0
  20. data/docker/Dockerfile.ubuntu +32 -0
  21. data/docker/README.md +214 -0
  22. data/exe/serialbench +1 -1
  23. data/lib/serialbench/benchmark_runner.rb +270 -350
  24. data/lib/serialbench/cli/base_cli.rb +51 -0
  25. data/lib/serialbench/cli/benchmark_cli.rb +380 -0
  26. data/lib/serialbench/cli/environment_cli.rb +181 -0
  27. data/lib/serialbench/cli/resultset_cli.rb +215 -0
  28. data/lib/serialbench/cli/ruby_build_cli.rb +238 -0
  29. data/lib/serialbench/cli.rb +59 -410
  30. data/lib/serialbench/config_manager.rb +140 -0
  31. data/lib/serialbench/models/benchmark_config.rb +63 -0
  32. data/lib/serialbench/models/benchmark_result.rb +45 -0
  33. data/lib/serialbench/models/environment_config.rb +71 -0
  34. data/lib/serialbench/models/platform.rb +59 -0
  35. data/lib/serialbench/models/result.rb +53 -0
  36. data/lib/serialbench/models/result_set.rb +71 -0
  37. data/lib/serialbench/models/result_store.rb +108 -0
  38. data/lib/serialbench/models.rb +54 -0
  39. data/lib/serialbench/ruby_build_manager.rb +153 -0
  40. data/lib/serialbench/runners/asdf_runner.rb +296 -0
  41. data/lib/serialbench/runners/base.rb +32 -0
  42. data/lib/serialbench/runners/docker_runner.rb +142 -0
  43. data/lib/serialbench/serializers/base_serializer.rb +8 -16
  44. data/lib/serialbench/serializers/json/base_json_serializer.rb +4 -4
  45. data/lib/serialbench/serializers/json/json_serializer.rb +0 -2
  46. data/lib/serialbench/serializers/json/oj_serializer.rb +0 -2
  47. data/lib/serialbench/serializers/json/rapidjson_serializer.rb +50 -0
  48. data/lib/serialbench/serializers/json/yajl_serializer.rb +6 -4
  49. data/lib/serialbench/serializers/toml/base_toml_serializer.rb +5 -3
  50. data/lib/serialbench/serializers/toml/toml_rb_serializer.rb +0 -2
  51. data/lib/serialbench/serializers/toml/tomlib_serializer.rb +0 -2
  52. data/lib/serialbench/serializers/toml/tomlrb_serializer.rb +56 -0
  53. data/lib/serialbench/serializers/xml/base_xml_serializer.rb +4 -9
  54. data/lib/serialbench/serializers/xml/libxml_serializer.rb +0 -2
  55. data/lib/serialbench/serializers/xml/nokogiri_serializer.rb +21 -5
  56. data/lib/serialbench/serializers/xml/oga_serializer.rb +0 -2
  57. data/lib/serialbench/serializers/xml/ox_serializer.rb +0 -2
  58. data/lib/serialbench/serializers/xml/rexml_serializer.rb +32 -4
  59. data/lib/serialbench/serializers/yaml/base_yaml_serializer.rb +59 -0
  60. data/lib/serialbench/serializers/yaml/psych_serializer.rb +54 -0
  61. data/lib/serialbench/serializers/yaml/syck_serializer.rb +102 -0
  62. data/lib/serialbench/serializers.rb +34 -6
  63. data/lib/serialbench/site_generator.rb +105 -0
  64. data/lib/serialbench/templates/assets/css/benchmark_report.css +535 -0
  65. data/lib/serialbench/templates/assets/css/format_based.css +526 -0
  66. data/lib/serialbench/templates/assets/css/themes.css +588 -0
  67. data/lib/serialbench/templates/assets/js/chart_helpers.js +381 -0
  68. data/lib/serialbench/templates/assets/js/dashboard.js +796 -0
  69. data/lib/serialbench/templates/assets/js/navigation.js +142 -0
  70. data/lib/serialbench/templates/base.liquid +49 -0
  71. data/lib/serialbench/templates/format_based.liquid +279 -0
  72. data/lib/serialbench/templates/partials/chart_section.liquid +4 -0
  73. data/lib/serialbench/version.rb +1 -1
  74. data/lib/serialbench.rb +2 -31
  75. data/serialbench.gemspec +28 -17
  76. metadata +192 -55
  77. data/lib/serialbench/chart_generator.rb +0 -821
  78. data/lib/serialbench/result_formatter.rb +0 -182
  79. data/lib/serialbench/result_merger.rb +0 -1201
  80. data/lib/serialbench/serializers/xml/base_parser.rb +0 -69
  81. data/lib/serialbench/serializers/xml/libxml_parser.rb +0 -98
  82. data/lib/serialbench/serializers/xml/nokogiri_parser.rb +0 -111
  83. data/lib/serialbench/serializers/xml/oga_parser.rb +0 -85
  84. data/lib/serialbench/serializers/xml/ox_parser.rb +0 -64
  85. data/lib/serialbench/serializers/xml/rexml_parser.rb +0 -129
@@ -3,6 +3,7 @@
3
3
  require 'benchmark'
4
4
  require 'benchmark/ips'
5
5
  require_relative 'serializers'
6
+ require_relative 'models/benchmark_result'
6
7
 
7
8
  begin
8
9
  require 'memory_profiler'
@@ -12,16 +13,14 @@ end
12
13
 
13
14
  module Serialbench
14
15
  class BenchmarkRunner
15
- attr_reader :serializers, :test_data, :results, :formats
16
-
17
- def initialize(formats: FORMATS, iterations: nil, warmup: nil, **options)
18
- @formats = Array(formats)
19
- @options = options
20
- @options[:iterations] = iterations if iterations
21
- @options[:warmup] = warmup if warmup
22
- @serializers = load_available_serializers
16
+ attr_reader :environment_config, :benchmark_config, :serializers, :test_data, :results
17
+
18
+ def initialize(benchmark_config:, environment_config:)
19
+ @environment_config = environment_config
20
+ @benchmark_config = benchmark_config
21
+ @serializers = Serializers.available
23
22
  @test_data = {}
24
- @results = {}
23
+ @results = []
25
24
  load_test_data
26
25
  end
27
26
 
@@ -29,201 +28,113 @@ module Serialbench
29
28
  puts 'Serialbench - Running comprehensive serialization performance tests'
30
29
  puts '=' * 70
31
30
  puts "Available serializers: #{@serializers.map(&:name).join(', ')}"
32
- puts "Test formats: #{@formats.join(', ')}"
31
+ puts "Test formats: #{@benchmark_config.formats.join(', ')}"
33
32
  puts "Test data sizes: #{@test_data.keys.join(', ')}"
34
33
  puts
35
34
 
36
- @results = {
37
- environment: collect_environment_info,
35
+ Models::BenchmarkResult.new(
36
+ serializers: Serializers.information,
38
37
  parsing: run_parsing_benchmarks,
39
38
  generation: run_generation_benchmarks,
40
- memory_usage: run_memory_benchmarks
41
- }
42
-
43
- # Add streaming benchmarks if any serializers support it
44
- streaming_serializers = @serializers.select(&:supports_streaming?)
45
- @results[:streaming] = run_streaming_benchmarks if streaming_serializers.any?
46
-
47
- @results
48
- end
49
-
50
- def environment_info
51
- collect_environment_info
39
+ memory_usage: run_memory_benchmarks,
40
+ streaming: run_streaming_benchmarks
41
+ )
52
42
  end
53
43
 
54
44
  def run_parsing_benchmarks
55
- puts 'Running parsing benchmarks...'
56
- results = {}
57
-
58
- @test_data.each do |size, format_data|
59
- puts " Testing #{size} files..."
60
- results[size] = {}
61
-
62
- format_data.each do |format, data|
63
- next unless @formats.include?(format)
64
-
65
- results[size][format] = {}
66
- iterations = get_iterations_for_size(size)
67
-
68
- serializers_for_format(format).each do |serializer|
69
- next unless serializer.available?
70
-
71
- begin
72
- # Warmup
73
- 3.times { serializer.parse(data) }
74
-
75
- # Benchmark
76
- time = Benchmark.realtime do
77
- iterations.times { serializer.parse(data) }
78
- end
79
-
80
- results[size][format][serializer.name] = {
81
- time_per_iterations: time,
82
- time_per_iteration: time / iterations.to_f,
83
- iterations_per_second: iterations.to_f / time,
84
- iterations_count: iterations
85
- }
86
-
87
- puts " #{format}/#{serializer.name}: #{(time / iterations.to_f * 1000).round(2)}ms per parse"
88
- rescue StandardError => e
89
- puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
90
- results[size][format][serializer.name] = { error: e.message }
91
- end
92
- end
93
- end
45
+ run_benchmark_type('parsing', 'parse') do |serializer, data|
46
+ serializer.parse(data)
94
47
  end
95
-
96
- results
97
48
  end
98
49
 
99
50
  def run_generation_benchmarks
100
- puts "\nRunning generation benchmarks..."
101
- results = {}
102
-
103
- @test_data.each do |size, format_data|
104
- puts " Testing #{size} files..."
105
- results[size] = {}
106
-
107
- format_data.each do |format, data|
108
- next unless @formats.include?(format)
109
-
110
- results[size][format] = {}
111
- iterations = get_iterations_for_size(size)
51
+ run_benchmark_type('generation', 'generation') do |serializer, data|
52
+ document = serializer.parse(data)
53
+ serializer.generate(document)
54
+ end
55
+ end
112
56
 
113
- serializers_for_format(format).each do |serializer|
114
- next unless serializer.available?
57
+ def run_streaming_benchmarks
58
+ run_benchmark_type('streaming', 'stream parse') do |serializer, data|
59
+ serializer.stream_parse(data) { |event, data| }
60
+ end
61
+ end
115
62
 
116
- begin
117
- # Parse document first to get object for generation
118
- document = serializer.parse(data)
119
-
120
- # Warmup
121
- 3.times { serializer.generate(document) }
122
-
123
- # Benchmark
124
- time = Benchmark.realtime do
125
- iterations.times { serializer.generate(document) }
126
- end
127
-
128
- results[size][format][serializer.name] = {
129
- time_per_iterations: time,
130
- time_per_iteration: time / iterations.to_f,
131
- iterations_per_second: iterations.to_f / time,
132
- iterations_count: iterations
133
- }
63
+ def run_memory_benchmarks
64
+ puts "\nRunning memory usage benchmarks..."
65
+ return [] unless defined?(::MemoryProfiler)
134
66
 
135
- puts " #{format}/#{serializer.name}: #{(time / iterations.to_f * 1000).round(2)}ms per generation"
136
- rescue StandardError => e
137
- puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
138
- results[size][format][serializer.name] = { error: e.message }
139
- end
140
- end
67
+ run_benchmark_iteration('memory') do |serializer, format, size, data|
68
+ # Memory profiling for parsing
69
+ report = ::MemoryProfiler.report do
70
+ 10.times { serializer.parse(data) }
141
71
  end
142
- end
143
72
 
144
- results
73
+ result = Models::MemoryPerformance.new(
74
+ adapter: serializer.name,
75
+ format: format,
76
+ data_size: size,
77
+ total_allocated: report.total_allocated,
78
+ total_retained: report.total_retained,
79
+ allocated_memory: report.total_allocated_memsize,
80
+ retained_memory: report.total_retained_memsize
81
+ )
82
+
83
+ puts " #{format}/#{serializer.name}: #{(report.total_allocated_memsize / 1024.0 / 1024.0).round(2)}MB allocated"
84
+ result
85
+ end
145
86
  end
146
87
 
147
- def run_streaming_benchmarks
148
- puts "\nRunning streaming benchmarks..."
149
- results = {}
150
-
151
- @test_data.each do |size, format_data|
152
- puts " Testing #{size} files..."
153
- results[size] = {}
88
+ private
154
89
 
155
- format_data.each do |format, data|
156
- next unless @formats.include?(format)
90
+ def run_benchmark_type(type_name, operation_name, &block)
91
+ puts "#{type_name == 'parsing' ? '' : "\n"}Running #{type_name} benchmarks..."
157
92
 
158
- results[size][format] = {}
159
- iterations = get_iterations_for_size(size)
93
+ run_benchmark_iteration(type_name) do |serializer, format, size, data|
94
+ iterations = get_iterations_for_size(size)
160
95
 
161
- serializers_for_format(format).select(&:supports_streaming?).each do |serializer|
162
- next unless serializer.available?
96
+ # Warmup
97
+ 3.times { block.call(serializer, data) }
163
98
 
164
- begin
165
- # Warmup
166
- 3.times { serializer.stream_parse(data) { |event, data| } }
167
-
168
- # Benchmark
169
- time = Benchmark.realtime do
170
- iterations.times { serializer.stream_parse(data) { |event, data| } }
171
- end
172
-
173
- results[size][format][serializer.name] = {
174
- time_per_iterations: time,
175
- time_per_iteration: time / iterations.to_f,
176
- iterations_per_second: iterations.to_f / time,
177
- iterations_count: iterations
178
- }
179
-
180
- puts " #{format}/#{serializer.name}: #{(time / iterations.to_f * 1000).round(2)}ms per stream parse"
181
- rescue StandardError => e
182
- puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
183
- results[size][format][serializer.name] = { error: e.message }
184
- end
185
- end
99
+ # Benchmark
100
+ time = Benchmark.realtime do
101
+ iterations.times { block.call(serializer, data) }
186
102
  end
187
- end
188
103
 
189
- results
104
+ result = Models::IterationPerformance.new(
105
+ adapter: serializer.name,
106
+ format: format,
107
+ data_size: size,
108
+ time_per_iterations: time,
109
+ time_per_iteration: time / iterations.to_f,
110
+ iterations_per_second: iterations.to_f / time,
111
+ iterations_count: iterations
112
+ )
113
+
114
+ puts " #{result.format}/#{result.adapter}: #{(result.time_per_iteration * 1000).round(2)}ms per #{operation_name}"
115
+ result
116
+ end
190
117
  end
191
118
 
192
- def run_memory_benchmarks
193
- puts "\nRunning memory usage benchmarks..."
194
- results = {}
195
-
196
- return results unless defined?(::MemoryProfiler)
119
+ def run_benchmark_iteration(type_name)
120
+ results = []
197
121
 
198
122
  @test_data.each do |size, format_data|
199
123
  puts " Testing #{size} files..."
200
- results[size] = {}
201
124
 
202
125
  format_data.each do |format, data|
203
- next unless @formats.include?(format)
126
+ next unless @benchmark_config.formats.include?(format)
204
127
 
205
- results[size][format] = {}
128
+ serializers = get_serializers_for_benchmark_type(type_name, format)
206
129
 
207
- serializers_for_format(format).each do |serializer|
130
+ serializers.each do |serializer|
208
131
  next unless serializer.available?
209
132
 
210
133
  begin
211
- # Memory profiling for parsing
212
- report = ::MemoryProfiler.report do
213
- 10.times { serializer.parse(data) }
214
- end
215
-
216
- results[size][format][serializer.name] = {
217
- total_allocated: report.total_allocated,
218
- total_retained: report.total_retained,
219
- allocated_memory: report.total_allocated_memsize,
220
- retained_memory: report.total_retained_memsize
221
- }
222
-
223
- puts " #{format}/#{serializer.name}: #{(report.total_allocated_memsize / 1024.0 / 1024.0).round(2)}MB allocated"
134
+ result = yield(serializer, format, size, data)
135
+ results << result if result
224
136
  rescue StandardError => e
225
137
  puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
226
- results[size][format][serializer.name] = { error: e.message }
227
138
  end
228
139
  end
229
140
  end
@@ -232,101 +143,160 @@ module Serialbench
232
143
  results
233
144
  end
234
145
 
235
- def serializers_for_format(format)
236
- @serializers.select { |s| s.format == format.to_sym }
237
- end
238
-
239
- def all_serializers
240
- @serializers
241
- end
242
-
243
- private
146
+ def get_serializers_for_benchmark_type(type_name, format)
147
+ serializers = Serializers.for_format(format)
244
148
 
245
- def get_iterations_for_size(size)
246
- case size
247
- when :small
248
- 20
249
- when :medium
250
- 5
251
- when :large
252
- 2
149
+ case type_name
150
+ when 'generation'
151
+ serializers.select(&:supports_generation?)
152
+ when 'streaming'
153
+ serializers.select(&:supports_streaming?)
253
154
  else
254
- 10
155
+ serializers
255
156
  end
256
157
  end
257
158
 
258
- def load_available_serializers
259
- Serializers.available.map(&:new)
159
+ def get_iterations_for_size(size)
160
+ @benchmark_config.iterations.send(size.to_s)
260
161
  end
261
162
 
262
163
  def load_test_data
263
- # Load test data for each format
264
- @test_data = {
265
- small: {},
266
- medium: {},
267
- large: {}
268
- }
164
+ # Determine which data sizes to load based on configuration
165
+ data_sizes = @benchmark_config.data_sizes
269
166
 
270
- # Generate data for each format
271
- @formats.each do |format|
272
- case format
273
- when :xml
274
- @test_data[:small][:xml] = generate_small_xml
275
- @test_data[:medium][:xml] = generate_medium_xml
276
- @test_data[:large][:xml] = generate_large_xml
277
- when :json
278
- @test_data[:small][:json] = generate_small_json
279
- @test_data[:medium][:json] = generate_medium_json
280
- @test_data[:large][:json] = generate_large_json
281
- when :toml
282
- @test_data[:small][:toml] = generate_small_toml
283
- @test_data[:medium][:toml] = generate_medium_toml
284
- @test_data[:large][:toml] = generate_large_toml
167
+ # Initialize test data structure
168
+ @test_data = {}
169
+ data_sizes.each { |size| @test_data[size] = {} }
170
+
171
+ # Generate data for each format and size
172
+ @benchmark_config.formats.each do |format|
173
+ data_sizes.each do |size|
174
+ @test_data[size][format] = generate_test_data(format, size)
285
175
  end
286
176
  end
287
177
 
288
178
  # Try to load real test files if they exist
289
- %w[small medium large].each do |size|
290
- @formats.each do |format|
179
+ data_sizes.each do |size|
180
+ @benchmark_config.formats.each do |format|
291
181
  file_path = "test_data/#{size}.#{format}"
292
- @test_data[size.to_sym][format] = File.read(file_path) if File.exist?(file_path)
182
+ @test_data[size][format] = File.read(file_path) if File.exist?(file_path)
293
183
  end
294
184
  end
295
185
  end
296
186
 
187
+ def generate_test_data(format, size)
188
+ method_name = "generate_#{size}_#{format}"
189
+ send(method_name)
190
+ end
191
+
192
+ # Shared data structure generators
193
+ def small_test_data_structure
194
+ {
195
+ config: {
196
+ database: {
197
+ host: 'localhost',
198
+ port: 5432,
199
+ name: 'myapp',
200
+ user: 'admin',
201
+ password: 'secret'
202
+ },
203
+ cache: {
204
+ enabled: true,
205
+ ttl: 3600
206
+ }
207
+ }
208
+ }
209
+ end
210
+
211
+ def medium_test_data_structure
212
+ {
213
+ users: (1..1000).map do |i|
214
+ {
215
+ id: i,
216
+ name: "User #{i}",
217
+ email: "user#{i}@example.com",
218
+ created_at: "2023-01-#{(i % 28) + 1}T10:00:00Z",
219
+ profile: {
220
+ age: 20 + (i % 50),
221
+ city: "City #{i % 100}",
222
+ preferences: {
223
+ theme: i.even? ? 'dark' : 'light',
224
+ notifications: i % 3 == 0
225
+ }
226
+ }
227
+ }
228
+ end
229
+ }
230
+ end
231
+
232
+ def large_test_data_structure
233
+ {
234
+ dataset: {
235
+ header: {
236
+ created: '2023-01-01T00:00:00Z',
237
+ count: 10_000,
238
+ format: 'data'
239
+ },
240
+ records: (1..10_000).map do |i|
241
+ {
242
+ id: i,
243
+ timestamp: "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z",
244
+ data: {
245
+ field1: "Value #{i}",
246
+ field2: i * 2,
247
+ field3: i % 100 == 0 ? 'special' : 'normal',
248
+ nested: [
249
+ "Item #{i}-1",
250
+ "Item #{i}-2",
251
+ "Item #{i}-3"
252
+ ]
253
+ },
254
+ metadata: {
255
+ source: 'generator',
256
+ version: '1.0',
257
+ checksum: i.to_s(16)
258
+ }
259
+ }
260
+ end
261
+ }
262
+ }
263
+ end
264
+
297
265
  # XML test data generators
298
266
  def generate_small_xml
267
+ data = small_test_data_structure
299
268
  <<~XML
300
269
  <?xml version="1.0" encoding="UTF-8"?>
301
270
  <config>
302
271
  <database>
303
- <host>localhost</host>
304
- <port>5432</port>
305
- <name>myapp</name>
306
- <user>admin</user>
307
- <password>secret</password>
272
+ <host>#{data[:config][:database][:host]}</host>
273
+ <port>#{data[:config][:database][:port]}</port>
274
+ <name>#{data[:config][:database][:name]}</name>
275
+ <user>#{data[:config][:database][:user]}</user>
276
+ <password>#{data[:config][:database][:password]}</password>
308
277
  </database>
309
278
  <cache>
310
- <enabled>true</enabled>
311
- <ttl>3600</ttl>
279
+ <enabled>#{data[:config][:cache][:enabled]}</enabled>
280
+ <ttl>#{data[:config][:cache][:ttl]}</ttl>
312
281
  </cache>
313
282
  </config>
314
283
  XML
315
284
  end
316
285
 
317
286
  def generate_medium_xml
318
- users = (1..1000).map do |i|
287
+ data = medium_test_data_structure
288
+ users = data[:users].map do |user|
319
289
  <<~USER
320
- <user id="#{i}">
321
- <name>User #{i}</name>
322
- <email>user#{i}@example.com</email>
323
- <created_at>2023-01-#{(i % 28) + 1}T10:00:00Z</created_at>
290
+ <user id="#{user[:id]}">
291
+ <name>#{user[:name]}</name>
292
+ <email>#{user[:email]}</email>
293
+ <created_at>#{user[:created_at]}</created_at>
324
294
  <profile>
325
- <age>#{20 + (i % 50)}</age>
326
- <city>City #{i % 100}</city>
295
+ <age>#{user[:profile][:age]}</age>
296
+ <city>#{user[:profile][:city]}</city>
327
297
  <preferences>
328
- <theme>#{i.even? ? 'dark' : 'light'}</theme>
329
- <notifications>#{i % 3 == 0 ? 'true' : 'false'}</notifications>
298
+ <theme>#{user[:profile][:preferences][:theme]}</theme>
299
+ <notifications>#{user[:profile][:preferences][:notifications]}</notifications>
330
300
  </preferences>
331
301
  </profile>
332
302
  </user>
@@ -342,26 +312,26 @@ module Serialbench
342
312
  end
343
313
 
344
314
  def generate_large_xml
345
- records = (1..10_000).map do |i|
315
+ data = large_test_data_structure
316
+ records = data[:dataset][:records].map do |record|
317
+ nested_items = record[:data][:nested].map { |item| " <item>#{item}</item>" }.join("\n")
346
318
  <<~RECORD
347
- <record id="#{i}">
348
- <timestamp>2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z</timestamp>
349
- <data>
350
- <field1>Value #{i}</field1>
351
- <field2>#{i * 2}</field2>
352
- <field3>#{i % 100 == 0 ? 'special' : 'normal'}</field3>
353
- <nested>
354
- <item>Item #{i}-1</item>
355
- <item>Item #{i}-2</item>
356
- <item>Item #{i}-3</item>
357
- </nested>
358
- </data>
359
- <metadata>
360
- <source>generator</source>
361
- <version>1.0</version>
362
- <checksum>#{i.to_s(16)}</checksum>
363
- </metadata>
364
- </record>
319
+ <record id="#{record[:id]}">
320
+ <timestamp>#{record[:timestamp]}</timestamp>
321
+ <data>
322
+ <field1>#{record[:data][:field1]}</field1>
323
+ <field2>#{record[:data][:field2]}</field2>
324
+ <field3>#{record[:data][:field3]}</field3>
325
+ <nested>
326
+ #{nested_items}
327
+ </nested>
328
+ </data>
329
+ <metadata>
330
+ <source>#{record[:metadata][:source]}</source>
331
+ <version>#{record[:metadata][:version]}</version>
332
+ <checksum>#{record[:metadata][:checksum]}</checksum>
333
+ </metadata>
334
+ </record>
365
335
  RECORD
366
336
  end.join
367
337
 
@@ -369,8 +339,8 @@ module Serialbench
369
339
  <?xml version="1.0" encoding="UTF-8"?>
370
340
  <dataset>
371
341
  <header>
372
- <created>2023-01-01T00:00:00Z</created>
373
- <count>10000</count>
342
+ <created>#{data[:dataset][:header][:created]}</created>
343
+ <count>#{data[:dataset][:header][:count]}</count>
374
344
  <format>xml</format>
375
345
  </header>
376
346
  <records>
@@ -382,137 +352,96 @@ module Serialbench
382
352
 
383
353
  # JSON test data generators
384
354
  def generate_small_json
385
- require 'json'
386
- JSON.generate({
387
- config: {
388
- database: {
389
- host: 'localhost',
390
- port: 5432,
391
- name: 'myapp',
392
- user: 'admin',
393
- password: 'secret'
394
- },
395
- cache: {
396
- enabled: true,
397
- ttl: 3600
398
- }
399
- }
400
- })
355
+ JSON.generate(small_test_data_structure)
401
356
  end
402
357
 
403
358
  def generate_medium_json
404
- require 'json'
405
- users = (1..1000).map do |i|
406
- {
407
- id: i,
408
- name: "User #{i}",
409
- email: "user#{i}@example.com",
410
- created_at: "2023-01-#{(i % 28) + 1}T10:00:00Z",
411
- profile: {
412
- age: 20 + (i % 50),
413
- city: "City #{i % 100}",
414
- preferences: {
415
- theme: i.even? ? 'dark' : 'light',
416
- notifications: i % 3 == 0
417
- }
418
- }
419
- }
420
- end
421
-
422
- JSON.generate({ users: users })
359
+ JSON.generate(medium_test_data_structure)
423
360
  end
424
361
 
425
362
  def generate_large_json
426
- require 'json'
427
- records = (1..10_000).map do |i|
428
- {
429
- id: i,
430
- timestamp: "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z",
431
- data: {
432
- field1: "Value #{i}",
433
- field2: i * 2,
434
- field3: i % 100 == 0 ? 'special' : 'normal',
435
- nested: [
436
- "Item #{i}-1",
437
- "Item #{i}-2",
438
- "Item #{i}-3"
439
- ]
440
- },
441
- metadata: {
442
- source: 'generator',
443
- version: '1.0',
444
- checksum: i.to_s(16)
445
- }
446
- }
447
- end
363
+ data = large_test_data_structure
364
+ data[:dataset][:header][:format] = 'json'
365
+ JSON.generate(data)
366
+ end
448
367
 
449
- JSON.generate({
450
- dataset: {
451
- header: {
452
- created: '2023-01-01T00:00:00Z',
453
- count: 10_000,
454
- format: 'json'
455
- },
456
- records: records
457
- }
458
- })
368
+ # YAML test data generators
369
+ def generate_small_yaml
370
+ small_test_data_structure.to_yaml
371
+ end
372
+
373
+ def generate_medium_yaml
374
+ medium_test_data_structure.to_yaml
375
+ end
376
+
377
+ def generate_large_yaml
378
+ data = large_test_data_structure
379
+ data[:dataset][:header][:format] = 'yaml'
380
+ data.to_yaml
459
381
  end
460
382
 
461
383
  # TOML test data generators
462
384
  def generate_small_toml
385
+ data = small_test_data_structure
463
386
  <<~TOML
464
387
  [config]
465
388
 
466
389
  [config.database]
467
- host = "localhost"
468
- port = 5432
469
- name = "myapp"
470
- user = "admin"
471
- password = "secret"
390
+ host = "#{data[:config][:database][:host]}"
391
+ port = #{data[:config][:database][:port]}
392
+ name = "#{data[:config][:database][:name]}"
393
+ user = "#{data[:config][:database][:user]}"
394
+ password = "#{data[:config][:database][:password]}"
472
395
 
473
396
  [config.cache]
474
- enabled = true
475
- ttl = 3600
397
+ enabled = #{data[:config][:cache][:enabled]}
398
+ ttl = #{data[:config][:cache][:ttl]}
476
399
  TOML
477
400
  end
478
401
 
479
402
  def generate_medium_toml
480
- (1..100).map do |i| # Smaller for TOML due to verbosity
403
+ data = medium_test_data_structure
404
+ # Use smaller dataset for TOML due to verbosity
405
+ users = data[:users].first(100)
406
+ users.map do |user|
481
407
  <<~USER
482
408
  [[users]]
483
- id = #{i}
484
- name = "User #{i}"
485
- email = "user#{i}@example.com"
486
- created_at = "2023-01-#{(i % 28) + 1}T10:00:00Z"
409
+ id = #{user[:id]}
410
+ name = "#{user[:name]}"
411
+ email = "#{user[:email]}"
412
+ created_at = "#{user[:created_at]}"
487
413
 
488
414
  [users.profile]
489
- age = #{20 + (i % 50)}
490
- city = "City #{i % 100}"
415
+ age = #{user[:profile][:age]}
416
+ city = "#{user[:profile][:city]}"
491
417
 
492
418
  [users.profile.preferences]
493
- theme = "#{i.even? ? 'dark' : 'light'}"
494
- notifications = #{i % 3 == 0}
419
+ theme = "#{user[:profile][:preferences][:theme]}"
420
+ notifications = #{user[:profile][:preferences][:notifications]}
495
421
  USER
496
422
  end.join("\n")
497
423
  end
498
424
 
499
425
  def generate_large_toml
500
- records_toml = (1..1000).map do |i| # Smaller for TOML due to verbosity
426
+ data = large_test_data_structure
427
+ # Use smaller dataset for TOML due to verbosity
428
+ records = data[:dataset][:records].first(1000)
429
+ records_toml = records.map do |record|
501
430
  <<~RECORD
502
431
  [[dataset.records]]
503
- id = #{i}
504
- timestamp = "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z"
432
+ id = #{record[:id]}
433
+ timestamp = "#{record[:timestamp]}"
505
434
 
506
435
  [dataset.records.data]
507
- field1 = "Value #{i}"
508
- field2 = #{i * 2}
509
- field3 = "#{i % 100 == 0 ? 'special' : 'normal'}"
510
- nested = ["Item #{i}-1", "Item #{i}-2", "Item #{i}-3"]
436
+ field1 = "#{record[:data][:field1]}"
437
+ field2 = #{record[:data][:field2]}
438
+ field3 = "#{record[:data][:field3]}"
439
+ nested = #{record[:data][:nested].inspect}
511
440
 
512
441
  [dataset.records.metadata]
513
- source = "generator"
514
- version = "1.0"
515
- checksum = "#{i.to_s(16)}"
442
+ source = "#{record[:metadata][:source]}"
443
+ version = "#{record[:metadata][:version]}"
444
+ checksum = "#{record[:metadata][:checksum]}"
516
445
  RECORD
517
446
  end.join("\n")
518
447
 
@@ -520,21 +449,12 @@ module Serialbench
520
449
  [dataset]
521
450
 
522
451
  [dataset.header]
523
- created = "2023-01-01T00:00:00Z"
524
- count = 1000
452
+ created = "#{data[:dataset][:header][:created]}"
453
+ count = #{records.length}
525
454
  format = "toml"
526
455
 
527
456
  #{records_toml}
528
457
  TOML
529
458
  end
530
-
531
- def collect_environment_info
532
- {
533
- ruby_version: RUBY_VERSION,
534
- ruby_platform: RUBY_PLATFORM,
535
- serializer_versions: @serializers.map { |s| [s.name, s.version] }.to_h,
536
- timestamp: Time.now.iso8601
537
- }
538
- end
539
459
  end
540
460
  end