serialbench 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/benchmark.yml +125 -0
  3. data/.github/workflows/ci.yml +74 -0
  4. data/.rspec +4 -0
  5. data/Gemfile +34 -0
  6. data/README.adoc +592 -0
  7. data/Rakefile +63 -0
  8. data/exe/serialbench +6 -0
  9. data/lib/serialbench/benchmark_runner.rb +540 -0
  10. data/lib/serialbench/chart_generator.rb +821 -0
  11. data/lib/serialbench/cli.rb +438 -0
  12. data/lib/serialbench/memory_profiler.rb +31 -0
  13. data/lib/serialbench/result_formatter.rb +182 -0
  14. data/lib/serialbench/result_merger.rb +1201 -0
  15. data/lib/serialbench/serializers/base_serializer.rb +63 -0
  16. data/lib/serialbench/serializers/json/base_json_serializer.rb +67 -0
  17. data/lib/serialbench/serializers/json/json_serializer.rb +58 -0
  18. data/lib/serialbench/serializers/json/oj_serializer.rb +102 -0
  19. data/lib/serialbench/serializers/json/yajl_serializer.rb +67 -0
  20. data/lib/serialbench/serializers/toml/base_toml_serializer.rb +76 -0
  21. data/lib/serialbench/serializers/toml/toml_rb_serializer.rb +55 -0
  22. data/lib/serialbench/serializers/toml/tomlib_serializer.rb +50 -0
  23. data/lib/serialbench/serializers/xml/base_parser.rb +69 -0
  24. data/lib/serialbench/serializers/xml/base_xml_serializer.rb +71 -0
  25. data/lib/serialbench/serializers/xml/libxml_parser.rb +98 -0
  26. data/lib/serialbench/serializers/xml/libxml_serializer.rb +127 -0
  27. data/lib/serialbench/serializers/xml/nokogiri_parser.rb +111 -0
  28. data/lib/serialbench/serializers/xml/nokogiri_serializer.rb +118 -0
  29. data/lib/serialbench/serializers/xml/oga_parser.rb +85 -0
  30. data/lib/serialbench/serializers/xml/oga_serializer.rb +125 -0
  31. data/lib/serialbench/serializers/xml/ox_parser.rb +64 -0
  32. data/lib/serialbench/serializers/xml/ox_serializer.rb +88 -0
  33. data/lib/serialbench/serializers/xml/rexml_parser.rb +129 -0
  34. data/lib/serialbench/serializers/xml/rexml_serializer.rb +121 -0
  35. data/lib/serialbench/serializers.rb +62 -0
  36. data/lib/serialbench/version.rb +5 -0
  37. data/lib/serialbench.rb +42 -0
  38. data/serialbench.gemspec +51 -0
  39. metadata +239 -0
@@ -0,0 +1,540 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'benchmark'
4
+ require 'benchmark/ips'
5
+ require_relative 'serializers'
6
+
7
+ begin
8
+ require 'memory_profiler'
9
+ rescue LoadError
10
+ # Memory profiler is optional
11
+ end
12
+
13
+ module Serialbench
14
+ class BenchmarkRunner
15
+ attr_reader :serializers, :test_data, :results, :formats
16
+
17
+ def initialize(formats: FORMATS, iterations: nil, warmup: nil, **options)
18
+ @formats = Array(formats)
19
+ @options = options
20
+ @options[:iterations] = iterations if iterations
21
+ @options[:warmup] = warmup if warmup
22
+ @serializers = load_available_serializers
23
+ @test_data = {}
24
+ @results = {}
25
+ load_test_data
26
+ end
27
+
28
+ def run_all_benchmarks
29
+ puts 'Serialbench - Running comprehensive serialization performance tests'
30
+ puts '=' * 70
31
+ puts "Available serializers: #{@serializers.map(&:name).join(', ')}"
32
+ puts "Test formats: #{@formats.join(', ')}"
33
+ puts "Test data sizes: #{@test_data.keys.join(', ')}"
34
+ puts
35
+
36
+ @results = {
37
+ environment: collect_environment_info,
38
+ parsing: run_parsing_benchmarks,
39
+ generation: run_generation_benchmarks,
40
+ memory_usage: run_memory_benchmarks
41
+ }
42
+
43
+ # Add streaming benchmarks if any serializers support it
44
+ streaming_serializers = @serializers.select(&:supports_streaming?)
45
+ @results[:streaming] = run_streaming_benchmarks if streaming_serializers.any?
46
+
47
+ @results
48
+ end
49
+
50
+ def environment_info
51
+ collect_environment_info
52
+ end
53
+
54
+ def run_parsing_benchmarks
55
+ puts 'Running parsing benchmarks...'
56
+ results = {}
57
+
58
+ @test_data.each do |size, format_data|
59
+ puts " Testing #{size} files..."
60
+ results[size] = {}
61
+
62
+ format_data.each do |format, data|
63
+ next unless @formats.include?(format)
64
+
65
+ results[size][format] = {}
66
+ iterations = get_iterations_for_size(size)
67
+
68
+ serializers_for_format(format).each do |serializer|
69
+ next unless serializer.available?
70
+
71
+ begin
72
+ # Warmup
73
+ 3.times { serializer.parse(data) }
74
+
75
+ # Benchmark
76
+ time = Benchmark.realtime do
77
+ iterations.times { serializer.parse(data) }
78
+ end
79
+
80
+ results[size][format][serializer.name] = {
81
+ time_per_iterations: time,
82
+ time_per_iteration: time / iterations.to_f,
83
+ iterations_per_second: iterations.to_f / time,
84
+ iterations_count: iterations
85
+ }
86
+
87
+ puts " #{format}/#{serializer.name}: #{(time / iterations.to_f * 1000).round(2)}ms per parse"
88
+ rescue StandardError => e
89
+ puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
90
+ results[size][format][serializer.name] = { error: e.message }
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ results
97
+ end
98
+
99
+ def run_generation_benchmarks
100
+ puts "\nRunning generation benchmarks..."
101
+ results = {}
102
+
103
+ @test_data.each do |size, format_data|
104
+ puts " Testing #{size} files..."
105
+ results[size] = {}
106
+
107
+ format_data.each do |format, data|
108
+ next unless @formats.include?(format)
109
+
110
+ results[size][format] = {}
111
+ iterations = get_iterations_for_size(size)
112
+
113
+ serializers_for_format(format).each do |serializer|
114
+ next unless serializer.available?
115
+
116
+ begin
117
+ # Parse document first to get object for generation
118
+ document = serializer.parse(data)
119
+
120
+ # Warmup
121
+ 3.times { serializer.generate(document) }
122
+
123
+ # Benchmark
124
+ time = Benchmark.realtime do
125
+ iterations.times { serializer.generate(document) }
126
+ end
127
+
128
+ results[size][format][serializer.name] = {
129
+ time_per_iterations: time,
130
+ time_per_iteration: time / iterations.to_f,
131
+ iterations_per_second: iterations.to_f / time,
132
+ iterations_count: iterations
133
+ }
134
+
135
+ puts " #{format}/#{serializer.name}: #{(time / iterations.to_f * 1000).round(2)}ms per generation"
136
+ rescue StandardError => e
137
+ puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
138
+ results[size][format][serializer.name] = { error: e.message }
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ results
145
+ end
146
+
147
+ def run_streaming_benchmarks
148
+ puts "\nRunning streaming benchmarks..."
149
+ results = {}
150
+
151
+ @test_data.each do |size, format_data|
152
+ puts " Testing #{size} files..."
153
+ results[size] = {}
154
+
155
+ format_data.each do |format, data|
156
+ next unless @formats.include?(format)
157
+
158
+ results[size][format] = {}
159
+ iterations = get_iterations_for_size(size)
160
+
161
+ serializers_for_format(format).select(&:supports_streaming?).each do |serializer|
162
+ next unless serializer.available?
163
+
164
+ begin
165
+ # Warmup
166
+ 3.times { serializer.stream_parse(data) { |event, data| } }
167
+
168
+ # Benchmark
169
+ time = Benchmark.realtime do
170
+ iterations.times { serializer.stream_parse(data) { |event, data| } }
171
+ end
172
+
173
+ results[size][format][serializer.name] = {
174
+ time_per_iterations: time,
175
+ time_per_iteration: time / iterations.to_f,
176
+ iterations_per_second: iterations.to_f / time,
177
+ iterations_count: iterations
178
+ }
179
+
180
+ puts " #{format}/#{serializer.name}: #{(time / iterations.to_f * 1000).round(2)}ms per stream parse"
181
+ rescue StandardError => e
182
+ puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
183
+ results[size][format][serializer.name] = { error: e.message }
184
+ end
185
+ end
186
+ end
187
+ end
188
+
189
+ results
190
+ end
191
+
192
+ def run_memory_benchmarks
193
+ puts "\nRunning memory usage benchmarks..."
194
+ results = {}
195
+
196
+ return results unless defined?(::MemoryProfiler)
197
+
198
+ @test_data.each do |size, format_data|
199
+ puts " Testing #{size} files..."
200
+ results[size] = {}
201
+
202
+ format_data.each do |format, data|
203
+ next unless @formats.include?(format)
204
+
205
+ results[size][format] = {}
206
+
207
+ serializers_for_format(format).each do |serializer|
208
+ next unless serializer.available?
209
+
210
+ begin
211
+ # Memory profiling for parsing
212
+ report = ::MemoryProfiler.report do
213
+ 10.times { serializer.parse(data) }
214
+ end
215
+
216
+ results[size][format][serializer.name] = {
217
+ total_allocated: report.total_allocated,
218
+ total_retained: report.total_retained,
219
+ allocated_memory: report.total_allocated_memsize,
220
+ retained_memory: report.total_retained_memsize
221
+ }
222
+
223
+ puts " #{format}/#{serializer.name}: #{(report.total_allocated_memsize / 1024.0 / 1024.0).round(2)}MB allocated"
224
+ rescue StandardError => e
225
+ puts " #{format}/#{serializer.name}: ERROR - #{e.message}"
226
+ results[size][format][serializer.name] = { error: e.message }
227
+ end
228
+ end
229
+ end
230
+ end
231
+
232
+ results
233
+ end
234
+
235
+ def serializers_for_format(format)
236
+ @serializers.select { |s| s.format == format.to_sym }
237
+ end
238
+
239
+ def all_serializers
240
+ @serializers
241
+ end
242
+
243
+ private
244
+
245
+ def get_iterations_for_size(size)
246
+ case size
247
+ when :small
248
+ 20
249
+ when :medium
250
+ 5
251
+ when :large
252
+ 2
253
+ else
254
+ 10
255
+ end
256
+ end
257
+
258
+ def load_available_serializers
259
+ Serializers.available.map(&:new)
260
+ end
261
+
262
+ def load_test_data
263
+ # Load test data for each format
264
+ @test_data = {
265
+ small: {},
266
+ medium: {},
267
+ large: {}
268
+ }
269
+
270
+ # Generate data for each format
271
+ @formats.each do |format|
272
+ case format
273
+ when :xml
274
+ @test_data[:small][:xml] = generate_small_xml
275
+ @test_data[:medium][:xml] = generate_medium_xml
276
+ @test_data[:large][:xml] = generate_large_xml
277
+ when :json
278
+ @test_data[:small][:json] = generate_small_json
279
+ @test_data[:medium][:json] = generate_medium_json
280
+ @test_data[:large][:json] = generate_large_json
281
+ when :toml
282
+ @test_data[:small][:toml] = generate_small_toml
283
+ @test_data[:medium][:toml] = generate_medium_toml
284
+ @test_data[:large][:toml] = generate_large_toml
285
+ end
286
+ end
287
+
288
+ # Try to load real test files if they exist
289
+ %w[small medium large].each do |size|
290
+ @formats.each do |format|
291
+ file_path = "test_data/#{size}.#{format}"
292
+ @test_data[size.to_sym][format] = File.read(file_path) if File.exist?(file_path)
293
+ end
294
+ end
295
+ end
296
+
297
+ # XML test data generators
298
+ def generate_small_xml
299
+ <<~XML
300
+ <?xml version="1.0" encoding="UTF-8"?>
301
+ <config>
302
+ <database>
303
+ <host>localhost</host>
304
+ <port>5432</port>
305
+ <name>myapp</name>
306
+ <user>admin</user>
307
+ <password>secret</password>
308
+ </database>
309
+ <cache>
310
+ <enabled>true</enabled>
311
+ <ttl>3600</ttl>
312
+ </cache>
313
+ </config>
314
+ XML
315
+ end
316
+
317
+ def generate_medium_xml
318
+ users = (1..1000).map do |i|
319
+ <<~USER
320
+ <user id="#{i}">
321
+ <name>User #{i}</name>
322
+ <email>user#{i}@example.com</email>
323
+ <created_at>2023-01-#{(i % 28) + 1}T10:00:00Z</created_at>
324
+ <profile>
325
+ <age>#{20 + (i % 50)}</age>
326
+ <city>City #{i % 100}</city>
327
+ <preferences>
328
+ <theme>#{i.even? ? 'dark' : 'light'}</theme>
329
+ <notifications>#{i % 3 == 0 ? 'true' : 'false'}</notifications>
330
+ </preferences>
331
+ </profile>
332
+ </user>
333
+ USER
334
+ end.join
335
+
336
+ <<~XML
337
+ <?xml version="1.0" encoding="UTF-8"?>
338
+ <users>
339
+ #{users}
340
+ </users>
341
+ XML
342
+ end
343
+
344
+ def generate_large_xml
345
+ records = (1..10_000).map do |i|
346
+ <<~RECORD
347
+ <record id="#{i}">
348
+ <timestamp>2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z</timestamp>
349
+ <data>
350
+ <field1>Value #{i}</field1>
351
+ <field2>#{i * 2}</field2>
352
+ <field3>#{i % 100 == 0 ? 'special' : 'normal'}</field3>
353
+ <nested>
354
+ <item>Item #{i}-1</item>
355
+ <item>Item #{i}-2</item>
356
+ <item>Item #{i}-3</item>
357
+ </nested>
358
+ </data>
359
+ <metadata>
360
+ <source>generator</source>
361
+ <version>1.0</version>
362
+ <checksum>#{i.to_s(16)}</checksum>
363
+ </metadata>
364
+ </record>
365
+ RECORD
366
+ end.join
367
+
368
+ <<~XML
369
+ <?xml version="1.0" encoding="UTF-8"?>
370
+ <dataset>
371
+ <header>
372
+ <created>2023-01-01T00:00:00Z</created>
373
+ <count>10000</count>
374
+ <format>xml</format>
375
+ </header>
376
+ <records>
377
+ #{records}
378
+ </records>
379
+ </dataset>
380
+ XML
381
+ end
382
+
383
+ # JSON test data generators
384
+ def generate_small_json
385
+ require 'json'
386
+ JSON.generate({
387
+ config: {
388
+ database: {
389
+ host: 'localhost',
390
+ port: 5432,
391
+ name: 'myapp',
392
+ user: 'admin',
393
+ password: 'secret'
394
+ },
395
+ cache: {
396
+ enabled: true,
397
+ ttl: 3600
398
+ }
399
+ }
400
+ })
401
+ end
402
+
403
+ def generate_medium_json
404
+ require 'json'
405
+ users = (1..1000).map do |i|
406
+ {
407
+ id: i,
408
+ name: "User #{i}",
409
+ email: "user#{i}@example.com",
410
+ created_at: "2023-01-#{(i % 28) + 1}T10:00:00Z",
411
+ profile: {
412
+ age: 20 + (i % 50),
413
+ city: "City #{i % 100}",
414
+ preferences: {
415
+ theme: i.even? ? 'dark' : 'light',
416
+ notifications: i % 3 == 0
417
+ }
418
+ }
419
+ }
420
+ end
421
+
422
+ JSON.generate({ users: users })
423
+ end
424
+
425
+ def generate_large_json
426
+ require 'json'
427
+ records = (1..10_000).map do |i|
428
+ {
429
+ id: i,
430
+ timestamp: "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z",
431
+ data: {
432
+ field1: "Value #{i}",
433
+ field2: i * 2,
434
+ field3: i % 100 == 0 ? 'special' : 'normal',
435
+ nested: [
436
+ "Item #{i}-1",
437
+ "Item #{i}-2",
438
+ "Item #{i}-3"
439
+ ]
440
+ },
441
+ metadata: {
442
+ source: 'generator',
443
+ version: '1.0',
444
+ checksum: i.to_s(16)
445
+ }
446
+ }
447
+ end
448
+
449
+ JSON.generate({
450
+ dataset: {
451
+ header: {
452
+ created: '2023-01-01T00:00:00Z',
453
+ count: 10_000,
454
+ format: 'json'
455
+ },
456
+ records: records
457
+ }
458
+ })
459
+ end
460
+
461
+ # TOML test data generators
462
+ def generate_small_toml
463
+ <<~TOML
464
+ [config]
465
+
466
+ [config.database]
467
+ host = "localhost"
468
+ port = 5432
469
+ name = "myapp"
470
+ user = "admin"
471
+ password = "secret"
472
+
473
+ [config.cache]
474
+ enabled = true
475
+ ttl = 3600
476
+ TOML
477
+ end
478
+
479
+ def generate_medium_toml
480
+ (1..100).map do |i| # Smaller for TOML due to verbosity
481
+ <<~USER
482
+ [[users]]
483
+ id = #{i}
484
+ name = "User #{i}"
485
+ email = "user#{i}@example.com"
486
+ created_at = "2023-01-#{(i % 28) + 1}T10:00:00Z"
487
+
488
+ [users.profile]
489
+ age = #{20 + (i % 50)}
490
+ city = "City #{i % 100}"
491
+
492
+ [users.profile.preferences]
493
+ theme = "#{i.even? ? 'dark' : 'light'}"
494
+ notifications = #{i % 3 == 0}
495
+ USER
496
+ end.join("\n")
497
+ end
498
+
499
+ def generate_large_toml
500
+ records_toml = (1..1000).map do |i| # Smaller for TOML due to verbosity
501
+ <<~RECORD
502
+ [[dataset.records]]
503
+ id = #{i}
504
+ timestamp = "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z"
505
+
506
+ [dataset.records.data]
507
+ field1 = "Value #{i}"
508
+ field2 = #{i * 2}
509
+ field3 = "#{i % 100 == 0 ? 'special' : 'normal'}"
510
+ nested = ["Item #{i}-1", "Item #{i}-2", "Item #{i}-3"]
511
+
512
+ [dataset.records.metadata]
513
+ source = "generator"
514
+ version = "1.0"
515
+ checksum = "#{i.to_s(16)}"
516
+ RECORD
517
+ end.join("\n")
518
+
519
+ <<~TOML
520
+ [dataset]
521
+
522
+ [dataset.header]
523
+ created = "2023-01-01T00:00:00Z"
524
+ count = 1000
525
+ format = "toml"
526
+
527
+ #{records_toml}
528
+ TOML
529
+ end
530
+
531
+ def collect_environment_info
532
+ {
533
+ ruby_version: RUBY_VERSION,
534
+ ruby_platform: RUBY_PLATFORM,
535
+ serializer_versions: @serializers.map { |s| [s.name, s.version] }.to_h,
536
+ timestamp: Time.now.iso8601
537
+ }
538
+ end
539
+ end
540
+ end