serialbench 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -278,6 +278,10 @@ module Serialbench
278
278
  @test_data[:small][:json] = generate_small_json
279
279
  @test_data[:medium][:json] = generate_medium_json
280
280
  @test_data[:large][:json] = generate_large_json
281
+ when :yaml
282
+ @test_data[:small][:yaml] = generate_small_yaml
283
+ @test_data[:medium][:yaml] = generate_medium_yaml
284
+ @test_data[:large][:yaml] = generate_large_yaml
281
285
  when :toml
282
286
  @test_data[:small][:toml] = generate_small_toml
283
287
  @test_data[:medium][:toml] = generate_medium_toml
@@ -458,6 +462,84 @@ module Serialbench
458
462
  })
459
463
  end
460
464
 
465
+ # YAML test data generators
466
+ def generate_small_yaml
467
+ require 'yaml'
468
+ {
469
+ config: {
470
+ database: {
471
+ host: 'localhost',
472
+ port: 5432,
473
+ name: 'myapp',
474
+ user: 'admin',
475
+ password: 'secret'
476
+ },
477
+ cache: {
478
+ enabled: true,
479
+ ttl: 3600
480
+ }
481
+ }
482
+ }.to_yaml
483
+ end
484
+
485
+ def generate_medium_yaml
486
+ require 'yaml'
487
+ users = (1..1000).map do |i|
488
+ {
489
+ id: i,
490
+ name: "User #{i}",
491
+ email: "user#{i}@example.com",
492
+ created_at: "2023-01-#{(i % 28) + 1}T10:00:00Z",
493
+ profile: {
494
+ age: 20 + (i % 50),
495
+ city: "City #{i % 100}",
496
+ preferences: {
497
+ theme: i.even? ? 'dark' : 'light',
498
+ notifications: i % 3 == 0
499
+ }
500
+ }
501
+ }
502
+ end
503
+
504
+ { users: users }.to_yaml
505
+ end
506
+
507
+ def generate_large_yaml
508
+ require 'yaml'
509
+ records = (1..10_000).map do |i|
510
+ {
511
+ id: i,
512
+ timestamp: "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z",
513
+ data: {
514
+ field1: "Value #{i}",
515
+ field2: i * 2,
516
+ field3: i % 100 == 0 ? 'special' : 'normal',
517
+ nested: [
518
+ "Item #{i}-1",
519
+ "Item #{i}-2",
520
+ "Item #{i}-3"
521
+ ]
522
+ },
523
+ metadata: {
524
+ source: 'generator',
525
+ version: '1.0',
526
+ checksum: i.to_s(16)
527
+ }
528
+ }
529
+ end
530
+
531
+ {
532
+ dataset: {
533
+ header: {
534
+ created: '2023-01-01T00:00:00Z',
535
+ count: 10_000,
536
+ format: 'yaml'
537
+ },
538
+ records: records
539
+ }
540
+ }.to_yaml
541
+ end
542
+
461
543
  # TOML test data generators
462
544
  def generate_small_toml
463
545
  <<~TOML
@@ -6,7 +6,7 @@ require 'yaml'
6
6
  require 'fileutils'
7
7
 
8
8
  module Serialbench
9
- # Thor-based command line interface for SerialBench
9
+ # Thor-based command line interface for Serialbench
10
10
  class Cli < Thor
11
11
  include Thor::Actions
12
12
 
@@ -17,8 +17,8 @@ module Serialbench
17
17
  This command will test parsing, generation, streaming, and memory usage
18
18
  across XML, JSON, and TOML formats using all available libraries.
19
19
  DESC
20
- option :formats, type: :array, default: %w[xml json toml],
21
- desc: 'Formats to benchmark (xml, json, toml)'
20
+ option :formats, type: :array, default: %w[xml json yaml toml],
21
+ desc: 'Formats to benchmark (xml, json, yaml, toml)'
22
22
  option :output_format, type: :string, default: 'all',
23
23
  desc: 'Output format: all, json, yaml, html'
24
24
  option :parsing_only, type: :boolean, default: false,
@@ -34,11 +34,11 @@ module Serialbench
34
34
  option :warmup, type: :numeric, default: 3,
35
35
  desc: 'Number of warmup iterations'
36
36
  def benchmark
37
- say 'SerialBench - Comprehensive Serialization Performance Tests', :green
37
+ say 'Serialbench - Comprehensive Serialization Performance Tests', :green
38
38
  say '=' * 70, :green
39
39
 
40
40
  # Validate formats
41
- valid_formats = %w[xml json toml]
41
+ valid_formats = %w[xml json yaml toml]
42
42
  invalid_formats = options[:formats] - valid_formats
43
43
  unless invalid_formats.empty?
44
44
  say "Invalid formats: #{invalid_formats.join(', ')}", :red
@@ -94,7 +94,7 @@ module Serialbench
94
94
  show_serializers_for_format(format_sym, serializers)
95
95
  end
96
96
  else
97
- %i[xml json toml].each do |format|
97
+ %i[xml json yaml toml].each do |format|
98
98
  serializers = Serialbench::Serializers.available_for_format(format)
99
99
  next if serializers.empty?
100
100
 
@@ -104,9 +104,9 @@ module Serialbench
104
104
  end
105
105
  end
106
106
 
107
- desc 'version', 'Show SerialBench version'
107
+ desc 'version', 'Show Serialbench version'
108
108
  def version
109
- say "SerialBench version #{Serialbench::VERSION}", :green
109
+ say "Serialbench version #{Serialbench::VERSION}", :green
110
110
  end
111
111
 
112
112
  desc 'merge_results INPUT_DIRS... OUTPUT_DIR', 'Merge benchmark results from multiple runs'
@@ -214,6 +214,190 @@ module Serialbench
214
214
  end
215
215
  end
216
216
 
217
+ desc 'analyze_performance INPUT_DIRS... OUTPUT_FILE', 'Analyze performance across multiple benchmark results'
218
+ long_desc <<~DESC
219
+ Analyze performance data from multiple benchmark runs and generate JSON analysis.
220
+
221
+ INPUT_DIRS should contain results.json files from different benchmark runs.
222
+ OUTPUT_FILE will be a JSON file with detailed performance analysis.
223
+
224
+ Example:
225
+ serialbench analyze_performance artifacts/benchmark-results-*/ performance_analysis.json
226
+ DESC
227
+ def analyze_performance(*args)
228
+ if args.length < 2
229
+ say 'Error: Need at least one input directory and one output file', :red
230
+ say 'Usage: serialbench analyze_performance INPUT_DIRS... OUTPUT_FILE', :yellow
231
+ exit 1
232
+ end
233
+
234
+ output_file = args.pop
235
+ input_dirs = args
236
+
237
+ say "Analyzing performance from #{input_dirs.length} directories", :green
238
+
239
+ begin
240
+ results = []
241
+
242
+ input_dirs.each do |input_dir|
243
+ results_file = File.join(input_dir, 'data', 'results.json')
244
+ next unless File.exist?(results_file)
245
+
246
+ # Extract platform and ruby version from directory name
247
+ match = input_dir.match(/benchmark-results-([^-]+)-ruby-([^\/]+)/)
248
+ next unless match
249
+
250
+ platform = match[1]
251
+ ruby_version = match[2]
252
+
253
+ begin
254
+ data = JSON.parse(File.read(results_file))
255
+
256
+ # Process parsing results
257
+ data['parsing']&.each do |format, serializers|
258
+ serializers.each do |serializer, sizes|
259
+ sizes.each do |size, metrics|
260
+ results << {
261
+ platform: platform,
262
+ ruby_version: ruby_version,
263
+ format: format,
264
+ serializer: serializer,
265
+ size: size,
266
+ operation: 'parsing',
267
+ time_ms: metrics['average_time'] || 0,
268
+ memory_mb: metrics['memory_usage'] || 0,
269
+ iterations_per_second: metrics['iterations_per_second'] || 0
270
+ }
271
+ end
272
+ end
273
+ end
274
+
275
+ # Process generation results
276
+ data['generation']&.each do |format, serializers|
277
+ serializers.each do |serializer, sizes|
278
+ sizes.each do |size, metrics|
279
+ results << {
280
+ platform: platform,
281
+ ruby_version: ruby_version,
282
+ format: format,
283
+ serializer: serializer,
284
+ size: size,
285
+ operation: 'generation',
286
+ time_ms: metrics['average_time'] || 0,
287
+ memory_mb: metrics['memory_usage'] || 0,
288
+ iterations_per_second: metrics['iterations_per_second'] || 0
289
+ }
290
+ end
291
+ end
292
+ end
293
+ rescue JSON::ParserError => e
294
+ say "Warning: Could not parse #{results_file}: #{e.message}", :yellow
295
+ end
296
+ end
297
+
298
+ # Generate analysis report
299
+ analysis_report = {
300
+ 'summary' => 'Cross-platform performance analysis',
301
+ 'generated_at' => Time.now.iso8601,
302
+ 'total_data_points' => results.length,
303
+ 'platforms' => results.map { |r| r[:platform] }.uniq.sort,
304
+ 'ruby_versions' => results.map { |r| r[:ruby_version] }.uniq.sort,
305
+ 'formats' => results.map { |r| r[:format] }.uniq.sort,
306
+ 'serializers' => results.map { |r| r[:serializer] }.uniq.sort,
307
+ 'operations' => results.map { |r| r[:operation] }.uniq.sort,
308
+ 'data' => results
309
+ }
310
+
311
+ # Write JSON analysis
312
+ File.write(output_file, JSON.pretty_generate(analysis_report))
313
+
314
+ say "Performance analysis generated with #{results.length} data points", :green
315
+ say "Platforms: #{analysis_report['platforms'].join(', ')}", :cyan
316
+ say "Ruby versions: #{analysis_report['ruby_versions'].join(', ')}", :cyan
317
+ say "Formats: #{analysis_report['formats'].join(', ')}", :cyan
318
+ say "Output saved to: #{output_file}", :green
319
+ rescue StandardError => e
320
+ say "Error analyzing performance: #{e.message}", :red
321
+ exit 1
322
+ end
323
+ end
324
+
325
+ desc 'platform_comparison JSON_FILE OUTPUT_FILE', 'Generate platform comparison report from performance analysis'
326
+ long_desc <<~DESC
327
+ Generate a platform comparison report from performance analysis JSON.
328
+
329
+ JSON_FILE should be the output from analyze_performance command.
330
+ OUTPUT_FILE will be a JSON file with platform comparison statistics.
331
+
332
+ Example:
333
+ serialbench platform_comparison performance_analysis.json platform_comparison.json
334
+ DESC
335
+ def platform_comparison(json_file, output_file)
336
+ say "Generating platform comparison from #{json_file}", :green
337
+
338
+ unless File.exist?(json_file)
339
+ say "JSON file does not exist: #{json_file}", :red
340
+ exit 1
341
+ end
342
+
343
+ begin
344
+ # Read the performance analysis JSON
345
+ analysis_data = JSON.parse(File.read(json_file))
346
+ data_points = analysis_data['data']
347
+
348
+ # Group by platform and calculate averages
349
+ platform_stats = {}
350
+
351
+ data_points.each do |point|
352
+ platform = point['platform']
353
+ format = point['format']
354
+ operation = point['operation']
355
+ time = point['time_ms'].to_f
356
+
357
+ platform_stats[platform] ||= {}
358
+ platform_stats[platform][format] ||= {}
359
+ platform_stats[platform][format][operation] ||= []
360
+ platform_stats[platform][format][operation] << time
361
+ end
362
+
363
+ # Calculate averages and generate report
364
+ report = {
365
+ 'summary' => 'Cross-platform performance comparison',
366
+ 'generated_at' => Time.now.iso8601,
367
+ 'source_analysis' => json_file,
368
+ 'total_platforms' => platform_stats.keys.length,
369
+ 'platforms' => {}
370
+ }
371
+
372
+ platform_stats.each do |platform, formats|
373
+ report['platforms'][platform] = {}
374
+ formats.each do |format, operations|
375
+ report['platforms'][platform][format] = {}
376
+ operations.each do |operation, times|
377
+ avg_time = times.sum / times.length
378
+ report['platforms'][platform][format][operation] = {
379
+ 'average_time_ms' => avg_time.round(3),
380
+ 'sample_count' => times.length,
381
+ 'min_time_ms' => times.min.round(3),
382
+ 'max_time_ms' => times.max.round(3),
383
+ 'std_deviation' => calculate_std_deviation(times).round(3)
384
+ }
385
+ end
386
+ end
387
+ end
388
+
389
+ # Write JSON report
390
+ File.write(output_file, JSON.pretty_generate(report))
391
+
392
+ say "Platform comparison report generated", :green
393
+ say "Platforms analyzed: #{platform_stats.keys.sort.join(', ')}", :cyan
394
+ say "Output saved to: #{output_file}", :green
395
+ rescue StandardError => e
396
+ say "Error generating platform comparison: #{e.message}", :red
397
+ exit 1
398
+ end
399
+ end
400
+
217
401
  private
218
402
 
219
403
  def show_available_serializers(formats)
@@ -242,7 +426,7 @@ module Serialbench
242
426
  serializer = serializer_class.new
243
427
  features = []
244
428
  features << 'streaming' if serializer.supports_streaming?
245
- features << 'built-in' if %w[json rexml].include?(serializer.name)
429
+ features << 'built-in' if %w[json rexml psych].include?(serializer.name)
246
430
 
247
431
  feature_text = features.empty? ? '' : " (#{features.join(', ')})"
248
432
  say " ✓ #{serializer.name} v#{serializer.version}#{feature_text}", :green
@@ -434,5 +618,13 @@ module Serialbench
434
618
  end
435
619
  end
436
620
  end
621
+
622
+ def calculate_std_deviation(values)
623
+ return 0.0 if values.length <= 1
624
+
625
+ mean = values.sum.to_f / values.length
626
+ variance = values.map { |v| (v - mean)**2 }.sum / values.length
627
+ Math.sqrt(variance)
628
+ end
437
629
  end
438
630
  end
@@ -180,14 +180,14 @@ module Serialbench
180
180
  <head>
181
181
  <meta charset="UTF-8">
182
182
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
183
- <title>SerialBench - Multi-Ruby Version Comparison</title>
183
+ <title>Serialbench - Multi-Ruby Version Comparison</title>
184
184
  <link rel="stylesheet" href="styles.css">
185
185
  <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
186
186
  </head>
187
187
  <body>
188
188
  <div class="container">
189
189
  <header>
190
- <h1>SerialBench - Multi-Ruby Version Comparison</h1>
190
+ <h1>Serialbench - Multi-Ruby Version Comparison</h1>
191
191
  <p class="subtitle">Comprehensive serialization performance benchmarks across Ruby versions</p>
192
192
  <div class="metadata">
193
193
  <p><strong>Generated:</strong> #{@merged_results[:metadata][:merged_at]}</p>
@@ -531,14 +531,14 @@ module Serialbench
531
531
  <head>
532
532
  <meta charset="UTF-8">
533
533
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
534
- <title>SerialBench - Performance Report</title>
534
+ <title>Serialbench - Performance Report</title>
535
535
  <link rel="stylesheet" href="../assets/css/benchmark_report.css">
536
536
  <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
537
537
  </head>
538
538
  <body>
539
539
  <div class="container">
540
540
  <header>
541
- <h1>SerialBench - Performance Report</h1>
541
+ <h1>Serialbench - Performance Report</h1>
542
542
  <p class="subtitle">Comprehensive serialization performance benchmarks</p>
543
543
  <div class="metadata">
544
544
  <p><strong>Generated:</strong> #{Time.now.strftime('%B %d, %Y at %H:%M')}</p>
@@ -949,7 +949,7 @@ module Serialbench
949
949
 
950
950
  def generate_css
951
951
  css_content = <<~CSS
952
- /* SerialBench Report Styles */
952
+ /* Serialbench Report Styles */
953
953
  :root {
954
954
  --primary-color: #2c3e50;
955
955
  --secondary-color: #3498db;
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_json_serializer'
4
+
5
+ module Serialbench
6
+ module Serializers
7
+ module Json
8
+ # RapidJSON serializer - Ruby bindings for RapidJSON C++ library
9
+ class RapidjsonSerializer < BaseJsonSerializer
10
+ def available?
11
+ require_library('rapidjson')
12
+ end
13
+
14
+ def name
15
+ 'rapidjson'
16
+ end
17
+
18
+ def version
19
+ require 'rapidjson'
20
+ RapidJSON::VERSION
21
+ rescue StandardError
22
+ 'unknown'
23
+ end
24
+
25
+ def parse(json_string)
26
+ require 'rapidjson'
27
+ RapidJSON.parse(json_string)
28
+ end
29
+
30
+ def generate(object, options = {})
31
+ require 'rapidjson'
32
+ RapidJSON.dump(object)
33
+ end
34
+
35
+ def features
36
+ %w[parsing generation high-performance c-extension]
37
+ end
38
+
39
+ private
40
+
41
+ def require_library(library_name)
42
+ require library_name
43
+ true
44
+ rescue LoadError
45
+ false
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -15,9 +15,13 @@ module Serialbench
15
15
  Yajl::Parser.parse(json_string)
16
16
  end
17
17
 
18
- def generate(data)
18
+ def generate(data, options = {})
19
19
  require 'yajl'
20
- Yajl::Encoder.encode(data)
20
+ if options[:pretty]
21
+ Yajl::Encoder.encode(data, pretty: true, indent: ' ')
22
+ else
23
+ Yajl::Encoder.encode(data)
24
+ end
21
25
  end
22
26
 
23
27
  def parse_streaming(json_string, &block)
@@ -60,9 +60,9 @@ module Serialbench
60
60
  def build_xml_from_data(xml, data, root_name = 'root')
61
61
  case data
62
62
  when Hash
63
- xml.send(root_name) do
63
+ xml.send(sanitize_element_name(root_name)) do
64
64
  data.each do |key, value|
65
- build_xml_from_data(xml, value, key)
65
+ build_xml_from_data(xml, value, sanitize_element_name(key.to_s))
66
66
  end
67
67
  end
68
68
  when Array
@@ -70,10 +70,28 @@ module Serialbench
70
70
  build_xml_from_data(xml, item, "item_#{index}")
71
71
  end
72
72
  else
73
- xml.send(root_name, data.to_s)
73
+ # Use a safe method that always works
74
+ element_name = sanitize_element_name(root_name)
75
+ if xml.respond_to?(element_name)
76
+ xml.send(element_name, data.to_s)
77
+ else
78
+ # Fallback: create element manually
79
+ xml.tag!(element_name, data.to_s)
80
+ end
74
81
  end
75
82
  end
76
83
 
84
+ def sanitize_element_name(name)
85
+ # Ensure element name is valid XML and safe to use as method name
86
+ sanitized = name.to_s.gsub(/[^a-zA-Z0-9_]/, '_')
87
+ # Ensure it starts with a letter
88
+ sanitized = "element_#{sanitized}" if sanitized.empty? || sanitized =~ /\A\d/
89
+ # Avoid conflicts with common Nokogiri methods
90
+ reserved_words = %w[text comment cdata parent children attributes namespace]
91
+ sanitized = "data_#{sanitized}" if reserved_words.include?(sanitized)
92
+ sanitized
93
+ end
94
+
77
95
  # SAX handler for streaming
78
96
  class StreamingHandler
79
97
  attr_reader :elements_processed
@@ -26,8 +26,19 @@ module Serialbench
26
26
  REXML::Document.new(xml_string)
27
27
  end
28
28
 
29
- def generate(document, options = {})
29
+ def generate(data, options = {})
30
30
  require 'rexml/document'
31
+
32
+ # If data is already a REXML::Document, use it directly
33
+ if data.is_a?(REXML::Document)
34
+ document = data
35
+ else
36
+ # Convert Hash/other data to XML document
37
+ document = REXML::Document.new
38
+ root = document.add_element('root')
39
+ hash_to_xml(data, root)
40
+ end
41
+
31
42
  indent = options.fetch(:indent, 0)
32
43
  output = String.new
33
44
  if indent > 0
@@ -63,7 +74,7 @@ module Serialbench
63
74
  end
64
75
 
65
76
  def supports_streaming?
66
- true
77
+ false
67
78
  end
68
79
 
69
80
  protected
@@ -79,6 +90,25 @@ module Serialbench
79
90
  def supports_validation?
80
91
  false
81
92
  end
93
+
94
+ private
95
+
96
+ def hash_to_xml(data, parent)
97
+ case data
98
+ when Hash
99
+ data.each do |key, value|
100
+ element = parent.add_element(key.to_s)
101
+ hash_to_xml(value, element)
102
+ end
103
+ when Array
104
+ data.each_with_index do |item, index|
105
+ element = parent.add_element("item_#{index}")
106
+ hash_to_xml(item, element)
107
+ end
108
+ else
109
+ parent.text = data.to_s
110
+ end
111
+ end
82
112
  end
83
113
 
84
114
  # SAX handler for REXML streaming
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../base_serializer'
4
+
5
+ module Serialbench
6
+ module Serializers
7
+ module Yaml
8
+ # Base class for YAML serializers
9
+ class BaseYamlSerializer < BaseSerializer
10
+ def format
11
+ :yaml
12
+ end
13
+
14
+ def supports_streaming?
15
+ false # Most YAML parsers don't support streaming
16
+ end
17
+
18
+ def features
19
+ features = %w[parsing generation]
20
+ features << 'streaming' if supports_streaming?
21
+ features
22
+ end
23
+
24
+ # Default YAML generation options
25
+ def default_generation_options
26
+ {}
27
+ end
28
+
29
+ # Parse YAML string into Ruby object
30
+ def parse(yaml_string)
31
+ raise NotImplementedError, 'Subclasses must implement parse method'
32
+ end
33
+
34
+ # Generate YAML string from Ruby object
35
+ def generate(object, options = {})
36
+ raise NotImplementedError, 'Subclasses must implement generate method'
37
+ end
38
+
39
+ # Stream parse YAML (if supported)
40
+ def stream_parse(yaml_string, &block)
41
+ raise NotImplementedError, 'Streaming not supported by this YAML serializer'
42
+ end
43
+
44
+ private
45
+
46
+ def require_library(library_name)
47
+ require library_name
48
+ true
49
+ rescue LoadError
50
+ false
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_yaml_serializer'
4
+
5
+ module Serialbench
6
+ module Serializers
7
+ module Yaml
8
+ # Psych YAML serializer - Ruby's built-in YAML parser
9
+ class PsychSerializer < BaseYamlSerializer
10
+ def available?
11
+ require_library('psych')
12
+ end
13
+
14
+ def name
15
+ 'psych'
16
+ end
17
+
18
+ def version
19
+ require 'psych'
20
+ Psych::VERSION
21
+ end
22
+
23
+ def parse(yaml_string)
24
+ require 'psych'
25
+ # Handle Ruby version compatibility for permitted_classes parameter
26
+ if RUBY_VERSION >= '3.1.0'
27
+ Psych.load(yaml_string, permitted_classes: [Date, Time, Symbol])
28
+ else
29
+ # For older Ruby versions, use the old API
30
+ Psych.load(yaml_string)
31
+ end
32
+ end
33
+
34
+ def generate(object, options = {})
35
+ require 'psych'
36
+ Psych.dump(object)
37
+ end
38
+
39
+ def features
40
+ %w[parsing generation built-in]
41
+ end
42
+
43
+ private
44
+
45
+ def require_library(library_name)
46
+ require library_name
47
+ true
48
+ rescue LoadError
49
+ false
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end