RubyGems - serialbench - Versions diffs - 0.1.0 → 0.1.1 - Mend

serialbench 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/.github/workflows/benchmark.yml +173 -30
data/.github/workflows/ci.yml +3 -3
data/.github/workflows/docker.yml +246 -0
data/.github/workflows/release.yml +25 -0
data/Gemfile +5 -30
data/README.adoc +962 -134
data/config/ci.yml +22 -0
data/config/full.yml +30 -0
data/docker/Dockerfile.benchmark +31 -0
data/docker/README.md +214 -0
data/docker/run-benchmarks.sh +356 -0
data/lib/serialbench/benchmark_runner.rb +82 -0
data/lib/serialbench/cli.rb +201 -9
data/lib/serialbench/result_merger.rb +5 -5
data/lib/serialbench/serializers/json/rapidjson_serializer.rb +50 -0
data/lib/serialbench/serializers/json/yajl_serializer.rb +6 -2
data/lib/serialbench/serializers/xml/nokogiri_serializer.rb +21 -3
data/lib/serialbench/serializers/xml/rexml_serializer.rb +32 -2
data/lib/serialbench/serializers/yaml/base_yaml_serializer.rb +55 -0
data/lib/serialbench/serializers/yaml/psych_serializer.rb +54 -0
data/lib/serialbench/serializers/yaml/syck_serializer.rb +65 -0
data/lib/serialbench/serializers.rb +11 -0
data/lib/serialbench/version.rb +1 -1
data/serialbench.gemspec +25 -17
metadata +113 -46

data/lib/serialbench/benchmark_runner.rb CHANGED Viewed

@@ -278,6 +278,10 @@ module Serialbench
           @test_data[:small][:json] = generate_small_json
           @test_data[:medium][:json] = generate_medium_json
           @test_data[:large][:json] = generate_large_json
+        when :yaml
+          @test_data[:small][:yaml] = generate_small_yaml
+          @test_data[:medium][:yaml] = generate_medium_yaml
+          @test_data[:large][:yaml] = generate_large_yaml
         when :toml
           @test_data[:small][:toml] = generate_small_toml
           @test_data[:medium][:toml] = generate_medium_toml
@@ -458,6 +462,84 @@ module Serialbench
                     })
     end
+    # YAML test data generators
+    def generate_small_yaml
+      require 'yaml'
+      {
+        config: {
+          database: {
+            host: 'localhost',
+            port: 5432,
+            name: 'myapp',
+            user: 'admin',
+            password: 'secret'
+          },
+          cache: {
+            enabled: true,
+            ttl: 3600
+          }
+        }
+      }.to_yaml
+    end
+    def generate_medium_yaml
+      require 'yaml'
+      users = (1..1000).map do |i|
+        {
+          id: i,
+          name: "User #{i}",
+          email: "user#{i}@example.com",
+          created_at: "2023-01-#{(i % 28) + 1}T10:00:00Z",
+          profile: {
+            age: 20 + (i % 50),
+            city: "City #{i % 100}",
+            preferences: {
+              theme: i.even? ? 'dark' : 'light',
+              notifications: i % 3 == 0
+            }
+          }
+        }
+      end
+      { users: users }.to_yaml
+    end
+    def generate_large_yaml
+      require 'yaml'
+      records = (1..10_000).map do |i|
+        {
+          id: i,
+          timestamp: "2023-01-01T#{format('%02d', i % 24)}:#{format('%02d', i % 60)}:#{format('%02d', i % 60)}Z",
+          data: {
+            field1: "Value #{i}",
+            field2: i * 2,
+            field3: i % 100 == 0 ? 'special' : 'normal',
+            nested: [
+              "Item #{i}-1",
+              "Item #{i}-2",
+              "Item #{i}-3"
+            ]
+          },
+          metadata: {
+            source: 'generator',
+            version: '1.0',
+            checksum: i.to_s(16)
+          }
+        }
+      end
+      {
+        dataset: {
+          header: {
+            created: '2023-01-01T00:00:00Z',
+            count: 10_000,
+            format: 'yaml'
+          },
+          records: records
+        }
+      }.to_yaml
+    end
     # TOML test data generators
     def generate_small_toml
       <<~TOML

data/lib/serialbench/cli.rb CHANGED Viewed

@@ -6,7 +6,7 @@ require 'yaml'
 require 'fileutils'
 module Serialbench
-  # Thor-based command line interface for SerialBench
+  # Thor-based command line interface for Serialbench
   class Cli < Thor
     include Thor::Actions
@@ -17,8 +17,8 @@ module Serialbench
       This command will test parsing, generation, streaming, and memory usage
       across XML, JSON, and TOML formats using all available libraries.
     DESC
-    option :formats, type: :array, default: %w[xml json toml],
-                     desc: 'Formats to benchmark (xml, json, toml)'
+    option :formats, type: :array, default: %w[xml json yaml toml],
+                     desc: 'Formats to benchmark (xml, json, yaml, toml)'
     option :output_format, type: :string, default: 'all',
                            desc: 'Output format: all, json, yaml, html'
     option :parsing_only, type: :boolean, default: false,
@@ -34,11 +34,11 @@ module Serialbench
     option :warmup, type: :numeric, default: 3,
                     desc: 'Number of warmup iterations'
     def benchmark
-      say 'SerialBench - Comprehensive Serialization Performance Tests', :green
+      say 'Serialbench - Comprehensive Serialization Performance Tests', :green
       say '=' * 70, :green
       # Validate formats
-      valid_formats = %w[xml json toml]
+      valid_formats = %w[xml json yaml toml]
       invalid_formats = options[:formats] - valid_formats
       unless invalid_formats.empty?
         say "Invalid formats: #{invalid_formats.join(', ')}", :red
@@ -94,7 +94,7 @@ module Serialbench
           show_serializers_for_format(format_sym, serializers)
         end
       else
-        %i[xml json toml].each do |format|
+        %i[xml json yaml toml].each do |format|
           serializers = Serialbench::Serializers.available_for_format(format)
           next if serializers.empty?
@@ -104,9 +104,9 @@ module Serialbench
       end
     end
-    desc 'version', 'Show SerialBench version'
+    desc 'version', 'Show Serialbench version'
     def version
-      say "SerialBench version #{Serialbench::VERSION}", :green
+      say "Serialbench version #{Serialbench::VERSION}", :green
     end
     desc 'merge_results INPUT_DIRS... OUTPUT_DIR', 'Merge benchmark results from multiple runs'
@@ -214,6 +214,190 @@ module Serialbench
       end
     end
+    desc 'analyze_performance INPUT_DIRS... OUTPUT_FILE', 'Analyze performance across multiple benchmark results'
+    long_desc <<~DESC
+      Analyze performance data from multiple benchmark runs and generate JSON analysis.
+      INPUT_DIRS should contain results.json files from different benchmark runs.
+      OUTPUT_FILE will be a JSON file with detailed performance analysis.
+      Example:
+        serialbench analyze_performance artifacts/benchmark-results-*/ performance_analysis.json
+    DESC
+    def analyze_performance(*args)
+      if args.length < 2
+        say 'Error: Need at least one input directory and one output file', :red
+        say 'Usage: serialbench analyze_performance INPUT_DIRS... OUTPUT_FILE', :yellow
+        exit 1
+      end
+      output_file = args.pop
+      input_dirs = args
+      say "Analyzing performance from #{input_dirs.length} directories", :green
+      begin
+        results = []
+        input_dirs.each do |input_dir|
+          results_file = File.join(input_dir, 'data', 'results.json')
+          next unless File.exist?(results_file)
+          # Extract platform and ruby version from directory name
+          match = input_dir.match(/benchmark-results-([^-]+)-ruby-([^\/]+)/)
+          next unless match
+          platform = match[1]
+          ruby_version = match[2]
+          begin
+            data = JSON.parse(File.read(results_file))
+            # Process parsing results
+            data['parsing']&.each do |format, serializers|
+              serializers.each do |serializer, sizes|
+                sizes.each do |size, metrics|
+                  results << {
+                    platform: platform,
+                    ruby_version: ruby_version,
+                    format: format,
+                    serializer: serializer,
+                    size: size,
+                    operation: 'parsing',
+                    time_ms: metrics['average_time'] || 0,
+                    memory_mb: metrics['memory_usage'] || 0,
+                    iterations_per_second: metrics['iterations_per_second'] || 0
+                  }
+                end
+              end
+            end
+            # Process generation results
+            data['generation']&.each do |format, serializers|
+              serializers.each do |serializer, sizes|
+                sizes.each do |size, metrics|
+                  results << {
+                    platform: platform,
+                    ruby_version: ruby_version,
+                    format: format,
+                    serializer: serializer,
+                    size: size,
+                    operation: 'generation',
+                    time_ms: metrics['average_time'] || 0,
+                    memory_mb: metrics['memory_usage'] || 0,
+                    iterations_per_second: metrics['iterations_per_second'] || 0
+                  }
+                end
+              end
+            end
+          rescue JSON::ParserError => e
+            say "Warning: Could not parse #{results_file}: #{e.message}", :yellow
+          end
+        end
+        # Generate analysis report
+        analysis_report = {
+          'summary' => 'Cross-platform performance analysis',
+          'generated_at' => Time.now.iso8601,
+          'total_data_points' => results.length,
+          'platforms' => results.map { |r| r[:platform] }.uniq.sort,
+          'ruby_versions' => results.map { |r| r[:ruby_version] }.uniq.sort,
+          'formats' => results.map { |r| r[:format] }.uniq.sort,
+          'serializers' => results.map { |r| r[:serializer] }.uniq.sort,
+          'operations' => results.map { |r| r[:operation] }.uniq.sort,
+          'data' => results
+        }
+        # Write JSON analysis
+        File.write(output_file, JSON.pretty_generate(analysis_report))
+        say "Performance analysis generated with #{results.length} data points", :green
+        say "Platforms: #{analysis_report['platforms'].join(', ')}", :cyan
+        say "Ruby versions: #{analysis_report['ruby_versions'].join(', ')}", :cyan
+        say "Formats: #{analysis_report['formats'].join(', ')}", :cyan
+        say "Output saved to: #{output_file}", :green
+      rescue StandardError => e
+        say "Error analyzing performance: #{e.message}", :red
+        exit 1
+      end
+    end
+    desc 'platform_comparison JSON_FILE OUTPUT_FILE', 'Generate platform comparison report from performance analysis'
+    long_desc <<~DESC
+      Generate a platform comparison report from performance analysis JSON.
+      JSON_FILE should be the output from analyze_performance command.
+      OUTPUT_FILE will be a JSON file with platform comparison statistics.
+      Example:
+        serialbench platform_comparison performance_analysis.json platform_comparison.json
+    DESC
+    def platform_comparison(json_file, output_file)
+      say "Generating platform comparison from #{json_file}", :green
+      unless File.exist?(json_file)
+        say "JSON file does not exist: #{json_file}", :red
+        exit 1
+      end
+      begin
+        # Read the performance analysis JSON
+        analysis_data = JSON.parse(File.read(json_file))
+        data_points = analysis_data['data']
+        # Group by platform and calculate averages
+        platform_stats = {}
+        data_points.each do |point|
+          platform = point['platform']
+          format = point['format']
+          operation = point['operation']
+          time = point['time_ms'].to_f
+          platform_stats[platform] ||= {}
+          platform_stats[platform][format] ||= {}
+          platform_stats[platform][format][operation] ||= []
+          platform_stats[platform][format][operation] << time
+        end
+        # Calculate averages and generate report
+        report = {
+          'summary' => 'Cross-platform performance comparison',
+          'generated_at' => Time.now.iso8601,
+          'source_analysis' => json_file,
+          'total_platforms' => platform_stats.keys.length,
+          'platforms' => {}
+        }
+        platform_stats.each do |platform, formats|
+          report['platforms'][platform] = {}
+          formats.each do |format, operations|
+            report['platforms'][platform][format] = {}
+            operations.each do |operation, times|
+              avg_time = times.sum / times.length
+              report['platforms'][platform][format][operation] = {
+                'average_time_ms' => avg_time.round(3),
+                'sample_count' => times.length,
+                'min_time_ms' => times.min.round(3),
+                'max_time_ms' => times.max.round(3),
+                'std_deviation' => calculate_std_deviation(times).round(3)
+              }
+            end
+          end
+        end
+        # Write JSON report
+        File.write(output_file, JSON.pretty_generate(report))
+        say "Platform comparison report generated", :green
+        say "Platforms analyzed: #{platform_stats.keys.sort.join(', ')}", :cyan
+        say "Output saved to: #{output_file}", :green
+      rescue StandardError => e
+        say "Error generating platform comparison: #{e.message}", :red
+        exit 1
+      end
+    end
     private
     def show_available_serializers(formats)
@@ -242,7 +426,7 @@ module Serialbench
         serializer = serializer_class.new
         features = []
         features << 'streaming' if serializer.supports_streaming?
-        features << 'built-in' if %w[json rexml].include?(serializer.name)
+        features << 'built-in' if %w[json rexml psych].include?(serializer.name)
         feature_text = features.empty? ? '' : " (#{features.join(', ')})"
         say "  ✓ #{serializer.name} v#{serializer.version}#{feature_text}", :green
@@ -434,5 +618,13 @@ module Serialbench
         end
       end
     end
+    def calculate_std_deviation(values)
+      return 0.0 if values.length <= 1
+      mean = values.sum.to_f / values.length
+      variance = values.map { |v| (v - mean)**2 }.sum / values.length
+      Math.sqrt(variance)
+    end
   end
 end

data/lib/serialbench/result_merger.rb CHANGED Viewed

@@ -180,14 +180,14 @@ module Serialbench
         <head>
             <meta charset="UTF-8">
             <meta name="viewport" content="width=device-width, initial-scale=1.0">
-            <title>SerialBench - Multi-Ruby Version Comparison</title>
+            <title>Serialbench - Multi-Ruby Version Comparison</title>
             <link rel="stylesheet" href="styles.css">
             <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
         </head>
         <body>
             <div class="container">
                 <header>
-                    <h1>SerialBench - Multi-Ruby Version Comparison</h1>
+                    <h1>Serialbench - Multi-Ruby Version Comparison</h1>
                     <p class="subtitle">Comprehensive serialization performance benchmarks across Ruby versions</p>
                     <div class="metadata">
                         <p><strong>Generated:</strong> #{@merged_results[:metadata][:merged_at]}</p>
@@ -531,14 +531,14 @@ module Serialbench
         <head>
             <meta charset="UTF-8">
             <meta name="viewport" content="width=device-width, initial-scale=1.0">
-            <title>SerialBench - Performance Report</title>
+            <title>Serialbench - Performance Report</title>
             <link rel="stylesheet" href="../assets/css/benchmark_report.css">
             <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
         </head>
         <body>
             <div class="container">
                 <header>
-                    <h1>SerialBench - Performance Report</h1>
+                    <h1>Serialbench - Performance Report</h1>
                     <p class="subtitle">Comprehensive serialization performance benchmarks</p>
                     <div class="metadata">
                         <p><strong>Generated:</strong> #{Time.now.strftime('%B %d, %Y at %H:%M')}</p>
@@ -949,7 +949,7 @@ module Serialbench
     def generate_css
       css_content = <<~CSS
-        /* SerialBench Report Styles */
+        /* Serialbench Report Styles */
         :root {
           --primary-color: #2c3e50;
           --secondary-color: #3498db;

data/lib/serialbench/serializers/json/rapidjson_serializer.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+require_relative 'base_json_serializer'
+module Serialbench
+  module Serializers
+    module Json
+      # RapidJSON serializer - Ruby bindings for RapidJSON C++ library
+      class RapidjsonSerializer < BaseJsonSerializer
+        def available?
+          require_library('rapidjson')
+        end
+        def name
+          'rapidjson'
+        end
+        def version
+          require 'rapidjson'
+          RapidJSON::VERSION
+        rescue StandardError
+          'unknown'
+        end
+        def parse(json_string)
+          require 'rapidjson'
+          RapidJSON.parse(json_string)
+        end
+        def generate(object, options = {})
+          require 'rapidjson'
+          RapidJSON.dump(object)
+        end
+        def features
+          %w[parsing generation high-performance c-extension]
+        end
+        private
+        def require_library(library_name)
+          require library_name
+          true
+        rescue LoadError
+          false
+        end
+      end
+    end
+  end
+end

data/lib/serialbench/serializers/json/yajl_serializer.rb CHANGED Viewed

@@ -15,9 +15,13 @@ module Serialbench
           Yajl::Parser.parse(json_string)
         end
-        def generate(data)
+        def generate(data, options = {})
           require 'yajl'
-          Yajl::Encoder.encode(data)
+          if options[:pretty]
+            Yajl::Encoder.encode(data, pretty: true, indent: '  ')
+          else
+            Yajl::Encoder.encode(data)
+          end
         end
         def parse_streaming(json_string, &block)

data/lib/serialbench/serializers/xml/nokogiri_serializer.rb CHANGED Viewed

@@ -60,9 +60,9 @@ module Serialbench
         def build_xml_from_data(xml, data, root_name = 'root')
           case data
           when Hash
-            xml.send(root_name) do
+            xml.send(sanitize_element_name(root_name)) do
               data.each do |key, value|
-                build_xml_from_data(xml, value, key)
+                build_xml_from_data(xml, value, sanitize_element_name(key.to_s))
               end
             end
           when Array
@@ -70,10 +70,28 @@ module Serialbench
               build_xml_from_data(xml, item, "item_#{index}")
             end
           else
-            xml.send(root_name, data.to_s)
+            # Use a safe method that always works
+            element_name = sanitize_element_name(root_name)
+            if xml.respond_to?(element_name)
+              xml.send(element_name, data.to_s)
+            else
+              # Fallback: create element manually
+              xml.tag!(element_name, data.to_s)
+            end
           end
         end
+        def sanitize_element_name(name)
+          # Ensure element name is valid XML and safe to use as method name
+          sanitized = name.to_s.gsub(/[^a-zA-Z0-9_]/, '_')
+          # Ensure it starts with a letter
+          sanitized = "element_#{sanitized}" if sanitized.empty? || sanitized =~ /\A\d/
+          # Avoid conflicts with common Nokogiri methods
+          reserved_words = %w[text comment cdata parent children attributes namespace]
+          sanitized = "data_#{sanitized}" if reserved_words.include?(sanitized)
+          sanitized
+        end
         # SAX handler for streaming
         class StreamingHandler
           attr_reader :elements_processed

data/lib/serialbench/serializers/xml/rexml_serializer.rb CHANGED Viewed

@@ -26,8 +26,19 @@ module Serialbench
           REXML::Document.new(xml_string)
         end
-        def generate(document, options = {})
+        def generate(data, options = {})
           require 'rexml/document'
+          # If data is already a REXML::Document, use it directly
+          if data.is_a?(REXML::Document)
+            document = data
+          else
+            # Convert Hash/other data to XML document
+            document = REXML::Document.new
+            root = document.add_element('root')
+            hash_to_xml(data, root)
+          end
           indent = options.fetch(:indent, 0)
           output = String.new
           if indent > 0
@@ -63,7 +74,7 @@ module Serialbench
         end
         def supports_streaming?
-          true
+          false
         end
         protected
@@ -79,6 +90,25 @@ module Serialbench
         def supports_validation?
           false
         end
+        private
+        def hash_to_xml(data, parent)
+          case data
+          when Hash
+            data.each do |key, value|
+              element = parent.add_element(key.to_s)
+              hash_to_xml(value, element)
+            end
+          when Array
+            data.each_with_index do |item, index|
+              element = parent.add_element("item_#{index}")
+              hash_to_xml(item, element)
+            end
+          else
+            parent.text = data.to_s
+          end
+        end
       end
       # SAX handler for REXML streaming

data/lib/serialbench/serializers/yaml/base_yaml_serializer.rb ADDED Viewed

@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+require_relative '../base_serializer'
+module Serialbench
+  module Serializers
+    module Yaml
+      # Base class for YAML serializers
+      class BaseYamlSerializer < BaseSerializer
+        def format
+          :yaml
+        end
+        def supports_streaming?
+          false # Most YAML parsers don't support streaming
+        end
+        def features
+          features = %w[parsing generation]
+          features << 'streaming' if supports_streaming?
+          features
+        end
+        # Default YAML generation options
+        def default_generation_options
+          {}
+        end
+        # Parse YAML string into Ruby object
+        def parse(yaml_string)
+          raise NotImplementedError, 'Subclasses must implement parse method'
+        end
+        # Generate YAML string from Ruby object
+        def generate(object, options = {})
+          raise NotImplementedError, 'Subclasses must implement generate method'
+        end
+        # Stream parse YAML (if supported)
+        def stream_parse(yaml_string, &block)
+          raise NotImplementedError, 'Streaming not supported by this YAML serializer'
+        end
+        private
+        def require_library(library_name)
+          require library_name
+          true
+        rescue LoadError
+          false
+        end
+      end
+    end
+  end
+end

data/lib/serialbench/serializers/yaml/psych_serializer.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+require_relative 'base_yaml_serializer'
+module Serialbench
+  module Serializers
+    module Yaml
+      # Psych YAML serializer - Ruby's built-in YAML parser
+      class PsychSerializer < BaseYamlSerializer
+        def available?
+          require_library('psych')
+        end
+        def name
+          'psych'
+        end
+        def version
+          require 'psych'
+          Psych::VERSION
+        end
+        def parse(yaml_string)
+          require 'psych'
+          # Handle Ruby version compatibility for permitted_classes parameter
+          if RUBY_VERSION >= '3.1.0'
+            Psych.load(yaml_string, permitted_classes: [Date, Time, Symbol])
+          else
+            # For older Ruby versions, use the old API
+            Psych.load(yaml_string)
+          end
+        end
+        def generate(object, options = {})
+          require 'psych'
+          Psych.dump(object)
+        end
+        def features
+          %w[parsing generation built-in]
+        end
+        private
+        def require_library(library_name)
+          require library_name
+          true
+        rescue LoadError
+          false
+        end
+      end
+    end
+  end
+end