RubyGems - mahout - Versions diffs - 1.1.0 - Mend

mahout 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +7 -0
data/config/mahout.example.yml +55 -0
data/config/templates/datadog-pgbackrest-check.py +54 -0
data/config/templates/datadog-postgres.yaml.erb +29 -0
data/config/templates/pg_hba.conf.erb +8 -0
data/config/templates/pgbackrest.conf.erb +24 -0
data/config/templates/pgbouncer.ini.erb +13 -0
data/config/templates/postgresql.conf.erb +80 -0
data/exe/mahout +6 -0
data/lib/mahout/backup.rb +51 -0
data/lib/mahout/benchmark.rb +464 -0
data/lib/mahout/cli.rb +577 -0
data/lib/mahout/config.rb +322 -0
data/lib/mahout/disk_benchmark.rb +172 -0
data/lib/mahout/extension_registry.rb +144 -0
data/lib/mahout/health.rb +372 -0
data/lib/mahout/remote.rb +142 -0
data/lib/mahout/restore.rb +88 -0
data/lib/mahout/runner.rb +81 -0
data/lib/mahout/setup/datadog.rb +116 -0
data/lib/mahout/setup/extensions.rb +51 -0
data/lib/mahout/setup/hardening.rb +109 -0
data/lib/mahout/setup/os.rb +198 -0
data/lib/mahout/setup/pgbackrest.rb +125 -0
data/lib/mahout/setup/pgbouncer.rb +32 -0
data/lib/mahout/setup/postgres.rb +140 -0
data/lib/mahout/setup/ssl.rb +29 -0
data/lib/mahout/setup/systemd.rb +205 -0
data/lib/mahout/status.rb +99 -0
data/lib/mahout/step_runner.rb +321 -0
data/lib/mahout/tuner.rb +174 -0
data/lib/mahout/version.rb +3 -0
data/lib/mahout.rb +51 -0
metadata +180 -0

data/lib/mahout/benchmark.rb ADDED Viewed

@@ -0,0 +1,464 @@
+# frozen_string_literal: true
+module Mahout
+  class Benchmark
+    def initialize(runner:, config:)
+      @runner = runner
+      @config = config
+    end
+    STANDARD_CLIENT_LEVELS = [1, 4, 8, 16, 32].freeze
+    FAST_STANDARD_CLIENT_LEVELS = [1, 8, 32].freeze
+    def call(scale: 100, duration: 60, clients: nil, fast: false, standardized: false, stress: false)
+      if stress
+        scale = nil if scale == 100
+        duration = 600 if duration == 60
+      elsif fast && standardized
+        scale = 10
+        duration = 20
+      elsif fast
+        scale = 10
+        duration = 10
+      end
+      @cores = detect_cores
+      @clients = clients || (stress ? [@cores * 2, 64].max : @cores)
+      @duration = duration
+      @db = @config.pg_database
+      detect_hardware(@cores)
+      if stress
+        scale ||= stress_scale
+        init_pgbench(scale)
+        run_stress(scale)
+      elsif standardized
+        init_pgbench(scale)
+        run_standardized(fast: fast)
+      else
+        init_pgbench(scale)
+        run_read_only
+        run_write_heavy
+        run_mixed
+        unless fast
+          run_wal_throughput
+          run_checkpoint_impact
+          run_connection_scaling
+        end
+      end
+    end
+    private
+    def init_pgbench(scale)
+      $stdout.puts("initializing pgbench (scale: #{scale})")
+      @runner.run(
+        "sudo -u postgres pgbench -i -s #{scale} -q #{@db}",
+        sudo: false, timeout: 3600
+      )
+      $stdout.puts("")
+    end
+    def run_read_only
+      $stdout.puts("read-only (SELECT) -- #{@clients} clients, #{@duration}s")
+      result = run_pgbench("-S")
+      print_results(result)
+    end
+    def run_write_heavy
+      $stdout.puts("write-heavy (TPC-B) -- #{@clients} clients, #{@duration}s")
+      result = run_pgbench("")
+      print_results(result)
+    end
+    def run_mixed
+      $stdout.puts("mixed (70% read, 30% write) -- #{@clients} clients, #{@duration}s")
+      custom_script = <<~SQL
+        \\set aid random(1, 100000 * :scale)
+        \\set bid random(1, 1 * :scale)
+        \\set tid random(1, 10 * :scale)
+        \\set delta random(-5000, 5000)
+        BEGIN;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
+        SELECT sum(abalance) FROM pgbench_accounts WHERE aid = :aid;
+        UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
+        UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
+        END;
+      SQL
+      @runner.upload(custom_script, "/tmp/mahout-bench-mixed.sql", mode: "0644", owner: "postgres:postgres")
+      result = run_pgbench("-f /tmp/mahout-bench-mixed.sql")
+      @runner.run("rm -f /tmp/mahout-bench-mixed.sql", allow_failure: true)
+      print_results(result)
+    end
+    def run_wal_throughput
+      $stdout.puts("wal write throughput -- #{@clients} clients, #{@duration}s")
+      $stdout.puts("  (measures WAL generation rate from write-heavy workload)")
+      result = run_pgbench("")
+      return $stdout.puts("") if @runner.dry_run?
+      wal_before = get_wal_position
+      result = run_pgbench("")
+      wal_after = get_wal_position
+      if wal_before && wal_after
+        wal_bytes = wal_after - wal_before
+        wal_mb = wal_bytes / 1024.0 / 1024.0
+        wal_rate = wal_mb / @duration
+        $stdout.puts("  wal generated: #{wal_mb.round(1)}MB in #{@duration}s (#{wal_rate.round(1)}MB/s)")
+      end
+      tps = extract_tps(result.stdout)
+      latency = extract_latency(result.stdout)
+      $stdout.puts("  tps: #{tps}")
+      $stdout.puts("  latency: #{latency}")
+      wal_mount = @config.wal_mount || @config.data_mount
+      disk_result = @runner.run("df -h --output=target,size,used,avail,pcent #{wal_mount} | tail -1", allow_failure: true)
+      $stdout.puts("  wal disk: #{disk_result.stdout.strip}") if disk_result.success?
+      $stdout.puts("")
+    end
+    def run_checkpoint_impact
+      $stdout.puts("checkpoint impact -- measuring tps drop during forced checkpoint")
+      @runner.run(
+        "sudo -u postgres psql -c \"CHECKPOINT\"",
+        sudo: false, allow_failure: true
+      )
+      sleep(2) unless @runner.dry_run?
+      result = @runner.run(
+        "sudo -u postgres pgbench -T 30 -c #{@clients} -j #{@clients} -P 1 #{@db} 2>&1",
+        sudo: false, allow_failure: true, timeout: 120
+      )
+      return $stdout.puts("") if @runner.dry_run?
+      @runner.run(
+        "sudo -u postgres psql -c \"SELECT pg_sleep(10)\" & " \
+        "sleep 5 && sudo -u postgres psql -c \"CHECKPOINT\"",
+        sudo: false, allow_failure: true
+      )
+      progress_lines = result.stdout.lines.select { |l| l.match?(/^progress:/) }
+      if progress_lines.length >= 5
+        tps_values = progress_lines.filter_map { |l| m = l.match(/tps=\s*([\d.]+)/); m && m[1].to_f }
+        unless tps_values.empty?
+          avg = tps_values.sum / tps_values.length
+          min = tps_values.min
+          max = tps_values.max
+          dip = ((max - min) / max * 100).round(1)
+          $stdout.puts("  avg tps: #{avg.round(0)}")
+          $stdout.puts("  min tps: #{min.round(0)}")
+          $stdout.puts("  max tps: #{max.round(0)}")
+          $stdout.puts("  checkpoint dip: #{dip}%")
+          print_checkpoint_assessment(dip)
+        end
+      else
+        tps = extract_tps(result.stdout)
+        $stdout.puts("  tps: #{tps}")
+      end
+      $stdout.puts("")
+    end
+    def print_checkpoint_assessment(dip)
+      if dip < 10
+        $stdout.puts("  assessment: minimal impact, storage handles checkpoints well")
+      elsif dip < 25
+        $stdout.puts("  assessment: normal for block storage with write-heavy workloads")
+      elsif dip < 40
+        $stdout.puts("  assessment: noticeable dip, queries may slow during checkpoints")
+        $stdout.puts("  see README for tuning suggestions")
+      else
+        $stdout.puts("  assessment: significant dip, data device is the bottleneck")
+        $stdout.puts("  see README for tuning suggestions")
+      end
+    end
+    def get_wal_position
+      result = @runner.run(
+        "sudo -u postgres psql -tAc \"SELECT pg_current_wal_lsn()\"",
+        sudo: false, allow_failure: true
+      )
+      return nil unless result.success?
+      lsn = result.stdout.strip
+      parts = lsn.split("/")
+      return nil unless parts.length == 2
+      (parts[0].to_i(16) << 32) + parts[1].to_i(16)
+    end
+    def run_connection_scaling
+      $stdout.puts("connection scaling (read-only, 15s per level)")
+      levels = [1, @clients / 2, @clients, @clients * 2, @clients * 4].uniq.select { |n| n > 0 }
+      levels.each do |n|
+        result = @runner.run(
+          "sudo -u postgres pgbench -S -T 15 -c #{n} -j #{[n, @clients].min} " \
+          "--latency-limit=100 --log-prefix=/tmp/mahout-bench #{@db} 2>&1",
+          sudo: false, allow_failure: true
+        )
+        next if @runner.dry_run?
+        tps = extract_tps(result.stdout)
+        latency = extract_latency(result.stdout)
+        $stdout.puts("  #{n} clients: #{tps} tps, #{latency}")
+      end
+      @runner.run("rm -f /tmp/mahout-bench*", allow_failure: true)
+      $stdout.puts("")
+    end
+    def stress_scale
+      result = @runner.run(
+        "sudo -u postgres psql -tAc \"SELECT setting FROM pg_settings WHERE name = 'shared_buffers'\"",
+        sudo: false, allow_failure: true
+      )
+      return 500 unless result.success?
+      shared_buffers_8k = result.stdout.strip.to_i
+      shared_buffers_mb = shared_buffers_8k * 8 / 1024
+      target_mb = shared_buffers_mb * 3
+      scale = (target_mb / 16.0).ceil
+      scale = [[scale, 100].max, 5000].min
+      $stdout.puts("shared_buffers: #{shared_buffers_mb}MB, target dataset: #{target_mb}MB, scale: #{scale}")
+      scale
+    end
+    def run_stress(scale)
+      $stdout.puts("write stress -- #{@clients} clients, #{@duration}s, scale #{scale}")
+      $stdout.puts("")
+      @runner.run(
+        "sudo -u postgres psql -c \"CHECKPOINT\"",
+        sudo: false, allow_failure: true
+      )
+      sleep(2) unless @runner.dry_run?
+      wal_before = get_wal_position
+      result = @runner.run(
+        "sudo -u postgres pgbench -T #{@duration} -c #{@clients} -j #{@cores} -P 1 #{@db} 2>&1",
+        sudo: false, allow_failure: true, timeout: @duration + 120
+      )
+      return $stdout.puts("") if @runner.dry_run?
+      wal_after = get_wal_position
+      progress_lines = result.stdout.lines.select { |l| l.match?(/^progress:/) }
+      tps_values = progress_lines.filter_map { |l| m = l.match(/tps=\s*([\d.]+)/); m && m[1].to_f }
+      if tps_values.length >= 10
+        avg = tps_values.sum / tps_values.length
+        min = tps_values.min
+        max = tps_values.max
+        stddev = Math.sqrt(tps_values.sum { |v| (v - avg)**2 } / tps_values.length)
+        first_quarter = tps_values[0...(tps_values.length / 4)]
+        last_quarter = tps_values[(tps_values.length * 3 / 4)..]
+        first_avg = first_quarter.sum / first_quarter.length
+        last_avg = last_quarter.sum / last_quarter.length
+        degradation = ((first_avg - last_avg) / first_avg * 100).round(1)
+        sorted = tps_values.sort
+        p50 = sorted[sorted.length / 2]
+        p5 = sorted[(sorted.length * 0.05).to_i]
+        p95 = sorted[(sorted.length * 0.95).to_i]
+        $stdout.puts("tps avg: #{avg.round(0)}")
+        $stdout.puts("tps min: #{min.round(0)}")
+        $stdout.puts("tps max: #{max.round(0)}")
+        $stdout.puts("tps stddev: #{stddev.round(0)}")
+        $stdout.puts("tps p5: #{p5.round(0)}")
+        $stdout.puts("tps p50: #{p50.round(0)}")
+        $stdout.puts("tps p95: #{p95.round(0)}")
+        $stdout.puts("")
+        $stdout.puts("first 25% avg tps: #{first_avg.round(0)}")
+        $stdout.puts("last 25% avg tps: #{last_avg.round(0)}")
+        $stdout.puts("degradation: #{degradation}%")
+        dips = tps_values.each_cons(2).count { |a, b| b < a * 0.7 }
+        $stdout.puts("checkpoint dips (>30% drop): #{dips}")
+      else
+        tps = extract_tps(result.stdout)
+        latency = extract_latency(result.stdout)
+        $stdout.puts("tps: #{tps}")
+        $stdout.puts("latency: #{latency}")
+      end
+      if wal_before && wal_after
+        wal_bytes = wal_after - wal_before
+        wal_mb = wal_bytes / 1024.0 / 1024.0
+        wal_rate = wal_mb / @duration
+        $stdout.puts("")
+        $stdout.puts("wal generated: #{wal_mb.round(0)}MB (#{wal_rate.round(1)}MB/s)")
+      end
+      latency = extract_latency(result.stdout)
+      p99 = extract_percentile(result.stdout)
+      $stdout.puts("latency: #{latency}")
+      $stdout.puts("p99: #{p99}") if p99
+      $stdout.puts("")
+    end
+    def run_standardized(fast: false)
+      levels = fast ? FAST_STANDARD_CLIENT_LEVELS : STANDARD_CLIENT_LEVELS
+      $stdout.puts("standardized benchmark#{fast ? " (fast)" : ""} -- #{levels.join(", ")} clients, #{@duration}s per test")
+      $stdout.puts("")
+      upload_mixed_script unless fast
+      levels.each do |n|
+        threads = [n, @cores].min
+        $stdout.puts("#{n} clients")
+        $stdout.puts("  read-only (SELECT)")
+        result = run_pgbench_at("-S", n, threads)
+        print_results(result, indent: 4)
+        $stdout.puts("  write-heavy (TPC-B)")
+        result = run_pgbench_at("", n, threads)
+        print_results(result, indent: 4)
+        unless fast
+          $stdout.puts("  mixed (70/30)")
+          result = run_pgbench_at("-f /tmp/mahout-bench-mixed.sql", n, threads)
+          print_results(result, indent: 4)
+        end
+      end
+      @runner.run("rm -f /tmp/mahout-bench-mixed.sql", allow_failure: true) unless fast
+    end
+    def upload_mixed_script
+      custom_script = <<~SQL
+        \\set aid random(1, 100000 * :scale)
+        \\set bid random(1, 1 * :scale)
+        \\set tid random(1, 10 * :scale)
+        \\set delta random(-5000, 5000)
+        BEGIN;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
+        SELECT sum(abalance) FROM pgbench_accounts WHERE aid = :aid;
+        UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
+        UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
+        END;
+      SQL
+      @runner.upload(custom_script, "/tmp/mahout-bench-mixed.sql", mode: "0644", owner: "postgres:postgres")
+    end
+    def detect_hardware(cores)
+      $stdout.puts("cores: #{cores}")
+      ram = @runner.run("free -m | grep Mem | awk '{print $2}'", allow_failure: true)
+      $stdout.puts("ram: #{ram.stdout.strip}MB") if ram.success?
+      cpu = @runner.run("lscpu | grep 'Model name'", allow_failure: true)
+      if cpu.success?
+        model = cpu.stdout.strip.sub(/^Model name:\s*/, "")
+        $stdout.puts("cpu: #{model}")
+      end
+      disk = @runner.run("lsblk -ndo MODEL #{@config.data_device}", allow_failure: true)
+      if disk.success? && !disk.stdout.strip.empty?
+        $stdout.puts("disk: #{disk.stdout.strip}")
+      end
+      $stdout.puts("data: #{@config.data_device} -> #{@config.data_mount}")
+      if @config.wal_device
+        $stdout.puts("wal: #{@config.wal_device} -> #{@config.wal_mount}")
+      else
+        $stdout.puts("wal: same as data")
+      end
+      $stdout.puts("profile: #{@config.profile}")
+      $stdout.puts("")
+    end
+    def run_pgbench_at(extra_flags, clients, threads)
+      @runner.run(
+        "sudo -u postgres pgbench #{extra_flags} -T #{@duration} -c #{clients} -j #{threads} " \
+        "-P 5 --latency-limit=100 #{@db} 2>&1",
+        sudo: false, allow_failure: true, timeout: @duration + 60
+      )
+    end
+    def run_pgbench(extra_flags)
+      @runner.run(
+        "sudo -u postgres pgbench #{extra_flags} -T #{@duration} -c #{@clients} -j #{@clients} " \
+        "-P 5 --latency-limit=100 #{@db} 2>&1",
+        sudo: false, allow_failure: true, timeout: @duration + 60
+      )
+    end
+    def print_results(result, indent: 2)
+      pad = " " * indent
+      return $stdout.puts("") if @runner.dry_run?
+      output = result.stdout
+      tps = extract_tps(output)
+      latency = extract_latency(output)
+      p99 = extract_percentile(output)
+      $stdout.puts("#{pad}tps: #{tps}")
+      $stdout.puts("#{pad}latency: #{latency}")
+      $stdout.puts("#{pad}p99: #{p99}") if p99
+      $stdout.puts("")
+    end
+    def extract_tps(output)
+      match = output.match(/tps = ([\d.]+).*excluding/i) || output.match(/tps = ([\d.]+)/i)
+      match ? match[1] : "n/a"
+    end
+    def extract_latency(output)
+      avg = output.match(/latency average\s*=\s*([\d.]+)\s*ms/)
+      stddev = output.match(/latency stddev\s*=\s*([\d.]+)\s*ms/)
+      parts = []
+      parts << "avg #{avg[1]}ms" if avg
+      parts << "stddev #{stddev[1]}ms" if stddev
+      parts.empty? ? "n/a" : parts.join(", ")
+    end
+    def extract_percentile(output)
+      match = output.match(/latency.*?(\d+)th percentile\s*=?\s*([\d.]+)\s*ms/i)
+      return nil unless match
+      "#{match[2]}ms"
+    end
+    def detect_cores
+      result = @runner.run("nproc", allow_failure: true)
+      result.success? ? result.stdout.strip.to_i : 4
+    end
+  end
+end