RubyGems - mahout - Versions diffs - 1.1.0 - Mend

mahout 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +7 -0
data/config/mahout.example.yml +55 -0
data/config/templates/datadog-pgbackrest-check.py +54 -0
data/config/templates/datadog-postgres.yaml.erb +29 -0
data/config/templates/pg_hba.conf.erb +8 -0
data/config/templates/pgbackrest.conf.erb +24 -0
data/config/templates/pgbouncer.ini.erb +13 -0
data/config/templates/postgresql.conf.erb +80 -0
data/exe/mahout +6 -0
data/lib/mahout/backup.rb +51 -0
data/lib/mahout/benchmark.rb +464 -0
data/lib/mahout/cli.rb +577 -0
data/lib/mahout/config.rb +322 -0
data/lib/mahout/disk_benchmark.rb +172 -0
data/lib/mahout/extension_registry.rb +144 -0
data/lib/mahout/health.rb +372 -0
data/lib/mahout/remote.rb +142 -0
data/lib/mahout/restore.rb +88 -0
data/lib/mahout/runner.rb +81 -0
data/lib/mahout/setup/datadog.rb +116 -0
data/lib/mahout/setup/extensions.rb +51 -0
data/lib/mahout/setup/hardening.rb +109 -0
data/lib/mahout/setup/os.rb +198 -0
data/lib/mahout/setup/pgbackrest.rb +125 -0
data/lib/mahout/setup/pgbouncer.rb +32 -0
data/lib/mahout/setup/postgres.rb +140 -0
data/lib/mahout/setup/ssl.rb +29 -0
data/lib/mahout/setup/systemd.rb +205 -0
data/lib/mahout/status.rb +99 -0
data/lib/mahout/step_runner.rb +321 -0
data/lib/mahout/tuner.rb +174 -0
data/lib/mahout/version.rb +3 -0
data/lib/mahout.rb +51 -0
metadata +180 -0

data/lib/mahout/cli.rb ADDED Viewed

@@ -0,0 +1,577 @@
+# frozen_string_literal: true
+require "thor"
+require "io/console"
+require "fileutils"
+module Mahout
+  class CLI < Thor
+    def self.exit_on_failure?
+      true
+    end
+    class_option :host, type: :string, desc: "target server"
+    class_option :user, type: :string, desc: "SSH user"
+    class_option :key, type: :string, desc: "SSH private key path"
+    class_option :config, type: :string, default: "mahout.yml", desc: "config file path"
+    class_option :config_json, type: :string, desc: "config as JSON string (overrides --config)"
+    class_option :dry_run, type: :boolean, default: false, desc: "log commands without executing"
+    class_option :verbose, type: :boolean, default: false, desc: "show full command output"
+    class_option :log_file, type: :string, desc: "write output to log file"
+    STEP_NAMES = StepRunner::STEPS.map(&:to_s).join(", ")
+    SECRETS_TEMPLATE = <<~YAML
+      pg_password:
+      s3_key:
+      s3_secret:
+      # datadog_api_key:
+    YAML
+    CONFIG_TEMPLATE = <<~YAML
+      server:
+        host: 127.0.0.1
+        user: root
+        key: ~/.ssh/id_rsa
+      instance:
+        name: my-instance
+        environment: production
+      # conservative | balanced | aggressive
+      profile: balanced
+      postgres:
+        version: 17
+        port: 5432
+        database: my_instance
+        username: my_instance
+        # override any postgresql.conf setting after tuning
+        # these take precedence over tuned values
+        # timeouts are not set by the tuner -- add them here if needed
+        overrides: {}
+          # statement_timeout: 30s
+          # lock_timeout: 10s
+          # idle_in_transaction_session_timeout: 300000
+          # log_min_duration_statement: 1000
+          # temp_file_limit: 20GB
+          # work_mem: 256MB
+          # maintenance_work_mem: 2GB
+          # max_connections: 500
+          # checkpoint_timeout: 1800
+      # pin specific tuner values instead of auto-detecting
+      # these override values computed from hardware + profile
+      # tuner:
+      #   profile:
+      #     shared_buffers: 8GB
+      #     effective_cache_size: 24GB
+      #     work_mem: 64MB
+      #     maintenance_work_mem: 2GB
+      #     wal_buffers: 64MB
+      #     hash_mem_multiplier: 4.0
+      #     max_connections: 300
+      #     max_wal_size: 8GB
+      #     min_wal_size: 2GB
+      #     checkpoint_completion_target: 0.95
+      #     checkpoint_timeout: 900
+      #     checkpoint_warning: 30s
+      #     random_page_cost: 1.0
+      #     effective_io_concurrency: 300
+      #     huge_pages: "on"
+      #     max_parallel_workers_per_gather: 4
+      #     max_parallel_workers: 8
+      #     max_worker_processes: 12
+      #     parallel_leader_participation: "off"
+      #     superuser_reserved_connections: 5
+      #     autovacuum_max_workers: 8
+      #     autovacuum_naptime: 10s
+      #     autovacuum_vacuum_scale_factor: 0.01
+      #     autovacuum_analyze_scale_factor: 0.005
+      #     autovacuum_vacuum_cost_limit: 2000
+      #     autovacuum_vacuum_cost_delay: 2ms
+      #     autovacuum_work_mem: 2GB
+      #     vacuum_buffer_usage_limit: 2GB
+      #     bgwriter_lru_maxpages: 800
+      #     pgbouncer_default_pool_size: 40
+      #     pgbouncer_max_client_conn: 600
+      #     statement_timeout: 30s
+      #     lock_timeout: 10s
+      #     idle_in_transaction_session_timeout: 300000
+      #     archive_timeout: 300
+      #     temp_file_limit: 10GB
+      #     wal_level: replica
+      #     pg_stat_statements_max: 10000
+      #     pg_stat_statements_track: all
+      #     pg_stat_statements_track_utility: "on"
+      storage:
+        data_device: /dev/sdb
+        data_mount: /mnt/pgdata
+        # wal_device: /dev/nvme1n1
+        # wal_mount: /mnt/pgwal
+      pgbouncer:
+        port: 6432
+        pool_mode: transaction  # transaction | session | statement
+      pgbackrest:
+        enabled: true              # set to false to disable pgbackrest entirely
+        stanza: my-instance
+        repo_type: s3
+        s3_bucket: my-instance-backups
+        s3_endpoint: s3.amazonaws.com  # or R2: https://<id>.r2.cloudflarestorage.com
+        s3_region: us-east-1           # use "auto" for R2
+        s3_uri_style: host             # host for AWS, path for R2/MinIO
+        retention_full: 4
+        process_max: 4                 # auto-scales to CPU cores (max 12) during setup
+        schedule:
+          full: "Sun *-*-* 02:00:00"
+          diff: "Mon..Sat *-*-* 02:00:00"
+          incr: "*-*-* *:30:00"
+      datadog:
+        enabled: false
+        site: datadoghq.com            # datadoghq.eu for EU, ddog-gov.com for gov
+        tags:
+          - service:my-instance
+          - env:production
+      hardening:
+        allowed_cidrs:
+          - 10.0.0.0/8
+        ssl_required: true
+        ufw: true
+        ssh_hardening: true
+        ssh_password_auth: false
+        ssh_max_auth_tries: 3
+        fail2ban: true
+        fail2ban_maxretry: 5
+        fail2ban_bantime: 3600         # seconds
+        unattended_upgrades: true
+      # available: pg_stat_statements, pg_trgm, btree_gist, pgcrypto,
+      #   hstore, postgis, timescaledb, pg_cron, pg_partman, pg_repack,
+      #   pgvector, pg_prewarm
+      extensions:
+        - pg_stat_statements
+        - pg_trgm
+        - btree_gist
+      # path to custom templates directory (export defaults with: mahout templates)
+      # templates_path: templates
+      secrets_file: config/my-instance.secrets.yaml
+      health:
+        thresholds:
+          connection_warn_pct: 75
+          connection_critical_pct: 90
+          disk_warn_pct: 80
+          disk_critical_pct: 90
+          backup_warn_hours: 25
+          backup_critical_hours: 49
+          xid_warn: 500000000
+          xid_critical: 1000000000
+          long_query_minutes: 5
+          held_lock_minutes: 1
+    YAML
+    desc "retune", "re-apply tuning profile (os, postgres, pgbouncer)"
+    def retune
+      config = load_config
+      remote = build_remote(config)
+      runner = build_runner(remote, config)
+      %w[os postgres pgbouncer].each do |step|
+        StepRunner.new(
+          runner: runner,
+          remote: remote,
+          config: config,
+          only: step
+        ).call
+      end
+    rescue StepFailed => e
+      exit(1)
+    ensure
+      runner&.close
+      remote&.close
+    end
+    desc "setup", "provision a bare Ubuntu 24.04 instance"
+    long_desc <<~DESC
+      Runs all setup steps in order. Idempotent -- safe to re-run.
+      Steps: #{STEP_NAMES}
+    DESC
+    option :resume, type: :boolean, default: false, desc: "skip completed steps"
+    option :from_step, type: :string, desc: "start from a specific step (#{STEP_NAMES})"
+    option :only, type: :string, desc: "run a single step (#{STEP_NAMES})"
+    option :exclude, type: :string, desc: "comma-separated steps to exclude"
+    def setup
+      config = load_config
+      prompt_secrets(config)
+      remote = build_remote(config)
+      runner = build_runner(remote, config)
+      step_runner = StepRunner.new(
+        runner: runner,
+        remote: remote,
+        config: config,
+        resume: options[:resume],
+        from_step: options[:from_step],
+        only: options[:only],
+        skip_steps: options[:exclude]&.split(",")&.map(&:strip)
+      )
+      step_runner.call
+    rescue StepFailed => e
+      exit(1)
+    ensure
+      runner&.close
+      remote&.close
+    end
+    desc "backup", "trigger a manual backup"
+    option :type, type: :string, default: "full", desc: "full|diff|incr"
+    def backup
+      config = load_config
+      remote = build_remote(config)
+      runner = build_runner(remote, config)
+      Backup.new(runner: runner, config: config).call(type: options[:type])
+    ensure
+      runner&.close
+      remote&.close
+    end
+    desc "restore", "restore from pgBackRest backup"
+    option :latest, type: :boolean, default: true
+    option :target_time, type: :string, desc: "PITR target timestamp"
+    option :target_xid, type: :string, desc: "PITR target transaction ID"
+    option :stanza, type: :string, desc: "override stanza name"
+    option :setup_first, type: :boolean, default: false, desc: "run full setup before restoring (for bare instances)"
+    def restore
+      config = load_config
+      remote = build_remote(config)
+      runner = build_runner(remote, config)
+      if options[:setup_first]
+        prompt_secrets(config)
+        step_runner = StepRunner.new(
+          runner: runner,
+          remote: remote,
+          config: config,
+          skip_steps: %w[pgbackrest systemd]
+        )
+        step_runner.call
+        render_pgbackrest_config(runner, config) if config.pgbackrest_enabled?
+      end
+      Restore.new(runner: runner, config: config).call(
+        latest: options[:latest],
+        target_time: options[:target_time],
+        target_xid: options[:target_xid],
+        stanza: options[:stanza]
+      )
+      if options[:setup_first]
+        if config.pgbackrest_enabled?
+          $stdout.puts("running post-restore setup (pgbackrest, systemd)")
+          StepRunner.new(
+            runner: runner,
+            remote: remote,
+            config: config,
+            only: "pgbackrest"
+          ).call
+        else
+          $stdout.puts("running post-restore setup (systemd)")
+        end
+        StepRunner.new(
+          runner: runner,
+          remote: remote,
+          config: config,
+          only: "systemd"
+        ).call
+      end
+    ensure
+      runner&.close
+      remote&.close
+    end
+    desc "health", "check instance health"
+    option :json, type: :boolean, default: false, desc: "output JSON"
+    def health
+      config = load_config
+      remote = build_remote(config)
+      runner = build_runner(remote, config)
+      exit_code = Health.new(runner: runner, config: config).call(json: options[:json])
+      exit(exit_code) if exit_code > 0
+    ensure
+      runner&.close
+      remote&.close
+    end
+    desc "status", "show instance status overview"
+    def status
+      config = load_config
+      remote = build_remote(config)
+      runner = build_runner(remote, config)
+      Status.new(runner: runner, config: config).call
+    ensure
+      runner&.close
+      remote&.close
+    end
+    desc "console", "open a psql session on the server"
+    def console
+      config = load_config
+      key_path = File.expand_path(config.key)
+      db = config.pg_database
+      exec("ssh", "-t", "-i", key_path, "#{config.user}@#{config.host}", "sudo -u postgres psql #{db}")
+    end
+    desc "seed", "create a test table with sample data for verifying backup/restore"
+    def seed
+      config = load_config
+      remote = build_remote(config)
+      runner = build_runner(remote, config)
+      db = config.pg_database
+      sql = <<~SQL
+        CREATE TABLE IF NOT EXISTS mahout_seed (
+          id serial PRIMARY KEY,
+          message text NOT NULL,
+          created_at timestamptz NOT NULL DEFAULT now()
+        );
+        INSERT INTO mahout_seed (message) VALUES ('seed row at ' || now()::text);
+        SELECT id, message, created_at FROM mahout_seed ORDER BY id;
+      SQL
+      runner.upload(sql, "/tmp/mahout-seed.sql", mode: "0644", owner: "postgres:postgres")
+      result = runner.run("sudo -u postgres psql -d #{db} -f /tmp/mahout-seed.sql", sudo: false)
+      runner.run("rm -f /tmp/mahout-seed.sql")
+      $stdout.puts("")
+      $stdout.puts("seed data in #{db}:")
+      $stdout.puts(result.stdout) unless runner.dry_run?
+      $stdout.puts("")
+      $stdout.puts("after restore, verify with:")
+      $stdout.puts("  mahout seed --config #{options[:config]}")
+      $stdout.puts("the rows above should still be there")
+    ensure
+      runner&.close
+      remote&.close
+    end
+    desc "validate", "validate config file without connecting"
+    def validate
+      config = load_config
+      $stdout.puts("config: #{options[:config]}")
+      $stdout.puts("host: #{config.host}")
+      $stdout.puts("database: #{config.pg_database}")
+      $stdout.puts("user: #{config.pg_username}")
+      $stdout.puts("profile: #{config.profile}")
+      $stdout.puts("stanza: #{config.pgbackrest_stanza}") if config.pgbackrest_enabled?
+      $stdout.puts("pgbackrest: disabled") unless config.pgbackrest_enabled?
+      $stdout.puts("extensions: #{config.extensions.join(", ")}")
+      $stdout.puts("config is valid")
+    rescue ConfigError => e
+      $stderr.puts("config validation failed: #{e.message}")
+      exit(1)
+    end
+    desc "new NAME", "generate config and secrets files (e.g. mahout new prod)"
+    option :dir, type: :string, default: "config", desc: "output directory"
+    def new(name)
+      dir = options[:dir]
+      config_path = File.join(dir, "#{name}.yaml")
+      secrets_path = File.join(dir, "#{name}.secrets.yaml")
+      FileUtils.mkdir_p(dir)
+      if File.exist?(config_path)
+        $stderr.puts("#{config_path} already exists, not overwriting")
+        exit(1)
+      end
+      underscored = name.tr("-", "_")
+      config = CONFIG_TEMPLATE
+        .gsub("my-instance", name)
+        .gsub("my_instance", underscored)
+      File.write(config_path, config)
+      $stdout.puts("wrote #{config_path}")
+      unless File.exist?(secrets_path)
+        File.write(secrets_path, SECRETS_TEMPLATE)
+        $stdout.puts("wrote #{secrets_path}")
+      end
+      gitignore_entry = "*.secrets.yaml"
+      if File.exist?(".gitignore")
+        existing = File.read(".gitignore")
+        unless existing.include?(gitignore_entry)
+          File.open(".gitignore", "a") { |f| f.puts(gitignore_entry) }
+          $stdout.puts("added #{gitignore_entry} to .gitignore")
+        end
+      end
+      $stdout.puts("")
+      $stdout.puts("next steps:")
+      $stdout.puts("  1. edit #{config_path} and fill in your server, storage, and s3 details")
+      $stdout.puts("  2. edit #{secrets_path} with your passwords and keys")
+      $stdout.puts("  3. mahout validate --config #{config_path}")
+      $stdout.puts("  4. mahout setup --config #{config_path}")
+    end
+    desc "templates", "export built-in templates for customization"
+    option :dir, type: :string, default: "templates", desc: "output directory"
+    def templates
+      dir = options[:dir]
+      FileUtils.mkdir_p(dir)
+      Dir.glob(File.join(Mahout::DEFAULT_TEMPLATES_PATH, "*")).each do |src|
+        dest = File.join(dir, File.basename(src))
+        if File.exist?(dest)
+          $stdout.puts("skip #{dest} (already exists)")
+        else
+          FileUtils.cp(src, dest)
+          $stdout.puts("wrote #{dest}")
+        end
+      end
+      $stdout.puts("")
+      $stdout.puts("add templates_path: #{dir} to your config to use these as overrides")
+    end
+    desc "disk_benchmark", "benchmark disk I/O on data and WAL mounts"
+    def disk_benchmark
+      config = load_config
+      remote = build_remote(config)
+      runner = build_runner(remote, config)
+      DiskBenchmark.new(runner: runner, config: config).call
+    ensure
+      runner&.close
+      remote&.close
+    end
+    desc "benchmark", "run pgbench"
+    option :scale, type: :numeric, default: 100
+    option :duration, type: :numeric, default: 60
+    option :clients, type: :numeric, desc: "number of clients"
+    option :fast, type: :boolean, default: false, desc: "quick run (scale 10, 10s, skip wal/checkpoint/scaling)"
+    option :standardized, type: :boolean, default: false, desc: "fixed client counts (1,4,8,16,32) for cross-hardware comparison"
+    option :stress, type: :boolean, default: false, desc: "sustained write load (scale 500, 64+ clients, 10 min)"
+    def benchmark
+      config = load_config
+      remote = build_remote(config)
+      runner = build_runner(remote, config)
+      Benchmark.new(runner: runner, config: config).call(
+        scale: options[:scale],
+        duration: options[:duration],
+        clients: options[:clients],
+        fast: options[:fast],
+        standardized: options[:standardized],
+        stress: options[:stress]
+      )
+    ensure
+      runner&.close
+      remote&.close
+    end
+    private
+    def load_config
+      overrides = {}
+      overrides[:host] = options[:host] if options[:host]
+      overrides[:user] = options[:user] if options[:user]
+      overrides[:key] = options[:key] if options[:key]
+      if options[:config_json]
+        json_data = JSON.parse(options[:config_json])
+        Config.new(path: options[:config], merge: json_data, overrides: overrides)
+      else
+        Config.new(path: options[:config], overrides: overrides)
+      end
+    end
+    def build_remote(config)
+      Remote.new(host: config.host, user: config.user, key: config.key, ssh_options: config.ssh_options)
+    end
+    def build_runner(remote, config = nil)
+      Runner.new(
+        remote: remote,
+        dry_run: options[:dry_run],
+        verbose: options[:verbose],
+        log_file: options[:log_file],
+        hostname: config&.host,
+        templates_path: config&.templates_path
+      )
+    end
+    def render_pgbackrest_config(runner, config)
+      s3_key = config.secret("s3_key") || ""
+      s3_secret = config.secret("s3_secret") || ""
+      runner.run("mkdir -p /etc/pgbackrest")
+      runner.render_and_upload(
+        "pgbackrest.conf.erb",
+        "/etc/pgbackrest/pgbackrest.conf",
+        locals: {
+          config: config,
+          s3_key: s3_key,
+          s3_secret: s3_secret,
+          process_max: config.pgbackrest_process_max
+        },
+        mode: "0600",
+        owner: "postgres:postgres"
+      )
+    end
+    def prompt_secrets(config)
+      prompt_secret(config, "pg_password", "postgres password for #{config.pg_username}")
+      if pgbackrest_steps_will_run?(config)
+        prompt_secret(config, "s3_key", "pgbackrest S3 access key")
+        prompt_secret(config, "s3_secret", "pgbackrest S3 secret key")
+      end
+      return unless config.datadog_enabled? && datadog_steps_will_run?
+      prompt_secret(config, "datadog_api_key", "datadog API key")
+    end
+    def prompt_secret(config, key, label)
+      return if config.secret(key) && !config.secret(key).empty?
+      config.set_secret(key, prompt_password(label))
+    end
+    def pgbackrest_steps_will_run?(config)
+      return false unless config.pgbackrest_enabled?
+      return false if options[:only] && !%w[pgbackrest systemd].include?(options[:only])
+      skipped = options[:exclude]&.split(",")&.map(&:strip) || []
+      return false if skipped.include?("pgbackrest")
+      true
+    end
+    def datadog_steps_will_run?
+      return false if options[:only] && options[:only] != "datadog"
+      skipped = options[:exclude]&.split(",")&.map(&:strip) || []
+      return false if skipped.include?("datadog")
+      true
+    end
+    def prompt_password(label)
+      return "test" if options[:dry_run]
+      $stdout.print("#{label}: ")
+      IO.console.noecho { $stdin.gets&.chomp } || ""
+    ensure
+      $stdout.puts
+    end
+  end
+end