mahout 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/mahout/cli.rb ADDED
@@ -0,0 +1,577 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+ require "io/console"
5
+ require "fileutils"
6
+
7
+ module Mahout
8
+ class CLI < Thor
9
+ def self.exit_on_failure?
10
+ true
11
+ end
12
+
13
+ class_option :host, type: :string, desc: "target server"
14
+ class_option :user, type: :string, desc: "SSH user"
15
+ class_option :key, type: :string, desc: "SSH private key path"
16
+ class_option :config, type: :string, default: "mahout.yml", desc: "config file path"
17
+ class_option :config_json, type: :string, desc: "config as JSON string (overrides --config)"
18
+ class_option :dry_run, type: :boolean, default: false, desc: "log commands without executing"
19
+ class_option :verbose, type: :boolean, default: false, desc: "show full command output"
20
+ class_option :log_file, type: :string, desc: "write output to log file"
21
+
22
+ STEP_NAMES = StepRunner::STEPS.map(&:to_s).join(", ")
23
+
24
+ SECRETS_TEMPLATE = <<~YAML
25
+ pg_password:
26
+ s3_key:
27
+ s3_secret:
28
+ # datadog_api_key:
29
+ YAML
30
+
31
+ CONFIG_TEMPLATE = <<~YAML
32
+ server:
33
+ host: 127.0.0.1
34
+ user: root
35
+ key: ~/.ssh/id_rsa
36
+
37
+ instance:
38
+ name: my-instance
39
+ environment: production
40
+
41
+ # conservative | balanced | aggressive
42
+ profile: balanced
43
+
44
+ postgres:
45
+ version: 17
46
+ port: 5432
47
+ database: my_instance
48
+ username: my_instance
49
+ # override any postgresql.conf setting after tuning
50
+ # these take precedence over tuned values
51
+ # timeouts are not set by the tuner -- add them here if needed
52
+ overrides: {}
53
+ # statement_timeout: 30s
54
+ # lock_timeout: 10s
55
+ # idle_in_transaction_session_timeout: 300000
56
+ # log_min_duration_statement: 1000
57
+ # temp_file_limit: 20GB
58
+ # work_mem: 256MB
59
+ # maintenance_work_mem: 2GB
60
+ # max_connections: 500
61
+ # checkpoint_timeout: 1800
62
+
63
+ # pin specific tuner values instead of auto-detecting
64
+ # these override values computed from hardware + profile
65
+ # tuner:
66
+ # profile:
67
+ # shared_buffers: 8GB
68
+ # effective_cache_size: 24GB
69
+ # work_mem: 64MB
70
+ # maintenance_work_mem: 2GB
71
+ # wal_buffers: 64MB
72
+ # hash_mem_multiplier: 4.0
73
+ # max_connections: 300
74
+ # max_wal_size: 8GB
75
+ # min_wal_size: 2GB
76
+ # checkpoint_completion_target: 0.95
77
+ # checkpoint_timeout: 900
78
+ # checkpoint_warning: 30s
79
+ # random_page_cost: 1.0
80
+ # effective_io_concurrency: 300
81
+ # huge_pages: "on"
82
+ # max_parallel_workers_per_gather: 4
83
+ # max_parallel_workers: 8
84
+ # max_worker_processes: 12
85
+ # parallel_leader_participation: "off"
86
+ # superuser_reserved_connections: 5
87
+ # autovacuum_max_workers: 8
88
+ # autovacuum_naptime: 10s
89
+ # autovacuum_vacuum_scale_factor: 0.01
90
+ # autovacuum_analyze_scale_factor: 0.005
91
+ # autovacuum_vacuum_cost_limit: 2000
92
+ # autovacuum_vacuum_cost_delay: 2ms
93
+ # autovacuum_work_mem: 2GB
94
+ # vacuum_buffer_usage_limit: 2GB
95
+ # bgwriter_lru_maxpages: 800
96
+ # pgbouncer_default_pool_size: 40
97
+ # pgbouncer_max_client_conn: 600
98
+ # statement_timeout: 30s
99
+ # lock_timeout: 10s
100
+ # idle_in_transaction_session_timeout: 300000
101
+ # archive_timeout: 300
102
+ # temp_file_limit: 10GB
103
+ # wal_level: replica
104
+ # pg_stat_statements_max: 10000
105
+ # pg_stat_statements_track: all
106
+ # pg_stat_statements_track_utility: "on"
107
+
108
+ storage:
109
+ data_device: /dev/sdb
110
+ data_mount: /mnt/pgdata
111
+ # wal_device: /dev/nvme1n1
112
+ # wal_mount: /mnt/pgwal
113
+
114
+ pgbouncer:
115
+ port: 6432
116
+ pool_mode: transaction # transaction | session | statement
117
+
118
+ pgbackrest:
119
+ enabled: true # set to false to disable pgbackrest entirely
120
+ stanza: my-instance
121
+ repo_type: s3
122
+ s3_bucket: my-instance-backups
123
+ s3_endpoint: s3.amazonaws.com # or R2: https://<id>.r2.cloudflarestorage.com
124
+ s3_region: us-east-1 # use "auto" for R2
125
+ s3_uri_style: host # host for AWS, path for R2/MinIO
126
+ retention_full: 4
127
+ process_max: 4 # auto-scales to CPU cores (max 12) during setup
128
+ schedule:
129
+ full: "Sun *-*-* 02:00:00"
130
+ diff: "Mon..Sat *-*-* 02:00:00"
131
+ incr: "*-*-* *:30:00"
132
+
133
+ datadog:
134
+ enabled: false
135
+ site: datadoghq.com # datadoghq.eu for EU, ddog-gov.com for gov
136
+ tags:
137
+ - service:my-instance
138
+ - env:production
139
+
140
+ hardening:
141
+ allowed_cidrs:
142
+ - 10.0.0.0/8
143
+ ssl_required: true
144
+ ufw: true
145
+ ssh_hardening: true
146
+ ssh_password_auth: false
147
+ ssh_max_auth_tries: 3
148
+ fail2ban: true
149
+ fail2ban_maxretry: 5
150
+ fail2ban_bantime: 3600 # seconds
151
+ unattended_upgrades: true
152
+
153
+ # available: pg_stat_statements, pg_trgm, btree_gist, pgcrypto,
154
+ # hstore, postgis, timescaledb, pg_cron, pg_partman, pg_repack,
155
+ # pgvector, pg_prewarm
156
+ extensions:
157
+ - pg_stat_statements
158
+ - pg_trgm
159
+ - btree_gist
160
+
161
+ # path to custom templates directory (export defaults with: mahout templates)
162
+ # templates_path: templates
163
+
164
+ secrets_file: config/my-instance.secrets.yaml
165
+
166
+ health:
167
+ thresholds:
168
+ connection_warn_pct: 75
169
+ connection_critical_pct: 90
170
+ disk_warn_pct: 80
171
+ disk_critical_pct: 90
172
+ backup_warn_hours: 25
173
+ backup_critical_hours: 49
174
+ xid_warn: 500000000
175
+ xid_critical: 1000000000
176
+ long_query_minutes: 5
177
+ held_lock_minutes: 1
178
+ YAML
179
+
180
+ desc "retune", "re-apply tuning profile (os, postgres, pgbouncer)"
181
+ def retune
182
+ config = load_config
183
+ remote = build_remote(config)
184
+ runner = build_runner(remote, config)
185
+
186
+ %w[os postgres pgbouncer].each do |step|
187
+ StepRunner.new(
188
+ runner: runner,
189
+ remote: remote,
190
+ config: config,
191
+ only: step
192
+ ).call
193
+ end
194
+ rescue StepFailed => e
195
+ exit(1)
196
+ ensure
197
+ runner&.close
198
+ remote&.close
199
+ end
200
+
201
+ desc "setup", "provision a bare Ubuntu 24.04 instance"
202
+ long_desc <<~DESC
203
+ Runs all setup steps in order. Idempotent -- safe to re-run.
204
+
205
+ Steps: #{STEP_NAMES}
206
+ DESC
207
+ option :resume, type: :boolean, default: false, desc: "skip completed steps"
208
+ option :from_step, type: :string, desc: "start from a specific step (#{STEP_NAMES})"
209
+ option :only, type: :string, desc: "run a single step (#{STEP_NAMES})"
210
+ option :exclude, type: :string, desc: "comma-separated steps to exclude"
211
+ def setup
212
+ config = load_config
213
+ prompt_secrets(config)
214
+ remote = build_remote(config)
215
+ runner = build_runner(remote, config)
216
+
217
+ step_runner = StepRunner.new(
218
+ runner: runner,
219
+ remote: remote,
220
+ config: config,
221
+ resume: options[:resume],
222
+ from_step: options[:from_step],
223
+ only: options[:only],
224
+ skip_steps: options[:exclude]&.split(",")&.map(&:strip)
225
+ )
226
+ step_runner.call
227
+ rescue StepFailed => e
228
+ exit(1)
229
+ ensure
230
+ runner&.close
231
+ remote&.close
232
+ end
233
+
234
+ desc "backup", "trigger a manual backup"
235
+ option :type, type: :string, default: "full", desc: "full|diff|incr"
236
+ def backup
237
+ config = load_config
238
+ remote = build_remote(config)
239
+ runner = build_runner(remote, config)
240
+ Backup.new(runner: runner, config: config).call(type: options[:type])
241
+ ensure
242
+ runner&.close
243
+ remote&.close
244
+ end
245
+
246
+ desc "restore", "restore from pgBackRest backup"
247
+ option :latest, type: :boolean, default: true
248
+ option :target_time, type: :string, desc: "PITR target timestamp"
249
+ option :target_xid, type: :string, desc: "PITR target transaction ID"
250
+ option :stanza, type: :string, desc: "override stanza name"
251
+ option :setup_first, type: :boolean, default: false, desc: "run full setup before restoring (for bare instances)"
252
+ def restore
253
+ config = load_config
254
+ remote = build_remote(config)
255
+ runner = build_runner(remote, config)
256
+
257
+ if options[:setup_first]
258
+ prompt_secrets(config)
259
+ step_runner = StepRunner.new(
260
+ runner: runner,
261
+ remote: remote,
262
+ config: config,
263
+ skip_steps: %w[pgbackrest systemd]
264
+ )
265
+ step_runner.call
266
+
267
+ render_pgbackrest_config(runner, config) if config.pgbackrest_enabled?
268
+ end
269
+
270
+ Restore.new(runner: runner, config: config).call(
271
+ latest: options[:latest],
272
+ target_time: options[:target_time],
273
+ target_xid: options[:target_xid],
274
+ stanza: options[:stanza]
275
+ )
276
+
277
+ if options[:setup_first]
278
+ if config.pgbackrest_enabled?
279
+ $stdout.puts("running post-restore setup (pgbackrest, systemd)")
280
+ StepRunner.new(
281
+ runner: runner,
282
+ remote: remote,
283
+ config: config,
284
+ only: "pgbackrest"
285
+ ).call
286
+ else
287
+ $stdout.puts("running post-restore setup (systemd)")
288
+ end
289
+ StepRunner.new(
290
+ runner: runner,
291
+ remote: remote,
292
+ config: config,
293
+ only: "systemd"
294
+ ).call
295
+ end
296
+ ensure
297
+ runner&.close
298
+ remote&.close
299
+ end
300
+
301
+ desc "health", "check instance health"
302
+ option :json, type: :boolean, default: false, desc: "output JSON"
303
+ def health
304
+ config = load_config
305
+ remote = build_remote(config)
306
+ runner = build_runner(remote, config)
307
+ exit_code = Health.new(runner: runner, config: config).call(json: options[:json])
308
+ exit(exit_code) if exit_code > 0
309
+ ensure
310
+ runner&.close
311
+ remote&.close
312
+ end
313
+
314
+ desc "status", "show instance status overview"
315
+ def status
316
+ config = load_config
317
+ remote = build_remote(config)
318
+ runner = build_runner(remote, config)
319
+ Status.new(runner: runner, config: config).call
320
+ ensure
321
+ runner&.close
322
+ remote&.close
323
+ end
324
+
325
+ desc "console", "open a psql session on the server"
326
+ def console
327
+ config = load_config
328
+ key_path = File.expand_path(config.key)
329
+ db = config.pg_database
330
+ exec("ssh", "-t", "-i", key_path, "#{config.user}@#{config.host}", "sudo -u postgres psql #{db}")
331
+ end
332
+
333
+ desc "seed", "create a test table with sample data for verifying backup/restore"
334
+ def seed
335
+ config = load_config
336
+ remote = build_remote(config)
337
+ runner = build_runner(remote, config)
338
+ db = config.pg_database
339
+
340
+ sql = <<~SQL
341
+ CREATE TABLE IF NOT EXISTS mahout_seed (
342
+ id serial PRIMARY KEY,
343
+ message text NOT NULL,
344
+ created_at timestamptz NOT NULL DEFAULT now()
345
+ );
346
+ INSERT INTO mahout_seed (message) VALUES ('seed row at ' || now()::text);
347
+ SELECT id, message, created_at FROM mahout_seed ORDER BY id;
348
+ SQL
349
+
350
+ runner.upload(sql, "/tmp/mahout-seed.sql", mode: "0644", owner: "postgres:postgres")
351
+ result = runner.run("sudo -u postgres psql -d #{db} -f /tmp/mahout-seed.sql", sudo: false)
352
+ runner.run("rm -f /tmp/mahout-seed.sql")
353
+
354
+ $stdout.puts("")
355
+ $stdout.puts("seed data in #{db}:")
356
+ $stdout.puts(result.stdout) unless runner.dry_run?
357
+ $stdout.puts("")
358
+ $stdout.puts("after restore, verify with:")
359
+ $stdout.puts(" mahout seed --config #{options[:config]}")
360
+ $stdout.puts("the rows above should still be there")
361
+ ensure
362
+ runner&.close
363
+ remote&.close
364
+ end
365
+
366
+ desc "validate", "validate config file without connecting"
367
+ def validate
368
+ config = load_config
369
+ $stdout.puts("config: #{options[:config]}")
370
+ $stdout.puts("host: #{config.host}")
371
+ $stdout.puts("database: #{config.pg_database}")
372
+ $stdout.puts("user: #{config.pg_username}")
373
+ $stdout.puts("profile: #{config.profile}")
374
+ $stdout.puts("stanza: #{config.pgbackrest_stanza}") if config.pgbackrest_enabled?
375
+ $stdout.puts("pgbackrest: disabled") unless config.pgbackrest_enabled?
376
+ $stdout.puts("extensions: #{config.extensions.join(", ")}")
377
+ $stdout.puts("config is valid")
378
+ rescue ConfigError => e
379
+ $stderr.puts("config validation failed: #{e.message}")
380
+ exit(1)
381
+ end
382
+
383
+ desc "new NAME", "generate config and secrets files (e.g. mahout new prod)"
384
+ option :dir, type: :string, default: "config", desc: "output directory"
385
+ def new(name)
386
+ dir = options[:dir]
387
+ config_path = File.join(dir, "#{name}.yaml")
388
+ secrets_path = File.join(dir, "#{name}.secrets.yaml")
389
+
390
+ FileUtils.mkdir_p(dir)
391
+
392
+ if File.exist?(config_path)
393
+ $stderr.puts("#{config_path} already exists, not overwriting")
394
+ exit(1)
395
+ end
396
+
397
+ underscored = name.tr("-", "_")
398
+ config = CONFIG_TEMPLATE
399
+ .gsub("my-instance", name)
400
+ .gsub("my_instance", underscored)
401
+ File.write(config_path, config)
402
+ $stdout.puts("wrote #{config_path}")
403
+
404
+ unless File.exist?(secrets_path)
405
+ File.write(secrets_path, SECRETS_TEMPLATE)
406
+ $stdout.puts("wrote #{secrets_path}")
407
+ end
408
+
409
+ gitignore_entry = "*.secrets.yaml"
410
+ if File.exist?(".gitignore")
411
+ existing = File.read(".gitignore")
412
+ unless existing.include?(gitignore_entry)
413
+ File.open(".gitignore", "a") { |f| f.puts(gitignore_entry) }
414
+ $stdout.puts("added #{gitignore_entry} to .gitignore")
415
+ end
416
+ end
417
+
418
+ $stdout.puts("")
419
+ $stdout.puts("next steps:")
420
+ $stdout.puts(" 1. edit #{config_path} and fill in your server, storage, and s3 details")
421
+ $stdout.puts(" 2. edit #{secrets_path} with your passwords and keys")
422
+ $stdout.puts(" 3. mahout validate --config #{config_path}")
423
+ $stdout.puts(" 4. mahout setup --config #{config_path}")
424
+ end
425
+
426
+ desc "templates", "export built-in templates for customization"
427
+ option :dir, type: :string, default: "templates", desc: "output directory"
428
+ def templates
429
+ dir = options[:dir]
430
+ FileUtils.mkdir_p(dir)
431
+
432
+ Dir.glob(File.join(Mahout::DEFAULT_TEMPLATES_PATH, "*")).each do |src|
433
+ dest = File.join(dir, File.basename(src))
434
+ if File.exist?(dest)
435
+ $stdout.puts("skip #{dest} (already exists)")
436
+ else
437
+ FileUtils.cp(src, dest)
438
+ $stdout.puts("wrote #{dest}")
439
+ end
440
+ end
441
+
442
+ $stdout.puts("")
443
+ $stdout.puts("add templates_path: #{dir} to your config to use these as overrides")
444
+ end
445
+
446
+ desc "disk_benchmark", "benchmark disk I/O on data and WAL mounts"
447
+ def disk_benchmark
448
+ config = load_config
449
+ remote = build_remote(config)
450
+ runner = build_runner(remote, config)
451
+ DiskBenchmark.new(runner: runner, config: config).call
452
+ ensure
453
+ runner&.close
454
+ remote&.close
455
+ end
456
+
457
+ desc "benchmark", "run pgbench"
458
+ option :scale, type: :numeric, default: 100
459
+ option :duration, type: :numeric, default: 60
460
+ option :clients, type: :numeric, desc: "number of clients"
461
+ option :fast, type: :boolean, default: false, desc: "quick run (scale 10, 10s, skip wal/checkpoint/scaling)"
462
+ option :standardized, type: :boolean, default: false, desc: "fixed client counts (1,4,8,16,32) for cross-hardware comparison"
463
+ option :stress, type: :boolean, default: false, desc: "sustained write load (scale 500, 64+ clients, 10 min)"
464
+ def benchmark
465
+ config = load_config
466
+ remote = build_remote(config)
467
+ runner = build_runner(remote, config)
468
+ Benchmark.new(runner: runner, config: config).call(
469
+ scale: options[:scale],
470
+ duration: options[:duration],
471
+ clients: options[:clients],
472
+ fast: options[:fast],
473
+ standardized: options[:standardized],
474
+ stress: options[:stress]
475
+ )
476
+ ensure
477
+ runner&.close
478
+ remote&.close
479
+ end
480
+
481
+ private
482
+
483
+ def load_config
484
+ overrides = {}
485
+ overrides[:host] = options[:host] if options[:host]
486
+ overrides[:user] = options[:user] if options[:user]
487
+ overrides[:key] = options[:key] if options[:key]
488
+
489
+ if options[:config_json]
490
+ json_data = JSON.parse(options[:config_json])
491
+ Config.new(path: options[:config], merge: json_data, overrides: overrides)
492
+ else
493
+ Config.new(path: options[:config], overrides: overrides)
494
+ end
495
+ end
496
+
497
+ def build_remote(config)
498
+ Remote.new(host: config.host, user: config.user, key: config.key, ssh_options: config.ssh_options)
499
+ end
500
+
501
+ def build_runner(remote, config = nil)
502
+ Runner.new(
503
+ remote: remote,
504
+ dry_run: options[:dry_run],
505
+ verbose: options[:verbose],
506
+ log_file: options[:log_file],
507
+ hostname: config&.host,
508
+ templates_path: config&.templates_path
509
+ )
510
+ end
511
+
512
+ def render_pgbackrest_config(runner, config)
513
+ s3_key = config.secret("s3_key") || ""
514
+ s3_secret = config.secret("s3_secret") || ""
515
+ runner.run("mkdir -p /etc/pgbackrest")
516
+ runner.render_and_upload(
517
+ "pgbackrest.conf.erb",
518
+ "/etc/pgbackrest/pgbackrest.conf",
519
+ locals: {
520
+ config: config,
521
+ s3_key: s3_key,
522
+ s3_secret: s3_secret,
523
+ process_max: config.pgbackrest_process_max
524
+ },
525
+ mode: "0600",
526
+ owner: "postgres:postgres"
527
+ )
528
+ end
529
+
530
+ def prompt_secrets(config)
531
+ prompt_secret(config, "pg_password", "postgres password for #{config.pg_username}")
532
+
533
+ if pgbackrest_steps_will_run?(config)
534
+ prompt_secret(config, "s3_key", "pgbackrest S3 access key")
535
+ prompt_secret(config, "s3_secret", "pgbackrest S3 secret key")
536
+ end
537
+
538
+ return unless config.datadog_enabled? && datadog_steps_will_run?
539
+
540
+ prompt_secret(config, "datadog_api_key", "datadog API key")
541
+ end
542
+
543
+ def prompt_secret(config, key, label)
544
+ return if config.secret(key) && !config.secret(key).empty?
545
+
546
+ config.set_secret(key, prompt_password(label))
547
+ end
548
+
549
+ def pgbackrest_steps_will_run?(config)
550
+ return false unless config.pgbackrest_enabled?
551
+ return false if options[:only] && !%w[pgbackrest systemd].include?(options[:only])
552
+
553
+ skipped = options[:exclude]&.split(",")&.map(&:strip) || []
554
+ return false if skipped.include?("pgbackrest")
555
+
556
+ true
557
+ end
558
+
559
+ def datadog_steps_will_run?
560
+ return false if options[:only] && options[:only] != "datadog"
561
+
562
+ skipped = options[:exclude]&.split(",")&.map(&:strip) || []
563
+ return false if skipped.include?("datadog")
564
+
565
+ true
566
+ end
567
+
568
+ def prompt_password(label)
569
+ return "test" if options[:dry_run]
570
+
571
+ $stdout.print("#{label}: ")
572
+ IO.console.noecho { $stdin.gets&.chomp } || ""
573
+ ensure
574
+ $stdout.puts
575
+ end
576
+ end
577
+ end