mahout 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9a23149a2c8d4ea854eaeb79118864ed439ef1a3a76a67000c824547b80d5feb
4
+ data.tar.gz: 913d23fa0ea7103a707fc10ffda3494956a0e1f439588e24375c85493402b43a
5
+ SHA512:
6
+ metadata.gz: 4ed8db2538f03d29b17f0b7bc1fbc28f98e7847a585ce8ca375e84263d7f0d80ed18e44eaa5a57e16492b7104931f7112fa196dd578a89127c73597b19dc29f5
7
+ data.tar.gz: f4126989a7b9761bb60d7fa81941208ecbb3a44fe0bd5b124140006ece9c816138b2f8d19cd0cb7c93cd3abab8f604d79f747b27570c9252964c289508ee66eb
@@ -0,0 +1,55 @@
1
+ server:
2
+ host: 10.0.1.5
3
+ user: root
4
+ key: ~/.ssh/id_rsa
5
+
6
+ instance:
7
+ name: mahout-prod
8
+ environment: production
9
+
10
+ postgres:
11
+ version: 17
12
+ port: 5432
13
+ database: mahout
14
+ username: mahout
15
+
16
+ profile: aggressive
17
+
18
+ storage:
19
+ data_device: /dev/sdb
20
+ data_mount: /mnt/pgdata
21
+ wal_device: /dev/nvme0n1
22
+ wal_mount: /mnt/pgwal
23
+
24
+ pgbouncer:
25
+ port: 6432
26
+ pool_mode: transaction
27
+
28
+ pgbackrest:
29
+ enabled: true # set to false to disable pgbackrest entirely
30
+ stanza: mahout
31
+ repo_type: s3
32
+ s3_bucket: mahout-backups
33
+ s3_endpoint: s3.amazonaws.com
34
+ s3_region: us-east-1
35
+ s3_uri_style: host
36
+ retention_full: 4
37
+ process_max: 4
38
+
39
+ datadog:
40
+ enabled: true
41
+ tags:
42
+ - service:mahout
43
+ - env:production
44
+
45
+ hardening:
46
+ allowed_cidrs:
47
+ - 10.0.0.0/8
48
+ - 172.16.0.0/12
49
+ ssl_required: true
50
+
51
+ extensions:
52
+ - pg_stat_statements
53
+ - pg_trgm
54
+ - btree_gist
55
+ - pgcrypto
@@ -0,0 +1,54 @@
1
+ import json
2
+ import subprocess
3
+ import time
4
+
5
+ from datadog_checks.base import AgentCheck
6
+
7
+
8
+ class PgBackRestCheck(AgentCheck):
9
+ def check(self, instance):
10
+ stanza = instance.get("stanza", "main")
11
+
12
+ try:
13
+ result = subprocess.run(
14
+ ["sudo", "-u", "postgres", "pgbackrest", "--stanza", stanza, "info", "--output", "json"],
15
+ capture_output=True,
16
+ text=True,
17
+ timeout=30,
18
+ )
19
+
20
+ if result.returncode != 0:
21
+ self.service_check("pgbackrest.can_connect", self.CRITICAL, message=result.stderr)
22
+ return
23
+
24
+ info = json.loads(result.stdout)
25
+ if not info:
26
+ self.service_check("pgbackrest.can_connect", self.WARNING, message="no stanza info")
27
+ return
28
+
29
+ self.service_check("pgbackrest.can_connect", self.OK)
30
+
31
+ backups = info[0].get("backup", [])
32
+ if backups:
33
+ latest = backups[-1]
34
+ backup_time = latest["timestamp"]["stop"]
35
+ age_seconds = int(time.time()) - backup_time
36
+ backup_size = latest["info"]["size"]
37
+ backup_type = latest["type"]
38
+
39
+ tags = [f"stanza:{stanza}", f"backup_type:{backup_type}"]
40
+ self.gauge("pgbackrest.backup.age_seconds", age_seconds, tags=tags)
41
+ self.gauge("pgbackrest.backup.size_bytes", backup_size, tags=tags)
42
+
43
+ archive = info[0].get("archive", [])
44
+ if archive:
45
+ db = archive[-1].get("database", {})
46
+ wal_min = archive[-1].get("min", "")
47
+ wal_max = archive[-1].get("max", "")
48
+ tags = [f"stanza:{stanza}"]
49
+ self.gauge("pgbackrest.wal.archive_lag", 0, tags=tags)
50
+
51
+ except subprocess.TimeoutExpired:
52
+ self.service_check("pgbackrest.can_connect", self.CRITICAL, message="timeout")
53
+ except Exception as e:
54
+ self.service_check("pgbackrest.can_connect", self.CRITICAL, message=str(e))
@@ -0,0 +1,29 @@
1
+ init_config:
2
+
3
+ instances:
4
+ - host: 127.0.0.1
5
+ port: <%= config.pg_port %>
6
+ username: datadog
7
+ password: datadog
8
+ dbname: <%= config.pg_database %>
9
+ collect_activity_metrics: true
10
+ collect_database_size_metrics: true
11
+ collect_default_database: false
12
+ collect_wal_metrics: true
13
+ query_metrics:
14
+ enabled: true
15
+ query_activity:
16
+ enabled: true
17
+ query_samples:
18
+ enabled: true
19
+ explain_parameterized_queries: true
20
+ explain_function: datadog.explain_statement
21
+ collect_bloat_metrics: true
22
+ relations:
23
+ - relation_regex: .*
24
+ schemas:
25
+ - public
26
+ tags:
27
+ <%- config.datadog_tags.each do |tag| -%>
28
+ - <%= tag %>
29
+ <%- end -%>
@@ -0,0 +1,8 @@
1
+ local all all peer
2
+ host all all 127.0.0.1/32 scram-sha-256
3
+ host all all ::1/128 scram-sha-256
4
+ <%- config.allowed_cidrs.each do |cidr| -%>
5
+ hostssl all all <%= cidr %> scram-sha-256
6
+ <%- end -%>
7
+ host all all 0.0.0.0/0 reject
8
+ host all all ::/0 reject
@@ -0,0 +1,24 @@
1
+ [global]
2
+ repo1-type=s3
3
+ repo1-path=/pgbackrest/<%= config.pgbackrest_stanza %>
4
+ repo1-s3-bucket=<%= config.pgbackrest_s3_bucket %>
5
+ repo1-s3-endpoint=<%= config.pgbackrest_s3_endpoint %>
6
+ repo1-s3-region=<%= config.pgbackrest_s3_region %>
7
+ repo1-s3-key=<%= s3_key %>
8
+ repo1-s3-key-secret=<%= s3_secret %>
9
+ repo1-s3-uri-style=<%= config.pgbackrest_s3_uri_style %>
10
+ repo1-retention-full=<%= config.pgbackrest_retention_full %>
11
+ repo1-cipher-type=none
12
+
13
+ process-max=<%= process_max %>
14
+ compress-type=zst
15
+ compress-level=3
16
+ delta=y
17
+ start-fast=y
18
+
19
+ log-level-console=info
20
+ log-level-file=detail
21
+
22
+ [<%= config.pgbackrest_stanza %>]
23
+ pg1-path=<%= config.pg_data_dir %>
24
+ pg1-port=<%= config.pg_port %>
@@ -0,0 +1,13 @@
1
+ [databases]
2
+ <%= config.pg_database %> = host=127.0.0.1 port=<%= config.pg_port %> dbname=<%= config.pg_database %>
3
+
4
+ [pgbouncer]
5
+ listen_addr = *
6
+ listen_port = <%= config.pgbouncer_port %>
7
+ auth_type = scram-sha-256
8
+ auth_file = /etc/pgbouncer/userlist.txt
9
+ pool_mode = <%= config.pgbouncer_pool_mode %>
10
+ default_pool_size = <%= tuned[:pgbouncer_default_pool_size] || 25 %>
11
+ max_client_conn = <%= tuned[:pgbouncer_max_client_conn] || 400 %>
12
+ admin_users = postgres
13
+ stats_users = postgres
@@ -0,0 +1,80 @@
1
+ listen_addresses = '*'
2
+ port = <%= config.pg_port %>
3
+ max_connections = <%= tuned[:max_connections] %>
4
+ superuser_reserved_connections = <%= tuned[:superuser_reserved_connections] %>
5
+
6
+ tcp_keepalives_idle = 60
7
+ tcp_keepalives_interval = 10
8
+ tcp_keepalives_count = 6
9
+
10
+ shared_buffers = <%= tuned[:shared_buffers] %>
11
+ effective_cache_size = <%= tuned[:effective_cache_size] %>
12
+ work_mem = <%= tuned[:work_mem] %>
13
+ maintenance_work_mem = <%= tuned[:maintenance_work_mem] %>
14
+ wal_buffers = <%= tuned[:wal_buffers] %>
15
+ hash_mem_multiplier = <%= tuned[:hash_mem_multiplier] %>
16
+
17
+ huge_pages = <%= tuned[:huge_pages] %>
18
+
19
+ max_wal_size = <%= tuned[:max_wal_size] %>
20
+ min_wal_size = <%= tuned[:min_wal_size] %>
21
+ checkpoint_completion_target = <%= tuned[:checkpoint_completion_target] %>
22
+ checkpoint_timeout = <%= tuned[:checkpoint_timeout] %>
23
+ checkpoint_warning = <%= tuned[:checkpoint_warning] %>
24
+
25
+ wal_level = <%= tuned[:wal_level] %>
26
+ archive_timeout = <%= tuned[:archive_timeout] %>
27
+
28
+ random_page_cost = <%= tuned[:random_page_cost] %>
29
+ effective_io_concurrency = <%= tuned[:effective_io_concurrency] %>
30
+
31
+ max_parallel_workers_per_gather = <%= tuned[:max_parallel_workers_per_gather] %>
32
+ max_parallel_workers = <%= tuned[:max_parallel_workers] %>
33
+ max_worker_processes = <%= tuned[:max_worker_processes] %>
34
+ parallel_leader_participation = <%= tuned[:parallel_leader_participation] %>
35
+
36
+ autovacuum_max_workers = <%= tuned[:autovacuum_max_workers] %>
37
+ autovacuum_naptime = <%= tuned[:autovacuum_naptime] %>
38
+ autovacuum_vacuum_scale_factor = <%= tuned[:autovacuum_vacuum_scale_factor] %>
39
+ autovacuum_analyze_scale_factor = <%= tuned[:autovacuum_analyze_scale_factor] %>
40
+ autovacuum_vacuum_cost_limit = <%= tuned[:autovacuum_vacuum_cost_limit] %>
41
+ autovacuum_vacuum_cost_delay = <%= tuned[:autovacuum_vacuum_cost_delay] %>
42
+ autovacuum_work_mem = <%= tuned[:autovacuum_work_mem] %>
43
+ vacuum_buffer_usage_limit = <%= tuned[:vacuum_buffer_usage_limit] %>
44
+
45
+ bgwriter_lru_maxpages = <%= tuned[:bgwriter_lru_maxpages] %>
46
+
47
+ <%- unless preload_libraries.empty? -%>
48
+ shared_preload_libraries = '<%= preload_libraries.join(",") %>'
49
+ <%- end -%>
50
+
51
+ password_encryption = scram-sha-256
52
+
53
+ log_line_prefix = '<%= tuned[:log_line_prefix] %>'
54
+ log_min_duration_statement = 500
55
+ log_checkpoints = on
56
+ log_lock_waits = on
57
+ log_temp_files = 0
58
+ log_connections = on
59
+ log_disconnections = on
60
+
61
+ track_io_timing = on
62
+
63
+ pg_stat_statements.max = <%= tuned[:pg_stat_statements_max] %>
64
+ pg_stat_statements.track = <%= tuned[:pg_stat_statements_track] %>
65
+ pg_stat_statements.track_utility = <%= tuned[:pg_stat_statements_track_utility] %>
66
+
67
+ pg_prewarm.autoprewarm = on
68
+ pg_prewarm.autoprewarm_interval = 300
69
+
70
+ jit = off
71
+
72
+ data_directory = '<%= config.pg_data_dir %>'
73
+
74
+ restore_command = 'pgbackrest --stanza=<%= config.pgbackrest_stanza %> archive-get %f %p'
75
+
76
+ <%- config.postgres_overrides.each do |key, value| -%>
77
+ <%= key %> = <%= value %>
78
+ <%- end -%>
79
+
80
+ include_dir = 'conf.d'
data/exe/mahout ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "mahout"
5
+
6
+ Mahout::CLI.start(ARGV)
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mahout
4
+ class Backup
5
+ def initialize(runner:, config:)
6
+ @runner = runner
7
+ @config = config
8
+ end
9
+
10
+ def call(type: "full")
11
+ raise Error, "pgbackrest is disabled in config" unless @config.pgbackrest_enabled?
12
+
13
+ stanza = @config.pgbackrest_stanza
14
+
15
+ $stdout.puts("starting #{type} backup for stanza #{stanza}")
16
+
17
+ @runner.run(
18
+ "sudo -u postgres pgbackrest --stanza=#{stanza} --type=#{type} backup",
19
+ sudo: false
20
+ )
21
+
22
+ @runner.run(
23
+ "sudo -u postgres pgbackrest --stanza=#{stanza} check",
24
+ sudo: false
25
+ )
26
+
27
+ result = @runner.run(
28
+ "sudo -u postgres pgbackrest --stanza=#{stanza} info --output json",
29
+ sudo: false
30
+ )
31
+
32
+ $stdout.puts("backup complete")
33
+ print_summary(result) unless @runner.dry_run?
34
+ end
35
+
36
+ private
37
+
38
+ def print_summary(result)
39
+ info = JSON.parse(result.stdout)
40
+ return if info.empty?
41
+
42
+ backups = info.first["backup"]
43
+ return if backups.nil? || backups.empty?
44
+
45
+ latest = backups.last
46
+ $stdout.puts(" type: #{latest["type"]}")
47
+ $stdout.puts(" timestamp: #{latest["timestamp"]["start"]}")
48
+ $stdout.puts(" size: #{latest["info"]["size"]}")
49
+ end
50
+ end
51
+ end