@rulebricks/cli 2.1.7 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -16
- package/cluster-setup/aws/README.md +96 -47
- package/cluster-setup/aws/check-aws-access.sh +216 -52
- package/cluster-setup/aws/parameters.json +13 -0
- package/cluster-setup/aws/rulebricks-cluster.cfn.yaml +355 -0
- package/cluster-setup/azure/README.md +103 -55
- package/cluster-setup/azure/check-aks-prereqs.sh +236 -56
- package/cluster-setup/azure/parameters.json +30 -0
- package/cluster-setup/azure/rulebricks-cluster.bicep +546 -0
- package/cluster-setup/gcp/README.md +51 -34
- package/cluster-setup/gcp/check-gke-prereqs.sh +222 -60
- package/dist/commands/backup.d.ts +5 -0
- package/dist/commands/backup.js +104 -0
- package/dist/commands/deploy.d.ts +3 -1
- package/dist/commands/deploy.js +226 -326
- package/dist/commands/destroy.d.ts +1 -1
- package/dist/commands/destroy.js +73 -123
- package/dist/commands/init.d.ts +5 -1
- package/dist/commands/init.js +78 -54
- package/dist/commands/list.d.ts +1 -0
- package/dist/commands/list.js +74 -0
- package/dist/commands/open.d.ts +1 -1
- package/dist/commands/open.js +4 -12
- package/dist/commands/redeploy.d.ts +6 -0
- package/dist/commands/redeploy.js +310 -0
- package/dist/commands/restore.d.ts +5 -0
- package/dist/commands/restore.js +338 -0
- package/dist/commands/status.js +62 -49
- package/dist/commands/upgrade.js +74 -51
- package/dist/components/DNSWaitScreen.d.ts +5 -1
- package/dist/components/DNSWaitScreen.js +47 -41
- package/dist/components/Wizard/WizardContext.d.ts +157 -36
- package/dist/components/Wizard/WizardContext.js +872 -160
- package/dist/components/Wizard/steps/CloudProviderStep.js +192 -107
- package/dist/components/Wizard/steps/DomainStep.js +5 -24
- package/dist/components/Wizard/steps/ExternalServicesStep.d.ts +6 -0
- package/dist/components/Wizard/steps/ExternalServicesStep.js +645 -0
- package/dist/components/Wizard/steps/FeatureConfigStep.d.ts +2 -1
- package/dist/components/Wizard/steps/FeatureConfigStep.js +739 -425
- package/dist/components/Wizard/steps/FeaturesStep.js +31 -35
- package/dist/components/Wizard/steps/ObservabilityStep.d.ts +6 -0
- package/dist/components/Wizard/steps/ObservabilityStep.js +137 -0
- package/dist/components/Wizard/steps/ReviewStep.d.ts +2 -1
- package/dist/components/Wizard/steps/ReviewStep.js +56 -12
- package/dist/components/Wizard/steps/StorageStep.d.ts +9 -0
- package/dist/components/Wizard/steps/StorageStep.js +592 -0
- package/dist/components/Wizard/steps/SupabaseCredentialsStep.js +20 -21
- package/dist/components/Wizard/steps/VersionStep.js +45 -23
- package/dist/components/Wizard/steps/index.d.ts +3 -3
- package/dist/components/Wizard/steps/index.js +3 -3
- package/dist/components/common/CommandApproval.d.ts +12 -0
- package/dist/components/common/CommandApproval.js +91 -0
- package/dist/components/common/DeploymentPicker.d.ts +14 -0
- package/dist/components/common/DeploymentPicker.js +16 -0
- package/dist/components/common/index.d.ts +2 -0
- package/dist/components/common/index.js +2 -0
- package/dist/index.js +94 -62
- package/dist/lib/cloudCli.d.ts +134 -63
- package/dist/lib/cloudCli.js +512 -220
- package/dist/lib/clusterSetupDefaults.d.ts +30 -0
- package/dist/lib/clusterSetupDefaults.js +64 -0
- package/dist/lib/commandApproval.d.ts +26 -0
- package/dist/lib/commandApproval.js +114 -0
- package/dist/lib/config.d.ts +12 -10
- package/dist/lib/config.js +91 -33
- package/dist/lib/configFixtures.d.ts +5 -0
- package/dist/lib/configFixtures.js +513 -0
- package/dist/lib/deploymentHealth.d.ts +32 -0
- package/dist/lib/deploymentHealth.js +157 -0
- package/dist/lib/dns.d.ts +1 -1
- package/dist/lib/dns.js +19 -1
- package/dist/lib/dns.test.d.ts +1 -0
- package/dist/lib/dns.test.js +27 -0
- package/dist/lib/dockerHub.d.ts +12 -1
- package/dist/lib/dockerHub.js +18 -8
- package/dist/lib/helm.d.ts +4 -0
- package/dist/lib/helm.js +16 -0
- package/dist/lib/helmValues.d.ts +25 -0
- package/dist/lib/helmValues.js +1841 -289
- package/dist/lib/helmValues.test.d.ts +1 -0
- package/dist/lib/helmValues.test.js +1012 -0
- package/dist/lib/htpasswd.d.ts +1 -0
- package/dist/lib/htpasswd.js +15 -0
- package/dist/lib/kubernetes.d.ts +124 -17
- package/dist/lib/kubernetes.js +576 -145
- package/dist/lib/secrets.d.ts +23 -0
- package/dist/lib/secrets.js +158 -0
- package/dist/lib/validateValues.d.ts +31 -0
- package/dist/lib/validateValues.js +253 -0
- package/dist/lib/versions.d.ts +82 -11
- package/dist/lib/versions.js +131 -31
- package/dist/lib/versions.test.d.ts +1 -0
- package/dist/lib/versions.test.js +81 -0
- package/dist/lib/wizardSteps.d.ts +14 -0
- package/dist/lib/wizardSteps.js +23 -0
- package/dist/lib/workloadIdentity.d.ts +26 -0
- package/dist/lib/workloadIdentity.js +323 -0
- package/dist/lib/workloadIdentity.test.d.ts +1 -0
- package/dist/lib/workloadIdentity.test.js +57 -0
- package/dist/types/index.d.ts +1860 -164
- package/dist/types/index.js +518 -295
- package/package.json +9 -4
- package/schema/values.schema.json +1934 -0
- package/cluster-setup/aws/cluster.yaml +0 -33
- package/cluster-setup/azure/main.bicep +0 -282
- package/cluster-setup/azure/main.parameters.json +0 -21
- package/dist/components/Wizard/steps/CredentialsStep.d.ts +0 -6
- package/dist/components/Wizard/steps/CredentialsStep.js +0 -22
- package/dist/components/Wizard/steps/DeploymentModeStep.d.ts +0 -5
- package/dist/components/Wizard/steps/DeploymentModeStep.js +0 -26
- package/dist/components/Wizard/steps/TierStep.d.ts +0 -6
- package/dist/components/Wizard/steps/TierStep.js +0 -29
- package/dist/lib/terraform.d.ts +0 -66
- package/dist/lib/terraform.js +0 -754
- package/terraform/aws/main.tf +0 -355
- package/terraform/azure/main.tf +0 -371
- package/terraform/gcp/main.tf +0 -407
package/dist/lib/helmValues.js
CHANGED
|
@@ -1,7 +1,103 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { getReleaseName, isSupportedDnsProvider, validateRemoteWriteConfig, } from "../types/index.js";
|
|
2
2
|
import { saveHelmValues, getHelmValuesPath } from "./config.js";
|
|
3
|
+
import { assertValidHelmValues } from "./validateValues.js";
|
|
4
|
+
import { SUPABASE_POSTGRES_IMAGE_REPOSITORY, SUPABASE_POSTGRES_IMAGE_TAG, DEFAULT_IMAGE_REGISTRY, IMAGE_REPOSITORIES, IMAGE_DIGESTS, KAFKA_PROXY_IMAGE, } from "./versions.js";
|
|
5
|
+
import { createHmac } from "crypto";
|
|
3
6
|
import fs from "fs/promises";
|
|
4
7
|
import YAML from "yaml";
|
|
8
|
+
// Names of the Kubernetes Secrets the CLI creates in k8s secret mode. Shared by
|
|
9
|
+
// the value generator (which sets the secretRef fields) and src/lib/secrets.ts
|
|
10
|
+
// (which creates the Secrets) so they always agree.
|
|
11
|
+
//
|
|
12
|
+
// The base MUST be the Helm release name, not config.name. Most chart consumers
|
|
13
|
+
// read the secretRef *value* (name-agnostic), but a few templates hardcode the
|
|
14
|
+
// canonical <release>-* name — e.g. templates/migration-job.yaml derives
|
|
15
|
+
// DB_PASSWORD from `{{ .Release.Name }}-supabase-db`. Naming these secrets with
|
|
16
|
+
// the release name keeps the CLI a faithful drop-in for the unmodified chart so
|
|
17
|
+
// we never have to customize the chart to match the CLI.
|
|
18
|
+
export function deploymentSecretNames(config) {
|
|
19
|
+
const base = getReleaseName(config.name);
|
|
20
|
+
return {
|
|
21
|
+
app: `${base}-app-secrets`,
|
|
22
|
+
db: `${base}-supabase-db`,
|
|
23
|
+
dbBootstrap: `${base}-supabase-db-bootstrap`,
|
|
24
|
+
jwt: `${base}-supabase-jwt`,
|
|
25
|
+
dashboard: `${base}-supabase-dashboard`,
|
|
26
|
+
realtime: `${base}-supabase-realtime`,
|
|
27
|
+
smtp: `${base}-supabase-smtp`,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
// Baseline Kafka topic partitioning. These are NOT user-tunable sizing knobs
|
|
31
|
+
// (tiers were removed); they are a structural contract that must stay
|
|
32
|
+
// consistent across three places at once: the kafka.provisioning topic
|
|
33
|
+
// partitions, rulebricks.hps.workers.solutionPartitions (the worker-fleet
|
|
34
|
+
// concurrency ceiling the chart cross-checks), and the worker KEDA
|
|
35
|
+
// maxReplicaCount (validated to be <= solutionPartitions). They mirror the Helm
|
|
36
|
+
// chart's own defaults, so operators who need a different size tune the chart
|
|
37
|
+
// values directly. Partitions can never be decreased, so solution is sized with
|
|
38
|
+
// generous headroom up front; idle partitions are effectively free.
|
|
39
|
+
const SOLUTION_TOPIC_PARTITIONS = 128;
|
|
40
|
+
const LOGS_TOPIC_PARTITIONS = 24;
|
|
41
|
+
// RPC + log topics: replication factor 1. RPC traffic is transient and
|
|
42
|
+
// latency-sensitive (the HPS producer's acks=-1 would otherwise wait on full
|
|
43
|
+
// ISR replication); the in-cluster broker is single-replica by default.
|
|
44
|
+
const TOPIC_REPLICATION_FACTOR = 1;
|
|
45
|
+
// global.version must be empty or a semantic version per the chart schema. The
|
|
46
|
+
// CLI normally pins a real version, but migrated/legacy configs can carry
|
|
47
|
+
// "latest"; emitting that would fail chart validation, so we omit it instead
|
|
48
|
+
// and let the chart fall back to its default.
|
|
49
|
+
const SEMVER_PATTERN = /^\d+\.\d+\.\d+(-[0-9A-Za-z.-]+)?$/;
|
|
50
|
+
// Healthy defaults for the decision-log archive that ClickHouse reads:
|
|
51
|
+
// flush a gzipped NDJSON file at ~64 MiB (uncompressed) or after 5 minutes,
|
|
52
|
+
// whichever comes first. Users can override these in their Helm values.
|
|
53
|
+
//
|
|
54
|
+
// max_bytes MUST stay well below the Vector pod's memory limit
|
|
55
|
+
// (vector.resources.limits.memory in the chart): the object-storage sink buffers
|
|
56
|
+
// the whole uncompressed batch in memory before it flushes, so a batch sized at
|
|
57
|
+
// or above the pod limit gets OOMKilled before it can ever write a blob - which
|
|
58
|
+
// silently disables decision-log export entirely. 64 MiB leaves comfortable
|
|
59
|
+
// headroom under the chart's 1 GiB Vector limit while still producing large,
|
|
60
|
+
// scan-efficient files for ClickHouse.
|
|
61
|
+
const DECISION_LOG_BATCH = { max_bytes: 67108864, timeout_secs: 300 };
|
|
62
|
+
const SUPABASE_JWT_ISSUED_AT = 1641769200;
|
|
63
|
+
const SUPABASE_JWT_EXPIRES_AT = 4102444800;
|
|
64
|
+
// VRL that normalizes the Kafka decision-log envelope into the ClickHouse column
|
|
65
|
+
// types. Inlined as a real multi-line string (not a chart `{{ include }}`) so
|
|
66
|
+
// that YAML.stringify / Helm's toYaml emit it as a block scalar. A templated
|
|
67
|
+
// single-line include gets rendered into a single-quoted YAML scalar, whose
|
|
68
|
+
// newlines YAML folds into spaces - collapsing the statements onto one line and
|
|
69
|
+
// breaking VRL parsing. Keep in sync with rulebricks.vector.normalizeLogs.
|
|
70
|
+
const VECTOR_NORMALIZE_LOGS_VRL = [
|
|
71
|
+
"parsed, err = parse_json(string!(.message))",
|
|
72
|
+
"if err == null {",
|
|
73
|
+
" . = parsed",
|
|
74
|
+
"}",
|
|
75
|
+
'.timestamp = parse_timestamp!(to_string(.timestamp) ?? to_string(now()), format: "%+")',
|
|
76
|
+
'.api_key = to_string(.api_key) ?? ""',
|
|
77
|
+
".user_id = to_string(.user_id) ?? null",
|
|
78
|
+
".environment = to_string(.environment) ?? null",
|
|
79
|
+
".ip = to_string(.ip) ?? null",
|
|
80
|
+
".method = to_string(.method) ?? null",
|
|
81
|
+
'.url = to_string(.url) ?? ""',
|
|
82
|
+
".status = to_int(.status) ?? 0",
|
|
83
|
+
".rule_name = to_string(.rule_name) ?? null",
|
|
84
|
+
".rule_id = to_string(.rule_id) ?? null",
|
|
85
|
+
".rule_slug = to_string(.rule_slug) ?? null",
|
|
86
|
+
".rule_version = to_string(.rule_version) ?? null",
|
|
87
|
+
".operation = to_string(.operation) ?? null",
|
|
88
|
+
'.level = to_string(.level) ?? "info"',
|
|
89
|
+
".error = to_string(.error) ?? null",
|
|
90
|
+
".trace_id = to_string(.trace_id) ?? null",
|
|
91
|
+
".span_id = to_string(.span_id) ?? null",
|
|
92
|
+
'.request = to_string(.request) ?? "null"',
|
|
93
|
+
'.response = to_string(.response) ?? "null"',
|
|
94
|
+
'.decision = to_string(.decision) ?? "{}"',
|
|
95
|
+
'.params = to_string(.params) ?? "{}"',
|
|
96
|
+
].join("\n");
|
|
97
|
+
function decisionLogPathPrefix(config) {
|
|
98
|
+
const path = config.storage?.paths?.decisionLogs || "decision-logs";
|
|
99
|
+
return `${path.replace(/^\/+|\/+$/g, "")}/year=%Y/month=%m/day=%d/hour=%H/`;
|
|
100
|
+
}
|
|
5
101
|
/**
|
|
6
102
|
* Generates Vector sink configuration based on logging settings
|
|
7
103
|
*/
|
|
@@ -10,84 +106,85 @@ function generateVectorSinks(config) {
|
|
|
10
106
|
// Console sink is always enabled
|
|
11
107
|
console: {
|
|
12
108
|
type: "console",
|
|
13
|
-
inputs: ["
|
|
109
|
+
inputs: ["normalize_logs"],
|
|
14
110
|
encoding: {
|
|
15
111
|
codec: "json",
|
|
16
112
|
},
|
|
17
113
|
},
|
|
18
114
|
};
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
config.
|
|
22
|
-
const { sink, bucket, region } = config.features.logging;
|
|
23
|
-
switch (sink) {
|
|
24
|
-
// Cloud Storage sinks
|
|
115
|
+
if (config.storage) {
|
|
116
|
+
const storage = config.storage;
|
|
117
|
+
switch (config.storage.provider) {
|
|
25
118
|
case "s3":
|
|
26
|
-
sinks.
|
|
119
|
+
sinks.decision_logs = {
|
|
27
120
|
type: "aws_s3",
|
|
28
|
-
inputs: ["
|
|
29
|
-
bucket: bucket,
|
|
30
|
-
region: region,
|
|
31
|
-
key_prefix:
|
|
121
|
+
inputs: ["normalize_logs"],
|
|
122
|
+
bucket: storage.bucket,
|
|
123
|
+
region: storage.region,
|
|
124
|
+
key_prefix: decisionLogPathPrefix(config),
|
|
125
|
+
filename_extension: "ndjson",
|
|
32
126
|
compression: "gzip",
|
|
33
|
-
encoding: {
|
|
34
|
-
|
|
35
|
-
},
|
|
127
|
+
encoding: { codec: "json" },
|
|
128
|
+
framing: { method: "newline_delimited" },
|
|
129
|
+
batch: { ...DECISION_LOG_BATCH },
|
|
36
130
|
};
|
|
37
131
|
break;
|
|
38
|
-
case "azure-blob":
|
|
39
|
-
|
|
40
|
-
throw new Error("Azure Blob logging requires a storage account.");
|
|
41
|
-
}
|
|
42
|
-
const azureBlobSink = {
|
|
132
|
+
case "azure-blob": {
|
|
133
|
+
const sink = {
|
|
43
134
|
type: "azure_blob",
|
|
44
|
-
inputs: ["
|
|
45
|
-
account_name: bucket,
|
|
46
|
-
container_name:
|
|
47
|
-
blob_prefix:
|
|
135
|
+
inputs: ["normalize_logs"],
|
|
136
|
+
account_name: storage.bucket,
|
|
137
|
+
container_name: storage.azureBlobContainer || "rulebricks",
|
|
138
|
+
blob_prefix: decisionLogPathPrefix(config),
|
|
139
|
+
// azure_blob has no filename_extension (unlike aws_s3/gcs); it always
|
|
140
|
+
// writes ".log" (".log.gz" when compressed). ClickHouse globs on *.gz.
|
|
48
141
|
compression: "gzip",
|
|
49
|
-
encoding: {
|
|
50
|
-
|
|
51
|
-
},
|
|
142
|
+
encoding: { codec: "json" },
|
|
143
|
+
framing: { method: "newline_delimited" },
|
|
144
|
+
batch: { ...DECISION_LOG_BATCH },
|
|
52
145
|
};
|
|
53
|
-
if (config.
|
|
54
|
-
|
|
55
|
-
throw new Error("Azure Blob connection string auth requires a secret ref.");
|
|
56
|
-
}
|
|
57
|
-
azureBlobSink.connection_string = "${AZURE_STORAGE_CONNECTION_STRING}";
|
|
146
|
+
if (config.storage.cloudAuthMode === "secret") {
|
|
147
|
+
sink.connection_string = "${AZURE_STORAGE_CONNECTION_STRING}";
|
|
58
148
|
}
|
|
59
149
|
else {
|
|
60
|
-
|
|
61
|
-
!config.features.logging.azureBlobTenantId) {
|
|
62
|
-
throw new Error("Azure Blob workload identity requires client ID and tenant ID.");
|
|
63
|
-
}
|
|
64
|
-
azureBlobSink.auth = {
|
|
150
|
+
sink.auth = {
|
|
65
151
|
azure_credential_kind: "workload_identity",
|
|
66
|
-
client_id: config.
|
|
67
|
-
tenant_id: config.
|
|
152
|
+
client_id: config.storage.azureBlobClientId,
|
|
153
|
+
tenant_id: config.storage.azureBlobTenantId,
|
|
68
154
|
token_file_path: "/var/run/secrets/azure/tokens/azure-identity-token",
|
|
69
155
|
};
|
|
70
156
|
}
|
|
71
|
-
sinks.
|
|
157
|
+
sinks.decision_logs = sink;
|
|
72
158
|
break;
|
|
159
|
+
}
|
|
73
160
|
case "gcs":
|
|
74
|
-
sinks.
|
|
161
|
+
sinks.decision_logs = {
|
|
75
162
|
type: "gcp_cloud_storage",
|
|
76
|
-
inputs: ["
|
|
77
|
-
bucket: bucket,
|
|
78
|
-
key_prefix:
|
|
163
|
+
inputs: ["normalize_logs"],
|
|
164
|
+
bucket: storage.bucket,
|
|
165
|
+
key_prefix: decisionLogPathPrefix(config),
|
|
166
|
+
filename_extension: "ndjson",
|
|
79
167
|
compression: "gzip",
|
|
80
|
-
encoding: {
|
|
81
|
-
|
|
82
|
-
},
|
|
168
|
+
encoding: { codec: "json" },
|
|
169
|
+
framing: { method: "newline_delimited" },
|
|
170
|
+
batch: { ...DECISION_LOG_BATCH },
|
|
83
171
|
};
|
|
84
172
|
break;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
// Add external logging-platform sink if configured. Decision logs always go
|
|
176
|
+
// to object storage via the decision_logs sink above; this is an additional
|
|
177
|
+
// platform destination (Datadog, Splunk, etc.).
|
|
178
|
+
if (config.features.logging.sink !== "console" &&
|
|
179
|
+
config.features.logging.sink !== "pending") {
|
|
180
|
+
const { sink, bucket, region } = config.features.logging;
|
|
181
|
+
switch (sink) {
|
|
85
182
|
// Logging platform sinks
|
|
86
183
|
// For platforms, bucket is repurposed for API key/token, region for site/URL
|
|
87
184
|
case "datadog":
|
|
88
185
|
sinks.datadog = {
|
|
89
186
|
type: "datadog_logs",
|
|
90
|
-
inputs: ["
|
|
187
|
+
inputs: ["normalize_logs"],
|
|
91
188
|
default_api_key: bucket, // API key stored in bucket field
|
|
92
189
|
site: region || "datadoghq.com", // Site stored in region field
|
|
93
190
|
compression: "gzip",
|
|
@@ -99,7 +196,7 @@ function generateVectorSinks(config) {
|
|
|
99
196
|
case "splunk":
|
|
100
197
|
sinks.splunk = {
|
|
101
198
|
type: "splunk_hec_logs",
|
|
102
|
-
inputs: ["
|
|
199
|
+
inputs: ["normalize_logs"],
|
|
103
200
|
endpoint: region, // URL stored in region field
|
|
104
201
|
default_token: bucket, // HEC token stored in bucket field
|
|
105
202
|
compression: "gzip",
|
|
@@ -114,7 +211,7 @@ function generateVectorSinks(config) {
|
|
|
114
211
|
const esConfig = JSON.parse(bucket || "{}");
|
|
115
212
|
sinks.elasticsearch = {
|
|
116
213
|
type: "elasticsearch",
|
|
117
|
-
inputs: ["
|
|
214
|
+
inputs: ["normalize_logs"],
|
|
118
215
|
endpoints: [esConfig.url],
|
|
119
216
|
bulk: {
|
|
120
217
|
index: esConfig.index || "rulebricks-logs",
|
|
@@ -134,7 +231,7 @@ function generateVectorSinks(config) {
|
|
|
134
231
|
// Fallback if JSON parsing fails
|
|
135
232
|
sinks.elasticsearch = {
|
|
136
233
|
type: "elasticsearch",
|
|
137
|
-
inputs: ["
|
|
234
|
+
inputs: ["normalize_logs"],
|
|
138
235
|
endpoints: [bucket],
|
|
139
236
|
bulk: {
|
|
140
237
|
index: region || "rulebricks-logs",
|
|
@@ -145,7 +242,7 @@ function generateVectorSinks(config) {
|
|
|
145
242
|
case "loki":
|
|
146
243
|
sinks.loki = {
|
|
147
244
|
type: "loki",
|
|
148
|
-
inputs: ["
|
|
245
|
+
inputs: ["normalize_logs"],
|
|
149
246
|
endpoint: bucket, // Loki URL stored in bucket field
|
|
150
247
|
labels: {
|
|
151
248
|
app: "rulebricks",
|
|
@@ -159,7 +256,7 @@ function generateVectorSinks(config) {
|
|
|
159
256
|
case "newrelic":
|
|
160
257
|
sinks.newrelic = {
|
|
161
258
|
type: "new_relic",
|
|
162
|
-
inputs: ["
|
|
259
|
+
inputs: ["normalize_logs"],
|
|
163
260
|
license_key: bucket, // License key stored in bucket field
|
|
164
261
|
account_id: region, // Account ID stored in region field
|
|
165
262
|
api: "logs",
|
|
@@ -172,7 +269,7 @@ function generateVectorSinks(config) {
|
|
|
172
269
|
case "axiom":
|
|
173
270
|
sinks.axiom = {
|
|
174
271
|
type: "axiom",
|
|
175
|
-
inputs: ["
|
|
272
|
+
inputs: ["normalize_logs"],
|
|
176
273
|
token: bucket, // API token stored in bucket field
|
|
177
274
|
dataset: region || "rulebricks", // Dataset stored in region field
|
|
178
275
|
compression: "gzip",
|
|
@@ -186,20 +283,32 @@ function generateVectorSinks(config) {
|
|
|
186
283
|
return sinks;
|
|
187
284
|
}
|
|
188
285
|
function generateVectorEnv(config) {
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
286
|
+
// Kafka connection settings come from the templated vector-kafka-env ConfigMap
|
|
287
|
+
// so the in-cluster vs external (and bridge) decision lives in one place.
|
|
288
|
+
const configMapKeys = [
|
|
289
|
+
"KAFKA_BOOTSTRAP_SERVERS",
|
|
290
|
+
"KAFKA_TLS_ENABLED",
|
|
291
|
+
"KAFKA_SASL_ENABLED",
|
|
292
|
+
"KAFKA_SASL_MECHANISM",
|
|
293
|
+
"KAFKA_LOG_TOPIC",
|
|
294
|
+
];
|
|
295
|
+
const env = configMapKeys.map((key) => ({
|
|
296
|
+
name: key,
|
|
297
|
+
valueFrom: { configMapKeyRef: { name: "vector-kafka-env", key } },
|
|
298
|
+
}));
|
|
299
|
+
// SASL credentials (inline PLAIN/SCRAM). Optional so in-cluster/token-auth
|
|
300
|
+
// deploys work without the secret existing.
|
|
301
|
+
for (const key of ["KAFKA_SASL_USERNAME", "KAFKA_SASL_PASSWORD"]) {
|
|
302
|
+
env.push({
|
|
303
|
+
name: key,
|
|
192
304
|
valueFrom: {
|
|
193
|
-
|
|
194
|
-
name: "vector-kafka-env",
|
|
195
|
-
key: "KAFKA_BOOTSTRAP_SERVERS",
|
|
196
|
-
},
|
|
305
|
+
secretKeyRef: { name: "vector-kafka-credentials", key, optional: true },
|
|
197
306
|
},
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
const azureBlobSecretRef = config.
|
|
201
|
-
if (config.
|
|
202
|
-
config.
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
const azureBlobSecretRef = config.storage?.azureBlobConnectionStringSecretRef;
|
|
310
|
+
if (config.storage?.provider === "azure-blob" &&
|
|
311
|
+
config.storage.cloudAuthMode === "secret" &&
|
|
203
312
|
azureBlobSecretRef) {
|
|
204
313
|
env.push({
|
|
205
314
|
name: "AZURE_STORAGE_CONNECTION_STRING",
|
|
@@ -211,20 +320,20 @@ function generateVectorEnv(config) {
|
|
|
211
320
|
return env;
|
|
212
321
|
}
|
|
213
322
|
function generateVectorServiceAccount(config) {
|
|
323
|
+
// AWS uses EKS Pod Identity: NO eks.amazonaws.com/role-arn annotation - the
|
|
324
|
+
// CLI's workload-identity step creates a namespace-scoped association for this
|
|
325
|
+
// SA (to a role granting both the object-storage and MSK access Vector needs).
|
|
326
|
+
// Azure/GCP still annotate the SA, which is how their workload identity binds.
|
|
214
327
|
const annotations = {};
|
|
215
|
-
if (config.
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
}
|
|
219
|
-
if (config.features.logging.sink === "azure-blob" &&
|
|
220
|
-
config.features.logging.cloudAuthMode !== "secret" &&
|
|
221
|
-
config.features.logging.azureBlobClientId) {
|
|
328
|
+
if (config.storage?.provider === "azure-blob" &&
|
|
329
|
+
config.storage.cloudAuthMode !== "secret" &&
|
|
330
|
+
config.storage.azureBlobClientId) {
|
|
222
331
|
annotations["azure.workload.identity/client-id"] =
|
|
223
|
-
config.
|
|
332
|
+
config.storage.azureBlobClientId;
|
|
224
333
|
}
|
|
225
|
-
if (config.
|
|
334
|
+
if (config.storage?.provider === "gcs" && config.storage.gcpServiceAccountEmail) {
|
|
226
335
|
annotations["iam.gke.io/gcp-service-account"] =
|
|
227
|
-
config.
|
|
336
|
+
config.storage.gcpServiceAccountEmail;
|
|
228
337
|
}
|
|
229
338
|
return {
|
|
230
339
|
create: true,
|
|
@@ -233,9 +342,11 @@ function generateVectorServiceAccount(config) {
|
|
|
233
342
|
};
|
|
234
343
|
}
|
|
235
344
|
function generateVectorPodLabels(config) {
|
|
236
|
-
const labels = {
|
|
237
|
-
|
|
238
|
-
|
|
345
|
+
const labels = {
|
|
346
|
+
"rulebricks.com/workload-group": "infrastructure",
|
|
347
|
+
};
|
|
348
|
+
if (config.storage?.provider === "azure-blob" &&
|
|
349
|
+
config.storage.cloudAuthMode !== "secret") {
|
|
239
350
|
labels["azure.workload.identity/use"] = "true";
|
|
240
351
|
}
|
|
241
352
|
return labels;
|
|
@@ -258,6 +369,48 @@ function secretKeySelector(ref) {
|
|
|
258
369
|
key: ref.key,
|
|
259
370
|
};
|
|
260
371
|
}
|
|
372
|
+
function base64UrlJson(value) {
|
|
373
|
+
return Buffer.from(JSON.stringify(value)).toString("base64url");
|
|
374
|
+
}
|
|
375
|
+
// Self-hosted Supabase derives the anon and service_role API keys from the JWT
|
|
376
|
+
// secret: each is an HS256 JWT (role: anon / service_role) signed with the secret.
|
|
377
|
+
// https://supabase.com/docs/guides/self-hosting/self-hosted-auth-keys
|
|
378
|
+
export function signSupabaseJwt(role, secret) {
|
|
379
|
+
const header = base64UrlJson({ alg: "HS256", typ: "JWT" });
|
|
380
|
+
const payload = base64UrlJson({
|
|
381
|
+
role,
|
|
382
|
+
iss: "supabase",
|
|
383
|
+
iat: SUPABASE_JWT_ISSUED_AT,
|
|
384
|
+
exp: SUPABASE_JWT_EXPIRES_AT,
|
|
385
|
+
});
|
|
386
|
+
const body = `${header}.${payload}`;
|
|
387
|
+
const signature = createHmac("sha256", secret).update(body).digest("base64url");
|
|
388
|
+
return `${body}.${signature}`;
|
|
389
|
+
}
|
|
390
|
+
// Realtime needs SECRET_KEY_BASE (signs/encrypts its tokens) and a 16-byte
|
|
391
|
+
// DB_ENC_KEY (encrypts tenant DB creds). Derive both deterministically from the
|
|
392
|
+
// JWT secret so they are stable across redeploys with no extra state to persist,
|
|
393
|
+
// and anchored to the one root secret the operator already manages.
|
|
394
|
+
export function deriveRealtimeSecrets(jwtSecret) {
|
|
395
|
+
const secretKeyBase = createHmac("sha256", jwtSecret)
|
|
396
|
+
.update("supabase-realtime-secret-key-base")
|
|
397
|
+
.digest("hex"); // 64 chars
|
|
398
|
+
const dbEncKey = createHmac("sha256", jwtSecret)
|
|
399
|
+
.update("supabase-realtime-db-enc-key")
|
|
400
|
+
.digest("hex")
|
|
401
|
+
.slice(0, 16); // Realtime requires exactly 16 bytes
|
|
402
|
+
return { secretKeyBase, dbEncKey };
|
|
403
|
+
}
|
|
404
|
+
/**
|
|
405
|
+
* Strips surrounding whitespace and embedded control characters (notably the
|
|
406
|
+
* trailing carriage return that sneaks in when a remote_write URL is pasted from
|
|
407
|
+
* a CRLF file or captured from command output). A stray "\r" corrupts the URL
|
|
408
|
+
* the Prometheus operator hands to remote_write, so normalize it at the source.
|
|
409
|
+
*/
|
|
410
|
+
function sanitizeRemoteWriteUrl(url) {
|
|
411
|
+
// eslint-disable-next-line no-control-regex
|
|
412
|
+
return url.replace(/[\u0000-\u001F\u007F]/g, "").trim();
|
|
413
|
+
}
|
|
261
414
|
function generateRemoteWriteSpec(config) {
|
|
262
415
|
if (config.features.monitoring.destination === "local-grafana") {
|
|
263
416
|
return [];
|
|
@@ -265,11 +418,18 @@ function generateRemoteWriteSpec(config) {
|
|
|
265
418
|
const remoteWrite = config.features.monitoring.remoteWrite;
|
|
266
419
|
if (!remoteWrite) {
|
|
267
420
|
return config.features.monitoring.remoteWriteUrl
|
|
268
|
-
? [{ url: config.features.monitoring.remoteWriteUrl }]
|
|
421
|
+
? [{ url: sanitizeRemoteWriteUrl(config.features.monitoring.remoteWriteUrl) }]
|
|
269
422
|
: [];
|
|
270
423
|
}
|
|
424
|
+
// Enforce the same per-destination/auth requirements the wizard and Zod
|
|
425
|
+
// schema do. This is unreachable for CLI-generated configs (they are gated
|
|
426
|
+
// earlier) but guards hand-edited values and keeps one source of truth.
|
|
427
|
+
const remoteWriteErrors = validateRemoteWriteConfig(remoteWrite);
|
|
428
|
+
if (remoteWriteErrors.length > 0) {
|
|
429
|
+
throw new Error(remoteWriteErrors.join(" "));
|
|
430
|
+
}
|
|
271
431
|
const base = {
|
|
272
|
-
url: remoteWrite.url,
|
|
432
|
+
url: sanitizeRemoteWriteUrl(remoteWrite.url),
|
|
273
433
|
};
|
|
274
434
|
switch (remoteWrite.destination) {
|
|
275
435
|
case "aws-amp":
|
|
@@ -294,12 +454,125 @@ function generateRemoteWriteSpec(config) {
|
|
|
294
454
|
return [base];
|
|
295
455
|
}
|
|
296
456
|
}
|
|
457
|
+
function isClickStackEnabled(config) {
|
|
458
|
+
return config.features.observability?.clickstack?.enabled ?? true;
|
|
459
|
+
}
|
|
460
|
+
function generateClickStackValues(enabled, config, storageClass, infrastructurePodLabels, operationalDaemonSetTolerations) {
|
|
461
|
+
const clickstack = config.features.observability?.clickstack;
|
|
462
|
+
const telemetryRetentionDays = clickstack?.telemetryRetentionDays ?? 7;
|
|
463
|
+
const clickHouseStorageSize = clickstack?.clickHouseStorageSize ?? "100Gi";
|
|
464
|
+
// Registry host for the clickstack images. The clickstack subchart routes
|
|
465
|
+
// these through its own image helper, so the split { registry, repository }
|
|
466
|
+
// shape lets global.imageRegistry + digest pinning flow through.
|
|
467
|
+
const reg = config.imageRegistry || DEFAULT_IMAGE_REGISTRY;
|
|
468
|
+
return {
|
|
469
|
+
enabled,
|
|
470
|
+
clickhouse: {
|
|
471
|
+
database: "otel",
|
|
472
|
+
username: "rulebricks",
|
|
473
|
+
existingSecret: "",
|
|
474
|
+
existingSecretKey: "admin-password",
|
|
475
|
+
retentionDays: telemetryRetentionDays,
|
|
476
|
+
ttl: "",
|
|
477
|
+
},
|
|
478
|
+
hyperdx: {
|
|
479
|
+
enabled,
|
|
480
|
+
image: {
|
|
481
|
+
registry: reg,
|
|
482
|
+
repository: IMAGE_REPOSITORIES.hyperdx.repository,
|
|
483
|
+
tag: IMAGE_REPOSITORIES.hyperdx.tag,
|
|
484
|
+
pullPolicy: "IfNotPresent",
|
|
485
|
+
},
|
|
486
|
+
resources: {
|
|
487
|
+
requests: { cpu: "250m", memory: "512Mi" },
|
|
488
|
+
limits: { cpu: "1000m", memory: "1Gi" },
|
|
489
|
+
},
|
|
490
|
+
ingress: {
|
|
491
|
+
enabled,
|
|
492
|
+
className: "traefik",
|
|
493
|
+
hostname: "",
|
|
494
|
+
allowedIPs: [],
|
|
495
|
+
},
|
|
496
|
+
podLabels: infrastructurePodLabels,
|
|
497
|
+
},
|
|
498
|
+
collector: {
|
|
499
|
+
image: {
|
|
500
|
+
registry: reg,
|
|
501
|
+
repository: IMAGE_REPOSITORIES.clickstackOtelCollector.repository,
|
|
502
|
+
tag: IMAGE_REPOSITORIES.clickstackOtelCollector.tag,
|
|
503
|
+
pullPolicy: "IfNotPresent",
|
|
504
|
+
},
|
|
505
|
+
memoryLimitMiB: 800,
|
|
506
|
+
agent: {
|
|
507
|
+
enabled,
|
|
508
|
+
securityContext: {
|
|
509
|
+
runAsUser: 0,
|
|
510
|
+
runAsGroup: 0,
|
|
511
|
+
},
|
|
512
|
+
resources: {
|
|
513
|
+
requests: { cpu: "100m", memory: "256Mi" },
|
|
514
|
+
limits: { cpu: "500m", memory: "512Mi" },
|
|
515
|
+
},
|
|
516
|
+
tolerations: operationalDaemonSetTolerations,
|
|
517
|
+
podLabels: infrastructurePodLabels,
|
|
518
|
+
},
|
|
519
|
+
gateway: {
|
|
520
|
+
replicas: 1,
|
|
521
|
+
resources: {
|
|
522
|
+
requests: { cpu: "250m", memory: "512Mi" },
|
|
523
|
+
limits: { cpu: "2000m", memory: "1Gi" },
|
|
524
|
+
},
|
|
525
|
+
podLabels: infrastructurePodLabels,
|
|
526
|
+
},
|
|
527
|
+
},
|
|
528
|
+
ferretdb: {
|
|
529
|
+
enabled,
|
|
530
|
+
image: {
|
|
531
|
+
registry: reg,
|
|
532
|
+
repository: IMAGE_REPOSITORIES.ferretdb.repository,
|
|
533
|
+
tag: IMAGE_REPOSITORIES.ferretdb.tag,
|
|
534
|
+
pullPolicy: "IfNotPresent",
|
|
535
|
+
},
|
|
536
|
+
postgresImage: {
|
|
537
|
+
registry: reg,
|
|
538
|
+
repository: IMAGE_REPOSITORIES.postgresDocumentdb.repository,
|
|
539
|
+
tag: IMAGE_REPOSITORIES.postgresDocumentdb.tag,
|
|
540
|
+
pullPolicy: "IfNotPresent",
|
|
541
|
+
},
|
|
542
|
+
auth: {
|
|
543
|
+
username: "hyperdx",
|
|
544
|
+
password: "",
|
|
545
|
+
existingSecret: "",
|
|
546
|
+
existingSecretKey: "password",
|
|
547
|
+
},
|
|
548
|
+
persistence: {
|
|
549
|
+
enabled,
|
|
550
|
+
size: "10Gi",
|
|
551
|
+
storageClassName: storageClass,
|
|
552
|
+
},
|
|
553
|
+
resources: {
|
|
554
|
+
ferretdb: {
|
|
555
|
+
requests: { cpu: "100m", memory: "256Mi" },
|
|
556
|
+
limits: { cpu: "500m", memory: "512Mi" },
|
|
557
|
+
},
|
|
558
|
+
postgres: {
|
|
559
|
+
requests: { cpu: "250m", memory: "512Mi" },
|
|
560
|
+
limits: { cpu: "1000m", memory: "1Gi" },
|
|
561
|
+
},
|
|
562
|
+
},
|
|
563
|
+
podLabels: infrastructurePodLabels,
|
|
564
|
+
podAnnotations: {
|
|
565
|
+
"cluster-autoscaler.kubernetes.io/safe-to-evict": "false",
|
|
566
|
+
},
|
|
567
|
+
},
|
|
568
|
+
};
|
|
569
|
+
}
|
|
297
570
|
function generatePrometheusServiceAccount(config) {
|
|
571
|
+
// AWS (AMP remote write) uses EKS Pod Identity - the association is created by
|
|
572
|
+
// the CLI's workload-identity step, so no eks.amazonaws.com/role-arn annotation.
|
|
573
|
+
// Azure Monitor still annotates the SA for its workload identity.
|
|
298
574
|
const annotations = {};
|
|
299
575
|
const remoteWrite = config.features.monitoring.remoteWrite;
|
|
300
|
-
if (remoteWrite?.destination === "aws-amp" && remoteWrite.awsRoleArn) {
|
|
301
|
-
annotations["eks.amazonaws.com/role-arn"] = remoteWrite.awsRoleArn;
|
|
302
|
-
}
|
|
303
576
|
if (remoteWrite?.destination === "azure-monitor" &&
|
|
304
577
|
remoteWrite.authType === "workload-identity" &&
|
|
305
578
|
remoteWrite.clientId) {
|
|
@@ -343,8 +616,16 @@ function generateAzureMonitorRemoteWrite(remoteWrite, base) {
|
|
|
343
616
|
if (!remoteWrite.clientId || !remoteWrite.tenantId) {
|
|
344
617
|
throw new Error("Azure Monitor remote_write workload identity requires client ID and tenant ID.");
|
|
345
618
|
}
|
|
346
|
-
|
|
347
|
-
|
|
619
|
+
// The prometheus-operator AzureAD schema supports only managedIdentity,
|
|
620
|
+
// oauth, and sdk (there is no "workloadIdentity" field - emitting it makes
|
|
621
|
+
// the operator reject the whole remoteWrite with "must provide Azure Managed
|
|
622
|
+
// Identity or Azure OAuth or Azure SDK", which silently prevents the
|
|
623
|
+
// Prometheus StatefulSet from being created). For AKS workload identity we
|
|
624
|
+
// use the Azure SDK credential: it reads the projected token + AZURE_CLIENT_ID
|
|
625
|
+
// injected by the workload-identity webhook (driven by the prometheus
|
|
626
|
+
// ServiceAccount's azure.workload.identity/client-id annotation and the
|
|
627
|
+
// azure.workload.identity/use pod label), so only the tenant ID is needed here.
|
|
628
|
+
azureAd.sdk = {
|
|
348
629
|
tenantId: remoteWrite.tenantId,
|
|
349
630
|
};
|
|
350
631
|
}
|
|
@@ -392,57 +673,690 @@ function generateGenericRemoteWrite(remoteWrite, base) {
|
|
|
392
673
|
return base;
|
|
393
674
|
}
|
|
394
675
|
/**
|
|
395
|
-
* Generates Kafka
|
|
676
|
+
* Generates the Kafka broker config map (Kafka.spec.kafka.config for Strimzi).
|
|
677
|
+
* These are the former KAFKA_CFG_* tuning env vars, as their Kafka property
|
|
678
|
+
* names. Kept in lockstep with the chart's kafka.config.
|
|
679
|
+
*/
|
|
680
|
+
function generateKafkaConfig() {
|
|
681
|
+
return {
|
|
682
|
+
"auto.create.topics.enable": "true",
|
|
683
|
+
"log.retention.hours": "24",
|
|
684
|
+
"num.partitions": "12",
|
|
685
|
+
"num.network.threads": "8",
|
|
686
|
+
"num.io.threads": "8",
|
|
687
|
+
"socket.send.buffer.bytes": "1048576",
|
|
688
|
+
"socket.receive.buffer.bytes": "1048576",
|
|
689
|
+
"socket.request.max.bytes": "209715200",
|
|
690
|
+
// Broker-wide max record size; must exceed every per-topic max.message.bytes.
|
|
691
|
+
"message.max.bytes": "2097152",
|
|
692
|
+
"replica.fetch.max.bytes": "4194304",
|
|
693
|
+
// Broker-wide default retention; the application topics carry tighter caps.
|
|
694
|
+
"log.retention.bytes": "536870912",
|
|
695
|
+
"log.segment.bytes": "1073741824",
|
|
696
|
+
"num.replica.fetchers": "4",
|
|
697
|
+
"queued.max.requests": "10000",
|
|
698
|
+
"replica.socket.receive.buffer.bytes": "1048576",
|
|
699
|
+
"log.cleaner.dedupe.buffer.size": "268435456",
|
|
700
|
+
"log.cleaner.io.buffer.size": "1048576",
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
/**
|
|
704
|
+
* Effective Kafka topic prefix as HPS/Vector/KEDA will see it.
|
|
705
|
+
* Mirrors generateAppLogging: in-cluster Kafka runs UNPREFIXED (dedicated
|
|
706
|
+
* broker, and prefixing would desync chart-side consumers from producers);
|
|
707
|
+
* external Kafka uses the explicit prefix, falling back to the chart default.
|
|
708
|
+
*/
|
|
709
|
+
function effectiveTopicPrefix(config) {
|
|
710
|
+
if (!isExternalKafka(config)) {
|
|
711
|
+
return "";
|
|
712
|
+
}
|
|
713
|
+
const ext = config.externalServices?.kafka?.external ?? {};
|
|
714
|
+
return ext.topicPrefix !== undefined ? ext.topicPrefix : "com.rulebricks.";
|
|
715
|
+
}
|
|
716
|
+
/**
|
|
717
|
+
* Explicit topic management for in-cluster Kafka.
|
|
718
|
+
*
|
|
719
|
+
* Generates the kafka.provisioning block consumed by BOTH the subchart
|
|
720
|
+
* provisioning Job (creates topics) and the chart's kafka-topic-align Job
|
|
721
|
+
* (idempotently converges pre-existing topics on upgrade). Topic names are
|
|
722
|
+
* derived from the SAME prefix written to app.logging.kafkaTopicPrefix - the
|
|
723
|
+
* chart fails the render if these ever diverge.
|
|
724
|
+
*
|
|
725
|
+
* Sizing policy (baseline constants, mirroring the chart defaults):
|
|
726
|
+
* - solution/solution-response: SOLUTION_TOPIC_PARTITIONS (the worker-fleet
|
|
727
|
+
* concurrency CEILING; partitions can never be decreased, workers are sized
|
|
728
|
+
* separately by the cluster autoscaler). RF stays 1: RPC traffic is transient
|
|
729
|
+
* and latency-sensitive, and the HPS producer's acks=-1 would otherwise wait
|
|
730
|
+
* on full ISR replication.
|
|
731
|
+
* - logs: LOGS_TOPIC_PARTITIONS (durable data feeding the Vector -> object
|
|
732
|
+
* storage pipeline).
|
|
733
|
+
*/
|
|
734
|
+
function generateKafkaTopics(config) {
|
|
735
|
+
// External MSK IAM: the chart's kafka-topic-provision Job creates these on the
|
|
736
|
+
// managed broker (through the proxy bridge), so they must be populated here -
|
|
737
|
+
// MSK Serverless won't auto-create them. Other external brokers (SCRAM / Event
|
|
738
|
+
// Hubs / GCP, no bridge) a plain client can reach stay customer-managed.
|
|
739
|
+
if (isExternalKafka(config) && !kafkaUsesBridge(config)) {
|
|
740
|
+
return [];
|
|
741
|
+
}
|
|
742
|
+
const prefix = effectiveTopicPrefix(config);
|
|
743
|
+
const rpcTopicConfig = {
|
|
744
|
+
"retention.ms": "300000",
|
|
745
|
+
"segment.ms": "300000",
|
|
746
|
+
"segment.bytes": "67108864",
|
|
747
|
+
"retention.bytes": "67108864",
|
|
748
|
+
"max.message.bytes": "2097152",
|
|
749
|
+
};
|
|
750
|
+
return [
|
|
751
|
+
{
|
|
752
|
+
name: `${prefix}solution`,
|
|
753
|
+
partitions: SOLUTION_TOPIC_PARTITIONS,
|
|
754
|
+
replicas: TOPIC_REPLICATION_FACTOR,
|
|
755
|
+
config: rpcTopicConfig,
|
|
756
|
+
},
|
|
757
|
+
{
|
|
758
|
+
name: `${prefix}solution-response`,
|
|
759
|
+
partitions: SOLUTION_TOPIC_PARTITIONS,
|
|
760
|
+
replicas: TOPIC_REPLICATION_FACTOR,
|
|
761
|
+
config: rpcTopicConfig,
|
|
762
|
+
},
|
|
763
|
+
{
|
|
764
|
+
name: `${prefix}logs`,
|
|
765
|
+
partitions: LOGS_TOPIC_PARTITIONS,
|
|
766
|
+
replicas: TOPIC_REPLICATION_FACTOR,
|
|
767
|
+
config: {
|
|
768
|
+
"retention.ms": "86400000",
|
|
769
|
+
"retention.bytes": "268435456",
|
|
770
|
+
"max.message.bytes": "2097152",
|
|
771
|
+
},
|
|
772
|
+
},
|
|
773
|
+
];
|
|
774
|
+
}
|
|
775
|
+
function generateWorkerPodAntiAffinity() {
|
|
776
|
+
return {
|
|
777
|
+
podAntiAffinity: {
|
|
778
|
+
preferredDuringSchedulingIgnoredDuringExecution: [
|
|
779
|
+
{
|
|
780
|
+
weight: 50,
|
|
781
|
+
podAffinityTerm: {
|
|
782
|
+
labelSelector: {
|
|
783
|
+
matchExpressions: [
|
|
784
|
+
{
|
|
785
|
+
key: "rulebricks.com/workload-group",
|
|
786
|
+
operator: "In",
|
|
787
|
+
values: ["infrastructure"],
|
|
788
|
+
},
|
|
789
|
+
],
|
|
790
|
+
},
|
|
791
|
+
topologyKey: "kubernetes.io/hostname",
|
|
792
|
+
},
|
|
793
|
+
},
|
|
794
|
+
],
|
|
795
|
+
},
|
|
796
|
+
};
|
|
797
|
+
}
|
|
798
|
+
function generateScheduling(tolerations, affinity) {
|
|
799
|
+
return {
|
|
800
|
+
...(affinity ? { affinity } : {}),
|
|
801
|
+
...(tolerations ? { tolerations } : {}),
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
/**
|
|
805
|
+
* Burst-pool scheduling, always on. Cluster-setup provisions a dedicated
|
|
806
|
+
* worker pool labeled and tainted rulebricks.com/pool=burst (one big
|
|
807
|
+
* Deallocate-parked node on Azure or an on-demand nodegroup on AWS); workers
|
|
808
|
+
* tolerate the taint and SOFTLY prefer the label. On clusters without such a
|
|
809
|
+
* pool both are inert, so BYO clusters schedule exactly as before - zero
|
|
810
|
+
* configuration required either way.
|
|
811
|
+
*/
|
|
812
|
+
const BURST_POOL_TOLERATION = {
|
|
813
|
+
key: "rulebricks.com/pool",
|
|
814
|
+
operator: "Equal",
|
|
815
|
+
value: "burst",
|
|
816
|
+
effect: "NoSchedule",
|
|
817
|
+
};
|
|
818
|
+
const BURST_POOL_NODE_PREFERENCE = {
|
|
819
|
+
weight: 100,
|
|
820
|
+
preference: {
|
|
821
|
+
matchExpressions: [
|
|
822
|
+
{ key: "rulebricks.com/pool", operator: "In", values: ["burst"] },
|
|
823
|
+
],
|
|
824
|
+
},
|
|
825
|
+
};
|
|
826
|
+
function generateBackupValues(config) {
|
|
827
|
+
const usesInClusterPostgres = config.database.type === "self-hosted" &&
|
|
828
|
+
config.externalServices?.postgres?.mode !== "external";
|
|
829
|
+
const enabled = usesInClusterPostgres && config.backup?.enabled === true;
|
|
830
|
+
// The backup CronJob streams pg_dump from the running DB (using supabase.db.image)
|
|
831
|
+
// and uploads it with rclone, so no backup-specific image is needed here. The
|
|
832
|
+
// chart default rclone image applies unless overridden in values.
|
|
833
|
+
return {
|
|
834
|
+
enabled,
|
|
835
|
+
schedule: config.backup?.schedule || "0 2 * * *",
|
|
836
|
+
retentionDays: config.backup?.retentionDays || 7,
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
function isExternalRedis(config) {
|
|
840
|
+
return config.externalServices?.redis?.mode === "external";
|
|
841
|
+
}
|
|
842
|
+
function isExternalKafka(config) {
|
|
843
|
+
return config.externalServices?.kafka?.mode === "external";
|
|
844
|
+
}
|
|
845
|
+
/**
|
|
846
|
+
* Whether the Vector kafka-proxy bridge sidecar is required. Only AWS MSK IAM
|
|
847
|
+
* needs it: Vector's kafka source can't speak token mechanisms, while Azure
|
|
848
|
+
* Event Hubs and GCP both use SASL PLAIN/SCRAM that Vector handles directly.
|
|
849
|
+
*/
|
|
850
|
+
function kafkaUsesBridge(config) {
|
|
851
|
+
if (!isExternalKafka(config))
|
|
852
|
+
return false;
|
|
853
|
+
const ext = config.externalServices?.kafka?.external;
|
|
854
|
+
return (ext?.preset === "aws-msk-iam" || ext?.sasl?.mechanism === "aws-iam");
|
|
855
|
+
}
|
|
856
|
+
/**
|
|
857
|
+
* Whether Vector's kafka source connects with a direct PLAIN/SCRAM credential
|
|
858
|
+
* and therefore needs username/password. This mirrors the vector-kafka-env
|
|
859
|
+
* ConfigMap, which only sets KAFKA_SASL_ENABLED=true for external, non-token,
|
|
860
|
+
* non-bridge mechanisms (and where vector-kafka-credentials is populated). For
|
|
861
|
+
* in-cluster, bridge, and token-auth paths SASL is disabled, so username and
|
|
862
|
+
* password MUST be omitted: an empty env default (${VAR:-}) renders unquoted
|
|
863
|
+
* via Helm's toYaml and Vector reads the value as YAML null, which it rejects
|
|
864
|
+
* at startup ("invalid type: unit value, expected any valid TOML value").
|
|
865
|
+
*/
|
|
866
|
+
function kafkaUsesDirectSasl(config) {
|
|
867
|
+
if (!isExternalKafka(config))
|
|
868
|
+
return false;
|
|
869
|
+
if (kafkaUsesBridge(config))
|
|
870
|
+
return false;
|
|
871
|
+
const mechanism = config.externalServices?.kafka?.external?.sasl?.mechanism;
|
|
872
|
+
if (!mechanism)
|
|
873
|
+
return false;
|
|
874
|
+
return mechanism !== "aws-iam" && mechanism !== "oauthbearer";
|
|
875
|
+
}
|
|
876
|
+
/**
|
|
877
|
+
* Builds the rulebricks.redis block: in-cluster sizing when embedded, or
|
|
878
|
+
* external connection settings when the user points at managed Redis.
|
|
396
879
|
*/
|
|
397
|
-
function
|
|
880
|
+
function generateRedisBlock(config, storageClass, infrastructurePodLabels, coreScheduling) {
|
|
881
|
+
if (!isExternalRedis(config)) {
|
|
882
|
+
// Sizing (resources, persistence size) falls back to the chart defaults;
|
|
883
|
+
// only the deployment-specific storage class is set here.
|
|
884
|
+
return {
|
|
885
|
+
podLabels: infrastructurePodLabels,
|
|
886
|
+
...coreScheduling,
|
|
887
|
+
persistence: {
|
|
888
|
+
enabled: true,
|
|
889
|
+
storageClass,
|
|
890
|
+
},
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
const ext = config.externalServices?.redis?.external ?? {};
|
|
894
|
+
const external = {
|
|
895
|
+
host: ext.host ?? "",
|
|
896
|
+
port: ext.port ?? 6379,
|
|
897
|
+
tls: { enabled: ext.tls ?? false },
|
|
898
|
+
};
|
|
899
|
+
if (ext.password) {
|
|
900
|
+
external.password = ext.password;
|
|
901
|
+
}
|
|
902
|
+
if (ext.existingSecret) {
|
|
903
|
+
external.existingSecret = ext.existingSecret;
|
|
904
|
+
external.existingSecretKey = ext.existingSecretKey || "redis-password";
|
|
905
|
+
}
|
|
906
|
+
if (ext.httpApi?.enabled) {
|
|
907
|
+
external.httpApi = {
|
|
908
|
+
enabled: true,
|
|
909
|
+
url: ext.httpApi.url ?? "",
|
|
910
|
+
token: ext.httpApi.token ?? "",
|
|
911
|
+
};
|
|
912
|
+
}
|
|
913
|
+
return {
|
|
914
|
+
enabled: false,
|
|
915
|
+
external,
|
|
916
|
+
};
|
|
917
|
+
}
|
|
918
|
+
function generateCacheObservabilityBlock(config, infrastructurePodLabels) {
|
|
919
|
+
const cache = config.features.cache;
|
|
920
|
+
const valkeyAdmin = cache?.valkeyAdmin;
|
|
921
|
+
const redisExporter = cache?.redisExporter;
|
|
922
|
+
const valkeyAdminIngressEnabled = valkeyAdmin?.exposure === "ingress";
|
|
923
|
+
return {
|
|
924
|
+
valkeyAdmin: {
|
|
925
|
+
enabled: valkeyAdmin?.enabled ?? false,
|
|
926
|
+
exposure: valkeyAdmin?.exposure ?? "internal",
|
|
927
|
+
podLabels: infrastructurePodLabels,
|
|
928
|
+
ingress: {
|
|
929
|
+
enabled: valkeyAdminIngressEnabled,
|
|
930
|
+
hostname: valkeyAdminIngressEnabled
|
|
931
|
+
? valkeyAdmin?.hostname || `valkey.${config.domain}`
|
|
932
|
+
: "",
|
|
933
|
+
basicAuth: {
|
|
934
|
+
users: valkeyAdmin?.basicAuthUsers ?? [],
|
|
935
|
+
existingSecret: valkeyAdmin?.basicAuthExistingSecret ?? "",
|
|
936
|
+
},
|
|
937
|
+
allowedIPs: valkeyAdmin?.allowedIPs ?? [],
|
|
938
|
+
},
|
|
939
|
+
},
|
|
940
|
+
redisExporter: {
|
|
941
|
+
enabled: redisExporter?.enabled ?? true,
|
|
942
|
+
podLabels: infrastructurePodLabels,
|
|
943
|
+
},
|
|
944
|
+
};
|
|
945
|
+
}
|
|
946
|
+
function generateKafkaExporterBlock(config, infrastructurePodLabels) {
|
|
947
|
+
const requested = config.features.cache?.kafkaExporter?.enabled;
|
|
948
|
+
const canUseKafkaExporter = !isExternalKafka(config);
|
|
949
|
+
return {
|
|
950
|
+
enabled: requested ?? canUseKafkaExporter,
|
|
951
|
+
podLabels: infrastructurePodLabels,
|
|
952
|
+
brokers: isExternalKafka(config)
|
|
953
|
+
? config.externalServices?.kafka?.external?.brokers ?? ""
|
|
954
|
+
: "",
|
|
955
|
+
};
|
|
956
|
+
}
|
|
957
|
+
/**
|
|
958
|
+
* Builds the rulebricks.app.logging block. Decision logging is always enabled;
|
|
959
|
+
* external Kafka adds brokers + SSL/SASL, while embedded auto-discovers the
|
|
960
|
+
* in-cluster Kafka service.
|
|
961
|
+
*/
|
|
962
|
+
function generateAppLogging(config) {
|
|
963
|
+
if (!isExternalKafka(config)) {
|
|
964
|
+
return {
|
|
965
|
+
enabled: true,
|
|
966
|
+
kafkaBrokers: "", // Auto-discover from Kafka subchart
|
|
967
|
+
kafkaTopic: "logs",
|
|
968
|
+
// The in-cluster app/HPS produce to unprefixed topics (logs, solution,
|
|
969
|
+
// solution-response). The chart default prefix ("com.rulebricks.") is meant
|
|
970
|
+
// for shared/managed Kafka collision avoidance, but when applied here it
|
|
971
|
+
// makes the chart-side consumers diverge from the producers: Vector would
|
|
972
|
+
// subscribe to "com.rulebricks.logs" (no data) and the KEDA worker trigger
|
|
973
|
+
// would watch "com.rulebricks.solution" (no lag signal). Disable prefixing
|
|
974
|
+
// for the dedicated in-cluster broker so everything lines up.
|
|
975
|
+
kafkaTopicPrefix: "",
|
|
976
|
+
};
|
|
977
|
+
}
|
|
978
|
+
const ext = config.externalServices?.kafka?.external ?? {};
|
|
979
|
+
const logging = {
|
|
980
|
+
enabled: true,
|
|
981
|
+
kafkaBrokers: ext.brokers ?? "",
|
|
982
|
+
kafkaTopic: ext.topic || "logs",
|
|
983
|
+
kafkaSsl: ext.ssl ?? false,
|
|
984
|
+
};
|
|
985
|
+
// Topic prefix: emit only when explicitly provided (incl. "" to disable). When
|
|
986
|
+
// omitted, the chart default (com.rulebricks.) applies via value merge.
|
|
987
|
+
if (ext.topicPrefix !== undefined) {
|
|
988
|
+
logging.kafkaTopicPrefix = ext.topicPrefix;
|
|
989
|
+
}
|
|
990
|
+
if (ext.sasl?.mechanism) {
|
|
991
|
+
const sasl = { mechanism: ext.sasl.mechanism };
|
|
992
|
+
if (ext.sasl.region)
|
|
993
|
+
sasl.region = ext.sasl.region;
|
|
994
|
+
if (ext.sasl.username)
|
|
995
|
+
sasl.username = ext.sasl.username;
|
|
996
|
+
if (ext.sasl.password)
|
|
997
|
+
sasl.password = ext.sasl.password;
|
|
998
|
+
if (ext.sasl.existingSecret)
|
|
999
|
+
sasl.existingSecret = ext.sasl.existingSecret;
|
|
1000
|
+
logging.kafkaSasl = sasl;
|
|
1001
|
+
}
|
|
1002
|
+
return logging;
|
|
1003
|
+
}
|
|
1004
|
+
/**
|
|
1005
|
+
* HPS service account. When external Kafka uses MSK IAM, HPS authenticates to the
|
|
1006
|
+
* broker with its pod's cloud identity - under EKS Pod Identity that comes from a
|
|
1007
|
+
* namespace-scoped association (created by the CLI's workload-identity step for
|
|
1008
|
+
* the `<release>-hps` SA), NOT an eks.amazonaws.com/role-arn annotation. We only
|
|
1009
|
+
* CREATE the SA here so the association has a subject to bind.
|
|
1010
|
+
*/
|
|
1011
|
+
function generateHpsServiceAccount(config) {
|
|
1012
|
+
if (kafkaUsesBridge(config)) {
|
|
1013
|
+
return { create: true, annotations: {} };
|
|
1014
|
+
}
|
|
1015
|
+
return { create: false, annotations: {} };
|
|
1016
|
+
}
|
|
1017
|
+
/**
|
|
1018
|
+
* Top-level kafkaBridge block consumed by the Vector env ConfigMap. Only enabled
|
|
1019
|
+
* for AWS MSK IAM, where a kafka-proxy sidecar fronts the brokers for Vector.
|
|
1020
|
+
*/
|
|
1021
|
+
function generateKafkaBridge(config) {
|
|
1022
|
+
if (!kafkaUsesBridge(config)) {
|
|
1023
|
+
return { enabled: false };
|
|
1024
|
+
}
|
|
1025
|
+
const ext = config.externalServices?.kafka?.external ?? {};
|
|
1026
|
+
return {
|
|
1027
|
+
enabled: true,
|
|
1028
|
+
provider: "aws",
|
|
1029
|
+
region: ext.sasl?.region ?? "",
|
|
1030
|
+
brokers: ext.brokers ?? "",
|
|
1031
|
+
localPort: 19092,
|
|
1032
|
+
image: KAFKA_PROXY_IMAGE,
|
|
1033
|
+
awsRoleArn: ext.identity?.awsRoleArn ?? "",
|
|
1034
|
+
};
|
|
1035
|
+
}
|
|
1036
|
+
/**
|
|
1037
|
+
* kafka-proxy sidecar for the Vector pod (AWS MSK IAM). Maps each upstream
|
|
1038
|
+
* broker to a sequential local port and authenticates with the pod's IRSA role.
|
|
1039
|
+
*/
|
|
1040
|
+
function generateVectorExtraContainers(config) {
|
|
1041
|
+
if (!kafkaUsesBridge(config))
|
|
1042
|
+
return undefined;
|
|
1043
|
+
const ext = config.externalServices?.kafka?.external ?? {};
|
|
1044
|
+
const brokers = (ext.brokers ?? "")
|
|
1045
|
+
.split(",")
|
|
1046
|
+
.map((b) => b.trim())
|
|
1047
|
+
.filter(Boolean);
|
|
1048
|
+
if (brokers.length === 0)
|
|
1049
|
+
return undefined;
|
|
1050
|
+
const basePort = 19092;
|
|
1051
|
+
const mappings = brokers.map((broker, i) => `--bootstrap-server-mapping=${broker},127.0.0.1:${basePort + i}`);
|
|
398
1052
|
return [
|
|
399
1053
|
{
|
|
400
|
-
name: "
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
{ name: "KAFKA_CFG_REPLICA_SOCKET_RECEIVE_BUFFER_BYTES", value: "1048576" },
|
|
413
|
-
{ name: "KAFKA_CFG_LOG_CLEANER_DEDUPE_BUFFER_SIZE", value: "268435456" },
|
|
414
|
-
{ name: "KAFKA_CFG_LOG_CLEANER_IO_BUFFER_SIZE", value: "1048576" },
|
|
415
|
-
{ name: "KAFKA_CFG_MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION", value: "10" },
|
|
1054
|
+
name: "kafka-proxy",
|
|
1055
|
+
image: KAFKA_PROXY_IMAGE,
|
|
1056
|
+
args: [
|
|
1057
|
+
"server",
|
|
1058
|
+
...mappings,
|
|
1059
|
+
"--tls-enable",
|
|
1060
|
+
"--sasl-enable",
|
|
1061
|
+
"--sasl-method=AWS_MSK_IAM",
|
|
1062
|
+
`--sasl-aws-region=${ext.sasl?.region ?? ""}`,
|
|
1063
|
+
],
|
|
1064
|
+
ports: brokers.map((_, i) => ({ containerPort: basePort + i })),
|
|
1065
|
+
},
|
|
416
1066
|
];
|
|
417
1067
|
}
|
|
1068
|
+
// VRL for the Vector agent: parse JSON app/HPS log lines, lift trace_id/span_id
|
|
1069
|
+
// for logs<->traces correlation, and flatten useful Kubernetes metadata. Kept
|
|
1070
|
+
// in sync with charts/.../values.yaml vector-agent.customConfig.transforms.
|
|
1071
|
+
const VECTOR_APP_LOGS_VRL = [
|
|
1072
|
+
'parsed, err = parse_json(to_string(.message) ?? "")',
|
|
1073
|
+
"if err == null && is_object(parsed) {",
|
|
1074
|
+
" .log = parsed",
|
|
1075
|
+
" .trace_id = parsed.trace_id",
|
|
1076
|
+
" .span_id = parsed.span_id",
|
|
1077
|
+
' if exists(parsed.level) { .level = to_string(parsed.level) ?? "info" }',
|
|
1078
|
+
"}",
|
|
1079
|
+
".pod = .kubernetes.pod_name",
|
|
1080
|
+
".namespace = .kubernetes.pod_namespace",
|
|
1081
|
+
".container = .kubernetes.container_name",
|
|
1082
|
+
".node = .kubernetes.pod_node_name",
|
|
1083
|
+
].join("\n");
|
|
418
1084
|
/**
|
|
419
|
-
*
|
|
1085
|
+
* global.tracing block (in-cluster OTel Collector -> pluggable trace backend).
|
|
1086
|
+
* Emits the destination-specific sub-block (elastic | otlp | azure-monitor) and
|
|
1087
|
+
* returns undefined when tracing is disabled so it is omitted entirely.
|
|
420
1088
|
*/
|
|
421
|
-
|
|
422
|
-
const
|
|
423
|
-
|
|
1089
|
+
function generateTracingGlobal(config) {
|
|
1090
|
+
const tracing = config.features.tracing;
|
|
1091
|
+
if (!tracing?.enabled)
|
|
1092
|
+
return undefined;
|
|
1093
|
+
const destination = tracing.destination ?? "elastic";
|
|
1094
|
+
const reg = config.imageRegistry || DEFAULT_IMAGE_REGISTRY;
|
|
1095
|
+
const base = {
|
|
1096
|
+
enabled: true,
|
|
1097
|
+
destination,
|
|
1098
|
+
samplingRatio: tracing.samplingRatio ?? 1,
|
|
1099
|
+
// RB image dict for the parent chart's otel-collector deployment. The
|
|
1100
|
+
// rulebricks.image helper requires image.repository and applies
|
|
1101
|
+
// global.imageRegistry to the host.
|
|
1102
|
+
collector: {
|
|
1103
|
+
image: {
|
|
1104
|
+
registry: reg,
|
|
1105
|
+
repository: IMAGE_REPOSITORIES.opentelemetryCollector.repository,
|
|
1106
|
+
tag: IMAGE_REPOSITORIES.opentelemetryCollector.tag,
|
|
1107
|
+
},
|
|
1108
|
+
},
|
|
1109
|
+
};
|
|
1110
|
+
if (destination === "elastic") {
|
|
1111
|
+
const elastic = tracing.elastic ?? {};
|
|
1112
|
+
const authMode = elastic.authMode ?? "secret-token";
|
|
1113
|
+
const elasticBlock = {
|
|
1114
|
+
endpoint: elastic.endpoint ?? "",
|
|
1115
|
+
authMode,
|
|
1116
|
+
tlsInsecureSkipVerify: false,
|
|
1117
|
+
};
|
|
1118
|
+
if (authMode === "secret-token" && elastic.secretToken) {
|
|
1119
|
+
elasticBlock.secretToken = elastic.secretToken;
|
|
1120
|
+
}
|
|
1121
|
+
if (authMode === "api-key" && elastic.apiKey) {
|
|
1122
|
+
elasticBlock.apiKey = elastic.apiKey;
|
|
1123
|
+
}
|
|
1124
|
+
return { ...base, elastic: elasticBlock };
|
|
1125
|
+
}
|
|
1126
|
+
if (destination === "otlp") {
|
|
1127
|
+
const otlp = tracing.otlp ?? {};
|
|
1128
|
+
const authMode = otlp.authMode ?? "none";
|
|
1129
|
+
const otlpBlock = {
|
|
1130
|
+
endpoint: otlp.endpoint ?? "",
|
|
1131
|
+
authMode,
|
|
1132
|
+
tlsInsecureSkipVerify: otlp.tlsInsecureSkipVerify ?? false,
|
|
1133
|
+
};
|
|
1134
|
+
if (authMode === "bearer" && otlp.token)
|
|
1135
|
+
otlpBlock.token = otlp.token;
|
|
1136
|
+
if (authMode === "api-key" && otlp.apiKey)
|
|
1137
|
+
otlpBlock.apiKey = otlp.apiKey;
|
|
1138
|
+
if (authMode === "header") {
|
|
1139
|
+
otlpBlock.headerName = otlp.headerName ?? "Authorization";
|
|
1140
|
+
if (otlp.headerValue)
|
|
1141
|
+
otlpBlock.headerValue = otlp.headerValue;
|
|
1142
|
+
}
|
|
1143
|
+
if (otlp.headers && Object.keys(otlp.headers).length > 0) {
|
|
1144
|
+
otlpBlock.headers = otlp.headers;
|
|
1145
|
+
}
|
|
1146
|
+
return { ...base, otlp: otlpBlock };
|
|
1147
|
+
}
|
|
1148
|
+
// azure-monitor
|
|
1149
|
+
const azure = tracing.azureMonitor ?? {};
|
|
1150
|
+
return {
|
|
1151
|
+
...base,
|
|
1152
|
+
azureMonitor: { connectionString: azure.connectionString ?? "" },
|
|
1153
|
+
};
|
|
1154
|
+
}
|
|
1155
|
+
/**
|
|
1156
|
+
* traefik.tracing block: makes Traefik the root span and propagates the W3C
|
|
1157
|
+
* traceparent to backends. Empty object when tracing is disabled.
|
|
1158
|
+
*/
|
|
1159
|
+
function generateTraefikTracing(config, releaseName) {
|
|
1160
|
+
if (!isClickStackEnabled(config) && !config.features.tracing?.enabled)
|
|
1161
|
+
return {};
|
|
1162
|
+
return {
|
|
1163
|
+
otlp: {
|
|
1164
|
+
enabled: true,
|
|
1165
|
+
http: {
|
|
1166
|
+
enabled: true,
|
|
1167
|
+
endpoint: `http://${releaseName}-otel-collector:4318/v1/traces`,
|
|
1168
|
+
},
|
|
1169
|
+
},
|
|
1170
|
+
};
|
|
1171
|
+
}
|
|
1172
|
+
/**
|
|
1173
|
+
* vector-agent block: a second Vector deployment (role Agent / DaemonSet) that
|
|
1174
|
+
* tails all pod logs and ships them to a customer-managed Elasticsearch. Decision
|
|
1175
|
+
* logs are unaffected (they stay in ClickHouse via the `vector` aggregator).
|
|
1176
|
+
*/
|
|
1177
|
+
function generateVectorAgent(config, podLabels, tolerations) {
|
|
1178
|
+
const appLogs = config.features.logging.appLogs;
|
|
1179
|
+
if (!appLogs?.enabled) {
|
|
1180
|
+
return { enabled: false };
|
|
1181
|
+
}
|
|
1182
|
+
const destination = appLogs.destination ?? "elasticsearch";
|
|
1183
|
+
let sinkName = "elasticsearch";
|
|
1184
|
+
let sink;
|
|
1185
|
+
if (destination === "loki") {
|
|
1186
|
+
const loki = appLogs.loki ?? {};
|
|
1187
|
+
sinkName = "loki";
|
|
1188
|
+
sink = {
|
|
1189
|
+
type: "loki",
|
|
1190
|
+
inputs: ["app_logs"],
|
|
1191
|
+
endpoint: loki.endpoint,
|
|
1192
|
+
labels: loki.labels ?? {
|
|
1193
|
+
app: "rulebricks",
|
|
1194
|
+
namespace: "{{ namespace }}",
|
|
1195
|
+
pod: "{{ pod }}",
|
|
1196
|
+
container: "{{ container }}",
|
|
1197
|
+
},
|
|
1198
|
+
encoding: { codec: "json" },
|
|
1199
|
+
};
|
|
1200
|
+
}
|
|
1201
|
+
else if (destination === "generic") {
|
|
1202
|
+
const generic = appLogs.generic ?? {};
|
|
1203
|
+
sinkName = "generic_http";
|
|
1204
|
+
sink = {
|
|
1205
|
+
type: "http",
|
|
1206
|
+
inputs: ["app_logs"],
|
|
1207
|
+
uri: generic.endpoint,
|
|
1208
|
+
method: "post",
|
|
1209
|
+
encoding: { codec: "json" },
|
|
1210
|
+
};
|
|
1211
|
+
if (generic.authHeader) {
|
|
1212
|
+
sink.request = { headers: { Authorization: generic.authHeader } };
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
else {
|
|
1216
|
+
const es = appLogs.elasticsearch ?? {};
|
|
1217
|
+
const authMode = es.authMode ?? "basic";
|
|
1218
|
+
sink = {
|
|
1219
|
+
type: "elasticsearch",
|
|
1220
|
+
inputs: ["app_logs"],
|
|
1221
|
+
endpoints: [es.endpoint],
|
|
1222
|
+
mode: "bulk",
|
|
1223
|
+
bulk: { index: es.index || "rulebricks-app-logs" },
|
|
1224
|
+
tls: { verify_certificate: es.verifyCertificate ?? true },
|
|
1225
|
+
};
|
|
1226
|
+
if (authMode === "basic") {
|
|
1227
|
+
sink.auth = { strategy: "basic", user: es.username, password: es.password };
|
|
1228
|
+
}
|
|
1229
|
+
else if (authMode === "api-key") {
|
|
1230
|
+
sink.request = { headers: { Authorization: `ApiKey ${es.apiKey}` } };
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
return {
|
|
1234
|
+
enabled: true,
|
|
1235
|
+
role: "Agent",
|
|
1236
|
+
podLabels,
|
|
1237
|
+
// Follow active worker pools without tolerating shutdown, out-of-service,
|
|
1238
|
+
// or unreachable node taints.
|
|
1239
|
+
tolerations,
|
|
1240
|
+
resources: {
|
|
1241
|
+
requests: { cpu: "100m", memory: "256Mi" },
|
|
1242
|
+
limits: { cpu: "500m", memory: "512Mi" },
|
|
1243
|
+
},
|
|
1244
|
+
customConfig: {
|
|
1245
|
+
data_dir: "/vector-data-dir",
|
|
1246
|
+
sources: {
|
|
1247
|
+
kubernetes_logs: {
|
|
1248
|
+
type: "kubernetes_logs",
|
|
1249
|
+
// Skip both Vector deployments: the aggregator
|
|
1250
|
+
// (app.kubernetes.io/name=vector) re-emits decision logs on stdout
|
|
1251
|
+
// (those belong in ClickHouse, not Elasticsearch) and the agent
|
|
1252
|
+
// itself (vector-agent) to avoid a self-scrape loop.
|
|
1253
|
+
extra_label_selector: "app.kubernetes.io/name notin (vector,vector-agent)",
|
|
1254
|
+
},
|
|
1255
|
+
},
|
|
1256
|
+
transforms: {
|
|
1257
|
+
app_logs: {
|
|
1258
|
+
type: "remap",
|
|
1259
|
+
inputs: ["kubernetes_logs"],
|
|
1260
|
+
source: VECTOR_APP_LOGS_VRL,
|
|
1261
|
+
},
|
|
1262
|
+
},
|
|
1263
|
+
sinks: { [sinkName]: sink },
|
|
1264
|
+
},
|
|
1265
|
+
};
|
|
1266
|
+
}
|
|
1267
|
+
/**
|
|
1268
|
+
* Builds Helm values from the deployment configuration.
|
|
1269
|
+
*/
|
|
1270
|
+
export function buildHelmValues(config, options = {}) {
|
|
1271
|
+
if (config.database.type === "self-hosted" &&
|
|
1272
|
+
!config.database.supabaseJwtSecret) {
|
|
1273
|
+
throw new Error("Self-hosted Supabase is missing a JWT secret. Run `rulebricks redeploy <name>` to regenerate deployment credentials, or set database.supabaseJwtSecret in config.yaml.");
|
|
1274
|
+
}
|
|
1275
|
+
if (config.features.ai.enabled && !config.features.ai.openaiApiKey) {
|
|
1276
|
+
throw new Error("AI features are enabled but the OpenAI API key is missing. Run `rulebricks redeploy <name>` and enter your OpenAI API key, or disable AI features in config.yaml.");
|
|
1277
|
+
}
|
|
1278
|
+
const { tlsEnabled = true, secretMode = "inline" } = options;
|
|
424
1279
|
const useLocalGrafana = config.features.monitoring.destination === "local-grafana";
|
|
425
1280
|
// Determine if external-dns should be enabled
|
|
426
1281
|
const externalDnsEnabled = config.dns.autoManage && isSupportedDnsProvider(config.dns.provider);
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
1282
|
+
const gcpDiskType = config.infrastructure.nodeArchitecture === "amd64"
|
|
1283
|
+
? "pd-balanced"
|
|
1284
|
+
: "hyperdisk-balanced";
|
|
1285
|
+
// Prefer the live cluster's StorageClass. Provider defaults are only a
|
|
1286
|
+
// fallback for legacy configs that predate capability scanning.
|
|
1287
|
+
const storageClass = config.infrastructure.storageClass ||
|
|
1288
|
+
(config.infrastructure.provider === "aws"
|
|
1289
|
+
? "gp3"
|
|
1290
|
+
: config.infrastructure.provider === "gcp"
|
|
1291
|
+
? gcpDiskType
|
|
1292
|
+
: config.infrastructure.provider === "azure"
|
|
1293
|
+
? "managed-premium"
|
|
1294
|
+
: "gp3");
|
|
1295
|
+
const shouldApplyArm64Toleration = config.infrastructure.arm64TolerationRequired ?? false;
|
|
1296
|
+
const architectureTolerations = shouldApplyArm64Toleration
|
|
1297
|
+
? [
|
|
1298
|
+
{
|
|
1299
|
+
key: "kubernetes.io/arch",
|
|
1300
|
+
operator: "Equal",
|
|
1301
|
+
value: "arm64",
|
|
1302
|
+
effect: "NoSchedule",
|
|
1303
|
+
},
|
|
1304
|
+
]
|
|
1305
|
+
: undefined;
|
|
1306
|
+
const coreScheduling = generateScheduling(architectureTolerations);
|
|
1307
|
+
// Workers always tolerate + softly prefer the optional burst pool
|
|
1308
|
+
// (rulebricks.com/pool=burst). The preference is soft, so clusters without a
|
|
1309
|
+
// burst pool schedule workers on ordinary capacity exactly as before.
|
|
1310
|
+
const workerTolerations = [
|
|
1311
|
+
...(architectureTolerations ?? []),
|
|
1312
|
+
BURST_POOL_TOLERATION,
|
|
445
1313
|
];
|
|
1314
|
+
const operationalDaemonSetTolerations = workerTolerations;
|
|
1315
|
+
const workerScheduling = generateScheduling(workerTolerations, {
|
|
1316
|
+
...generateWorkerPodAntiAffinity(),
|
|
1317
|
+
nodeAffinity: {
|
|
1318
|
+
preferredDuringSchedulingIgnoredDuringExecution: [
|
|
1319
|
+
BURST_POOL_NODE_PREFERENCE,
|
|
1320
|
+
],
|
|
1321
|
+
},
|
|
1322
|
+
});
|
|
1323
|
+
const infrastructurePodLabels = {
|
|
1324
|
+
"rulebricks.com/workload-group": "infrastructure",
|
|
1325
|
+
};
|
|
1326
|
+
const applicationPodLabels = {
|
|
1327
|
+
"rulebricks.com/workload-group": "application",
|
|
1328
|
+
};
|
|
1329
|
+
const productVersion = config.version;
|
|
1330
|
+
// Scheduling priority tiers. The chart creates release-scoped
|
|
1331
|
+
// PriorityClasses (<release>-critical / <release>-burst); stateful
|
|
1332
|
+
// infrastructure references the critical class so it can always preempt
|
|
1333
|
+
// burst workers to reschedule, and workers reference the burst class so
|
|
1334
|
+
// they are strictly the first preemption victims. Subchart values cannot
|
|
1335
|
+
// template release names, so the CLI emits them as literals.
|
|
1336
|
+
const releaseName = getReleaseName(config.name);
|
|
1337
|
+
const criticalPriorityClass = `${releaseName}-critical`;
|
|
1338
|
+
const burstPriorityClass = `${releaseName}-burst`;
|
|
1339
|
+
// Subcharts that don't honor global.imagePullSecrets (keda, strimzi, traefik,
|
|
1340
|
+
// vector) need the pull secret on their own key so their pods can pull the
|
|
1341
|
+
// private docker.io/rulebricks/* images from index.docker.io.
|
|
1342
|
+
const rulebricksPullSecret = [{ name: `${releaseName}-regcred` }];
|
|
1343
|
+
// Registry host for every image. Empty config.imageRegistry => docker.io. When
|
|
1344
|
+
// set, the host is rewritten into global.imageRegistry (which kube-prometheus-stack
|
|
1345
|
+
// and our subcharts honor) and into each of the six Tier-2 charts' own image
|
|
1346
|
+
// keys below, always keeping the rulebricks/<name> path.
|
|
1347
|
+
const reg = config.imageRegistry || DEFAULT_IMAGE_REGISTRY;
|
|
1348
|
+
const clickStackEnabled = isClickStackEnabled(config);
|
|
1349
|
+
const clickStackConfig = config.features.observability?.clickstack;
|
|
1350
|
+
const clickHouseStorageSize = clickStackConfig?.clickHouseStorageSize ?? "100Gi";
|
|
1351
|
+
// Distributed tracing (self-hosted only). Lives under global so the
|
|
1352
|
+
// rulebricks subchart deployments can read it; the collector + traefik are
|
|
1353
|
+
// wired below from the same source.
|
|
1354
|
+
const tracingGlobal = clickStackEnabled ? undefined : generateTracingGlobal(config);
|
|
1355
|
+
// Never let the cluster-autoscaler evict single-replica stateful pods
|
|
1356
|
+
// during node scale-down; an evicted broker/db stalls the whole pipeline.
|
|
1357
|
+
const safeToEvictAnnotations = {
|
|
1358
|
+
"cluster-autoscaler.kubernetes.io/safe-to-evict": "false",
|
|
1359
|
+
};
|
|
446
1360
|
// Build global.supabase configuration
|
|
447
1361
|
const supabaseGlobalConfig = config.database.type === "supabase-cloud"
|
|
448
1362
|
? {
|
|
@@ -452,27 +1366,51 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
452
1366
|
accessToken: config.database.supabaseAccessToken || undefined,
|
|
453
1367
|
projectRef: config.database.supabaseProjectRef || undefined,
|
|
454
1368
|
}
|
|
455
|
-
: {
|
|
456
|
-
jwtSecret
|
|
457
|
-
|
|
458
|
-
|
|
1369
|
+
: (() => {
|
|
1370
|
+
const jwtSecret = config.database.supabaseJwtSecret || "";
|
|
1371
|
+
return {
|
|
1372
|
+
jwtSecret: jwtSecret || undefined,
|
|
1373
|
+
anonKey: jwtSecret ? signSupabaseJwt("anon", jwtSecret) : undefined,
|
|
1374
|
+
serviceKey: jwtSecret
|
|
1375
|
+
? signSupabaseJwt("service_role", jwtSecret)
|
|
1376
|
+
: undefined,
|
|
1377
|
+
};
|
|
1378
|
+
})();
|
|
1379
|
+
// Always emit email configuration so auth pods receive template/subject env
|
|
1380
|
+
// vars regardless of Helm merge order. Custom values take precedence over
|
|
1381
|
+
// built-in defaults when explicitly enabled.
|
|
1382
|
+
const customEmails = config.features.customEmails;
|
|
1383
|
+
if (customEmails?.enabled &&
|
|
1384
|
+
customEmails.subjects &&
|
|
1385
|
+
customEmails.templates) {
|
|
1386
|
+
supabaseGlobalConfig.emails = {
|
|
1387
|
+
subjects: {
|
|
1388
|
+
invite: customEmails.subjects.invite,
|
|
1389
|
+
confirmation: customEmails.subjects.confirmation,
|
|
1390
|
+
recovery: customEmails.subjects.recovery,
|
|
1391
|
+
emailChange: customEmails.subjects.emailChange,
|
|
1392
|
+
},
|
|
1393
|
+
templates: {
|
|
1394
|
+
invite: customEmails.templates.invite,
|
|
1395
|
+
confirmation: customEmails.templates.confirmation,
|
|
1396
|
+
recovery: customEmails.templates.recovery,
|
|
1397
|
+
emailChange: customEmails.templates.emailChange,
|
|
1398
|
+
},
|
|
459
1399
|
};
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
config.features.customEmails.subjects &&
|
|
463
|
-
config.features.customEmails.templates) {
|
|
1400
|
+
}
|
|
1401
|
+
else {
|
|
464
1402
|
supabaseGlobalConfig.emails = {
|
|
465
1403
|
subjects: {
|
|
466
|
-
invite:
|
|
467
|
-
confirmation:
|
|
468
|
-
recovery:
|
|
469
|
-
emailChange:
|
|
1404
|
+
invite: "Join your team on Rulebricks",
|
|
1405
|
+
confirmation: "Confirm Your Email",
|
|
1406
|
+
recovery: "Reset Your Password",
|
|
1407
|
+
emailChange: "Confirm Email Change",
|
|
470
1408
|
},
|
|
471
1409
|
templates: {
|
|
472
|
-
invite:
|
|
473
|
-
confirmation:
|
|
474
|
-
recovery:
|
|
475
|
-
emailChange:
|
|
1410
|
+
invite: "https://prefix-files.s3.us-west-2.amazonaws.com/templates/invite.html",
|
|
1411
|
+
confirmation: "https://prefix-files.s3.us-west-2.amazonaws.com/templates/verify.html",
|
|
1412
|
+
recovery: "https://prefix-files.s3.us-west-2.amazonaws.com/templates/password_change.html",
|
|
1413
|
+
emailChange: "https://prefix-files.s3.us-west-2.amazonaws.com/templates/email_change.html",
|
|
476
1414
|
},
|
|
477
1415
|
};
|
|
478
1416
|
}
|
|
@@ -485,7 +1423,30 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
485
1423
|
email: config.adminEmail,
|
|
486
1424
|
tlsEnabled,
|
|
487
1425
|
licenseKey: config.licenseKey,
|
|
1426
|
+
// Pull secret for the private docker.io/rulebricks/* images. References the
|
|
1427
|
+
// license registry secret <release>-regcred (index.docker.io, authed by the
|
|
1428
|
+
// license PAT). kube-prometheus-stack + cert-manager honor this global value;
|
|
1429
|
+
// keda, traefik, vector and the strimzi operator each get the same secret on
|
|
1430
|
+
// their own key below.
|
|
1431
|
+
imagePullSecrets: [{ name: `${releaseName}-regcred` }],
|
|
1432
|
+
// Single registry-host override (empty => docker.io/rulebricks/*). Honored by
|
|
1433
|
+
// kube-prometheus-stack and our subcharts; the CLI also rewrites the host into
|
|
1434
|
+
// the other Tier-2 charts' native image keys below.
|
|
1435
|
+
...(config.imageRegistry ? { imageRegistry: config.imageRegistry } : {}),
|
|
1436
|
+
// Generated name->sha256 digest map (empty until the helm repo's mirror
|
|
1437
|
+
// pipeline populates IMAGE_DIGESTS). When a name is present the chart image
|
|
1438
|
+
// helper pins @sha256 instead of :tag.
|
|
1439
|
+
imageDigests: IMAGE_DIGESTS,
|
|
1440
|
+
...(productVersion && SEMVER_PATTERN.test(productVersion)
|
|
1441
|
+
? { version: productVersion }
|
|
1442
|
+
: {}),
|
|
488
1443
|
externalDnsEnabled,
|
|
1444
|
+
// Scheduling priority tiers (the chart renders release-scoped
|
|
1445
|
+
// <release>-critical and <release>-burst PriorityClasses).
|
|
1446
|
+
priorityClasses: { enabled: true },
|
|
1447
|
+
clickstack: {
|
|
1448
|
+
enabled: clickStackEnabled,
|
|
1449
|
+
},
|
|
489
1450
|
// SMTP Configuration
|
|
490
1451
|
smtp: {
|
|
491
1452
|
host: config.smtp.host,
|
|
@@ -516,62 +1477,164 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
516
1477
|
: {
|
|
517
1478
|
enabled: false,
|
|
518
1479
|
},
|
|
1480
|
+
storage: config.storage
|
|
1481
|
+
? {
|
|
1482
|
+
// One provider, one identity, one bucket/container. decision-logs and
|
|
1483
|
+
// db-backups are key prefixes under paths.* within it.
|
|
1484
|
+
provider: config.storage.provider,
|
|
1485
|
+
bucket: config.storage.bucket,
|
|
1486
|
+
region: config.storage.region,
|
|
1487
|
+
s3: {
|
|
1488
|
+
iamRoleArn: config.storage.awsIamRoleArn || "",
|
|
1489
|
+
existingSecret: { name: "" },
|
|
1490
|
+
},
|
|
1491
|
+
azure: {
|
|
1492
|
+
authMode: config.storage.cloudAuthMode === "secret"
|
|
1493
|
+
? "connection-string"
|
|
1494
|
+
: "workload-identity",
|
|
1495
|
+
clientId: config.storage.azureBlobClientId || "",
|
|
1496
|
+
tenantId: config.storage.azureBlobTenantId || "",
|
|
1497
|
+
container: config.storage.azureBlobContainer || "",
|
|
1498
|
+
connectionStringSecretRef: config.storage.azureBlobConnectionStringSecretRef || {
|
|
1499
|
+
name: "",
|
|
1500
|
+
key: "",
|
|
1501
|
+
},
|
|
1502
|
+
},
|
|
1503
|
+
gcp: {
|
|
1504
|
+
serviceAccountEmail: config.storage.gcpServiceAccountEmail || "",
|
|
1505
|
+
},
|
|
1506
|
+
paths: {
|
|
1507
|
+
decisionLogs: config.storage.paths?.decisionLogs || "decision-logs",
|
|
1508
|
+
dbBackups: config.storage.paths?.dbBackups || "db-backups",
|
|
1509
|
+
},
|
|
1510
|
+
}
|
|
1511
|
+
: undefined,
|
|
1512
|
+
// Distributed tracing (omitted entirely when disabled).
|
|
1513
|
+
...(tracingGlobal ? { tracing: tracingGlobal } : {}),
|
|
519
1514
|
},
|
|
1515
|
+
clickstack: generateClickStackValues(clickStackEnabled, config, storageClass, infrastructurePodLabels, operationalDaemonSetTolerations),
|
|
1516
|
+
backup: generateBackupValues(config),
|
|
520
1517
|
// =============================================================================
|
|
521
1518
|
// RULEBRICKS APPLICATION STACK
|
|
522
1519
|
// =============================================================================
|
|
523
1520
|
rulebricks: {
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
image: {
|
|
528
|
-
repository: "index.docker.io/rulebricks/app",
|
|
529
|
-
tag: config.appVersion,
|
|
530
|
-
pullPolicy: "IfNotPresent",
|
|
531
|
-
},
|
|
532
|
-
}
|
|
533
|
-
: {}),
|
|
534
|
-
replicaCount: tierConfig.appReplicas,
|
|
535
|
-
resources: tierConfig.appResources,
|
|
536
|
-
tolerations: arm64Tolerations,
|
|
537
|
-
// Logging configuration
|
|
538
|
-
logging: {
|
|
1521
|
+
metrics: {
|
|
1522
|
+
enabled: true,
|
|
1523
|
+
serviceMonitor: {
|
|
539
1524
|
enabled: true,
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
1525
|
+
interval: "30s",
|
|
1526
|
+
scrapeTimeout: "10s",
|
|
1527
|
+
},
|
|
1528
|
+
app: {
|
|
1529
|
+
path: "/api/metrics",
|
|
543
1530
|
},
|
|
1531
|
+
hps: {
|
|
1532
|
+
path: "/metrics",
|
|
1533
|
+
},
|
|
1534
|
+
worker: {
|
|
1535
|
+
path: "/metrics",
|
|
1536
|
+
port: 3000,
|
|
1537
|
+
},
|
|
1538
|
+
},
|
|
1539
|
+
app: {
|
|
1540
|
+
image: {
|
|
1541
|
+
// Split shape: the rulebricks-chart.image helper applies
|
|
1542
|
+
// global.imageRegistry to the host + digest pinning. The host NEVER
|
|
1543
|
+
// goes in repository.
|
|
1544
|
+
registry: reg,
|
|
1545
|
+
repository: IMAGE_REPOSITORIES.app,
|
|
1546
|
+
pullPolicy: "IfNotPresent",
|
|
1547
|
+
},
|
|
1548
|
+
// Replica count and resources fall back to the chart defaults.
|
|
1549
|
+
podLabels: infrastructurePodLabels,
|
|
1550
|
+
...coreScheduling,
|
|
1551
|
+
// Logging configuration (in-cluster auto-discovery or external Kafka)
|
|
1552
|
+
logging: generateAppLogging(config),
|
|
544
1553
|
},
|
|
545
1554
|
// HPS (High Performance Server)
|
|
546
1555
|
hps: {
|
|
547
1556
|
enabled: true,
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
1557
|
+
image: {
|
|
1558
|
+
// Split shape (see app.image): host comes from global.imageRegistry via
|
|
1559
|
+
// the rulebricks-chart.image helper, never baked into repository.
|
|
1560
|
+
registry: reg,
|
|
1561
|
+
repository: IMAGE_REPOSITORIES.hps,
|
|
1562
|
+
pullPolicy: "Always",
|
|
1563
|
+
},
|
|
1564
|
+
// Replica count and resources fall back to the chart defaults.
|
|
1565
|
+
podLabels: applicationPodLabels,
|
|
1566
|
+
...coreScheduling,
|
|
1567
|
+
// Gather-plane autoscaling: HPS parses every chunk response, so its
|
|
1568
|
+
// capacity scales with request rate (load testing showed a fixed
|
|
1569
|
+
// gather plane plateaus throughput while workers idle). Conservative
|
|
1570
|
+
// one-pod-at-a-time scaling - each scale event rebalances the
|
|
1571
|
+
// response consumer group and can time out in-flight requests. Only the
|
|
1572
|
+
// enable flag is set here; min/max and thresholds use the chart
|
|
1573
|
+
// defaults.
|
|
1574
|
+
keda: {
|
|
1575
|
+
enabled: true,
|
|
1576
|
+
},
|
|
1577
|
+
// Warm the hps/worker images onto active worker-capable nodes so burst
|
|
1578
|
+
// scale-outs skip the image pull without targeting shutdown nodes.
|
|
1579
|
+
imagePrepull: {
|
|
1580
|
+
enabled: true,
|
|
1581
|
+
tolerations: operationalDaemonSetTolerations,
|
|
1582
|
+
},
|
|
1583
|
+
extraEnv: [
|
|
1584
|
+
// FLOW_CHUNK_MAX_ITEMS is the #1 throughput dial. Each chunk is one
|
|
1585
|
+
// Kafka round-trip (gather -> solution -> worker -> solution-response
|
|
1586
|
+
// -> gather), so throughput ~= (broker messages/sec) x (payloads per
|
|
1587
|
+
// message). Bigger chunks = fewer messages per solution = less broker
|
|
1588
|
+
// and coordination overhead. Benchmarks: 10 -> 50 gave +27%, and on
|
|
1589
|
+
// small payloads 100 -> 1000 gave another ~1.6x (22k -> 35k sol/s),
|
|
1590
|
+
// until the bottleneck moved off the broker onto worker CPU.
|
|
1591
|
+
// 500 keeps typical bulk requests to 1-2 messages. The byte bound
|
|
1592
|
+
// (CHUNK_MAX_BYTES, default 256 KiB in HPS) caps message size
|
|
1593
|
+
// regardless, so large payloads stay under Kafka's 2 MiB
|
|
1594
|
+
// max.message.bytes. High-throughput, small-payload deployments can
|
|
1595
|
+
// raise this much higher (and CHUNK_MAX_BYTES with it); the only costs
|
|
1596
|
+
// are per-request latency (one worker processes a whole chunk) and the
|
|
1597
|
+
// 2 MiB cap on the larger response message (avg output x chunk size
|
|
1598
|
+
// must stay < 2 MiB, so lower this for output-heavy flows).
|
|
1599
|
+
{ name: "FLOW_CHUNK_MAX_ITEMS", value: "500" },
|
|
1600
|
+
],
|
|
1601
|
+
// Service account (annotated with the MSK IAM role for external Kafka)
|
|
1602
|
+
serviceAccount: generateHpsServiceAccount(config),
|
|
560
1603
|
// HPS Workers with KEDA autoscaling
|
|
561
1604
|
workers: {
|
|
562
1605
|
enabled: true,
|
|
563
|
-
|
|
1606
|
+
// Workers consume the solution topic directly, so under external MSK
|
|
1607
|
+
// IAM they need their own cloud identity - not the shared/default SA.
|
|
1608
|
+
// Same rule as HPS: a dedicated `<release>-hps-worker` SA (no role-arn
|
|
1609
|
+
// annotation) that the CLI's workload-identity step binds to the Kafka
|
|
1610
|
+
// role via Pod Identity.
|
|
1611
|
+
serviceAccount: generateHpsServiceAccount(config),
|
|
1612
|
+
// Partition count of the solution request topic (also exported to
|
|
1613
|
+
// HPS as MAX_WORKERS). Must match kafka.provisioning above; it is
|
|
1614
|
+
// the fleet-concurrency ceiling, NOT a worker count. Replica count
|
|
1615
|
+
// and resources fall back to the chart defaults.
|
|
1616
|
+
solutionPartitions: SOLUTION_TOPIC_PARTITIONS,
|
|
564
1617
|
keda: {
|
|
565
1618
|
enabled: true,
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
1619
|
+
// Poll fast so bursts are detected within seconds; the chart's
|
|
1620
|
+
// ScaledObject defaults add exponential scale-up (double every
|
|
1621
|
+
// 15s) and smooth scale-down (5-min window, -25%/min) behavior.
|
|
1622
|
+
// min/max replica counts fall back to the chart defaults.
|
|
1623
|
+
pollingInterval: 5,
|
|
569
1624
|
cooldownPeriod: 300,
|
|
1625
|
+
// Lag is measured in MESSAGES; with chunked bulk dispatch each
|
|
1626
|
+
// message is a bounded unit of work (~50-150ms), so 50 messages
|
|
1627
|
+
// approximates 5-8s of backlog for a single worker - one replica
|
|
1628
|
+
// is added per ~5s of fleet backlog, biasing toward early
|
|
1629
|
+
// scale-out for bursty traffic.
|
|
570
1630
|
lagThreshold: 50,
|
|
571
1631
|
cpuThreshold: 25,
|
|
572
1632
|
},
|
|
573
|
-
|
|
574
|
-
|
|
1633
|
+
podLabels: applicationPodLabels,
|
|
1634
|
+
// Burst tier: first preemption victims, so critical infrastructure
|
|
1635
|
+
// can always reschedule during an aggressive scale-out.
|
|
1636
|
+
priorityClassName: burstPriorityClass,
|
|
1637
|
+
...workerScheduling,
|
|
575
1638
|
},
|
|
576
1639
|
},
|
|
577
1640
|
// Ingress configuration
|
|
@@ -580,74 +1643,138 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
580
1643
|
className: "traefik",
|
|
581
1644
|
paths: [{ path: "/", pathType: "Prefix" }],
|
|
582
1645
|
},
|
|
583
|
-
// Redis configuration
|
|
584
|
-
redis:
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
persistence: {
|
|
588
|
-
enabled: true,
|
|
589
|
-
size: tierConfig.redisPersistenceSize,
|
|
590
|
-
storageClass: storageClass,
|
|
591
|
-
},
|
|
592
|
-
},
|
|
1646
|
+
// Redis configuration (in-cluster sizing or external connection settings)
|
|
1647
|
+
redis: generateRedisBlock(config, storageClass, infrastructurePodLabels, coreScheduling),
|
|
1648
|
+
cache: generateCacheObservabilityBlock(config, infrastructurePodLabels),
|
|
1649
|
+
kafkaExporter: generateKafkaExporterBlock(config, infrastructurePodLabels),
|
|
593
1650
|
},
|
|
594
1651
|
// =============================================================================
|
|
595
1652
|
// KAFKA (Message Queue)
|
|
596
1653
|
// =============================================================================
|
|
597
1654
|
kafka: {
|
|
598
|
-
enabled:
|
|
599
|
-
//
|
|
600
|
-
|
|
1655
|
+
enabled: !isExternalKafka(config),
|
|
1656
|
+
// Apache Kafka version (must be one the bundled DHI Strimzi operator
|
|
1657
|
+
// supports; DHI strimzi 1.0.1 ships Kafka 4.2.0).
|
|
1658
|
+
version: "4.2.0",
|
|
1659
|
+
// Single combined controller+broker node (KRaft, no ZooKeeper).
|
|
1660
|
+
replicas: TOPIC_REPLICATION_FACTOR,
|
|
1661
|
+
storage: {
|
|
1662
|
+
size: "20Gi",
|
|
1663
|
+
class: storageClass,
|
|
1664
|
+
},
|
|
1665
|
+
// Critical tier: the broker must always be able to preempt burst workers.
|
|
1666
|
+
priorityClassName: criticalPriorityClass,
|
|
1667
|
+
config: generateKafkaConfig(),
|
|
1668
|
+
jvm: {
|
|
1669
|
+
xms: "1g",
|
|
1670
|
+
xmx: "1g",
|
|
1671
|
+
extraOpts: {
|
|
1672
|
+
UseZGC: "true",
|
|
1673
|
+
AlwaysPreTouch: "true",
|
|
1674
|
+
MaxDirectMemorySize: "256M",
|
|
1675
|
+
},
|
|
1676
|
+
},
|
|
1677
|
+
metrics: {
|
|
601
1678
|
enabled: true,
|
|
1679
|
+
serviceMonitor: { enabled: true },
|
|
602
1680
|
},
|
|
603
|
-
|
|
604
|
-
|
|
1681
|
+
// Topics, reconciled by the Strimzi Topic Operator (KafkaTopic CRs) for the
|
|
1682
|
+
// in-cluster broker, or created by the kafka-topic-provision Job for an
|
|
1683
|
+
// external MSK IAM broker.
|
|
1684
|
+
topics: generateKafkaTopics(config),
|
|
1685
|
+
// When false, the chart never creates topics on an external broker - the
|
|
1686
|
+
// operator manages them (and the workload role needs no CreateTopic).
|
|
1687
|
+
provisioning: {
|
|
1688
|
+
enabled: config.externalServices?.kafka?.external?.provisionTopics ?? true,
|
|
605
1689
|
},
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
1690
|
+
},
|
|
1691
|
+
// Strimzi operator: pull secret so the operator pod pulls the private
|
|
1692
|
+
// rulebricks/* image from index.docker.io.
|
|
1693
|
+
"strimzi-kafka-operator": {
|
|
1694
|
+
image: { imagePullSecrets: rulebricksPullSecret },
|
|
1695
|
+
},
|
|
1696
|
+
// =============================================================================
|
|
1697
|
+
// VECTOR KAFKA BRIDGE (AWS MSK IAM token auth)
|
|
1698
|
+
// =============================================================================
|
|
1699
|
+
kafkaBridge: generateKafkaBridge(config),
|
|
1700
|
+
clickhouse: {
|
|
1701
|
+
enabled: true,
|
|
1702
|
+
// Critical tier: single replica must preempt burst workers to
|
|
1703
|
+
// reschedule; never autoscaler-evicted on scale-down.
|
|
1704
|
+
priorityClassName: criticalPriorityClass,
|
|
1705
|
+
podAnnotations: safeToEvictAnnotations,
|
|
1706
|
+
auth: {
|
|
1707
|
+
username: "rulebricks",
|
|
1708
|
+
password: "",
|
|
1709
|
+
existingSecret: '{{ printf "%s-clickhouse-credentials" .Release.Name }}',
|
|
1710
|
+
existingSecretKey: "admin-password",
|
|
1711
|
+
},
|
|
1712
|
+
persistence: clickStackEnabled
|
|
1713
|
+
? {
|
|
619
1714
|
enabled: true,
|
|
620
|
-
size: tierConfig.kafkaStorage,
|
|
621
1715
|
storageClass: storageClass,
|
|
1716
|
+
size: clickHouseStorageSize,
|
|
1717
|
+
}
|
|
1718
|
+
: { enabled: false },
|
|
1719
|
+
resources: clickStackEnabled
|
|
1720
|
+
? {
|
|
1721
|
+
requests: { cpu: "1000m", memory: "4Gi" },
|
|
1722
|
+
limits: { cpu: "4", memory: "12Gi" },
|
|
1723
|
+
}
|
|
1724
|
+
: {
|
|
1725
|
+
requests: { cpu: "500m", memory: "2Gi" },
|
|
1726
|
+
limits: { cpu: "2", memory: "6Gi" },
|
|
622
1727
|
},
|
|
623
|
-
|
|
624
|
-
|
|
1728
|
+
serviceAccount: {
|
|
1729
|
+
create: true,
|
|
1730
|
+
annotations: {},
|
|
625
1731
|
},
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
controller: {
|
|
631
|
-
protocol: "PLAINTEXT",
|
|
632
|
-
},
|
|
633
|
-
interbroker: {
|
|
634
|
-
protocol: "PLAINTEXT",
|
|
1732
|
+
metrics: {
|
|
1733
|
+
enabled: true,
|
|
1734
|
+
serviceMonitor: {
|
|
1735
|
+
enabled: true,
|
|
635
1736
|
},
|
|
636
1737
|
},
|
|
1738
|
+
queryLimits: {
|
|
1739
|
+
maxMemoryUsage: 4294967296,
|
|
1740
|
+
maxThreads: 4,
|
|
1741
|
+
maxExecutionTime: 120,
|
|
1742
|
+
maxRowsToRead: 50000000,
|
|
1743
|
+
readOverflowMode: "break",
|
|
1744
|
+
},
|
|
1745
|
+
otelQueryLimits: {
|
|
1746
|
+
maxMemoryUsage: 4294967296,
|
|
1747
|
+
maxThreads: 8,
|
|
1748
|
+
maxExecutionTime: 120,
|
|
1749
|
+
},
|
|
1750
|
+
otelDatabase: "otel",
|
|
1751
|
+
// config.d / users.d / the decision-log view are rendered by the parent
|
|
1752
|
+
// chart's clickhouse templates (no longer passed as Bitnami subchart values).
|
|
637
1753
|
},
|
|
638
1754
|
// =============================================================================
|
|
639
1755
|
// TRAEFIK (Ingress Controller)
|
|
640
1756
|
// =============================================================================
|
|
641
1757
|
traefik: {
|
|
642
1758
|
enabled: true,
|
|
1759
|
+
// traefik has no global.imageRegistry path: set registry + repository
|
|
1760
|
+
// directly (host = reg, rulebricks/* path).
|
|
1761
|
+
image: {
|
|
1762
|
+
registry: reg,
|
|
1763
|
+
repository: IMAGE_REPOSITORIES.traefik,
|
|
1764
|
+
},
|
|
1765
|
+
deployment: {
|
|
1766
|
+
imagePullSecrets: rulebricksPullSecret,
|
|
1767
|
+
},
|
|
643
1768
|
ingressClass: {
|
|
644
1769
|
name: "traefik",
|
|
645
1770
|
},
|
|
646
|
-
|
|
1771
|
+
...coreScheduling,
|
|
647
1772
|
autoscaling: {
|
|
648
1773
|
enabled: true,
|
|
649
1774
|
minReplicas: 1,
|
|
650
|
-
|
|
1775
|
+
// Headroom for colocated clients pushing multi-hundred-RPS bulk
|
|
1776
|
+
// traffic through the ingress.
|
|
1777
|
+
maxReplicas: 4,
|
|
651
1778
|
},
|
|
652
1779
|
resources: {
|
|
653
1780
|
requests: {
|
|
@@ -670,11 +1797,26 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
670
1797
|
websecure: {
|
|
671
1798
|
port: 8443,
|
|
672
1799
|
exposedPort: 443,
|
|
673
|
-
tls
|
|
674
|
-
|
|
1800
|
+
// traefik 41.x moved per-entrypoint TLS under ports.<name>.http.tls
|
|
1801
|
+
// (the old ports.<name>.tls location is rejected by the chart schema).
|
|
1802
|
+
http: {
|
|
1803
|
+
tls: {
|
|
1804
|
+
enabled: tlsEnabled,
|
|
1805
|
+
},
|
|
1806
|
+
},
|
|
1807
|
+
},
|
|
1808
|
+
},
|
|
1809
|
+
metrics: {
|
|
1810
|
+
prometheus: {
|
|
1811
|
+
enabled: true,
|
|
1812
|
+
serviceMonitor: {
|
|
1813
|
+
enabled: false,
|
|
675
1814
|
},
|
|
676
1815
|
},
|
|
677
1816
|
},
|
|
1817
|
+
// OTLP tracing: ingress becomes the root span and propagates traceparent
|
|
1818
|
+
// to backends. Empty object when tracing is disabled.
|
|
1819
|
+
tracing: generateTraefikTracing(config, releaseName),
|
|
678
1820
|
persistence: {
|
|
679
1821
|
enabled: false,
|
|
680
1822
|
},
|
|
@@ -684,7 +1826,29 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
684
1826
|
// =============================================================================
|
|
685
1827
|
keda: {
|
|
686
1828
|
enabled: true,
|
|
687
|
-
|
|
1829
|
+
imagePullSecrets: rulebricksPullSecret,
|
|
1830
|
+
// keda reads global.image.registry (NOT global.imageRegistry) for the host;
|
|
1831
|
+
// set it plus the rulebricks/* repositories for all three sub-images.
|
|
1832
|
+
global: {
|
|
1833
|
+
image: {
|
|
1834
|
+
registry: reg,
|
|
1835
|
+
},
|
|
1836
|
+
},
|
|
1837
|
+
image: {
|
|
1838
|
+
keda: {
|
|
1839
|
+
registry: reg,
|
|
1840
|
+
repository: IMAGE_REPOSITORIES.keda,
|
|
1841
|
+
},
|
|
1842
|
+
metricsApiServer: {
|
|
1843
|
+
registry: reg,
|
|
1844
|
+
repository: IMAGE_REPOSITORIES.kedaMetricsApiServer,
|
|
1845
|
+
},
|
|
1846
|
+
webhooks: {
|
|
1847
|
+
registry: reg,
|
|
1848
|
+
repository: IMAGE_REPOSITORIES.kedaAdmissionWebhooks,
|
|
1849
|
+
},
|
|
1850
|
+
},
|
|
1851
|
+
...coreScheduling,
|
|
688
1852
|
crds: {
|
|
689
1853
|
install: false, // CRDs managed in parent chart
|
|
690
1854
|
},
|
|
@@ -694,13 +1858,41 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
694
1858
|
// =============================================================================
|
|
695
1859
|
"cert-manager": {
|
|
696
1860
|
enabled: tlsEnabled,
|
|
697
|
-
|
|
698
|
-
|
|
1861
|
+
// CRDs managed in parent chart (cert-manager v1.15+ uses crds.enabled,
|
|
1862
|
+
// not the deprecated installCRDs flag).
|
|
1863
|
+
crds: { enabled: false },
|
|
1864
|
+
// cert-manager prepends image.registry to image.repository, so set both per
|
|
1865
|
+
// component (host = reg, rulebricks/cert-manager-* path).
|
|
1866
|
+
image: {
|
|
1867
|
+
registry: reg,
|
|
1868
|
+
repository: IMAGE_REPOSITORIES.certManagerController,
|
|
1869
|
+
},
|
|
1870
|
+
...coreScheduling,
|
|
699
1871
|
webhook: {
|
|
700
|
-
|
|
1872
|
+
image: {
|
|
1873
|
+
registry: reg,
|
|
1874
|
+
repository: IMAGE_REPOSITORIES.certManagerWebhook,
|
|
1875
|
+
},
|
|
1876
|
+
...coreScheduling,
|
|
701
1877
|
},
|
|
702
1878
|
cainjector: {
|
|
703
|
-
|
|
1879
|
+
image: {
|
|
1880
|
+
registry: reg,
|
|
1881
|
+
repository: IMAGE_REPOSITORIES.certManagerCainjector,
|
|
1882
|
+
},
|
|
1883
|
+
...coreScheduling,
|
|
1884
|
+
},
|
|
1885
|
+
startupapicheck: {
|
|
1886
|
+
image: {
|
|
1887
|
+
registry: reg,
|
|
1888
|
+
repository: IMAGE_REPOSITORIES.certManagerStartupapicheck,
|
|
1889
|
+
},
|
|
1890
|
+
},
|
|
1891
|
+
acmesolver: {
|
|
1892
|
+
image: {
|
|
1893
|
+
registry: reg,
|
|
1894
|
+
repository: IMAGE_REPOSITORIES.certManagerAcmesolver,
|
|
1895
|
+
},
|
|
704
1896
|
},
|
|
705
1897
|
},
|
|
706
1898
|
// Cluster Issuer for Let's Encrypt
|
|
@@ -714,12 +1906,20 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
714
1906
|
// =============================================================================
|
|
715
1907
|
vector: {
|
|
716
1908
|
enabled: true,
|
|
1909
|
+
// vector's image.repository is the FULL path including host (no separate
|
|
1910
|
+
// registry field), so the reg host is prefixed here.
|
|
1911
|
+
image: {
|
|
1912
|
+
repository: `${reg}/${IMAGE_REPOSITORIES.vector}`,
|
|
1913
|
+
pullSecrets: rulebricksPullSecret,
|
|
1914
|
+
},
|
|
717
1915
|
role: "Stateless-Aggregator",
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
tolerations: arm64Tolerations,
|
|
1916
|
+
// Replica count and resources fall back to the chart defaults.
|
|
1917
|
+
...coreScheduling,
|
|
721
1918
|
serviceAccount: generateVectorServiceAccount(config),
|
|
722
1919
|
podLabels: generateVectorPodLabels(config),
|
|
1920
|
+
...(generateVectorExtraContainers(config)
|
|
1921
|
+
? { extraContainers: generateVectorExtraContainers(config) }
|
|
1922
|
+
: {}),
|
|
723
1923
|
service: {
|
|
724
1924
|
enabled: true,
|
|
725
1925
|
ports: [{ name: "api", port: 8686, protocol: "TCP", targetPort: 8686 }],
|
|
@@ -731,90 +1931,274 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
731
1931
|
kafka: {
|
|
732
1932
|
type: "kafka",
|
|
733
1933
|
bootstrap_servers: "${KAFKA_BOOTSTRAP_SERVERS:-rulebricks-kafka:9092}",
|
|
734
|
-
|
|
1934
|
+
// KAFKA_LOG_TOPIC carries the namespace prefix (e.g. com.rulebricks.logs).
|
|
1935
|
+
topics: ["${KAFKA_LOG_TOPIC:-logs}"],
|
|
735
1936
|
group_id: "vector-consumers",
|
|
736
1937
|
auto_offset_reset: "latest",
|
|
1938
|
+
// TLS + SASL driven by env from vector-kafka-env (disabled for
|
|
1939
|
+
// in-cluster Kafka and the kafka-proxy bridge path).
|
|
1940
|
+
tls: { enabled: "${KAFKA_TLS_ENABLED:-false}" },
|
|
1941
|
+
sasl: {
|
|
1942
|
+
enabled: "${KAFKA_SASL_ENABLED:-false}",
|
|
1943
|
+
mechanism: "${KAFKA_SASL_MECHANISM:-PLAIN}",
|
|
1944
|
+
// username/password are only emitted for external Kafka using a
|
|
1945
|
+
// direct PLAIN/SCRAM credential (where vector-kafka-credentials is
|
|
1946
|
+
// populated). Emitting them with an empty default would render as
|
|
1947
|
+
// YAML null and crash Vector at config load; omitting the keys
|
|
1948
|
+
// leaves them unset (valid) whenever SASL is disabled.
|
|
1949
|
+
...(kafkaUsesDirectSasl(config)
|
|
1950
|
+
? {
|
|
1951
|
+
username: "${KAFKA_SASL_USERNAME}",
|
|
1952
|
+
password: "${KAFKA_SASL_PASSWORD}",
|
|
1953
|
+
}
|
|
1954
|
+
: {}),
|
|
1955
|
+
},
|
|
1956
|
+
},
|
|
1957
|
+
},
|
|
1958
|
+
transforms: {
|
|
1959
|
+
normalize_logs: {
|
|
1960
|
+
type: "remap",
|
|
1961
|
+
inputs: ["kafka"],
|
|
1962
|
+
source: VECTOR_NORMALIZE_LOGS_VRL,
|
|
737
1963
|
},
|
|
738
1964
|
},
|
|
739
1965
|
sinks: generateVectorSinks(config),
|
|
740
1966
|
},
|
|
741
1967
|
},
|
|
742
1968
|
// =============================================================================
|
|
1969
|
+
// VECTOR AGENT (Application / container logs -> Elasticsearch)
|
|
1970
|
+
// =============================================================================
|
|
1971
|
+
"vector-agent": clickStackEnabled
|
|
1972
|
+
? { enabled: false }
|
|
1973
|
+
: {
|
|
1974
|
+
...generateVectorAgent(config, infrastructurePodLabels, operationalDaemonSetTolerations),
|
|
1975
|
+
// Full-path repository (see vector above) + pull secret.
|
|
1976
|
+
image: {
|
|
1977
|
+
repository: `${reg}/${IMAGE_REPOSITORIES.vector}`,
|
|
1978
|
+
pullSecrets: rulebricksPullSecret,
|
|
1979
|
+
},
|
|
1980
|
+
},
|
|
1981
|
+
// =============================================================================
|
|
743
1982
|
// SUPABASE (Self-hosted Database)
|
|
744
1983
|
// =============================================================================
|
|
745
1984
|
supabase: {
|
|
746
1985
|
enabled: config.database.type === "self-hosted",
|
|
747
1986
|
...(config.database.type === "self-hosted"
|
|
748
|
-
? {
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
1987
|
+
? (() => {
|
|
1988
|
+
// External managed Postgres (AWS RDS / Azure Flexible Server): the
|
|
1989
|
+
// self-hosted Supabase services run against it instead of the
|
|
1990
|
+
// bundled in-cluster database.
|
|
1991
|
+
const pgExt = config.externalServices?.postgres?.mode === "external"
|
|
1992
|
+
? config.externalServices?.postgres?.external
|
|
1993
|
+
: undefined;
|
|
1994
|
+
return {
|
|
1995
|
+
secret: {
|
|
1996
|
+
db: {
|
|
1997
|
+
username: "postgres",
|
|
1998
|
+
// Shared service-role password (authenticator / auth_admin /
|
|
1999
|
+
// replication_admin). With an external DB the bootstrap hook
|
|
2000
|
+
// sets the roles to this same value.
|
|
2001
|
+
password: config.database.supabaseDbPassword,
|
|
2002
|
+
database: pgExt?.database || "postgres",
|
|
2003
|
+
},
|
|
2004
|
+
dashboard: {
|
|
2005
|
+
username: config.database.supabaseDashboardUser || "supabase",
|
|
2006
|
+
password: config.database.supabaseDashboardPass,
|
|
2007
|
+
},
|
|
2008
|
+
jwt: {
|
|
2009
|
+
secret: config.database.supabaseJwtSecret,
|
|
2010
|
+
},
|
|
2011
|
+
// SECRET_KEY_BASE / DB_ENC_KEY, derived from the JWT secret
|
|
2012
|
+
// (stable across redeploys). The chart no longer ships defaults.
|
|
2013
|
+
realtime: deriveRealtimeSecrets(config.database.supabaseJwtSecret || ""),
|
|
754
2014
|
},
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
2015
|
+
...(pgExt
|
|
2016
|
+
? {
|
|
2017
|
+
// One switch: enabling externalDatabase disables the bundled
|
|
2018
|
+
// Postgres and runs the bootstrap hook to initialize the
|
|
2019
|
+
// managed instance. db.enabled=false is explicit so chart
|
|
2020
|
+
// schema rules keyed off it hold.
|
|
2021
|
+
db: { enabled: false },
|
|
2022
|
+
externalDatabase: {
|
|
2023
|
+
enabled: true,
|
|
2024
|
+
host: pgExt.host ?? "",
|
|
2025
|
+
port: pgExt.port ?? 5432,
|
|
2026
|
+
bootstrap: {
|
|
2027
|
+
enabled: pgExt.bootstrap?.enabled ?? true,
|
|
2028
|
+
masterUsername: pgExt.bootstrap?.masterUsername ?? "postgres",
|
|
2029
|
+
masterPassword: pgExt.bootstrap?.masterPassword ?? "",
|
|
2030
|
+
appRole: pgExt.bootstrap?.appRole ?? "postgres",
|
|
2031
|
+
},
|
|
2032
|
+
},
|
|
2033
|
+
}
|
|
2034
|
+
: {
|
|
2035
|
+
db: {
|
|
2036
|
+
// Explicit so chart schema rules that key off
|
|
2037
|
+
// supabase.db.enabled (e.g. Database Backup Storage
|
|
2038
|
+
// Validation) hold without relying on subchart-default
|
|
2039
|
+
// coalescing.
|
|
2040
|
+
enabled: true,
|
|
2041
|
+
image: {
|
|
2042
|
+
// Split shape: the supabase.image helper applies
|
|
2043
|
+
// global.imageRegistry to the host. Host never in repository.
|
|
2044
|
+
registry: reg,
|
|
2045
|
+
repository: SUPABASE_POSTGRES_IMAGE_REPOSITORY,
|
|
2046
|
+
tag: SUPABASE_POSTGRES_IMAGE_TAG,
|
|
2047
|
+
pullPolicy: "IfNotPresent",
|
|
2048
|
+
},
|
|
2049
|
+
podLabels: infrastructurePodLabels,
|
|
2050
|
+
// Critical tier: the primary datastore must preempt burst
|
|
2051
|
+
// workers to reschedule; never autoscaler-evicted.
|
|
2052
|
+
// Resources and persistence size fall back to chart
|
|
2053
|
+
// defaults.
|
|
2054
|
+
priorityClassName: criticalPriorityClass,
|
|
2055
|
+
podAnnotations: safeToEvictAnnotations,
|
|
2056
|
+
...coreScheduling,
|
|
2057
|
+
persistence: {
|
|
2058
|
+
enabled: true,
|
|
2059
|
+
storageClassName: storageClass,
|
|
2060
|
+
},
|
|
2061
|
+
},
|
|
2062
|
+
}),
|
|
2063
|
+
auth: {
|
|
2064
|
+
// Explicit public URLs so GoTrue never falls back to the
|
|
2065
|
+
// in-cluster Kong service name when global.domain propagation
|
|
2066
|
+
// is lost (e.g. after manual patching or partial upgrades).
|
|
2067
|
+
siteUrl: `https://${config.domain}`,
|
|
2068
|
+
externalUrl: `https://supabase.${config.domain}`,
|
|
2069
|
+
...coreScheduling,
|
|
758
2070
|
},
|
|
759
|
-
|
|
760
|
-
|
|
2071
|
+
rest: {
|
|
2072
|
+
...coreScheduling,
|
|
761
2073
|
},
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
resources: tierConfig.dbResources,
|
|
765
|
-
tolerations: arm64Tolerations,
|
|
766
|
-
persistence: {
|
|
767
|
-
enabled: true,
|
|
768
|
-
size: tierConfig.dbPersistenceSize,
|
|
769
|
-
storageClassName: storageClass,
|
|
2074
|
+
realtime: {
|
|
2075
|
+
...coreScheduling,
|
|
770
2076
|
},
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
tolerations: arm64Tolerations,
|
|
774
|
-
},
|
|
775
|
-
rest: {
|
|
776
|
-
tolerations: arm64Tolerations,
|
|
777
|
-
},
|
|
778
|
-
realtime: {
|
|
779
|
-
tolerations: arm64Tolerations,
|
|
780
|
-
},
|
|
781
|
-
meta: {
|
|
782
|
-
tolerations: arm64Tolerations,
|
|
783
|
-
},
|
|
784
|
-
kong: {
|
|
785
|
-
tolerations: arm64Tolerations,
|
|
786
|
-
ingress: {
|
|
787
|
-
enabled: true,
|
|
788
|
-
className: "traefik",
|
|
789
|
-
annotations: {},
|
|
2077
|
+
meta: {
|
|
2078
|
+
...coreScheduling,
|
|
790
2079
|
},
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
2080
|
+
kong: {
|
|
2081
|
+
...coreScheduling,
|
|
2082
|
+
ingress: {
|
|
2083
|
+
enabled: true,
|
|
2084
|
+
className: "traefik",
|
|
2085
|
+
// The supabase subchart's kong ingress does NOT emit Traefik's
|
|
2086
|
+
// router.entrypoints/router.tls annotations the way the app
|
|
2087
|
+
// ingress does — without them Traefik only builds a web (HTTP)
|
|
2088
|
+
// router, so https://supabase.<domain> 404s and the app can't
|
|
2089
|
+
// reach Supabase. Inject them via the subchart's annotations
|
|
2090
|
+
// passthrough (kong/ingress.yaml ranges over these), matching
|
|
2091
|
+
// charts/rulebricks/templates/ingress.yaml.
|
|
2092
|
+
annotations: {
|
|
2093
|
+
"traefik.ingress.kubernetes.io/router.entrypoints": tlsEnabled ? "websecure" : "web",
|
|
2094
|
+
"traefik.ingress.kubernetes.io/router.tls": tlsEnabled
|
|
2095
|
+
? "true"
|
|
2096
|
+
: "false",
|
|
2097
|
+
},
|
|
2098
|
+
},
|
|
2099
|
+
},
|
|
2100
|
+
studio: {
|
|
2101
|
+
...coreScheduling,
|
|
2102
|
+
},
|
|
2103
|
+
};
|
|
2104
|
+
})()
|
|
796
2105
|
: {}),
|
|
797
2106
|
},
|
|
798
2107
|
// =============================================================================
|
|
799
2108
|
// MONITORING
|
|
800
2109
|
// =============================================================================
|
|
801
2110
|
monitoring: {
|
|
802
|
-
enabled:
|
|
2111
|
+
enabled: true,
|
|
803
2112
|
},
|
|
804
2113
|
"kube-prometheus-stack": {
|
|
805
|
-
enabled:
|
|
2114
|
+
enabled: true,
|
|
2115
|
+
// kube-prometheus-stack honors the parent global.imageRegistry for the host
|
|
2116
|
+
// automatically; the CLI sets the rulebricks/* repository defaults (and the
|
|
2117
|
+
// reg host explicitly) for every sub-image so a bare helm install also pulls
|
|
2118
|
+
// rulebricks/*.
|
|
806
2119
|
alertmanager: {
|
|
807
2120
|
enabled: false,
|
|
2121
|
+
alertmanagerSpec: {
|
|
2122
|
+
image: {
|
|
2123
|
+
registry: reg,
|
|
2124
|
+
repository: IMAGE_REPOSITORIES.alertmanager,
|
|
2125
|
+
},
|
|
2126
|
+
},
|
|
2127
|
+
},
|
|
2128
|
+
prometheusOperator: {
|
|
2129
|
+
image: {
|
|
2130
|
+
registry: reg,
|
|
2131
|
+
repository: IMAGE_REPOSITORIES.prometheusOperator,
|
|
2132
|
+
},
|
|
2133
|
+
prometheusConfigReloader: {
|
|
2134
|
+
image: {
|
|
2135
|
+
registry: reg,
|
|
2136
|
+
repository: IMAGE_REPOSITORIES.prometheusConfigReloader,
|
|
2137
|
+
},
|
|
2138
|
+
},
|
|
2139
|
+
admissionWebhooks: {
|
|
2140
|
+
patch: {
|
|
2141
|
+
image: {
|
|
2142
|
+
registry: reg,
|
|
2143
|
+
repository: IMAGE_REPOSITORIES.kubeWebhookCertgen,
|
|
2144
|
+
},
|
|
2145
|
+
},
|
|
2146
|
+
},
|
|
2147
|
+
},
|
|
2148
|
+
"kube-state-metrics": {
|
|
2149
|
+
image: {
|
|
2150
|
+
registry: reg,
|
|
2151
|
+
repository: IMAGE_REPOSITORIES.kubeStateMetrics,
|
|
2152
|
+
},
|
|
2153
|
+
},
|
|
2154
|
+
"prometheus-node-exporter": {
|
|
2155
|
+
image: {
|
|
2156
|
+
registry: reg,
|
|
2157
|
+
repository: IMAGE_REPOSITORIES.nodeExporter,
|
|
2158
|
+
},
|
|
808
2159
|
},
|
|
809
2160
|
grafana: {
|
|
810
2161
|
enabled: useLocalGrafana,
|
|
2162
|
+
image: {
|
|
2163
|
+
registry: reg,
|
|
2164
|
+
repository: IMAGE_REPOSITORIES.grafana,
|
|
2165
|
+
},
|
|
2166
|
+
// Dashboard sidecar imports the provisioned Rulebricks dashboards
|
|
2167
|
+
// (ConfigMaps labeled grafana_dashboard="1") when in-cluster Grafana
|
|
2168
|
+
// is enabled.
|
|
2169
|
+
sidecar: {
|
|
2170
|
+
image: {
|
|
2171
|
+
registry: reg,
|
|
2172
|
+
repository: IMAGE_REPOSITORIES.k8sSidecar,
|
|
2173
|
+
},
|
|
2174
|
+
...(useLocalGrafana
|
|
2175
|
+
? {
|
|
2176
|
+
dashboards: {
|
|
2177
|
+
enabled: true,
|
|
2178
|
+
label: "grafana_dashboard",
|
|
2179
|
+
labelValue: "1",
|
|
2180
|
+
searchNamespace: "ALL",
|
|
2181
|
+
folderAnnotation: "grafana_folder",
|
|
2182
|
+
provider: { foldersFromFilesStructure: true },
|
|
2183
|
+
},
|
|
2184
|
+
}
|
|
2185
|
+
: {}),
|
|
2186
|
+
},
|
|
811
2187
|
},
|
|
812
2188
|
prometheus: {
|
|
813
|
-
enabled:
|
|
2189
|
+
enabled: true,
|
|
814
2190
|
serviceAccount: generatePrometheusServiceAccount(config),
|
|
815
2191
|
prometheusSpec: {
|
|
816
2192
|
retention: "30d",
|
|
2193
|
+
image: {
|
|
2194
|
+
registry: reg,
|
|
2195
|
+
repository: IMAGE_REPOSITORIES.prometheus,
|
|
2196
|
+
},
|
|
817
2197
|
podMetadata: generatePrometheusPodMetadata(config),
|
|
2198
|
+
serviceMonitorSelectorNilUsesHelmValues: false,
|
|
2199
|
+
serviceMonitorSelector: {},
|
|
2200
|
+
podMonitorSelectorNilUsesHelmValues: false,
|
|
2201
|
+
podMonitorSelector: {},
|
|
818
2202
|
storageSpec: {
|
|
819
2203
|
volumeClaimTemplate: {
|
|
820
2204
|
spec: {
|
|
@@ -828,7 +2212,9 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
828
2212
|
},
|
|
829
2213
|
},
|
|
830
2214
|
},
|
|
831
|
-
remoteWrite:
|
|
2215
|
+
remoteWrite: [
|
|
2216
|
+
...(clickStackEnabled ? [] : generateRemoteWriteSpec(config)),
|
|
2217
|
+
],
|
|
832
2218
|
},
|
|
833
2219
|
},
|
|
834
2220
|
},
|
|
@@ -836,20 +2222,21 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
836
2222
|
// STORAGE CLASS
|
|
837
2223
|
// =============================================================================
|
|
838
2224
|
storageClass: {
|
|
839
|
-
create:
|
|
2225
|
+
create: false,
|
|
840
2226
|
name: storageClass,
|
|
841
|
-
provisioner: config.infrastructure.
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
2227
|
+
provisioner: config.infrastructure.storageProvisioner ||
|
|
2228
|
+
(config.infrastructure.provider === "aws"
|
|
2229
|
+
? "ebs.csi.aws.com"
|
|
2230
|
+
: config.infrastructure.provider === "gcp"
|
|
2231
|
+
? "pd.csi.storage.gke.io"
|
|
2232
|
+
: config.infrastructure.provider === "azure"
|
|
2233
|
+
? "disk.csi.azure.com"
|
|
2234
|
+
: "ebs.csi.aws.com"),
|
|
848
2235
|
// Parameters for the StorageClass - must include type for disk provisioning
|
|
849
2236
|
parameters: config.infrastructure.provider === "aws"
|
|
850
2237
|
? { type: "gp3" }
|
|
851
2238
|
: config.infrastructure.provider === "gcp"
|
|
852
|
-
? { type:
|
|
2239
|
+
? { type: gcpDiskType }
|
|
853
2240
|
: config.infrastructure.provider === "azure"
|
|
854
2241
|
? { skuName: "Premium_LRS" }
|
|
855
2242
|
: { type: "gp3" },
|
|
@@ -864,7 +2251,13 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
864
2251
|
"external-dns": externalDnsEnabled
|
|
865
2252
|
? {
|
|
866
2253
|
enabled: true,
|
|
867
|
-
|
|
2254
|
+
// external-dns has NO image.registry field: image.repository is the
|
|
2255
|
+
// FULL path including host (reg prefix + rulebricks/external-dns).
|
|
2256
|
+
image: {
|
|
2257
|
+
repository: `${reg}/${IMAGE_REPOSITORIES.externalDns}`,
|
|
2258
|
+
},
|
|
2259
|
+
// external-dns 1.21+ idiom: provider is an object ({name: ...}).
|
|
2260
|
+
provider: { name: getExternalDnsProvider(config.dns.provider) },
|
|
868
2261
|
domainFilters: [config.domain],
|
|
869
2262
|
sources: ["ingress", "service"],
|
|
870
2263
|
policy: "upsert-only",
|
|
@@ -873,6 +2266,149 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
873
2266
|
enabled: false,
|
|
874
2267
|
},
|
|
875
2268
|
};
|
|
2269
|
+
// The managed-Postgres migration hook (templates/migration-job.yaml) reads the
|
|
2270
|
+
// DB host/port from .Values.migrations.externalDb — a SEPARATE seam from
|
|
2271
|
+
// supabase.externalDatabase.* — and its `pg_isready -h $DB_HOST` loop hangs
|
|
2272
|
+
// forever (empty host) if it is unset. Wire it for external Postgres. We only
|
|
2273
|
+
// set host/port: DB_PASSWORD falls back to the <release>-supabase-db secret and
|
|
2274
|
+
// DB_USER/DB_NAME default to "postgres", which match deploymentSecretNames()
|
|
2275
|
+
// and the bootstrap app role.
|
|
2276
|
+
const migrationsPgExt = config.database.type === "self-hosted" &&
|
|
2277
|
+
config.externalServices?.postgres?.mode === "external"
|
|
2278
|
+
? config.externalServices.postgres.external
|
|
2279
|
+
: undefined;
|
|
2280
|
+
if (migrationsPgExt) {
|
|
2281
|
+
values.migrations = {
|
|
2282
|
+
externalDb: {
|
|
2283
|
+
host: migrationsPgExt.host ?? "",
|
|
2284
|
+
// Chart schema requires a string here (the template quotes it).
|
|
2285
|
+
port: String(migrationsPgExt.port ?? 5432),
|
|
2286
|
+
// Run migrations as the master/app_role. The bootstrap hook creates the
|
|
2287
|
+
// service login roles (authenticator, supabase_auth_admin, …) with the
|
|
2288
|
+
// service password but deliberately does NOT change the master's
|
|
2289
|
+
// password (bootstrap.sql runs "as the master user (named postgres)").
|
|
2290
|
+
// So the migrate hook must authenticate with the MASTER credential, not
|
|
2291
|
+
// the service password in <release>-supabase-db (that would 401). Point
|
|
2292
|
+
// DB_PASSWORD at the bootstrap Secret's master-password.
|
|
2293
|
+
existingSecret: deploymentSecretNames(config).dbBootstrap,
|
|
2294
|
+
existingSecretKey: "master-password",
|
|
2295
|
+
},
|
|
2296
|
+
};
|
|
2297
|
+
}
|
|
2298
|
+
// In k8s secret mode, the CLI creates Kubernetes Secrets and the chart reads
|
|
2299
|
+
// them by reference. Point the chart's secretRef seams at those Secrets and
|
|
2300
|
+
// strip every plaintext secret out of the generated values.
|
|
2301
|
+
if (secretMode === "k8s") {
|
|
2302
|
+
return redactSecretsToRefs(values, config);
|
|
2303
|
+
}
|
|
2304
|
+
return values;
|
|
2305
|
+
}
|
|
2306
|
+
/**
|
|
2307
|
+
* Rewrites generated values for k8s secret mode: sets the chart's *.secretRef
|
|
2308
|
+
* seams to the CLI-created Secret names and removes inline plaintext secrets so
|
|
2309
|
+
* none are persisted to values.yaml or the Helm release.
|
|
2310
|
+
*/
|
|
2311
|
+
export function redactSecretsToRefs(values, config) {
|
|
2312
|
+
const names = deploymentSecretNames(config);
|
|
2313
|
+
const global = (values.global ?? {});
|
|
2314
|
+
const supabase = (values.supabase ?? {});
|
|
2315
|
+
const pgExt = config.database.type === "self-hosted" &&
|
|
2316
|
+
config.externalServices?.postgres?.mode === "external"
|
|
2317
|
+
? config.externalServices.postgres.external
|
|
2318
|
+
: undefined;
|
|
2319
|
+
// App-level consolidated secret: one secretRef supplies every app cred.
|
|
2320
|
+
global.secrets = { ...(global.secrets ?? {}), secretRef: names.app };
|
|
2321
|
+
// Strip inline app/global secrets (non-secret config like host/from/url stays).
|
|
2322
|
+
if (global.smtp) {
|
|
2323
|
+
delete global.smtp.user;
|
|
2324
|
+
delete global.smtp.pass;
|
|
2325
|
+
}
|
|
2326
|
+
if (global.supabase) {
|
|
2327
|
+
delete global.supabase.jwtSecret;
|
|
2328
|
+
// NOTE: anonKey is intentionally NOT stripped. It is the *public* Supabase
|
|
2329
|
+
// key that app-configmap.yaml embeds into the Next.js client bundle
|
|
2330
|
+
// (SUPABASE_PUBLIC_KEY / NEXT_PUBLIC_SUPABASE_PUBLIC_KEY). That ConfigMap
|
|
2331
|
+
// reads global.supabase.anonKey at TEMPLATE time and there is no secretRef
|
|
2332
|
+
// seam for it, so stripping it leaves the browser client with an empty key.
|
|
2333
|
+
// It is a public token (safe in a ConfigMap by design) and never appears in
|
|
2334
|
+
// the k8s-mode secret-leak checks.
|
|
2335
|
+
delete global.supabase.serviceKey;
|
|
2336
|
+
delete global.supabase.accessToken;
|
|
2337
|
+
}
|
|
2338
|
+
if (global.ai)
|
|
2339
|
+
delete global.ai.openaiApiKey;
|
|
2340
|
+
if (global.sso) {
|
|
2341
|
+
delete global.sso.clientId;
|
|
2342
|
+
delete global.sso.clientSecret;
|
|
2343
|
+
}
|
|
2344
|
+
// NOTE: licenseKey is intentionally NOT stripped. The (standard) chart builds
|
|
2345
|
+
// the image-pull secret <release>-regcred from inline global.licenseKey at
|
|
2346
|
+
// TEMPLATE time (templates/registry-secret.yaml -> imagePullSecret helper). A
|
|
2347
|
+
// Kubernetes imagePullSecret cannot be sourced from a secretRef, so the chart
|
|
2348
|
+
// has no k8s-mode seam for it — stripping it makes the chart fall back to the
|
|
2349
|
+
// "evaluation" placeholder -> dckr_pat_evaluation -> 401 on every private
|
|
2350
|
+
// rulebricks/* image. Standalone chart users set global.licenseKey in their own
|
|
2351
|
+
// values for exactly this reason; the CLI must do the same to stay compatible
|
|
2352
|
+
// with the unmodified chart. It is a Docker Hub read-only PAT and already lives
|
|
2353
|
+
// in the deployment's config.yaml, so keeping it inline adds no new exposure.
|
|
2354
|
+
// Supabase subchart: replace each inline secret block with a secretRef.
|
|
2355
|
+
if (supabase.secret) {
|
|
2356
|
+
const dbSecret = { secretRef: names.db };
|
|
2357
|
+
if (pgExt) {
|
|
2358
|
+
dbSecret.secretRefKey = {
|
|
2359
|
+
host: "host",
|
|
2360
|
+
port: "port",
|
|
2361
|
+
username: "username",
|
|
2362
|
+
password: "password",
|
|
2363
|
+
database: "database",
|
|
2364
|
+
};
|
|
2365
|
+
}
|
|
2366
|
+
supabase.secret = {
|
|
2367
|
+
db: dbSecret,
|
|
2368
|
+
jwt: { secretRef: names.jwt },
|
|
2369
|
+
dashboard: { secretRef: names.dashboard },
|
|
2370
|
+
realtime: { secretRef: names.realtime },
|
|
2371
|
+
// Supabase auth (GoTrue) SMTP — only when SMTP creds are configured;
|
|
2372
|
+
// otherwise the global.smtp we just stripped would leave it empty.
|
|
2373
|
+
...(config.smtp?.user || config.smtp?.pass
|
|
2374
|
+
? { smtp: { secretRef: names.smtp } }
|
|
2375
|
+
: {}),
|
|
2376
|
+
};
|
|
2377
|
+
}
|
|
2378
|
+
if (pgExt && supabase.externalDatabase) {
|
|
2379
|
+
supabase.externalDatabase = {
|
|
2380
|
+
...supabase.externalDatabase,
|
|
2381
|
+
// New charts read host/port/user/pass/db from this single Secret. Keep
|
|
2382
|
+
// externalDatabase.host/port above for older charts that do not yet support
|
|
2383
|
+
// host/port secret keys.
|
|
2384
|
+
secretRef: names.db,
|
|
2385
|
+
secretRefKey: {
|
|
2386
|
+
host: "host",
|
|
2387
|
+
port: "port",
|
|
2388
|
+
username: "username",
|
|
2389
|
+
password: "password",
|
|
2390
|
+
database: "database",
|
|
2391
|
+
},
|
|
2392
|
+
bootstrap: {
|
|
2393
|
+
...(supabase.externalDatabase.bootstrap ?? {}),
|
|
2394
|
+
secretRef: names.dbBootstrap,
|
|
2395
|
+
// Master credentials move into the hook Secret in k8s mode.
|
|
2396
|
+
masterUsername: undefined,
|
|
2397
|
+
masterPassword: undefined,
|
|
2398
|
+
},
|
|
2399
|
+
};
|
|
2400
|
+
}
|
|
2401
|
+
values.global = global;
|
|
2402
|
+
values.supabase = supabase;
|
|
2403
|
+
return values;
|
|
2404
|
+
}
|
|
2405
|
+
/**
|
|
2406
|
+
* Generates Helm values from the deployment configuration
|
|
2407
|
+
*/
|
|
2408
|
+
export async function generateHelmValues(config, options = {}) {
|
|
2409
|
+
const values = buildHelmValues(config, options);
|
|
2410
|
+
// Last-line guardrail: never write/deploy values the chart would reject.
|
|
2411
|
+
assertValidHelmValues(values);
|
|
876
2412
|
await saveHelmValues(config.name, values);
|
|
877
2413
|
}
|
|
878
2414
|
/**
|
|
@@ -908,6 +2444,22 @@ export async function updateHelmValuesForTLS(deploymentName, tlsEnabled) {
|
|
|
908
2444
|
}
|
|
909
2445
|
}
|
|
910
2446
|
}
|
|
2447
|
+
// Keep the supabase kong ingress on the right Traefik entrypoint. The
|
|
2448
|
+
// subchart doesn't emit router.entrypoints/tls itself, so on the TLS-toggle
|
|
2449
|
+
// path (not a full regen) HTTPS to supabase.<domain> would 404 without this.
|
|
2450
|
+
// Mirrors what buildHelmValues sets on the kong ingress annotations.
|
|
2451
|
+
const supabase = values.supabase;
|
|
2452
|
+
const kongIngress = supabase?.kong
|
|
2453
|
+
?.ingress;
|
|
2454
|
+
if (kongIngress && typeof kongIngress === "object") {
|
|
2455
|
+
kongIngress.annotations = {
|
|
2456
|
+
...kongIngress.annotations,
|
|
2457
|
+
"traefik.ingress.kubernetes.io/router.entrypoints": tlsEnabled
|
|
2458
|
+
? "websecure"
|
|
2459
|
+
: "web",
|
|
2460
|
+
"traefik.ingress.kubernetes.io/router.tls": tlsEnabled ? "true" : "false",
|
|
2461
|
+
};
|
|
2462
|
+
}
|
|
911
2463
|
// Save updated values
|
|
912
2464
|
await fs.writeFile(valuesPath, YAML.stringify(values), "utf8");
|
|
913
2465
|
}
|