@rulebricks/cli 2.1.7 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -16
- package/cluster-setup/aws/README.md +96 -47
- package/cluster-setup/aws/check-aws-access.sh +216 -52
- package/cluster-setup/aws/parameters.json +13 -0
- package/cluster-setup/aws/rulebricks-cluster.cfn.yaml +355 -0
- package/cluster-setup/azure/README.md +103 -55
- package/cluster-setup/azure/check-aks-prereqs.sh +236 -56
- package/cluster-setup/azure/parameters.json +30 -0
- package/cluster-setup/azure/rulebricks-cluster.bicep +546 -0
- package/cluster-setup/gcp/README.md +51 -34
- package/cluster-setup/gcp/check-gke-prereqs.sh +222 -60
- package/dist/commands/backup.d.ts +5 -0
- package/dist/commands/backup.js +104 -0
- package/dist/commands/deploy.d.ts +3 -1
- package/dist/commands/deploy.js +226 -326
- package/dist/commands/destroy.d.ts +1 -1
- package/dist/commands/destroy.js +73 -123
- package/dist/commands/init.d.ts +5 -1
- package/dist/commands/init.js +78 -54
- package/dist/commands/list.d.ts +1 -0
- package/dist/commands/list.js +74 -0
- package/dist/commands/open.d.ts +1 -1
- package/dist/commands/open.js +4 -12
- package/dist/commands/redeploy.d.ts +6 -0
- package/dist/commands/redeploy.js +310 -0
- package/dist/commands/restore.d.ts +5 -0
- package/dist/commands/restore.js +338 -0
- package/dist/commands/status.js +62 -49
- package/dist/commands/upgrade.js +74 -51
- package/dist/components/DNSWaitScreen.d.ts +5 -1
- package/dist/components/DNSWaitScreen.js +47 -41
- package/dist/components/Wizard/WizardContext.d.ts +157 -36
- package/dist/components/Wizard/WizardContext.js +872 -160
- package/dist/components/Wizard/steps/CloudProviderStep.js +192 -107
- package/dist/components/Wizard/steps/DomainStep.js +5 -24
- package/dist/components/Wizard/steps/ExternalServicesStep.d.ts +6 -0
- package/dist/components/Wizard/steps/ExternalServicesStep.js +645 -0
- package/dist/components/Wizard/steps/FeatureConfigStep.d.ts +2 -1
- package/dist/components/Wizard/steps/FeatureConfigStep.js +739 -425
- package/dist/components/Wizard/steps/FeaturesStep.js +31 -35
- package/dist/components/Wizard/steps/ObservabilityStep.d.ts +6 -0
- package/dist/components/Wizard/steps/ObservabilityStep.js +137 -0
- package/dist/components/Wizard/steps/ReviewStep.d.ts +2 -1
- package/dist/components/Wizard/steps/ReviewStep.js +56 -12
- package/dist/components/Wizard/steps/StorageStep.d.ts +9 -0
- package/dist/components/Wizard/steps/StorageStep.js +592 -0
- package/dist/components/Wizard/steps/SupabaseCredentialsStep.js +20 -21
- package/dist/components/Wizard/steps/VersionStep.js +45 -23
- package/dist/components/Wizard/steps/index.d.ts +3 -3
- package/dist/components/Wizard/steps/index.js +3 -3
- package/dist/components/common/CommandApproval.d.ts +12 -0
- package/dist/components/common/CommandApproval.js +91 -0
- package/dist/components/common/DeploymentPicker.d.ts +14 -0
- package/dist/components/common/DeploymentPicker.js +16 -0
- package/dist/components/common/index.d.ts +2 -0
- package/dist/components/common/index.js +2 -0
- package/dist/index.js +94 -62
- package/dist/lib/cloudCli.d.ts +134 -63
- package/dist/lib/cloudCli.js +512 -220
- package/dist/lib/clusterSetupDefaults.d.ts +30 -0
- package/dist/lib/clusterSetupDefaults.js +64 -0
- package/dist/lib/commandApproval.d.ts +26 -0
- package/dist/lib/commandApproval.js +114 -0
- package/dist/lib/config.d.ts +12 -10
- package/dist/lib/config.js +91 -33
- package/dist/lib/configFixtures.d.ts +5 -0
- package/dist/lib/configFixtures.js +513 -0
- package/dist/lib/deploymentHealth.d.ts +32 -0
- package/dist/lib/deploymentHealth.js +157 -0
- package/dist/lib/dns.d.ts +1 -1
- package/dist/lib/dns.js +19 -1
- package/dist/lib/dns.test.d.ts +1 -0
- package/dist/lib/dns.test.js +27 -0
- package/dist/lib/dockerHub.d.ts +12 -1
- package/dist/lib/dockerHub.js +18 -8
- package/dist/lib/helm.d.ts +4 -0
- package/dist/lib/helm.js +16 -0
- package/dist/lib/helmValues.d.ts +25 -0
- package/dist/lib/helmValues.js +1762 -289
- package/dist/lib/helmValues.test.d.ts +1 -0
- package/dist/lib/helmValues.test.js +966 -0
- package/dist/lib/htpasswd.d.ts +1 -0
- package/dist/lib/htpasswd.js +15 -0
- package/dist/lib/kubernetes.d.ts +124 -17
- package/dist/lib/kubernetes.js +576 -145
- package/dist/lib/secrets.d.ts +23 -0
- package/dist/lib/secrets.js +158 -0
- package/dist/lib/validateValues.d.ts +31 -0
- package/dist/lib/validateValues.js +253 -0
- package/dist/lib/versions.d.ts +82 -11
- package/dist/lib/versions.js +131 -31
- package/dist/lib/versions.test.d.ts +1 -0
- package/dist/lib/versions.test.js +81 -0
- package/dist/lib/wizardSteps.d.ts +14 -0
- package/dist/lib/wizardSteps.js +23 -0
- package/dist/lib/workloadIdentity.d.ts +26 -0
- package/dist/lib/workloadIdentity.js +323 -0
- package/dist/lib/workloadIdentity.test.d.ts +1 -0
- package/dist/lib/workloadIdentity.test.js +57 -0
- package/dist/types/index.d.ts +1860 -164
- package/dist/types/index.js +518 -295
- package/package.json +9 -4
- package/schema/values.schema.json +1934 -0
- package/cluster-setup/aws/cluster.yaml +0 -33
- package/cluster-setup/azure/main.bicep +0 -282
- package/cluster-setup/azure/main.parameters.json +0 -21
- package/dist/components/Wizard/steps/CredentialsStep.d.ts +0 -6
- package/dist/components/Wizard/steps/CredentialsStep.js +0 -22
- package/dist/components/Wizard/steps/DeploymentModeStep.d.ts +0 -5
- package/dist/components/Wizard/steps/DeploymentModeStep.js +0 -26
- package/dist/components/Wizard/steps/TierStep.d.ts +0 -6
- package/dist/components/Wizard/steps/TierStep.js +0 -29
- package/dist/lib/terraform.d.ts +0 -66
- package/dist/lib/terraform.js +0 -754
- package/terraform/aws/main.tf +0 -355
- package/terraform/azure/main.tf +0 -371
- package/terraform/gcp/main.tf +0 -407
package/dist/lib/helmValues.js
CHANGED
|
@@ -1,7 +1,96 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { getReleaseName, isSupportedDnsProvider, validateRemoteWriteConfig, } from "../types/index.js";
|
|
2
2
|
import { saveHelmValues, getHelmValuesPath } from "./config.js";
|
|
3
|
+
import { assertValidHelmValues } from "./validateValues.js";
|
|
4
|
+
import { SUPABASE_POSTGRES_IMAGE_REPOSITORY, SUPABASE_POSTGRES_IMAGE_TAG, DEFAULT_IMAGE_REGISTRY, IMAGE_REPOSITORIES, IMAGE_DIGESTS, KAFKA_PROXY_IMAGE, } from "./versions.js";
|
|
5
|
+
import { createHmac } from "crypto";
|
|
3
6
|
import fs from "fs/promises";
|
|
4
7
|
import YAML from "yaml";
|
|
8
|
+
// Names of the Kubernetes Secrets the CLI creates in k8s secret mode. Shared by
|
|
9
|
+
// the value generator (which sets the secretRef fields) and src/lib/secrets.ts
|
|
10
|
+
// (which creates the Secrets) so they always agree.
|
|
11
|
+
export function deploymentSecretNames(config) {
|
|
12
|
+
const base = config.name;
|
|
13
|
+
return {
|
|
14
|
+
app: `${base}-app-secrets`,
|
|
15
|
+
db: `${base}-supabase-db`,
|
|
16
|
+
dbBootstrap: `${base}-supabase-db-bootstrap`,
|
|
17
|
+
jwt: `${base}-supabase-jwt`,
|
|
18
|
+
dashboard: `${base}-supabase-dashboard`,
|
|
19
|
+
realtime: `${base}-supabase-realtime`,
|
|
20
|
+
smtp: `${base}-supabase-smtp`,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
// Baseline Kafka topic partitioning. These are NOT user-tunable sizing knobs
|
|
24
|
+
// (tiers were removed); they are a structural contract that must stay
|
|
25
|
+
// consistent across three places at once: the kafka.provisioning topic
|
|
26
|
+
// partitions, rulebricks.hps.workers.solutionPartitions (the worker-fleet
|
|
27
|
+
// concurrency ceiling the chart cross-checks), and the worker KEDA
|
|
28
|
+
// maxReplicaCount (validated to be <= solutionPartitions). They mirror the Helm
|
|
29
|
+
// chart's own defaults, so operators who need a different size tune the chart
|
|
30
|
+
// values directly. Partitions can never be decreased, so solution is sized with
|
|
31
|
+
// generous headroom up front; idle partitions are effectively free.
|
|
32
|
+
const SOLUTION_TOPIC_PARTITIONS = 128;
|
|
33
|
+
const LOGS_TOPIC_PARTITIONS = 24;
|
|
34
|
+
// RPC + log topics: replication factor 1. RPC traffic is transient and
|
|
35
|
+
// latency-sensitive (the HPS producer's acks=-1 would otherwise wait on full
|
|
36
|
+
// ISR replication); the in-cluster broker is single-replica by default.
|
|
37
|
+
const TOPIC_REPLICATION_FACTOR = 1;
|
|
38
|
+
// global.version must be empty or a semantic version per the chart schema. The
|
|
39
|
+
// CLI normally pins a real version, but migrated/legacy configs can carry
|
|
40
|
+
// "latest"; emitting that would fail chart validation, so we omit it instead
|
|
41
|
+
// and let the chart fall back to its default.
|
|
42
|
+
const SEMVER_PATTERN = /^\d+\.\d+\.\d+(-[0-9A-Za-z.-]+)?$/;
|
|
43
|
+
// Healthy defaults for the decision-log archive that ClickHouse reads:
|
|
44
|
+
// flush a gzipped NDJSON file at ~64 MiB (uncompressed) or after 5 minutes,
|
|
45
|
+
// whichever comes first. Users can override these in their Helm values.
|
|
46
|
+
//
|
|
47
|
+
// max_bytes MUST stay well below the Vector pod's memory limit
|
|
48
|
+
// (vector.resources.limits.memory in the chart): the object-storage sink buffers
|
|
49
|
+
// the whole uncompressed batch in memory before it flushes, so a batch sized at
|
|
50
|
+
// or above the pod limit gets OOMKilled before it can ever write a blob - which
|
|
51
|
+
// silently disables decision-log export entirely. 64 MiB leaves comfortable
|
|
52
|
+
// headroom under the chart's 1 GiB Vector limit while still producing large,
|
|
53
|
+
// scan-efficient files for ClickHouse.
|
|
54
|
+
const DECISION_LOG_BATCH = { max_bytes: 67108864, timeout_secs: 300 };
|
|
55
|
+
const SUPABASE_JWT_ISSUED_AT = 1641769200;
|
|
56
|
+
const SUPABASE_JWT_EXPIRES_AT = 4102444800;
|
|
57
|
+
// VRL that normalizes the Kafka decision-log envelope into the ClickHouse column
|
|
58
|
+
// types. Inlined as a real multi-line string (not a chart `{{ include }}`) so
|
|
59
|
+
// that YAML.stringify / Helm's toYaml emit it as a block scalar. A templated
|
|
60
|
+
// single-line include gets rendered into a single-quoted YAML scalar, whose
|
|
61
|
+
// newlines YAML folds into spaces - collapsing the statements onto one line and
|
|
62
|
+
// breaking VRL parsing. Keep in sync with rulebricks.vector.normalizeLogs.
|
|
63
|
+
const VECTOR_NORMALIZE_LOGS_VRL = [
|
|
64
|
+
"parsed, err = parse_json(string!(.message))",
|
|
65
|
+
"if err == null {",
|
|
66
|
+
" . = parsed",
|
|
67
|
+
"}",
|
|
68
|
+
'.timestamp = parse_timestamp!(to_string(.timestamp) ?? to_string(now()), format: "%+")',
|
|
69
|
+
'.api_key = to_string(.api_key) ?? ""',
|
|
70
|
+
".user_id = to_string(.user_id) ?? null",
|
|
71
|
+
".environment = to_string(.environment) ?? null",
|
|
72
|
+
".ip = to_string(.ip) ?? null",
|
|
73
|
+
".method = to_string(.method) ?? null",
|
|
74
|
+
'.url = to_string(.url) ?? ""',
|
|
75
|
+
".status = to_int(.status) ?? 0",
|
|
76
|
+
".rule_name = to_string(.rule_name) ?? null",
|
|
77
|
+
".rule_id = to_string(.rule_id) ?? null",
|
|
78
|
+
".rule_slug = to_string(.rule_slug) ?? null",
|
|
79
|
+
".rule_version = to_string(.rule_version) ?? null",
|
|
80
|
+
".operation = to_string(.operation) ?? null",
|
|
81
|
+
'.level = to_string(.level) ?? "info"',
|
|
82
|
+
".error = to_string(.error) ?? null",
|
|
83
|
+
".trace_id = to_string(.trace_id) ?? null",
|
|
84
|
+
".span_id = to_string(.span_id) ?? null",
|
|
85
|
+
'.request = to_string(.request) ?? "null"',
|
|
86
|
+
'.response = to_string(.response) ?? "null"',
|
|
87
|
+
'.decision = to_string(.decision) ?? "{}"',
|
|
88
|
+
'.params = to_string(.params) ?? "{}"',
|
|
89
|
+
].join("\n");
|
|
90
|
+
function decisionLogPathPrefix(config) {
|
|
91
|
+
const path = config.storage?.paths?.decisionLogs || "decision-logs";
|
|
92
|
+
return `${path.replace(/^\/+|\/+$/g, "")}/year=%Y/month=%m/day=%d/hour=%H/`;
|
|
93
|
+
}
|
|
5
94
|
/**
|
|
6
95
|
* Generates Vector sink configuration based on logging settings
|
|
7
96
|
*/
|
|
@@ -10,84 +99,85 @@ function generateVectorSinks(config) {
|
|
|
10
99
|
// Console sink is always enabled
|
|
11
100
|
console: {
|
|
12
101
|
type: "console",
|
|
13
|
-
inputs: ["
|
|
102
|
+
inputs: ["normalize_logs"],
|
|
14
103
|
encoding: {
|
|
15
104
|
codec: "json",
|
|
16
105
|
},
|
|
17
106
|
},
|
|
18
107
|
};
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
config.
|
|
22
|
-
const { sink, bucket, region } = config.features.logging;
|
|
23
|
-
switch (sink) {
|
|
24
|
-
// Cloud Storage sinks
|
|
108
|
+
if (config.storage) {
|
|
109
|
+
const storage = config.storage;
|
|
110
|
+
switch (config.storage.provider) {
|
|
25
111
|
case "s3":
|
|
26
|
-
sinks.
|
|
112
|
+
sinks.decision_logs = {
|
|
27
113
|
type: "aws_s3",
|
|
28
|
-
inputs: ["
|
|
29
|
-
bucket: bucket,
|
|
30
|
-
region: region,
|
|
31
|
-
key_prefix:
|
|
114
|
+
inputs: ["normalize_logs"],
|
|
115
|
+
bucket: storage.bucket,
|
|
116
|
+
region: storage.region,
|
|
117
|
+
key_prefix: decisionLogPathPrefix(config),
|
|
118
|
+
filename_extension: "ndjson",
|
|
32
119
|
compression: "gzip",
|
|
33
|
-
encoding: {
|
|
34
|
-
|
|
35
|
-
},
|
|
120
|
+
encoding: { codec: "json" },
|
|
121
|
+
framing: { method: "newline_delimited" },
|
|
122
|
+
batch: { ...DECISION_LOG_BATCH },
|
|
36
123
|
};
|
|
37
124
|
break;
|
|
38
|
-
case "azure-blob":
|
|
39
|
-
|
|
40
|
-
throw new Error("Azure Blob logging requires a storage account.");
|
|
41
|
-
}
|
|
42
|
-
const azureBlobSink = {
|
|
125
|
+
case "azure-blob": {
|
|
126
|
+
const sink = {
|
|
43
127
|
type: "azure_blob",
|
|
44
|
-
inputs: ["
|
|
45
|
-
account_name: bucket,
|
|
46
|
-
container_name:
|
|
47
|
-
blob_prefix:
|
|
128
|
+
inputs: ["normalize_logs"],
|
|
129
|
+
account_name: storage.bucket,
|
|
130
|
+
container_name: storage.azureBlobContainer || "rulebricks",
|
|
131
|
+
blob_prefix: decisionLogPathPrefix(config),
|
|
132
|
+
// azure_blob has no filename_extension (unlike aws_s3/gcs); it always
|
|
133
|
+
// writes ".log" (".log.gz" when compressed). ClickHouse globs on *.gz.
|
|
48
134
|
compression: "gzip",
|
|
49
|
-
encoding: {
|
|
50
|
-
|
|
51
|
-
},
|
|
135
|
+
encoding: { codec: "json" },
|
|
136
|
+
framing: { method: "newline_delimited" },
|
|
137
|
+
batch: { ...DECISION_LOG_BATCH },
|
|
52
138
|
};
|
|
53
|
-
if (config.
|
|
54
|
-
|
|
55
|
-
throw new Error("Azure Blob connection string auth requires a secret ref.");
|
|
56
|
-
}
|
|
57
|
-
azureBlobSink.connection_string = "${AZURE_STORAGE_CONNECTION_STRING}";
|
|
139
|
+
if (config.storage.cloudAuthMode === "secret") {
|
|
140
|
+
sink.connection_string = "${AZURE_STORAGE_CONNECTION_STRING}";
|
|
58
141
|
}
|
|
59
142
|
else {
|
|
60
|
-
|
|
61
|
-
!config.features.logging.azureBlobTenantId) {
|
|
62
|
-
throw new Error("Azure Blob workload identity requires client ID and tenant ID.");
|
|
63
|
-
}
|
|
64
|
-
azureBlobSink.auth = {
|
|
143
|
+
sink.auth = {
|
|
65
144
|
azure_credential_kind: "workload_identity",
|
|
66
|
-
client_id: config.
|
|
67
|
-
tenant_id: config.
|
|
145
|
+
client_id: config.storage.azureBlobClientId,
|
|
146
|
+
tenant_id: config.storage.azureBlobTenantId,
|
|
68
147
|
token_file_path: "/var/run/secrets/azure/tokens/azure-identity-token",
|
|
69
148
|
};
|
|
70
149
|
}
|
|
71
|
-
sinks.
|
|
150
|
+
sinks.decision_logs = sink;
|
|
72
151
|
break;
|
|
152
|
+
}
|
|
73
153
|
case "gcs":
|
|
74
|
-
sinks.
|
|
154
|
+
sinks.decision_logs = {
|
|
75
155
|
type: "gcp_cloud_storage",
|
|
76
|
-
inputs: ["
|
|
77
|
-
bucket: bucket,
|
|
78
|
-
key_prefix:
|
|
156
|
+
inputs: ["normalize_logs"],
|
|
157
|
+
bucket: storage.bucket,
|
|
158
|
+
key_prefix: decisionLogPathPrefix(config),
|
|
159
|
+
filename_extension: "ndjson",
|
|
79
160
|
compression: "gzip",
|
|
80
|
-
encoding: {
|
|
81
|
-
|
|
82
|
-
},
|
|
161
|
+
encoding: { codec: "json" },
|
|
162
|
+
framing: { method: "newline_delimited" },
|
|
163
|
+
batch: { ...DECISION_LOG_BATCH },
|
|
83
164
|
};
|
|
84
165
|
break;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// Add external logging-platform sink if configured. Decision logs always go
|
|
169
|
+
// to object storage via the decision_logs sink above; this is an additional
|
|
170
|
+
// platform destination (Datadog, Splunk, etc.).
|
|
171
|
+
if (config.features.logging.sink !== "console" &&
|
|
172
|
+
config.features.logging.sink !== "pending") {
|
|
173
|
+
const { sink, bucket, region } = config.features.logging;
|
|
174
|
+
switch (sink) {
|
|
85
175
|
// Logging platform sinks
|
|
86
176
|
// For platforms, bucket is repurposed for API key/token, region for site/URL
|
|
87
177
|
case "datadog":
|
|
88
178
|
sinks.datadog = {
|
|
89
179
|
type: "datadog_logs",
|
|
90
|
-
inputs: ["
|
|
180
|
+
inputs: ["normalize_logs"],
|
|
91
181
|
default_api_key: bucket, // API key stored in bucket field
|
|
92
182
|
site: region || "datadoghq.com", // Site stored in region field
|
|
93
183
|
compression: "gzip",
|
|
@@ -99,7 +189,7 @@ function generateVectorSinks(config) {
|
|
|
99
189
|
case "splunk":
|
|
100
190
|
sinks.splunk = {
|
|
101
191
|
type: "splunk_hec_logs",
|
|
102
|
-
inputs: ["
|
|
192
|
+
inputs: ["normalize_logs"],
|
|
103
193
|
endpoint: region, // URL stored in region field
|
|
104
194
|
default_token: bucket, // HEC token stored in bucket field
|
|
105
195
|
compression: "gzip",
|
|
@@ -114,7 +204,7 @@ function generateVectorSinks(config) {
|
|
|
114
204
|
const esConfig = JSON.parse(bucket || "{}");
|
|
115
205
|
sinks.elasticsearch = {
|
|
116
206
|
type: "elasticsearch",
|
|
117
|
-
inputs: ["
|
|
207
|
+
inputs: ["normalize_logs"],
|
|
118
208
|
endpoints: [esConfig.url],
|
|
119
209
|
bulk: {
|
|
120
210
|
index: esConfig.index || "rulebricks-logs",
|
|
@@ -134,7 +224,7 @@ function generateVectorSinks(config) {
|
|
|
134
224
|
// Fallback if JSON parsing fails
|
|
135
225
|
sinks.elasticsearch = {
|
|
136
226
|
type: "elasticsearch",
|
|
137
|
-
inputs: ["
|
|
227
|
+
inputs: ["normalize_logs"],
|
|
138
228
|
endpoints: [bucket],
|
|
139
229
|
bulk: {
|
|
140
230
|
index: region || "rulebricks-logs",
|
|
@@ -145,7 +235,7 @@ function generateVectorSinks(config) {
|
|
|
145
235
|
case "loki":
|
|
146
236
|
sinks.loki = {
|
|
147
237
|
type: "loki",
|
|
148
|
-
inputs: ["
|
|
238
|
+
inputs: ["normalize_logs"],
|
|
149
239
|
endpoint: bucket, // Loki URL stored in bucket field
|
|
150
240
|
labels: {
|
|
151
241
|
app: "rulebricks",
|
|
@@ -159,7 +249,7 @@ function generateVectorSinks(config) {
|
|
|
159
249
|
case "newrelic":
|
|
160
250
|
sinks.newrelic = {
|
|
161
251
|
type: "new_relic",
|
|
162
|
-
inputs: ["
|
|
252
|
+
inputs: ["normalize_logs"],
|
|
163
253
|
license_key: bucket, // License key stored in bucket field
|
|
164
254
|
account_id: region, // Account ID stored in region field
|
|
165
255
|
api: "logs",
|
|
@@ -172,7 +262,7 @@ function generateVectorSinks(config) {
|
|
|
172
262
|
case "axiom":
|
|
173
263
|
sinks.axiom = {
|
|
174
264
|
type: "axiom",
|
|
175
|
-
inputs: ["
|
|
265
|
+
inputs: ["normalize_logs"],
|
|
176
266
|
token: bucket, // API token stored in bucket field
|
|
177
267
|
dataset: region || "rulebricks", // Dataset stored in region field
|
|
178
268
|
compression: "gzip",
|
|
@@ -186,20 +276,32 @@ function generateVectorSinks(config) {
|
|
|
186
276
|
return sinks;
|
|
187
277
|
}
|
|
188
278
|
function generateVectorEnv(config) {
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
279
|
+
// Kafka connection settings come from the templated vector-kafka-env ConfigMap
|
|
280
|
+
// so the in-cluster vs external (and bridge) decision lives in one place.
|
|
281
|
+
const configMapKeys = [
|
|
282
|
+
"KAFKA_BOOTSTRAP_SERVERS",
|
|
283
|
+
"KAFKA_TLS_ENABLED",
|
|
284
|
+
"KAFKA_SASL_ENABLED",
|
|
285
|
+
"KAFKA_SASL_MECHANISM",
|
|
286
|
+
"KAFKA_LOG_TOPIC",
|
|
287
|
+
];
|
|
288
|
+
const env = configMapKeys.map((key) => ({
|
|
289
|
+
name: key,
|
|
290
|
+
valueFrom: { configMapKeyRef: { name: "vector-kafka-env", key } },
|
|
291
|
+
}));
|
|
292
|
+
// SASL credentials (inline PLAIN/SCRAM). Optional so in-cluster/token-auth
|
|
293
|
+
// deploys work without the secret existing.
|
|
294
|
+
for (const key of ["KAFKA_SASL_USERNAME", "KAFKA_SASL_PASSWORD"]) {
|
|
295
|
+
env.push({
|
|
296
|
+
name: key,
|
|
192
297
|
valueFrom: {
|
|
193
|
-
|
|
194
|
-
name: "vector-kafka-env",
|
|
195
|
-
key: "KAFKA_BOOTSTRAP_SERVERS",
|
|
196
|
-
},
|
|
298
|
+
secretKeyRef: { name: "vector-kafka-credentials", key, optional: true },
|
|
197
299
|
},
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
const azureBlobSecretRef = config.
|
|
201
|
-
if (config.
|
|
202
|
-
config.
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
const azureBlobSecretRef = config.storage?.azureBlobConnectionStringSecretRef;
|
|
303
|
+
if (config.storage?.provider === "azure-blob" &&
|
|
304
|
+
config.storage.cloudAuthMode === "secret" &&
|
|
203
305
|
azureBlobSecretRef) {
|
|
204
306
|
env.push({
|
|
205
307
|
name: "AZURE_STORAGE_CONNECTION_STRING",
|
|
@@ -211,20 +313,20 @@ function generateVectorEnv(config) {
|
|
|
211
313
|
return env;
|
|
212
314
|
}
|
|
213
315
|
function generateVectorServiceAccount(config) {
|
|
316
|
+
// AWS uses EKS Pod Identity: NO eks.amazonaws.com/role-arn annotation - the
|
|
317
|
+
// CLI's workload-identity step creates a namespace-scoped association for this
|
|
318
|
+
// SA (to a role granting both the object-storage and MSK access Vector needs).
|
|
319
|
+
// Azure/GCP still annotate the SA, which is how their workload identity binds.
|
|
214
320
|
const annotations = {};
|
|
215
|
-
if (config.
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
}
|
|
219
|
-
if (config.features.logging.sink === "azure-blob" &&
|
|
220
|
-
config.features.logging.cloudAuthMode !== "secret" &&
|
|
221
|
-
config.features.logging.azureBlobClientId) {
|
|
321
|
+
if (config.storage?.provider === "azure-blob" &&
|
|
322
|
+
config.storage.cloudAuthMode !== "secret" &&
|
|
323
|
+
config.storage.azureBlobClientId) {
|
|
222
324
|
annotations["azure.workload.identity/client-id"] =
|
|
223
|
-
config.
|
|
325
|
+
config.storage.azureBlobClientId;
|
|
224
326
|
}
|
|
225
|
-
if (config.
|
|
327
|
+
if (config.storage?.provider === "gcs" && config.storage.gcpServiceAccountEmail) {
|
|
226
328
|
annotations["iam.gke.io/gcp-service-account"] =
|
|
227
|
-
config.
|
|
329
|
+
config.storage.gcpServiceAccountEmail;
|
|
228
330
|
}
|
|
229
331
|
return {
|
|
230
332
|
create: true,
|
|
@@ -233,9 +335,11 @@ function generateVectorServiceAccount(config) {
|
|
|
233
335
|
};
|
|
234
336
|
}
|
|
235
337
|
function generateVectorPodLabels(config) {
|
|
236
|
-
const labels = {
|
|
237
|
-
|
|
238
|
-
|
|
338
|
+
const labels = {
|
|
339
|
+
"rulebricks.com/workload-group": "infrastructure",
|
|
340
|
+
};
|
|
341
|
+
if (config.storage?.provider === "azure-blob" &&
|
|
342
|
+
config.storage.cloudAuthMode !== "secret") {
|
|
239
343
|
labels["azure.workload.identity/use"] = "true";
|
|
240
344
|
}
|
|
241
345
|
return labels;
|
|
@@ -258,6 +362,48 @@ function secretKeySelector(ref) {
|
|
|
258
362
|
key: ref.key,
|
|
259
363
|
};
|
|
260
364
|
}
|
|
365
|
+
function base64UrlJson(value) {
|
|
366
|
+
return Buffer.from(JSON.stringify(value)).toString("base64url");
|
|
367
|
+
}
|
|
368
|
+
// Self-hosted Supabase derives the anon and service_role API keys from the JWT
|
|
369
|
+
// secret: each is an HS256 JWT (role: anon / service_role) signed with the secret.
|
|
370
|
+
// https://supabase.com/docs/guides/self-hosting/self-hosted-auth-keys
|
|
371
|
+
export function signSupabaseJwt(role, secret) {
|
|
372
|
+
const header = base64UrlJson({ alg: "HS256", typ: "JWT" });
|
|
373
|
+
const payload = base64UrlJson({
|
|
374
|
+
role,
|
|
375
|
+
iss: "supabase",
|
|
376
|
+
iat: SUPABASE_JWT_ISSUED_AT,
|
|
377
|
+
exp: SUPABASE_JWT_EXPIRES_AT,
|
|
378
|
+
});
|
|
379
|
+
const body = `${header}.${payload}`;
|
|
380
|
+
const signature = createHmac("sha256", secret).update(body).digest("base64url");
|
|
381
|
+
return `${body}.${signature}`;
|
|
382
|
+
}
|
|
383
|
+
// Realtime needs SECRET_KEY_BASE (signs/encrypts its tokens) and a 16-byte
|
|
384
|
+
// DB_ENC_KEY (encrypts tenant DB creds). Derive both deterministically from the
|
|
385
|
+
// JWT secret so they are stable across redeploys with no extra state to persist,
|
|
386
|
+
// and anchored to the one root secret the operator already manages.
|
|
387
|
+
export function deriveRealtimeSecrets(jwtSecret) {
|
|
388
|
+
const secretKeyBase = createHmac("sha256", jwtSecret)
|
|
389
|
+
.update("supabase-realtime-secret-key-base")
|
|
390
|
+
.digest("hex"); // 64 chars
|
|
391
|
+
const dbEncKey = createHmac("sha256", jwtSecret)
|
|
392
|
+
.update("supabase-realtime-db-enc-key")
|
|
393
|
+
.digest("hex")
|
|
394
|
+
.slice(0, 16); // Realtime requires exactly 16 bytes
|
|
395
|
+
return { secretKeyBase, dbEncKey };
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Strips surrounding whitespace and embedded control characters (notably the
|
|
399
|
+
* trailing carriage return that sneaks in when a remote_write URL is pasted from
|
|
400
|
+
* a CRLF file or captured from command output). A stray "\r" corrupts the URL
|
|
401
|
+
* the Prometheus operator hands to remote_write, so normalize it at the source.
|
|
402
|
+
*/
|
|
403
|
+
function sanitizeRemoteWriteUrl(url) {
|
|
404
|
+
// eslint-disable-next-line no-control-regex
|
|
405
|
+
return url.replace(/[\u0000-\u001F\u007F]/g, "").trim();
|
|
406
|
+
}
|
|
261
407
|
function generateRemoteWriteSpec(config) {
|
|
262
408
|
if (config.features.monitoring.destination === "local-grafana") {
|
|
263
409
|
return [];
|
|
@@ -265,11 +411,18 @@ function generateRemoteWriteSpec(config) {
|
|
|
265
411
|
const remoteWrite = config.features.monitoring.remoteWrite;
|
|
266
412
|
if (!remoteWrite) {
|
|
267
413
|
return config.features.monitoring.remoteWriteUrl
|
|
268
|
-
? [{ url: config.features.monitoring.remoteWriteUrl }]
|
|
414
|
+
? [{ url: sanitizeRemoteWriteUrl(config.features.monitoring.remoteWriteUrl) }]
|
|
269
415
|
: [];
|
|
270
416
|
}
|
|
417
|
+
// Enforce the same per-destination/auth requirements the wizard and Zod
|
|
418
|
+
// schema do. This is unreachable for CLI-generated configs (they are gated
|
|
419
|
+
// earlier) but guards hand-edited values and keeps one source of truth.
|
|
420
|
+
const remoteWriteErrors = validateRemoteWriteConfig(remoteWrite);
|
|
421
|
+
if (remoteWriteErrors.length > 0) {
|
|
422
|
+
throw new Error(remoteWriteErrors.join(" "));
|
|
423
|
+
}
|
|
271
424
|
const base = {
|
|
272
|
-
url: remoteWrite.url,
|
|
425
|
+
url: sanitizeRemoteWriteUrl(remoteWrite.url),
|
|
273
426
|
};
|
|
274
427
|
switch (remoteWrite.destination) {
|
|
275
428
|
case "aws-amp":
|
|
@@ -294,12 +447,125 @@ function generateRemoteWriteSpec(config) {
|
|
|
294
447
|
return [base];
|
|
295
448
|
}
|
|
296
449
|
}
|
|
450
|
+
function isClickStackEnabled(config) {
|
|
451
|
+
return config.features.observability?.clickstack?.enabled ?? true;
|
|
452
|
+
}
|
|
453
|
+
function generateClickStackValues(enabled, config, storageClass, infrastructurePodLabels, operationalDaemonSetTolerations) {
|
|
454
|
+
const clickstack = config.features.observability?.clickstack;
|
|
455
|
+
const telemetryRetentionDays = clickstack?.telemetryRetentionDays ?? 7;
|
|
456
|
+
const clickHouseStorageSize = clickstack?.clickHouseStorageSize ?? "100Gi";
|
|
457
|
+
// Registry host for the clickstack images. The clickstack subchart routes
|
|
458
|
+
// these through its own image helper, so the split { registry, repository }
|
|
459
|
+
// shape lets global.imageRegistry + digest pinning flow through.
|
|
460
|
+
const reg = config.imageRegistry || DEFAULT_IMAGE_REGISTRY;
|
|
461
|
+
return {
|
|
462
|
+
enabled,
|
|
463
|
+
clickhouse: {
|
|
464
|
+
database: "otel",
|
|
465
|
+
username: "rulebricks",
|
|
466
|
+
existingSecret: "",
|
|
467
|
+
existingSecretKey: "admin-password",
|
|
468
|
+
retentionDays: telemetryRetentionDays,
|
|
469
|
+
ttl: "",
|
|
470
|
+
},
|
|
471
|
+
hyperdx: {
|
|
472
|
+
enabled,
|
|
473
|
+
image: {
|
|
474
|
+
registry: reg,
|
|
475
|
+
repository: IMAGE_REPOSITORIES.hyperdx.repository,
|
|
476
|
+
tag: IMAGE_REPOSITORIES.hyperdx.tag,
|
|
477
|
+
pullPolicy: "IfNotPresent",
|
|
478
|
+
},
|
|
479
|
+
resources: {
|
|
480
|
+
requests: { cpu: "250m", memory: "512Mi" },
|
|
481
|
+
limits: { cpu: "1000m", memory: "1Gi" },
|
|
482
|
+
},
|
|
483
|
+
ingress: {
|
|
484
|
+
enabled,
|
|
485
|
+
className: "traefik",
|
|
486
|
+
hostname: "",
|
|
487
|
+
allowedIPs: [],
|
|
488
|
+
},
|
|
489
|
+
podLabels: infrastructurePodLabels,
|
|
490
|
+
},
|
|
491
|
+
collector: {
|
|
492
|
+
image: {
|
|
493
|
+
registry: reg,
|
|
494
|
+
repository: IMAGE_REPOSITORIES.clickstackOtelCollector.repository,
|
|
495
|
+
tag: IMAGE_REPOSITORIES.clickstackOtelCollector.tag,
|
|
496
|
+
pullPolicy: "IfNotPresent",
|
|
497
|
+
},
|
|
498
|
+
memoryLimitMiB: 800,
|
|
499
|
+
agent: {
|
|
500
|
+
enabled,
|
|
501
|
+
securityContext: {
|
|
502
|
+
runAsUser: 0,
|
|
503
|
+
runAsGroup: 0,
|
|
504
|
+
},
|
|
505
|
+
resources: {
|
|
506
|
+
requests: { cpu: "100m", memory: "256Mi" },
|
|
507
|
+
limits: { cpu: "500m", memory: "512Mi" },
|
|
508
|
+
},
|
|
509
|
+
tolerations: operationalDaemonSetTolerations,
|
|
510
|
+
podLabels: infrastructurePodLabels,
|
|
511
|
+
},
|
|
512
|
+
gateway: {
|
|
513
|
+
replicas: 1,
|
|
514
|
+
resources: {
|
|
515
|
+
requests: { cpu: "250m", memory: "512Mi" },
|
|
516
|
+
limits: { cpu: "2000m", memory: "1Gi" },
|
|
517
|
+
},
|
|
518
|
+
podLabels: infrastructurePodLabels,
|
|
519
|
+
},
|
|
520
|
+
},
|
|
521
|
+
ferretdb: {
|
|
522
|
+
enabled,
|
|
523
|
+
image: {
|
|
524
|
+
registry: reg,
|
|
525
|
+
repository: IMAGE_REPOSITORIES.ferretdb.repository,
|
|
526
|
+
tag: IMAGE_REPOSITORIES.ferretdb.tag,
|
|
527
|
+
pullPolicy: "IfNotPresent",
|
|
528
|
+
},
|
|
529
|
+
postgresImage: {
|
|
530
|
+
registry: reg,
|
|
531
|
+
repository: IMAGE_REPOSITORIES.postgresDocumentdb.repository,
|
|
532
|
+
tag: IMAGE_REPOSITORIES.postgresDocumentdb.tag,
|
|
533
|
+
pullPolicy: "IfNotPresent",
|
|
534
|
+
},
|
|
535
|
+
auth: {
|
|
536
|
+
username: "hyperdx",
|
|
537
|
+
password: "",
|
|
538
|
+
existingSecret: "",
|
|
539
|
+
existingSecretKey: "password",
|
|
540
|
+
},
|
|
541
|
+
persistence: {
|
|
542
|
+
enabled,
|
|
543
|
+
size: "10Gi",
|
|
544
|
+
storageClassName: storageClass,
|
|
545
|
+
},
|
|
546
|
+
resources: {
|
|
547
|
+
ferretdb: {
|
|
548
|
+
requests: { cpu: "100m", memory: "256Mi" },
|
|
549
|
+
limits: { cpu: "500m", memory: "512Mi" },
|
|
550
|
+
},
|
|
551
|
+
postgres: {
|
|
552
|
+
requests: { cpu: "250m", memory: "512Mi" },
|
|
553
|
+
limits: { cpu: "1000m", memory: "1Gi" },
|
|
554
|
+
},
|
|
555
|
+
},
|
|
556
|
+
podLabels: infrastructurePodLabels,
|
|
557
|
+
podAnnotations: {
|
|
558
|
+
"cluster-autoscaler.kubernetes.io/safe-to-evict": "false",
|
|
559
|
+
},
|
|
560
|
+
},
|
|
561
|
+
};
|
|
562
|
+
}
|
|
297
563
|
function generatePrometheusServiceAccount(config) {
|
|
564
|
+
// AWS (AMP remote write) uses EKS Pod Identity - the association is created by
|
|
565
|
+
// the CLI's workload-identity step, so no eks.amazonaws.com/role-arn annotation.
|
|
566
|
+
// Azure Monitor still annotates the SA for its workload identity.
|
|
298
567
|
const annotations = {};
|
|
299
568
|
const remoteWrite = config.features.monitoring.remoteWrite;
|
|
300
|
-
if (remoteWrite?.destination === "aws-amp" && remoteWrite.awsRoleArn) {
|
|
301
|
-
annotations["eks.amazonaws.com/role-arn"] = remoteWrite.awsRoleArn;
|
|
302
|
-
}
|
|
303
569
|
if (remoteWrite?.destination === "azure-monitor" &&
|
|
304
570
|
remoteWrite.authType === "workload-identity" &&
|
|
305
571
|
remoteWrite.clientId) {
|
|
@@ -343,8 +609,16 @@ function generateAzureMonitorRemoteWrite(remoteWrite, base) {
|
|
|
343
609
|
if (!remoteWrite.clientId || !remoteWrite.tenantId) {
|
|
344
610
|
throw new Error("Azure Monitor remote_write workload identity requires client ID and tenant ID.");
|
|
345
611
|
}
|
|
346
|
-
|
|
347
|
-
|
|
612
|
+
// The prometheus-operator AzureAD schema supports only managedIdentity,
|
|
613
|
+
// oauth, and sdk (there is no "workloadIdentity" field - emitting it makes
|
|
614
|
+
// the operator reject the whole remoteWrite with "must provide Azure Managed
|
|
615
|
+
// Identity or Azure OAuth or Azure SDK", which silently prevents the
|
|
616
|
+
// Prometheus StatefulSet from being created). For AKS workload identity we
|
|
617
|
+
// use the Azure SDK credential: it reads the projected token + AZURE_CLIENT_ID
|
|
618
|
+
// injected by the workload-identity webhook (driven by the prometheus
|
|
619
|
+
// ServiceAccount's azure.workload.identity/client-id annotation and the
|
|
620
|
+
// azure.workload.identity/use pod label), so only the tenant ID is needed here.
|
|
621
|
+
azureAd.sdk = {
|
|
348
622
|
tenantId: remoteWrite.tenantId,
|
|
349
623
|
};
|
|
350
624
|
}
|
|
@@ -392,57 +666,690 @@ function generateGenericRemoteWrite(remoteWrite, base) {
|
|
|
392
666
|
return base;
|
|
393
667
|
}
|
|
394
668
|
/**
|
|
395
|
-
* Generates Kafka
|
|
669
|
+
* Generates the Kafka broker config map (Kafka.spec.kafka.config for Strimzi).
|
|
670
|
+
* These are the former KAFKA_CFG_* tuning env vars, as their Kafka property
|
|
671
|
+
* names. Kept in lockstep with the chart's kafka.config.
|
|
672
|
+
*/
|
|
673
|
+
function generateKafkaConfig() {
|
|
674
|
+
return {
|
|
675
|
+
"auto.create.topics.enable": "true",
|
|
676
|
+
"log.retention.hours": "24",
|
|
677
|
+
"num.partitions": "12",
|
|
678
|
+
"num.network.threads": "8",
|
|
679
|
+
"num.io.threads": "8",
|
|
680
|
+
"socket.send.buffer.bytes": "1048576",
|
|
681
|
+
"socket.receive.buffer.bytes": "1048576",
|
|
682
|
+
"socket.request.max.bytes": "209715200",
|
|
683
|
+
// Broker-wide max record size; must exceed every per-topic max.message.bytes.
|
|
684
|
+
"message.max.bytes": "2097152",
|
|
685
|
+
"replica.fetch.max.bytes": "4194304",
|
|
686
|
+
// Broker-wide default retention; the application topics carry tighter caps.
|
|
687
|
+
"log.retention.bytes": "536870912",
|
|
688
|
+
"log.segment.bytes": "1073741824",
|
|
689
|
+
"num.replica.fetchers": "4",
|
|
690
|
+
"queued.max.requests": "10000",
|
|
691
|
+
"replica.socket.receive.buffer.bytes": "1048576",
|
|
692
|
+
"log.cleaner.dedupe.buffer.size": "268435456",
|
|
693
|
+
"log.cleaner.io.buffer.size": "1048576",
|
|
694
|
+
};
|
|
695
|
+
}
|
|
696
|
+
/**
|
|
697
|
+
* Effective Kafka topic prefix as HPS/Vector/KEDA will see it.
|
|
698
|
+
* Mirrors generateAppLogging: in-cluster Kafka runs UNPREFIXED (dedicated
|
|
699
|
+
* broker, and prefixing would desync chart-side consumers from producers);
|
|
700
|
+
* external Kafka uses the explicit prefix, falling back to the chart default.
|
|
701
|
+
*/
|
|
702
|
+
function effectiveTopicPrefix(config) {
|
|
703
|
+
if (!isExternalKafka(config)) {
|
|
704
|
+
return "";
|
|
705
|
+
}
|
|
706
|
+
const ext = config.externalServices?.kafka?.external ?? {};
|
|
707
|
+
return ext.topicPrefix !== undefined ? ext.topicPrefix : "com.rulebricks.";
|
|
708
|
+
}
|
|
709
|
+
/**
|
|
710
|
+
* Explicit topic management for in-cluster Kafka.
|
|
711
|
+
*
|
|
712
|
+
* Generates the kafka.provisioning block consumed by BOTH the subchart
|
|
713
|
+
* provisioning Job (creates topics) and the chart's kafka-topic-align Job
|
|
714
|
+
* (idempotently converges pre-existing topics on upgrade). Topic names are
|
|
715
|
+
* derived from the SAME prefix written to app.logging.kafkaTopicPrefix - the
|
|
716
|
+
* chart fails the render if these ever diverge.
|
|
717
|
+
*
|
|
718
|
+
* Sizing policy (baseline constants, mirroring the chart defaults):
|
|
719
|
+
* - solution/solution-response: SOLUTION_TOPIC_PARTITIONS (the worker-fleet
|
|
720
|
+
* concurrency CEILING; partitions can never be decreased, workers are sized
|
|
721
|
+
* separately by the cluster autoscaler). RF stays 1: RPC traffic is transient
|
|
722
|
+
* and latency-sensitive, and the HPS producer's acks=-1 would otherwise wait
|
|
723
|
+
* on full ISR replication.
|
|
724
|
+
* - logs: LOGS_TOPIC_PARTITIONS (durable data feeding the Vector -> object
|
|
725
|
+
* storage pipeline).
|
|
726
|
+
*/
|
|
727
|
+
function generateKafkaTopics(config) {
|
|
728
|
+
// External MSK IAM: the chart's kafka-topic-provision Job creates these on the
|
|
729
|
+
// managed broker (through the proxy bridge), so they must be populated here -
|
|
730
|
+
// MSK Serverless won't auto-create them. Other external brokers (SCRAM / Event
|
|
731
|
+
// Hubs / GCP, no bridge) a plain client can reach stay customer-managed.
|
|
732
|
+
if (isExternalKafka(config) && !kafkaUsesBridge(config)) {
|
|
733
|
+
return [];
|
|
734
|
+
}
|
|
735
|
+
const prefix = effectiveTopicPrefix(config);
|
|
736
|
+
const rpcTopicConfig = {
|
|
737
|
+
"retention.ms": "300000",
|
|
738
|
+
"segment.ms": "300000",
|
|
739
|
+
"segment.bytes": "67108864",
|
|
740
|
+
"retention.bytes": "67108864",
|
|
741
|
+
"max.message.bytes": "2097152",
|
|
742
|
+
};
|
|
743
|
+
return [
|
|
744
|
+
{
|
|
745
|
+
name: `${prefix}solution`,
|
|
746
|
+
partitions: SOLUTION_TOPIC_PARTITIONS,
|
|
747
|
+
replicas: TOPIC_REPLICATION_FACTOR,
|
|
748
|
+
config: rpcTopicConfig,
|
|
749
|
+
},
|
|
750
|
+
{
|
|
751
|
+
name: `${prefix}solution-response`,
|
|
752
|
+
partitions: SOLUTION_TOPIC_PARTITIONS,
|
|
753
|
+
replicas: TOPIC_REPLICATION_FACTOR,
|
|
754
|
+
config: rpcTopicConfig,
|
|
755
|
+
},
|
|
756
|
+
{
|
|
757
|
+
name: `${prefix}logs`,
|
|
758
|
+
partitions: LOGS_TOPIC_PARTITIONS,
|
|
759
|
+
replicas: TOPIC_REPLICATION_FACTOR,
|
|
760
|
+
config: {
|
|
761
|
+
"retention.ms": "86400000",
|
|
762
|
+
"retention.bytes": "268435456",
|
|
763
|
+
"max.message.bytes": "2097152",
|
|
764
|
+
},
|
|
765
|
+
},
|
|
766
|
+
];
|
|
767
|
+
}
|
|
768
|
+
function generateWorkerPodAntiAffinity() {
|
|
769
|
+
return {
|
|
770
|
+
podAntiAffinity: {
|
|
771
|
+
preferredDuringSchedulingIgnoredDuringExecution: [
|
|
772
|
+
{
|
|
773
|
+
weight: 50,
|
|
774
|
+
podAffinityTerm: {
|
|
775
|
+
labelSelector: {
|
|
776
|
+
matchExpressions: [
|
|
777
|
+
{
|
|
778
|
+
key: "rulebricks.com/workload-group",
|
|
779
|
+
operator: "In",
|
|
780
|
+
values: ["infrastructure"],
|
|
781
|
+
},
|
|
782
|
+
],
|
|
783
|
+
},
|
|
784
|
+
topologyKey: "kubernetes.io/hostname",
|
|
785
|
+
},
|
|
786
|
+
},
|
|
787
|
+
],
|
|
788
|
+
},
|
|
789
|
+
};
|
|
790
|
+
}
|
|
791
|
+
function generateScheduling(tolerations, affinity) {
|
|
792
|
+
return {
|
|
793
|
+
...(affinity ? { affinity } : {}),
|
|
794
|
+
...(tolerations ? { tolerations } : {}),
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
/**
|
|
798
|
+
* Burst-pool scheduling, always on. Cluster-setup provisions a dedicated
|
|
799
|
+
* worker pool labeled and tainted rulebricks.com/pool=burst (one big
|
|
800
|
+
* Deallocate-parked node on Azure or an on-demand nodegroup on AWS); workers
|
|
801
|
+
* tolerate the taint and SOFTLY prefer the label. On clusters without such a
|
|
802
|
+
* pool both are inert, so BYO clusters schedule exactly as before - zero
|
|
803
|
+
* configuration required either way.
|
|
804
|
+
*/
|
|
805
|
+
const BURST_POOL_TOLERATION = {
|
|
806
|
+
key: "rulebricks.com/pool",
|
|
807
|
+
operator: "Equal",
|
|
808
|
+
value: "burst",
|
|
809
|
+
effect: "NoSchedule",
|
|
810
|
+
};
|
|
811
|
+
const BURST_POOL_NODE_PREFERENCE = {
|
|
812
|
+
weight: 100,
|
|
813
|
+
preference: {
|
|
814
|
+
matchExpressions: [
|
|
815
|
+
{ key: "rulebricks.com/pool", operator: "In", values: ["burst"] },
|
|
816
|
+
],
|
|
817
|
+
},
|
|
818
|
+
};
|
|
819
|
+
function generateBackupValues(config) {
|
|
820
|
+
const usesInClusterPostgres = config.database.type === "self-hosted" &&
|
|
821
|
+
config.externalServices?.postgres?.mode !== "external";
|
|
822
|
+
const enabled = usesInClusterPostgres && config.backup?.enabled === true;
|
|
823
|
+
// The backup CronJob streams pg_dump from the running DB (using supabase.db.image)
|
|
824
|
+
// and uploads it with rclone, so no backup-specific image is needed here. The
|
|
825
|
+
// chart default rclone image applies unless overridden in values.
|
|
826
|
+
return {
|
|
827
|
+
enabled,
|
|
828
|
+
schedule: config.backup?.schedule || "0 2 * * *",
|
|
829
|
+
retentionDays: config.backup?.retentionDays || 7,
|
|
830
|
+
};
|
|
831
|
+
}
|
|
832
|
+
function isExternalRedis(config) {
|
|
833
|
+
return config.externalServices?.redis?.mode === "external";
|
|
834
|
+
}
|
|
835
|
+
function isExternalKafka(config) {
|
|
836
|
+
return config.externalServices?.kafka?.mode === "external";
|
|
837
|
+
}
|
|
838
|
+
/**
|
|
839
|
+
* Whether the Vector kafka-proxy bridge sidecar is required. Only AWS MSK IAM
|
|
840
|
+
* needs it: Vector's kafka source can't speak token mechanisms, while Azure
|
|
841
|
+
* Event Hubs and GCP both use SASL PLAIN/SCRAM that Vector handles directly.
|
|
842
|
+
*/
|
|
843
|
+
function kafkaUsesBridge(config) {
|
|
844
|
+
if (!isExternalKafka(config))
|
|
845
|
+
return false;
|
|
846
|
+
const ext = config.externalServices?.kafka?.external;
|
|
847
|
+
return (ext?.preset === "aws-msk-iam" || ext?.sasl?.mechanism === "aws-iam");
|
|
848
|
+
}
|
|
849
|
+
/**
|
|
850
|
+
* Whether Vector's kafka source connects with a direct PLAIN/SCRAM credential
|
|
851
|
+
* and therefore needs username/password. This mirrors the vector-kafka-env
|
|
852
|
+
* ConfigMap, which only sets KAFKA_SASL_ENABLED=true for external, non-token,
|
|
853
|
+
* non-bridge mechanisms (and where vector-kafka-credentials is populated). For
|
|
854
|
+
* in-cluster, bridge, and token-auth paths SASL is disabled, so username and
|
|
855
|
+
* password MUST be omitted: an empty env default (${VAR:-}) renders unquoted
|
|
856
|
+
* via Helm's toYaml and Vector reads the value as YAML null, which it rejects
|
|
857
|
+
* at startup ("invalid type: unit value, expected any valid TOML value").
|
|
858
|
+
*/
|
|
859
|
+
function kafkaUsesDirectSasl(config) {
|
|
860
|
+
if (!isExternalKafka(config))
|
|
861
|
+
return false;
|
|
862
|
+
if (kafkaUsesBridge(config))
|
|
863
|
+
return false;
|
|
864
|
+
const mechanism = config.externalServices?.kafka?.external?.sasl?.mechanism;
|
|
865
|
+
if (!mechanism)
|
|
866
|
+
return false;
|
|
867
|
+
return mechanism !== "aws-iam" && mechanism !== "oauthbearer";
|
|
868
|
+
}
|
|
869
|
+
/**
|
|
870
|
+
* Builds the rulebricks.redis block: in-cluster sizing when embedded, or
|
|
871
|
+
* external connection settings when the user points at managed Redis.
|
|
872
|
+
*/
|
|
873
|
+
function generateRedisBlock(config, storageClass, infrastructurePodLabels, coreScheduling) {
|
|
874
|
+
if (!isExternalRedis(config)) {
|
|
875
|
+
// Sizing (resources, persistence size) falls back to the chart defaults;
|
|
876
|
+
// only the deployment-specific storage class is set here.
|
|
877
|
+
return {
|
|
878
|
+
podLabels: infrastructurePodLabels,
|
|
879
|
+
...coreScheduling,
|
|
880
|
+
persistence: {
|
|
881
|
+
enabled: true,
|
|
882
|
+
storageClass,
|
|
883
|
+
},
|
|
884
|
+
};
|
|
885
|
+
}
|
|
886
|
+
const ext = config.externalServices?.redis?.external ?? {};
|
|
887
|
+
const external = {
|
|
888
|
+
host: ext.host ?? "",
|
|
889
|
+
port: ext.port ?? 6379,
|
|
890
|
+
tls: { enabled: ext.tls ?? false },
|
|
891
|
+
};
|
|
892
|
+
if (ext.password) {
|
|
893
|
+
external.password = ext.password;
|
|
894
|
+
}
|
|
895
|
+
if (ext.existingSecret) {
|
|
896
|
+
external.existingSecret = ext.existingSecret;
|
|
897
|
+
external.existingSecretKey = ext.existingSecretKey || "redis-password";
|
|
898
|
+
}
|
|
899
|
+
if (ext.httpApi?.enabled) {
|
|
900
|
+
external.httpApi = {
|
|
901
|
+
enabled: true,
|
|
902
|
+
url: ext.httpApi.url ?? "",
|
|
903
|
+
token: ext.httpApi.token ?? "",
|
|
904
|
+
};
|
|
905
|
+
}
|
|
906
|
+
return {
|
|
907
|
+
enabled: false,
|
|
908
|
+
external,
|
|
909
|
+
};
|
|
910
|
+
}
|
|
911
|
+
function generateCacheObservabilityBlock(config, infrastructurePodLabels) {
|
|
912
|
+
const cache = config.features.cache;
|
|
913
|
+
const valkeyAdmin = cache?.valkeyAdmin;
|
|
914
|
+
const redisExporter = cache?.redisExporter;
|
|
915
|
+
const valkeyAdminIngressEnabled = valkeyAdmin?.exposure === "ingress";
|
|
916
|
+
return {
|
|
917
|
+
valkeyAdmin: {
|
|
918
|
+
enabled: valkeyAdmin?.enabled ?? false,
|
|
919
|
+
exposure: valkeyAdmin?.exposure ?? "internal",
|
|
920
|
+
podLabels: infrastructurePodLabels,
|
|
921
|
+
ingress: {
|
|
922
|
+
enabled: valkeyAdminIngressEnabled,
|
|
923
|
+
hostname: valkeyAdminIngressEnabled
|
|
924
|
+
? valkeyAdmin?.hostname || `valkey.${config.domain}`
|
|
925
|
+
: "",
|
|
926
|
+
basicAuth: {
|
|
927
|
+
users: valkeyAdmin?.basicAuthUsers ?? [],
|
|
928
|
+
existingSecret: valkeyAdmin?.basicAuthExistingSecret ?? "",
|
|
929
|
+
},
|
|
930
|
+
allowedIPs: valkeyAdmin?.allowedIPs ?? [],
|
|
931
|
+
},
|
|
932
|
+
},
|
|
933
|
+
redisExporter: {
|
|
934
|
+
enabled: redisExporter?.enabled ?? true,
|
|
935
|
+
podLabels: infrastructurePodLabels,
|
|
936
|
+
},
|
|
937
|
+
};
|
|
938
|
+
}
|
|
939
|
+
function generateKafkaExporterBlock(config, infrastructurePodLabels) {
|
|
940
|
+
const requested = config.features.cache?.kafkaExporter?.enabled;
|
|
941
|
+
const canUseKafkaExporter = !isExternalKafka(config);
|
|
942
|
+
return {
|
|
943
|
+
enabled: requested ?? canUseKafkaExporter,
|
|
944
|
+
podLabels: infrastructurePodLabels,
|
|
945
|
+
brokers: isExternalKafka(config)
|
|
946
|
+
? config.externalServices?.kafka?.external?.brokers ?? ""
|
|
947
|
+
: "",
|
|
948
|
+
};
|
|
949
|
+
}
|
|
950
|
+
/**
|
|
951
|
+
* Builds the rulebricks.app.logging block. Decision logging is always enabled;
|
|
952
|
+
* external Kafka adds brokers + SSL/SASL, while embedded auto-discovers the
|
|
953
|
+
* in-cluster Kafka service.
|
|
954
|
+
*/
|
|
955
|
+
function generateAppLogging(config) {
|
|
956
|
+
if (!isExternalKafka(config)) {
|
|
957
|
+
return {
|
|
958
|
+
enabled: true,
|
|
959
|
+
kafkaBrokers: "", // Auto-discover from Kafka subchart
|
|
960
|
+
kafkaTopic: "logs",
|
|
961
|
+
// The in-cluster app/HPS produce to unprefixed topics (logs, solution,
|
|
962
|
+
// solution-response). The chart default prefix ("com.rulebricks.") is meant
|
|
963
|
+
// for shared/managed Kafka collision avoidance, but when applied here it
|
|
964
|
+
// makes the chart-side consumers diverge from the producers: Vector would
|
|
965
|
+
// subscribe to "com.rulebricks.logs" (no data) and the KEDA worker trigger
|
|
966
|
+
// would watch "com.rulebricks.solution" (no lag signal). Disable prefixing
|
|
967
|
+
// for the dedicated in-cluster broker so everything lines up.
|
|
968
|
+
kafkaTopicPrefix: "",
|
|
969
|
+
};
|
|
970
|
+
}
|
|
971
|
+
const ext = config.externalServices?.kafka?.external ?? {};
|
|
972
|
+
const logging = {
|
|
973
|
+
enabled: true,
|
|
974
|
+
kafkaBrokers: ext.brokers ?? "",
|
|
975
|
+
kafkaTopic: ext.topic || "logs",
|
|
976
|
+
kafkaSsl: ext.ssl ?? false,
|
|
977
|
+
};
|
|
978
|
+
// Topic prefix: emit only when explicitly provided (incl. "" to disable). When
|
|
979
|
+
// omitted, the chart default (com.rulebricks.) applies via value merge.
|
|
980
|
+
if (ext.topicPrefix !== undefined) {
|
|
981
|
+
logging.kafkaTopicPrefix = ext.topicPrefix;
|
|
982
|
+
}
|
|
983
|
+
if (ext.sasl?.mechanism) {
|
|
984
|
+
const sasl = { mechanism: ext.sasl.mechanism };
|
|
985
|
+
if (ext.sasl.region)
|
|
986
|
+
sasl.region = ext.sasl.region;
|
|
987
|
+
if (ext.sasl.username)
|
|
988
|
+
sasl.username = ext.sasl.username;
|
|
989
|
+
if (ext.sasl.password)
|
|
990
|
+
sasl.password = ext.sasl.password;
|
|
991
|
+
if (ext.sasl.existingSecret)
|
|
992
|
+
sasl.existingSecret = ext.sasl.existingSecret;
|
|
993
|
+
logging.kafkaSasl = sasl;
|
|
994
|
+
}
|
|
995
|
+
return logging;
|
|
996
|
+
}
|
|
997
|
+
/**
|
|
998
|
+
* HPS service account. When external Kafka uses MSK IAM, HPS authenticates to the
|
|
999
|
+
* broker with its pod's cloud identity - under EKS Pod Identity that comes from a
|
|
1000
|
+
* namespace-scoped association (created by the CLI's workload-identity step for
|
|
1001
|
+
* the `<release>-hps` SA), NOT an eks.amazonaws.com/role-arn annotation. We only
|
|
1002
|
+
* CREATE the SA here so the association has a subject to bind.
|
|
1003
|
+
*/
|
|
1004
|
+
function generateHpsServiceAccount(config) {
|
|
1005
|
+
if (kafkaUsesBridge(config)) {
|
|
1006
|
+
return { create: true, annotations: {} };
|
|
1007
|
+
}
|
|
1008
|
+
return { create: false, annotations: {} };
|
|
1009
|
+
}
|
|
1010
|
+
/**
|
|
1011
|
+
* Top-level kafkaBridge block consumed by the Vector env ConfigMap. Only enabled
|
|
1012
|
+
* for AWS MSK IAM, where a kafka-proxy sidecar fronts the brokers for Vector.
|
|
1013
|
+
*/
|
|
1014
|
+
function generateKafkaBridge(config) {
|
|
1015
|
+
if (!kafkaUsesBridge(config)) {
|
|
1016
|
+
return { enabled: false };
|
|
1017
|
+
}
|
|
1018
|
+
const ext = config.externalServices?.kafka?.external ?? {};
|
|
1019
|
+
return {
|
|
1020
|
+
enabled: true,
|
|
1021
|
+
provider: "aws",
|
|
1022
|
+
region: ext.sasl?.region ?? "",
|
|
1023
|
+
brokers: ext.brokers ?? "",
|
|
1024
|
+
localPort: 19092,
|
|
1025
|
+
image: KAFKA_PROXY_IMAGE,
|
|
1026
|
+
awsRoleArn: ext.identity?.awsRoleArn ?? "",
|
|
1027
|
+
};
|
|
1028
|
+
}
|
|
1029
|
+
/**
|
|
1030
|
+
* kafka-proxy sidecar for the Vector pod (AWS MSK IAM). Maps each upstream
|
|
1031
|
+
* broker to a sequential local port and authenticates with the pod's IRSA role.
|
|
396
1032
|
*/
|
|
397
|
-
function
|
|
1033
|
+
function generateVectorExtraContainers(config) {
|
|
1034
|
+
if (!kafkaUsesBridge(config))
|
|
1035
|
+
return undefined;
|
|
1036
|
+
const ext = config.externalServices?.kafka?.external ?? {};
|
|
1037
|
+
const brokers = (ext.brokers ?? "")
|
|
1038
|
+
.split(",")
|
|
1039
|
+
.map((b) => b.trim())
|
|
1040
|
+
.filter(Boolean);
|
|
1041
|
+
if (brokers.length === 0)
|
|
1042
|
+
return undefined;
|
|
1043
|
+
const basePort = 19092;
|
|
1044
|
+
const mappings = brokers.map((broker, i) => `--bootstrap-server-mapping=${broker},127.0.0.1:${basePort + i}`);
|
|
398
1045
|
return [
|
|
399
1046
|
{
|
|
400
|
-
name: "
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
{ name: "KAFKA_CFG_REPLICA_SOCKET_RECEIVE_BUFFER_BYTES", value: "1048576" },
|
|
413
|
-
{ name: "KAFKA_CFG_LOG_CLEANER_DEDUPE_BUFFER_SIZE", value: "268435456" },
|
|
414
|
-
{ name: "KAFKA_CFG_LOG_CLEANER_IO_BUFFER_SIZE", value: "1048576" },
|
|
415
|
-
{ name: "KAFKA_CFG_MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION", value: "10" },
|
|
1047
|
+
name: "kafka-proxy",
|
|
1048
|
+
image: KAFKA_PROXY_IMAGE,
|
|
1049
|
+
args: [
|
|
1050
|
+
"server",
|
|
1051
|
+
...mappings,
|
|
1052
|
+
"--tls-enable",
|
|
1053
|
+
"--sasl-enable",
|
|
1054
|
+
"--sasl-method=AWS_MSK_IAM",
|
|
1055
|
+
`--sasl-aws-region=${ext.sasl?.region ?? ""}`,
|
|
1056
|
+
],
|
|
1057
|
+
ports: brokers.map((_, i) => ({ containerPort: basePort + i })),
|
|
1058
|
+
},
|
|
416
1059
|
];
|
|
417
1060
|
}
|
|
1061
|
+
// VRL for the Vector agent: parse JSON app/HPS log lines, lift trace_id/span_id
|
|
1062
|
+
// for logs<->traces correlation, and flatten useful Kubernetes metadata. Kept
|
|
1063
|
+
// in sync with charts/.../values.yaml vector-agent.customConfig.transforms.
|
|
1064
|
+
const VECTOR_APP_LOGS_VRL = [
|
|
1065
|
+
'parsed, err = parse_json(to_string(.message) ?? "")',
|
|
1066
|
+
"if err == null && is_object(parsed) {",
|
|
1067
|
+
" .log = parsed",
|
|
1068
|
+
" .trace_id = parsed.trace_id",
|
|
1069
|
+
" .span_id = parsed.span_id",
|
|
1070
|
+
' if exists(parsed.level) { .level = to_string(parsed.level) ?? "info" }',
|
|
1071
|
+
"}",
|
|
1072
|
+
".pod = .kubernetes.pod_name",
|
|
1073
|
+
".namespace = .kubernetes.pod_namespace",
|
|
1074
|
+
".container = .kubernetes.container_name",
|
|
1075
|
+
".node = .kubernetes.pod_node_name",
|
|
1076
|
+
].join("\n");
|
|
418
1077
|
/**
|
|
419
|
-
*
|
|
1078
|
+
* global.tracing block (in-cluster OTel Collector -> pluggable trace backend).
|
|
1079
|
+
* Emits the destination-specific sub-block (elastic | otlp | azure-monitor) and
|
|
1080
|
+
* returns undefined when tracing is disabled so it is omitted entirely.
|
|
420
1081
|
*/
|
|
421
|
-
|
|
422
|
-
const
|
|
423
|
-
|
|
1082
|
+
function generateTracingGlobal(config) {
|
|
1083
|
+
const tracing = config.features.tracing;
|
|
1084
|
+
if (!tracing?.enabled)
|
|
1085
|
+
return undefined;
|
|
1086
|
+
const destination = tracing.destination ?? "elastic";
|
|
1087
|
+
const reg = config.imageRegistry || DEFAULT_IMAGE_REGISTRY;
|
|
1088
|
+
const base = {
|
|
1089
|
+
enabled: true,
|
|
1090
|
+
destination,
|
|
1091
|
+
samplingRatio: tracing.samplingRatio ?? 1,
|
|
1092
|
+
// RB image dict for the parent chart's otel-collector deployment. The
|
|
1093
|
+
// rulebricks.image helper requires image.repository and applies
|
|
1094
|
+
// global.imageRegistry to the host.
|
|
1095
|
+
collector: {
|
|
1096
|
+
image: {
|
|
1097
|
+
registry: reg,
|
|
1098
|
+
repository: IMAGE_REPOSITORIES.opentelemetryCollector.repository,
|
|
1099
|
+
tag: IMAGE_REPOSITORIES.opentelemetryCollector.tag,
|
|
1100
|
+
},
|
|
1101
|
+
},
|
|
1102
|
+
};
|
|
1103
|
+
if (destination === "elastic") {
|
|
1104
|
+
const elastic = tracing.elastic ?? {};
|
|
1105
|
+
const authMode = elastic.authMode ?? "secret-token";
|
|
1106
|
+
const elasticBlock = {
|
|
1107
|
+
endpoint: elastic.endpoint ?? "",
|
|
1108
|
+
authMode,
|
|
1109
|
+
tlsInsecureSkipVerify: false,
|
|
1110
|
+
};
|
|
1111
|
+
if (authMode === "secret-token" && elastic.secretToken) {
|
|
1112
|
+
elasticBlock.secretToken = elastic.secretToken;
|
|
1113
|
+
}
|
|
1114
|
+
if (authMode === "api-key" && elastic.apiKey) {
|
|
1115
|
+
elasticBlock.apiKey = elastic.apiKey;
|
|
1116
|
+
}
|
|
1117
|
+
return { ...base, elastic: elasticBlock };
|
|
1118
|
+
}
|
|
1119
|
+
if (destination === "otlp") {
|
|
1120
|
+
const otlp = tracing.otlp ?? {};
|
|
1121
|
+
const authMode = otlp.authMode ?? "none";
|
|
1122
|
+
const otlpBlock = {
|
|
1123
|
+
endpoint: otlp.endpoint ?? "",
|
|
1124
|
+
authMode,
|
|
1125
|
+
tlsInsecureSkipVerify: otlp.tlsInsecureSkipVerify ?? false,
|
|
1126
|
+
};
|
|
1127
|
+
if (authMode === "bearer" && otlp.token)
|
|
1128
|
+
otlpBlock.token = otlp.token;
|
|
1129
|
+
if (authMode === "api-key" && otlp.apiKey)
|
|
1130
|
+
otlpBlock.apiKey = otlp.apiKey;
|
|
1131
|
+
if (authMode === "header") {
|
|
1132
|
+
otlpBlock.headerName = otlp.headerName ?? "Authorization";
|
|
1133
|
+
if (otlp.headerValue)
|
|
1134
|
+
otlpBlock.headerValue = otlp.headerValue;
|
|
1135
|
+
}
|
|
1136
|
+
if (otlp.headers && Object.keys(otlp.headers).length > 0) {
|
|
1137
|
+
otlpBlock.headers = otlp.headers;
|
|
1138
|
+
}
|
|
1139
|
+
return { ...base, otlp: otlpBlock };
|
|
1140
|
+
}
|
|
1141
|
+
// azure-monitor
|
|
1142
|
+
const azure = tracing.azureMonitor ?? {};
|
|
1143
|
+
return {
|
|
1144
|
+
...base,
|
|
1145
|
+
azureMonitor: { connectionString: azure.connectionString ?? "" },
|
|
1146
|
+
};
|
|
1147
|
+
}
|
|
1148
|
+
/**
|
|
1149
|
+
* traefik.tracing block: makes Traefik the root span and propagates the W3C
|
|
1150
|
+
* traceparent to backends. Empty object when tracing is disabled.
|
|
1151
|
+
*/
|
|
1152
|
+
function generateTraefikTracing(config, releaseName) {
|
|
1153
|
+
if (!isClickStackEnabled(config) && !config.features.tracing?.enabled)
|
|
1154
|
+
return {};
|
|
1155
|
+
return {
|
|
1156
|
+
otlp: {
|
|
1157
|
+
enabled: true,
|
|
1158
|
+
http: {
|
|
1159
|
+
enabled: true,
|
|
1160
|
+
endpoint: `http://${releaseName}-otel-collector:4318/v1/traces`,
|
|
1161
|
+
},
|
|
1162
|
+
},
|
|
1163
|
+
};
|
|
1164
|
+
}
|
|
1165
|
+
/**
|
|
1166
|
+
* vector-agent block: a second Vector deployment (role Agent / DaemonSet) that
|
|
1167
|
+
* tails all pod logs and ships them to a customer-managed Elasticsearch. Decision
|
|
1168
|
+
* logs are unaffected (they stay in ClickHouse via the `vector` aggregator).
|
|
1169
|
+
*/
|
|
1170
|
+
function generateVectorAgent(config, podLabels, tolerations) {
|
|
1171
|
+
const appLogs = config.features.logging.appLogs;
|
|
1172
|
+
if (!appLogs?.enabled) {
|
|
1173
|
+
return { enabled: false };
|
|
1174
|
+
}
|
|
1175
|
+
const destination = appLogs.destination ?? "elasticsearch";
|
|
1176
|
+
let sinkName = "elasticsearch";
|
|
1177
|
+
let sink;
|
|
1178
|
+
if (destination === "loki") {
|
|
1179
|
+
const loki = appLogs.loki ?? {};
|
|
1180
|
+
sinkName = "loki";
|
|
1181
|
+
sink = {
|
|
1182
|
+
type: "loki",
|
|
1183
|
+
inputs: ["app_logs"],
|
|
1184
|
+
endpoint: loki.endpoint,
|
|
1185
|
+
labels: loki.labels ?? {
|
|
1186
|
+
app: "rulebricks",
|
|
1187
|
+
namespace: "{{ namespace }}",
|
|
1188
|
+
pod: "{{ pod }}",
|
|
1189
|
+
container: "{{ container }}",
|
|
1190
|
+
},
|
|
1191
|
+
encoding: { codec: "json" },
|
|
1192
|
+
};
|
|
1193
|
+
}
|
|
1194
|
+
else if (destination === "generic") {
|
|
1195
|
+
const generic = appLogs.generic ?? {};
|
|
1196
|
+
sinkName = "generic_http";
|
|
1197
|
+
sink = {
|
|
1198
|
+
type: "http",
|
|
1199
|
+
inputs: ["app_logs"],
|
|
1200
|
+
uri: generic.endpoint,
|
|
1201
|
+
method: "post",
|
|
1202
|
+
encoding: { codec: "json" },
|
|
1203
|
+
};
|
|
1204
|
+
if (generic.authHeader) {
|
|
1205
|
+
sink.request = { headers: { Authorization: generic.authHeader } };
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
else {
|
|
1209
|
+
const es = appLogs.elasticsearch ?? {};
|
|
1210
|
+
const authMode = es.authMode ?? "basic";
|
|
1211
|
+
sink = {
|
|
1212
|
+
type: "elasticsearch",
|
|
1213
|
+
inputs: ["app_logs"],
|
|
1214
|
+
endpoints: [es.endpoint],
|
|
1215
|
+
mode: "bulk",
|
|
1216
|
+
bulk: { index: es.index || "rulebricks-app-logs" },
|
|
1217
|
+
tls: { verify_certificate: es.verifyCertificate ?? true },
|
|
1218
|
+
};
|
|
1219
|
+
if (authMode === "basic") {
|
|
1220
|
+
sink.auth = { strategy: "basic", user: es.username, password: es.password };
|
|
1221
|
+
}
|
|
1222
|
+
else if (authMode === "api-key") {
|
|
1223
|
+
sink.request = { headers: { Authorization: `ApiKey ${es.apiKey}` } };
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
return {
|
|
1227
|
+
enabled: true,
|
|
1228
|
+
role: "Agent",
|
|
1229
|
+
podLabels,
|
|
1230
|
+
// Follow active worker pools without tolerating shutdown, out-of-service,
|
|
1231
|
+
// or unreachable node taints.
|
|
1232
|
+
tolerations,
|
|
1233
|
+
resources: {
|
|
1234
|
+
requests: { cpu: "100m", memory: "256Mi" },
|
|
1235
|
+
limits: { cpu: "500m", memory: "512Mi" },
|
|
1236
|
+
},
|
|
1237
|
+
customConfig: {
|
|
1238
|
+
data_dir: "/vector-data-dir",
|
|
1239
|
+
sources: {
|
|
1240
|
+
kubernetes_logs: {
|
|
1241
|
+
type: "kubernetes_logs",
|
|
1242
|
+
// Skip both Vector deployments: the aggregator
|
|
1243
|
+
// (app.kubernetes.io/name=vector) re-emits decision logs on stdout
|
|
1244
|
+
// (those belong in ClickHouse, not Elasticsearch) and the agent
|
|
1245
|
+
// itself (vector-agent) to avoid a self-scrape loop.
|
|
1246
|
+
extra_label_selector: "app.kubernetes.io/name notin (vector,vector-agent)",
|
|
1247
|
+
},
|
|
1248
|
+
},
|
|
1249
|
+
transforms: {
|
|
1250
|
+
app_logs: {
|
|
1251
|
+
type: "remap",
|
|
1252
|
+
inputs: ["kubernetes_logs"],
|
|
1253
|
+
source: VECTOR_APP_LOGS_VRL,
|
|
1254
|
+
},
|
|
1255
|
+
},
|
|
1256
|
+
sinks: { [sinkName]: sink },
|
|
1257
|
+
},
|
|
1258
|
+
};
|
|
1259
|
+
}
|
|
1260
|
+
/**
|
|
1261
|
+
* Builds Helm values from the deployment configuration.
|
|
1262
|
+
*/
|
|
1263
|
+
export function buildHelmValues(config, options = {}) {
|
|
1264
|
+
if (config.database.type === "self-hosted" &&
|
|
1265
|
+
!config.database.supabaseJwtSecret) {
|
|
1266
|
+
throw new Error("Self-hosted Supabase is missing a JWT secret. Run `rulebricks redeploy <name>` to regenerate deployment credentials, or set database.supabaseJwtSecret in config.yaml.");
|
|
1267
|
+
}
|
|
1268
|
+
if (config.features.ai.enabled && !config.features.ai.openaiApiKey) {
|
|
1269
|
+
throw new Error("AI features are enabled but the OpenAI API key is missing. Run `rulebricks redeploy <name>` and enter your OpenAI API key, or disable AI features in config.yaml.");
|
|
1270
|
+
}
|
|
1271
|
+
const { tlsEnabled = true, secretMode = "inline" } = options;
|
|
424
1272
|
const useLocalGrafana = config.features.monitoring.destination === "local-grafana";
|
|
425
1273
|
// Determine if external-dns should be enabled
|
|
426
1274
|
const externalDnsEnabled = config.dns.autoManage && isSupportedDnsProvider(config.dns.provider);
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
1275
|
+
const gcpDiskType = config.infrastructure.nodeArchitecture === "amd64"
|
|
1276
|
+
? "pd-balanced"
|
|
1277
|
+
: "hyperdisk-balanced";
|
|
1278
|
+
// Prefer the live cluster's StorageClass. Provider defaults are only a
|
|
1279
|
+
// fallback for legacy configs that predate capability scanning.
|
|
1280
|
+
const storageClass = config.infrastructure.storageClass ||
|
|
1281
|
+
(config.infrastructure.provider === "aws"
|
|
1282
|
+
? "gp3"
|
|
1283
|
+
: config.infrastructure.provider === "gcp"
|
|
1284
|
+
? gcpDiskType
|
|
1285
|
+
: config.infrastructure.provider === "azure"
|
|
1286
|
+
? "managed-premium"
|
|
1287
|
+
: "gp3");
|
|
1288
|
+
const shouldApplyArm64Toleration = config.infrastructure.arm64TolerationRequired ?? false;
|
|
1289
|
+
const architectureTolerations = shouldApplyArm64Toleration
|
|
1290
|
+
? [
|
|
1291
|
+
{
|
|
1292
|
+
key: "kubernetes.io/arch",
|
|
1293
|
+
operator: "Equal",
|
|
1294
|
+
value: "arm64",
|
|
1295
|
+
effect: "NoSchedule",
|
|
1296
|
+
},
|
|
1297
|
+
]
|
|
1298
|
+
: undefined;
|
|
1299
|
+
const coreScheduling = generateScheduling(architectureTolerations);
|
|
1300
|
+
// Workers always tolerate + softly prefer the optional burst pool
|
|
1301
|
+
// (rulebricks.com/pool=burst). The preference is soft, so clusters without a
|
|
1302
|
+
// burst pool schedule workers on ordinary capacity exactly as before.
|
|
1303
|
+
const workerTolerations = [
|
|
1304
|
+
...(architectureTolerations ?? []),
|
|
1305
|
+
BURST_POOL_TOLERATION,
|
|
445
1306
|
];
|
|
1307
|
+
const operationalDaemonSetTolerations = workerTolerations;
|
|
1308
|
+
const workerScheduling = generateScheduling(workerTolerations, {
|
|
1309
|
+
...generateWorkerPodAntiAffinity(),
|
|
1310
|
+
nodeAffinity: {
|
|
1311
|
+
preferredDuringSchedulingIgnoredDuringExecution: [
|
|
1312
|
+
BURST_POOL_NODE_PREFERENCE,
|
|
1313
|
+
],
|
|
1314
|
+
},
|
|
1315
|
+
});
|
|
1316
|
+
const infrastructurePodLabels = {
|
|
1317
|
+
"rulebricks.com/workload-group": "infrastructure",
|
|
1318
|
+
};
|
|
1319
|
+
const applicationPodLabels = {
|
|
1320
|
+
"rulebricks.com/workload-group": "application",
|
|
1321
|
+
};
|
|
1322
|
+
const productVersion = config.version;
|
|
1323
|
+
// Scheduling priority tiers. The chart creates release-scoped
|
|
1324
|
+
// PriorityClasses (<release>-critical / <release>-burst); stateful
|
|
1325
|
+
// infrastructure references the critical class so it can always preempt
|
|
1326
|
+
// burst workers to reschedule, and workers reference the burst class so
|
|
1327
|
+
// they are strictly the first preemption victims. Subchart values cannot
|
|
1328
|
+
// template release names, so the CLI emits them as literals.
|
|
1329
|
+
const releaseName = getReleaseName(config.name);
|
|
1330
|
+
const criticalPriorityClass = `${releaseName}-critical`;
|
|
1331
|
+
const burstPriorityClass = `${releaseName}-burst`;
|
|
1332
|
+
// Subcharts that don't honor global.imagePullSecrets (keda, strimzi, traefik,
|
|
1333
|
+
// vector) need the pull secret on their own key so their pods can pull the
|
|
1334
|
+
// private docker.io/rulebricks/* images from index.docker.io.
|
|
1335
|
+
const rulebricksPullSecret = [{ name: `${releaseName}-regcred` }];
|
|
1336
|
+
// Registry host for every image. Empty config.imageRegistry => docker.io. When
|
|
1337
|
+
// set, the host is rewritten into global.imageRegistry (which kube-prometheus-stack
|
|
1338
|
+
// and our subcharts honor) and into each of the six Tier-2 charts' own image
|
|
1339
|
+
// keys below, always keeping the rulebricks/<name> path.
|
|
1340
|
+
const reg = config.imageRegistry || DEFAULT_IMAGE_REGISTRY;
|
|
1341
|
+
const clickStackEnabled = isClickStackEnabled(config);
|
|
1342
|
+
const clickStackConfig = config.features.observability?.clickstack;
|
|
1343
|
+
const clickHouseStorageSize = clickStackConfig?.clickHouseStorageSize ?? "100Gi";
|
|
1344
|
+
// Distributed tracing (self-hosted only). Lives under global so the
|
|
1345
|
+
// rulebricks subchart deployments can read it; the collector + traefik are
|
|
1346
|
+
// wired below from the same source.
|
|
1347
|
+
const tracingGlobal = clickStackEnabled ? undefined : generateTracingGlobal(config);
|
|
1348
|
+
// Never let the cluster-autoscaler evict single-replica stateful pods
|
|
1349
|
+
// during node scale-down; an evicted broker/db stalls the whole pipeline.
|
|
1350
|
+
const safeToEvictAnnotations = {
|
|
1351
|
+
"cluster-autoscaler.kubernetes.io/safe-to-evict": "false",
|
|
1352
|
+
};
|
|
446
1353
|
// Build global.supabase configuration
|
|
447
1354
|
const supabaseGlobalConfig = config.database.type === "supabase-cloud"
|
|
448
1355
|
? {
|
|
@@ -452,27 +1359,51 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
452
1359
|
accessToken: config.database.supabaseAccessToken || undefined,
|
|
453
1360
|
projectRef: config.database.supabaseProjectRef || undefined,
|
|
454
1361
|
}
|
|
455
|
-
: {
|
|
456
|
-
jwtSecret
|
|
457
|
-
|
|
458
|
-
|
|
1362
|
+
: (() => {
|
|
1363
|
+
const jwtSecret = config.database.supabaseJwtSecret || "";
|
|
1364
|
+
return {
|
|
1365
|
+
jwtSecret: jwtSecret || undefined,
|
|
1366
|
+
anonKey: jwtSecret ? signSupabaseJwt("anon", jwtSecret) : undefined,
|
|
1367
|
+
serviceKey: jwtSecret
|
|
1368
|
+
? signSupabaseJwt("service_role", jwtSecret)
|
|
1369
|
+
: undefined,
|
|
1370
|
+
};
|
|
1371
|
+
})();
|
|
1372
|
+
// Always emit email configuration so auth pods receive template/subject env
|
|
1373
|
+
// vars regardless of Helm merge order. Custom values take precedence over
|
|
1374
|
+
// built-in defaults when explicitly enabled.
|
|
1375
|
+
const customEmails = config.features.customEmails;
|
|
1376
|
+
if (customEmails?.enabled &&
|
|
1377
|
+
customEmails.subjects &&
|
|
1378
|
+
customEmails.templates) {
|
|
1379
|
+
supabaseGlobalConfig.emails = {
|
|
1380
|
+
subjects: {
|
|
1381
|
+
invite: customEmails.subjects.invite,
|
|
1382
|
+
confirmation: customEmails.subjects.confirmation,
|
|
1383
|
+
recovery: customEmails.subjects.recovery,
|
|
1384
|
+
emailChange: customEmails.subjects.emailChange,
|
|
1385
|
+
},
|
|
1386
|
+
templates: {
|
|
1387
|
+
invite: customEmails.templates.invite,
|
|
1388
|
+
confirmation: customEmails.templates.confirmation,
|
|
1389
|
+
recovery: customEmails.templates.recovery,
|
|
1390
|
+
emailChange: customEmails.templates.emailChange,
|
|
1391
|
+
},
|
|
459
1392
|
};
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
config.features.customEmails.subjects &&
|
|
463
|
-
config.features.customEmails.templates) {
|
|
1393
|
+
}
|
|
1394
|
+
else {
|
|
464
1395
|
supabaseGlobalConfig.emails = {
|
|
465
1396
|
subjects: {
|
|
466
|
-
invite:
|
|
467
|
-
confirmation:
|
|
468
|
-
recovery:
|
|
469
|
-
emailChange:
|
|
1397
|
+
invite: "Join your team on Rulebricks",
|
|
1398
|
+
confirmation: "Confirm Your Email",
|
|
1399
|
+
recovery: "Reset Your Password",
|
|
1400
|
+
emailChange: "Confirm Email Change",
|
|
470
1401
|
},
|
|
471
1402
|
templates: {
|
|
472
|
-
invite:
|
|
473
|
-
confirmation:
|
|
474
|
-
recovery:
|
|
475
|
-
emailChange:
|
|
1403
|
+
invite: "https://prefix-files.s3.us-west-2.amazonaws.com/templates/invite.html",
|
|
1404
|
+
confirmation: "https://prefix-files.s3.us-west-2.amazonaws.com/templates/verify.html",
|
|
1405
|
+
recovery: "https://prefix-files.s3.us-west-2.amazonaws.com/templates/password_change.html",
|
|
1406
|
+
emailChange: "https://prefix-files.s3.us-west-2.amazonaws.com/templates/email_change.html",
|
|
476
1407
|
},
|
|
477
1408
|
};
|
|
478
1409
|
}
|
|
@@ -485,7 +1416,30 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
485
1416
|
email: config.adminEmail,
|
|
486
1417
|
tlsEnabled,
|
|
487
1418
|
licenseKey: config.licenseKey,
|
|
1419
|
+
// Pull secret for the private docker.io/rulebricks/* images. References the
|
|
1420
|
+
// license registry secret <release>-regcred (index.docker.io, authed by the
|
|
1421
|
+
// license PAT). kube-prometheus-stack + cert-manager honor this global value;
|
|
1422
|
+
// keda, traefik, vector and the strimzi operator each get the same secret on
|
|
1423
|
+
// their own key below.
|
|
1424
|
+
imagePullSecrets: [{ name: `${releaseName}-regcred` }],
|
|
1425
|
+
// Single registry-host override (empty => docker.io/rulebricks/*). Honored by
|
|
1426
|
+
// kube-prometheus-stack and our subcharts; the CLI also rewrites the host into
|
|
1427
|
+
// the other Tier-2 charts' native image keys below.
|
|
1428
|
+
...(config.imageRegistry ? { imageRegistry: config.imageRegistry } : {}),
|
|
1429
|
+
// Generated name->sha256 digest map (empty until the helm repo's mirror
|
|
1430
|
+
// pipeline populates IMAGE_DIGESTS). When a name is present the chart image
|
|
1431
|
+
// helper pins @sha256 instead of :tag.
|
|
1432
|
+
imageDigests: IMAGE_DIGESTS,
|
|
1433
|
+
...(productVersion && SEMVER_PATTERN.test(productVersion)
|
|
1434
|
+
? { version: productVersion }
|
|
1435
|
+
: {}),
|
|
488
1436
|
externalDnsEnabled,
|
|
1437
|
+
// Scheduling priority tiers (the chart renders release-scoped
|
|
1438
|
+
// <release>-critical and <release>-burst PriorityClasses).
|
|
1439
|
+
priorityClasses: { enabled: true },
|
|
1440
|
+
clickstack: {
|
|
1441
|
+
enabled: clickStackEnabled,
|
|
1442
|
+
},
|
|
489
1443
|
// SMTP Configuration
|
|
490
1444
|
smtp: {
|
|
491
1445
|
host: config.smtp.host,
|
|
@@ -516,62 +1470,164 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
516
1470
|
: {
|
|
517
1471
|
enabled: false,
|
|
518
1472
|
},
|
|
1473
|
+
storage: config.storage
|
|
1474
|
+
? {
|
|
1475
|
+
// One provider, one identity, one bucket/container. decision-logs and
|
|
1476
|
+
// db-backups are key prefixes under paths.* within it.
|
|
1477
|
+
provider: config.storage.provider,
|
|
1478
|
+
bucket: config.storage.bucket,
|
|
1479
|
+
region: config.storage.region,
|
|
1480
|
+
s3: {
|
|
1481
|
+
iamRoleArn: config.storage.awsIamRoleArn || "",
|
|
1482
|
+
existingSecret: { name: "" },
|
|
1483
|
+
},
|
|
1484
|
+
azure: {
|
|
1485
|
+
authMode: config.storage.cloudAuthMode === "secret"
|
|
1486
|
+
? "connection-string"
|
|
1487
|
+
: "workload-identity",
|
|
1488
|
+
clientId: config.storage.azureBlobClientId || "",
|
|
1489
|
+
tenantId: config.storage.azureBlobTenantId || "",
|
|
1490
|
+
container: config.storage.azureBlobContainer || "",
|
|
1491
|
+
connectionStringSecretRef: config.storage.azureBlobConnectionStringSecretRef || {
|
|
1492
|
+
name: "",
|
|
1493
|
+
key: "",
|
|
1494
|
+
},
|
|
1495
|
+
},
|
|
1496
|
+
gcp: {
|
|
1497
|
+
serviceAccountEmail: config.storage.gcpServiceAccountEmail || "",
|
|
1498
|
+
},
|
|
1499
|
+
paths: {
|
|
1500
|
+
decisionLogs: config.storage.paths?.decisionLogs || "decision-logs",
|
|
1501
|
+
dbBackups: config.storage.paths?.dbBackups || "db-backups",
|
|
1502
|
+
},
|
|
1503
|
+
}
|
|
1504
|
+
: undefined,
|
|
1505
|
+
// Distributed tracing (omitted entirely when disabled).
|
|
1506
|
+
...(tracingGlobal ? { tracing: tracingGlobal } : {}),
|
|
519
1507
|
},
|
|
1508
|
+
clickstack: generateClickStackValues(clickStackEnabled, config, storageClass, infrastructurePodLabels, operationalDaemonSetTolerations),
|
|
1509
|
+
backup: generateBackupValues(config),
|
|
520
1510
|
// =============================================================================
|
|
521
1511
|
// RULEBRICKS APPLICATION STACK
|
|
522
1512
|
// =============================================================================
|
|
523
1513
|
rulebricks: {
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
image: {
|
|
528
|
-
repository: "index.docker.io/rulebricks/app",
|
|
529
|
-
tag: config.appVersion,
|
|
530
|
-
pullPolicy: "IfNotPresent",
|
|
531
|
-
},
|
|
532
|
-
}
|
|
533
|
-
: {}),
|
|
534
|
-
replicaCount: tierConfig.appReplicas,
|
|
535
|
-
resources: tierConfig.appResources,
|
|
536
|
-
tolerations: arm64Tolerations,
|
|
537
|
-
// Logging configuration
|
|
538
|
-
logging: {
|
|
1514
|
+
metrics: {
|
|
1515
|
+
enabled: true,
|
|
1516
|
+
serviceMonitor: {
|
|
539
1517
|
enabled: true,
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
1518
|
+
interval: "30s",
|
|
1519
|
+
scrapeTimeout: "10s",
|
|
1520
|
+
},
|
|
1521
|
+
app: {
|
|
1522
|
+
path: "/api/metrics",
|
|
1523
|
+
},
|
|
1524
|
+
hps: {
|
|
1525
|
+
path: "/metrics",
|
|
1526
|
+
},
|
|
1527
|
+
worker: {
|
|
1528
|
+
path: "/metrics",
|
|
1529
|
+
port: 3000,
|
|
543
1530
|
},
|
|
544
1531
|
},
|
|
1532
|
+
app: {
|
|
1533
|
+
image: {
|
|
1534
|
+
// Split shape: the rulebricks-chart.image helper applies
|
|
1535
|
+
// global.imageRegistry to the host + digest pinning. The host NEVER
|
|
1536
|
+
// goes in repository.
|
|
1537
|
+
registry: reg,
|
|
1538
|
+
repository: IMAGE_REPOSITORIES.app,
|
|
1539
|
+
pullPolicy: "IfNotPresent",
|
|
1540
|
+
},
|
|
1541
|
+
// Replica count and resources fall back to the chart defaults.
|
|
1542
|
+
podLabels: infrastructurePodLabels,
|
|
1543
|
+
...coreScheduling,
|
|
1544
|
+
// Logging configuration (in-cluster auto-discovery or external Kafka)
|
|
1545
|
+
logging: generateAppLogging(config),
|
|
1546
|
+
},
|
|
545
1547
|
// HPS (High Performance Server)
|
|
546
1548
|
hps: {
|
|
547
1549
|
enabled: true,
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
1550
|
+
image: {
|
|
1551
|
+
// Split shape (see app.image): host comes from global.imageRegistry via
|
|
1552
|
+
// the rulebricks-chart.image helper, never baked into repository.
|
|
1553
|
+
registry: reg,
|
|
1554
|
+
repository: IMAGE_REPOSITORIES.hps,
|
|
1555
|
+
pullPolicy: "Always",
|
|
1556
|
+
},
|
|
1557
|
+
// Replica count and resources fall back to the chart defaults.
|
|
1558
|
+
podLabels: applicationPodLabels,
|
|
1559
|
+
...coreScheduling,
|
|
1560
|
+
// Gather-plane autoscaling: HPS parses every chunk response, so its
|
|
1561
|
+
// capacity scales with request rate (load testing showed a fixed
|
|
1562
|
+
// gather plane plateaus throughput while workers idle). Conservative
|
|
1563
|
+
// one-pod-at-a-time scaling - each scale event rebalances the
|
|
1564
|
+
// response consumer group and can time out in-flight requests. Only the
|
|
1565
|
+
// enable flag is set here; min/max and thresholds use the chart
|
|
1566
|
+
// defaults.
|
|
1567
|
+
keda: {
|
|
1568
|
+
enabled: true,
|
|
1569
|
+
},
|
|
1570
|
+
// Warm the hps/worker images onto active worker-capable nodes so burst
|
|
1571
|
+
// scale-outs skip the image pull without targeting shutdown nodes.
|
|
1572
|
+
imagePrepull: {
|
|
1573
|
+
enabled: true,
|
|
1574
|
+
tolerations: operationalDaemonSetTolerations,
|
|
1575
|
+
},
|
|
1576
|
+
extraEnv: [
|
|
1577
|
+
// FLOW_CHUNK_MAX_ITEMS is the #1 throughput dial. Each chunk is one
|
|
1578
|
+
// Kafka round-trip (gather -> solution -> worker -> solution-response
|
|
1579
|
+
// -> gather), so throughput ~= (broker messages/sec) x (payloads per
|
|
1580
|
+
// message). Bigger chunks = fewer messages per solution = less broker
|
|
1581
|
+
// and coordination overhead. Benchmarks: 10 -> 50 gave +27%, and on
|
|
1582
|
+
// small payloads 100 -> 1000 gave another ~1.6x (22k -> 35k sol/s),
|
|
1583
|
+
// until the bottleneck moved off the broker onto worker CPU.
|
|
1584
|
+
// 500 keeps typical bulk requests to 1-2 messages. The byte bound
|
|
1585
|
+
// (CHUNK_MAX_BYTES, default 256 KiB in HPS) caps message size
|
|
1586
|
+
// regardless, so large payloads stay under Kafka's 2 MiB
|
|
1587
|
+
// max.message.bytes. High-throughput, small-payload deployments can
|
|
1588
|
+
// raise this much higher (and CHUNK_MAX_BYTES with it); the only costs
|
|
1589
|
+
// are per-request latency (one worker processes a whole chunk) and the
|
|
1590
|
+
// 2 MiB cap on the larger response message (avg output x chunk size
|
|
1591
|
+
// must stay < 2 MiB, so lower this for output-heavy flows).
|
|
1592
|
+
{ name: "FLOW_CHUNK_MAX_ITEMS", value: "500" },
|
|
1593
|
+
],
|
|
1594
|
+
// Service account (annotated with the MSK IAM role for external Kafka)
|
|
1595
|
+
serviceAccount: generateHpsServiceAccount(config),
|
|
560
1596
|
// HPS Workers with KEDA autoscaling
|
|
561
1597
|
workers: {
|
|
562
1598
|
enabled: true,
|
|
563
|
-
|
|
1599
|
+
// Workers consume the solution topic directly, so under external MSK
|
|
1600
|
+
// IAM they need their own cloud identity - not the shared/default SA.
|
|
1601
|
+
// Same rule as HPS: a dedicated `<release>-hps-worker` SA (no role-arn
|
|
1602
|
+
// annotation) that the CLI's workload-identity step binds to the Kafka
|
|
1603
|
+
// role via Pod Identity.
|
|
1604
|
+
serviceAccount: generateHpsServiceAccount(config),
|
|
1605
|
+
// Partition count of the solution request topic (also exported to
|
|
1606
|
+
// HPS as MAX_WORKERS). Must match kafka.provisioning above; it is
|
|
1607
|
+
// the fleet-concurrency ceiling, NOT a worker count. Replica count
|
|
1608
|
+
// and resources fall back to the chart defaults.
|
|
1609
|
+
solutionPartitions: SOLUTION_TOPIC_PARTITIONS,
|
|
564
1610
|
keda: {
|
|
565
1611
|
enabled: true,
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
1612
|
+
// Poll fast so bursts are detected within seconds; the chart's
|
|
1613
|
+
// ScaledObject defaults add exponential scale-up (double every
|
|
1614
|
+
// 15s) and smooth scale-down (5-min window, -25%/min) behavior.
|
|
1615
|
+
// min/max replica counts fall back to the chart defaults.
|
|
1616
|
+
pollingInterval: 5,
|
|
569
1617
|
cooldownPeriod: 300,
|
|
1618
|
+
// Lag is measured in MESSAGES; with chunked bulk dispatch each
|
|
1619
|
+
// message is a bounded unit of work (~50-150ms), so 50 messages
|
|
1620
|
+
// approximates 5-8s of backlog for a single worker - one replica
|
|
1621
|
+
// is added per ~5s of fleet backlog, biasing toward early
|
|
1622
|
+
// scale-out for bursty traffic.
|
|
570
1623
|
lagThreshold: 50,
|
|
571
1624
|
cpuThreshold: 25,
|
|
572
1625
|
},
|
|
573
|
-
|
|
574
|
-
|
|
1626
|
+
podLabels: applicationPodLabels,
|
|
1627
|
+
// Burst tier: first preemption victims, so critical infrastructure
|
|
1628
|
+
// can always reschedule during an aggressive scale-out.
|
|
1629
|
+
priorityClassName: burstPriorityClass,
|
|
1630
|
+
...workerScheduling,
|
|
575
1631
|
},
|
|
576
1632
|
},
|
|
577
1633
|
// Ingress configuration
|
|
@@ -580,74 +1636,138 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
580
1636
|
className: "traefik",
|
|
581
1637
|
paths: [{ path: "/", pathType: "Prefix" }],
|
|
582
1638
|
},
|
|
583
|
-
// Redis configuration
|
|
584
|
-
redis:
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
persistence: {
|
|
588
|
-
enabled: true,
|
|
589
|
-
size: tierConfig.redisPersistenceSize,
|
|
590
|
-
storageClass: storageClass,
|
|
591
|
-
},
|
|
592
|
-
},
|
|
1639
|
+
// Redis configuration (in-cluster sizing or external connection settings)
|
|
1640
|
+
redis: generateRedisBlock(config, storageClass, infrastructurePodLabels, coreScheduling),
|
|
1641
|
+
cache: generateCacheObservabilityBlock(config, infrastructurePodLabels),
|
|
1642
|
+
kafkaExporter: generateKafkaExporterBlock(config, infrastructurePodLabels),
|
|
593
1643
|
},
|
|
594
1644
|
// =============================================================================
|
|
595
1645
|
// KAFKA (Message Queue)
|
|
596
1646
|
// =============================================================================
|
|
597
1647
|
kafka: {
|
|
598
|
-
enabled:
|
|
599
|
-
//
|
|
600
|
-
|
|
1648
|
+
enabled: !isExternalKafka(config),
|
|
1649
|
+
// Apache Kafka version (must be one the bundled DHI Strimzi operator
|
|
1650
|
+
// supports; DHI strimzi 1.0.1 ships Kafka 4.2.0).
|
|
1651
|
+
version: "4.2.0",
|
|
1652
|
+
// Single combined controller+broker node (KRaft, no ZooKeeper).
|
|
1653
|
+
replicas: TOPIC_REPLICATION_FACTOR,
|
|
1654
|
+
storage: {
|
|
1655
|
+
size: "20Gi",
|
|
1656
|
+
class: storageClass,
|
|
1657
|
+
},
|
|
1658
|
+
// Critical tier: the broker must always be able to preempt burst workers.
|
|
1659
|
+
priorityClassName: criticalPriorityClass,
|
|
1660
|
+
config: generateKafkaConfig(),
|
|
1661
|
+
jvm: {
|
|
1662
|
+
xms: "1g",
|
|
1663
|
+
xmx: "1g",
|
|
1664
|
+
extraOpts: {
|
|
1665
|
+
UseZGC: "true",
|
|
1666
|
+
AlwaysPreTouch: "true",
|
|
1667
|
+
MaxDirectMemorySize: "256M",
|
|
1668
|
+
},
|
|
1669
|
+
},
|
|
1670
|
+
metrics: {
|
|
601
1671
|
enabled: true,
|
|
1672
|
+
serviceMonitor: { enabled: true },
|
|
602
1673
|
},
|
|
603
|
-
|
|
604
|
-
|
|
1674
|
+
// Topics, reconciled by the Strimzi Topic Operator (KafkaTopic CRs) for the
|
|
1675
|
+
// in-cluster broker, or created by the kafka-topic-provision Job for an
|
|
1676
|
+
// external MSK IAM broker.
|
|
1677
|
+
topics: generateKafkaTopics(config),
|
|
1678
|
+
// When false, the chart never creates topics on an external broker - the
|
|
1679
|
+
// operator manages them (and the workload role needs no CreateTopic).
|
|
1680
|
+
provisioning: {
|
|
1681
|
+
enabled: config.externalServices?.kafka?.external?.provisionTopics ?? true,
|
|
1682
|
+
},
|
|
1683
|
+
},
|
|
1684
|
+
// Strimzi operator: pull secret so the operator pod pulls the private
|
|
1685
|
+
// rulebricks/* image from index.docker.io.
|
|
1686
|
+
"strimzi-kafka-operator": {
|
|
1687
|
+
image: { imagePullSecrets: rulebricksPullSecret },
|
|
1688
|
+
},
|
|
1689
|
+
// =============================================================================
|
|
1690
|
+
// VECTOR KAFKA BRIDGE (AWS MSK IAM token auth)
|
|
1691
|
+
// =============================================================================
|
|
1692
|
+
kafkaBridge: generateKafkaBridge(config),
|
|
1693
|
+
clickhouse: {
|
|
1694
|
+
enabled: true,
|
|
1695
|
+
// Critical tier: single replica must preempt burst workers to
|
|
1696
|
+
// reschedule; never autoscaler-evicted on scale-down.
|
|
1697
|
+
priorityClassName: criticalPriorityClass,
|
|
1698
|
+
podAnnotations: safeToEvictAnnotations,
|
|
1699
|
+
auth: {
|
|
1700
|
+
username: "rulebricks",
|
|
1701
|
+
password: "",
|
|
1702
|
+
existingSecret: '{{ printf "%s-clickhouse-credentials" .Release.Name }}',
|
|
1703
|
+
existingSecretKey: "admin-password",
|
|
605
1704
|
},
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
"auto.create.topics.enable": "true",
|
|
609
|
-
"log.retention.hours": "24",
|
|
610
|
-
"default.replication.factor": String(tierConfig.kafkaReplication),
|
|
611
|
-
"offsets.topic.replication.factor": String(tierConfig.kafkaReplication),
|
|
612
|
-
"num.partitions": String(tierConfig.hpsWorkerReplicas.max), // Match max workers for parallel consumption
|
|
613
|
-
},
|
|
614
|
-
controller: {
|
|
615
|
-
replicaCount: tierConfig.kafkaReplication,
|
|
616
|
-
resources: tierConfig.kafkaResources,
|
|
617
|
-
tolerations: arm64Tolerations,
|
|
618
|
-
persistence: {
|
|
1705
|
+
persistence: clickStackEnabled
|
|
1706
|
+
? {
|
|
619
1707
|
enabled: true,
|
|
620
|
-
size: tierConfig.kafkaStorage,
|
|
621
1708
|
storageClass: storageClass,
|
|
1709
|
+
size: clickHouseStorageSize,
|
|
1710
|
+
}
|
|
1711
|
+
: { enabled: false },
|
|
1712
|
+
resources: clickStackEnabled
|
|
1713
|
+
? {
|
|
1714
|
+
requests: { cpu: "1000m", memory: "4Gi" },
|
|
1715
|
+
limits: { cpu: "4", memory: "12Gi" },
|
|
1716
|
+
}
|
|
1717
|
+
: {
|
|
1718
|
+
requests: { cpu: "500m", memory: "2Gi" },
|
|
1719
|
+
limits: { cpu: "2", memory: "6Gi" },
|
|
622
1720
|
},
|
|
623
|
-
|
|
624
|
-
|
|
1721
|
+
serviceAccount: {
|
|
1722
|
+
create: true,
|
|
1723
|
+
annotations: {},
|
|
625
1724
|
},
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
controller: {
|
|
631
|
-
protocol: "PLAINTEXT",
|
|
632
|
-
},
|
|
633
|
-
interbroker: {
|
|
634
|
-
protocol: "PLAINTEXT",
|
|
1725
|
+
metrics: {
|
|
1726
|
+
enabled: true,
|
|
1727
|
+
serviceMonitor: {
|
|
1728
|
+
enabled: true,
|
|
635
1729
|
},
|
|
636
1730
|
},
|
|
1731
|
+
queryLimits: {
|
|
1732
|
+
maxMemoryUsage: 4294967296,
|
|
1733
|
+
maxThreads: 4,
|
|
1734
|
+
maxExecutionTime: 120,
|
|
1735
|
+
maxRowsToRead: 50000000,
|
|
1736
|
+
readOverflowMode: "break",
|
|
1737
|
+
},
|
|
1738
|
+
otelQueryLimits: {
|
|
1739
|
+
maxMemoryUsage: 4294967296,
|
|
1740
|
+
maxThreads: 8,
|
|
1741
|
+
maxExecutionTime: 120,
|
|
1742
|
+
},
|
|
1743
|
+
otelDatabase: "otel",
|
|
1744
|
+
// config.d / users.d / the decision-log view are rendered by the parent
|
|
1745
|
+
// chart's clickhouse templates (no longer passed as Bitnami subchart values).
|
|
637
1746
|
},
|
|
638
1747
|
// =============================================================================
|
|
639
1748
|
// TRAEFIK (Ingress Controller)
|
|
640
1749
|
// =============================================================================
|
|
641
1750
|
traefik: {
|
|
642
1751
|
enabled: true,
|
|
1752
|
+
// traefik has no global.imageRegistry path: set registry + repository
|
|
1753
|
+
// directly (host = reg, rulebricks/* path).
|
|
1754
|
+
image: {
|
|
1755
|
+
registry: reg,
|
|
1756
|
+
repository: IMAGE_REPOSITORIES.traefik,
|
|
1757
|
+
},
|
|
1758
|
+
deployment: {
|
|
1759
|
+
imagePullSecrets: rulebricksPullSecret,
|
|
1760
|
+
},
|
|
643
1761
|
ingressClass: {
|
|
644
1762
|
name: "traefik",
|
|
645
1763
|
},
|
|
646
|
-
|
|
1764
|
+
...coreScheduling,
|
|
647
1765
|
autoscaling: {
|
|
648
1766
|
enabled: true,
|
|
649
1767
|
minReplicas: 1,
|
|
650
|
-
|
|
1768
|
+
// Headroom for colocated clients pushing multi-hundred-RPS bulk
|
|
1769
|
+
// traffic through the ingress.
|
|
1770
|
+
maxReplicas: 4,
|
|
651
1771
|
},
|
|
652
1772
|
resources: {
|
|
653
1773
|
requests: {
|
|
@@ -670,11 +1790,26 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
670
1790
|
websecure: {
|
|
671
1791
|
port: 8443,
|
|
672
1792
|
exposedPort: 443,
|
|
673
|
-
tls
|
|
674
|
-
|
|
1793
|
+
// traefik 41.x moved per-entrypoint TLS under ports.<name>.http.tls
|
|
1794
|
+
// (the old ports.<name>.tls location is rejected by the chart schema).
|
|
1795
|
+
http: {
|
|
1796
|
+
tls: {
|
|
1797
|
+
enabled: tlsEnabled,
|
|
1798
|
+
},
|
|
675
1799
|
},
|
|
676
1800
|
},
|
|
677
1801
|
},
|
|
1802
|
+
metrics: {
|
|
1803
|
+
prometheus: {
|
|
1804
|
+
enabled: true,
|
|
1805
|
+
serviceMonitor: {
|
|
1806
|
+
enabled: false,
|
|
1807
|
+
},
|
|
1808
|
+
},
|
|
1809
|
+
},
|
|
1810
|
+
// OTLP tracing: ingress becomes the root span and propagates traceparent
|
|
1811
|
+
// to backends. Empty object when tracing is disabled.
|
|
1812
|
+
tracing: generateTraefikTracing(config, releaseName),
|
|
678
1813
|
persistence: {
|
|
679
1814
|
enabled: false,
|
|
680
1815
|
},
|
|
@@ -684,7 +1819,29 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
684
1819
|
// =============================================================================
|
|
685
1820
|
keda: {
|
|
686
1821
|
enabled: true,
|
|
687
|
-
|
|
1822
|
+
imagePullSecrets: rulebricksPullSecret,
|
|
1823
|
+
// keda reads global.image.registry (NOT global.imageRegistry) for the host;
|
|
1824
|
+
// set it plus the rulebricks/* repositories for all three sub-images.
|
|
1825
|
+
global: {
|
|
1826
|
+
image: {
|
|
1827
|
+
registry: reg,
|
|
1828
|
+
},
|
|
1829
|
+
},
|
|
1830
|
+
image: {
|
|
1831
|
+
keda: {
|
|
1832
|
+
registry: reg,
|
|
1833
|
+
repository: IMAGE_REPOSITORIES.keda,
|
|
1834
|
+
},
|
|
1835
|
+
metricsApiServer: {
|
|
1836
|
+
registry: reg,
|
|
1837
|
+
repository: IMAGE_REPOSITORIES.kedaMetricsApiServer,
|
|
1838
|
+
},
|
|
1839
|
+
webhooks: {
|
|
1840
|
+
registry: reg,
|
|
1841
|
+
repository: IMAGE_REPOSITORIES.kedaAdmissionWebhooks,
|
|
1842
|
+
},
|
|
1843
|
+
},
|
|
1844
|
+
...coreScheduling,
|
|
688
1845
|
crds: {
|
|
689
1846
|
install: false, // CRDs managed in parent chart
|
|
690
1847
|
},
|
|
@@ -694,13 +1851,41 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
694
1851
|
// =============================================================================
|
|
695
1852
|
"cert-manager": {
|
|
696
1853
|
enabled: tlsEnabled,
|
|
697
|
-
|
|
698
|
-
|
|
1854
|
+
// CRDs managed in parent chart (cert-manager v1.15+ uses crds.enabled,
|
|
1855
|
+
// not the deprecated installCRDs flag).
|
|
1856
|
+
crds: { enabled: false },
|
|
1857
|
+
// cert-manager prepends image.registry to image.repository, so set both per
|
|
1858
|
+
// component (host = reg, rulebricks/cert-manager-* path).
|
|
1859
|
+
image: {
|
|
1860
|
+
registry: reg,
|
|
1861
|
+
repository: IMAGE_REPOSITORIES.certManagerController,
|
|
1862
|
+
},
|
|
1863
|
+
...coreScheduling,
|
|
699
1864
|
webhook: {
|
|
700
|
-
|
|
1865
|
+
image: {
|
|
1866
|
+
registry: reg,
|
|
1867
|
+
repository: IMAGE_REPOSITORIES.certManagerWebhook,
|
|
1868
|
+
},
|
|
1869
|
+
...coreScheduling,
|
|
701
1870
|
},
|
|
702
1871
|
cainjector: {
|
|
703
|
-
|
|
1872
|
+
image: {
|
|
1873
|
+
registry: reg,
|
|
1874
|
+
repository: IMAGE_REPOSITORIES.certManagerCainjector,
|
|
1875
|
+
},
|
|
1876
|
+
...coreScheduling,
|
|
1877
|
+
},
|
|
1878
|
+
startupapicheck: {
|
|
1879
|
+
image: {
|
|
1880
|
+
registry: reg,
|
|
1881
|
+
repository: IMAGE_REPOSITORIES.certManagerStartupapicheck,
|
|
1882
|
+
},
|
|
1883
|
+
},
|
|
1884
|
+
acmesolver: {
|
|
1885
|
+
image: {
|
|
1886
|
+
registry: reg,
|
|
1887
|
+
repository: IMAGE_REPOSITORIES.certManagerAcmesolver,
|
|
1888
|
+
},
|
|
704
1889
|
},
|
|
705
1890
|
},
|
|
706
1891
|
// Cluster Issuer for Let's Encrypt
|
|
@@ -714,12 +1899,20 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
714
1899
|
// =============================================================================
|
|
715
1900
|
vector: {
|
|
716
1901
|
enabled: true,
|
|
1902
|
+
// vector's image.repository is the FULL path including host (no separate
|
|
1903
|
+
// registry field), so the reg host is prefixed here.
|
|
1904
|
+
image: {
|
|
1905
|
+
repository: `${reg}/${IMAGE_REPOSITORIES.vector}`,
|
|
1906
|
+
pullSecrets: rulebricksPullSecret,
|
|
1907
|
+
},
|
|
717
1908
|
role: "Stateless-Aggregator",
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
tolerations: arm64Tolerations,
|
|
1909
|
+
// Replica count and resources fall back to the chart defaults.
|
|
1910
|
+
...coreScheduling,
|
|
721
1911
|
serviceAccount: generateVectorServiceAccount(config),
|
|
722
1912
|
podLabels: generateVectorPodLabels(config),
|
|
1913
|
+
...(generateVectorExtraContainers(config)
|
|
1914
|
+
? { extraContainers: generateVectorExtraContainers(config) }
|
|
1915
|
+
: {}),
|
|
723
1916
|
service: {
|
|
724
1917
|
enabled: true,
|
|
725
1918
|
ports: [{ name: "api", port: 8686, protocol: "TCP", targetPort: 8686 }],
|
|
@@ -731,90 +1924,262 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
731
1924
|
kafka: {
|
|
732
1925
|
type: "kafka",
|
|
733
1926
|
bootstrap_servers: "${KAFKA_BOOTSTRAP_SERVERS:-rulebricks-kafka:9092}",
|
|
734
|
-
|
|
1927
|
+
// KAFKA_LOG_TOPIC carries the namespace prefix (e.g. com.rulebricks.logs).
|
|
1928
|
+
topics: ["${KAFKA_LOG_TOPIC:-logs}"],
|
|
735
1929
|
group_id: "vector-consumers",
|
|
736
1930
|
auto_offset_reset: "latest",
|
|
1931
|
+
// TLS + SASL driven by env from vector-kafka-env (disabled for
|
|
1932
|
+
// in-cluster Kafka and the kafka-proxy bridge path).
|
|
1933
|
+
tls: { enabled: "${KAFKA_TLS_ENABLED:-false}" },
|
|
1934
|
+
sasl: {
|
|
1935
|
+
enabled: "${KAFKA_SASL_ENABLED:-false}",
|
|
1936
|
+
mechanism: "${KAFKA_SASL_MECHANISM:-PLAIN}",
|
|
1937
|
+
// username/password are only emitted for external Kafka using a
|
|
1938
|
+
// direct PLAIN/SCRAM credential (where vector-kafka-credentials is
|
|
1939
|
+
// populated). Emitting them with an empty default would render as
|
|
1940
|
+
// YAML null and crash Vector at config load; omitting the keys
|
|
1941
|
+
// leaves them unset (valid) whenever SASL is disabled.
|
|
1942
|
+
...(kafkaUsesDirectSasl(config)
|
|
1943
|
+
? {
|
|
1944
|
+
username: "${KAFKA_SASL_USERNAME}",
|
|
1945
|
+
password: "${KAFKA_SASL_PASSWORD}",
|
|
1946
|
+
}
|
|
1947
|
+
: {}),
|
|
1948
|
+
},
|
|
1949
|
+
},
|
|
1950
|
+
},
|
|
1951
|
+
transforms: {
|
|
1952
|
+
normalize_logs: {
|
|
1953
|
+
type: "remap",
|
|
1954
|
+
inputs: ["kafka"],
|
|
1955
|
+
source: VECTOR_NORMALIZE_LOGS_VRL,
|
|
737
1956
|
},
|
|
738
1957
|
},
|
|
739
1958
|
sinks: generateVectorSinks(config),
|
|
740
1959
|
},
|
|
741
1960
|
},
|
|
742
1961
|
// =============================================================================
|
|
1962
|
+
// VECTOR AGENT (Application / container logs -> Elasticsearch)
|
|
1963
|
+
// =============================================================================
|
|
1964
|
+
"vector-agent": clickStackEnabled
|
|
1965
|
+
? { enabled: false }
|
|
1966
|
+
: {
|
|
1967
|
+
...generateVectorAgent(config, infrastructurePodLabels, operationalDaemonSetTolerations),
|
|
1968
|
+
// Full-path repository (see vector above) + pull secret.
|
|
1969
|
+
image: {
|
|
1970
|
+
repository: `${reg}/${IMAGE_REPOSITORIES.vector}`,
|
|
1971
|
+
pullSecrets: rulebricksPullSecret,
|
|
1972
|
+
},
|
|
1973
|
+
},
|
|
1974
|
+
// =============================================================================
|
|
743
1975
|
// SUPABASE (Self-hosted Database)
|
|
744
1976
|
// =============================================================================
|
|
745
1977
|
supabase: {
|
|
746
1978
|
enabled: config.database.type === "self-hosted",
|
|
747
1979
|
...(config.database.type === "self-hosted"
|
|
748
|
-
? {
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
1980
|
+
? (() => {
|
|
1981
|
+
// External managed Postgres (AWS RDS / Azure Flexible Server): the
|
|
1982
|
+
// self-hosted Supabase services run against it instead of the
|
|
1983
|
+
// bundled in-cluster database.
|
|
1984
|
+
const pgExt = config.externalServices?.postgres?.mode === "external"
|
|
1985
|
+
? config.externalServices?.postgres?.external
|
|
1986
|
+
: undefined;
|
|
1987
|
+
return {
|
|
1988
|
+
secret: {
|
|
1989
|
+
db: {
|
|
1990
|
+
username: "postgres",
|
|
1991
|
+
// Shared service-role password (authenticator / auth_admin /
|
|
1992
|
+
// replication_admin). With an external DB the bootstrap hook
|
|
1993
|
+
// sets the roles to this same value.
|
|
1994
|
+
password: config.database.supabaseDbPassword,
|
|
1995
|
+
database: pgExt?.database || "postgres",
|
|
1996
|
+
},
|
|
1997
|
+
dashboard: {
|
|
1998
|
+
username: config.database.supabaseDashboardUser || "supabase",
|
|
1999
|
+
password: config.database.supabaseDashboardPass,
|
|
2000
|
+
},
|
|
2001
|
+
jwt: {
|
|
2002
|
+
secret: config.database.supabaseJwtSecret,
|
|
2003
|
+
},
|
|
2004
|
+
// SECRET_KEY_BASE / DB_ENC_KEY, derived from the JWT secret
|
|
2005
|
+
// (stable across redeploys). The chart no longer ships defaults.
|
|
2006
|
+
realtime: deriveRealtimeSecrets(config.database.supabaseJwtSecret || ""),
|
|
754
2007
|
},
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
2008
|
+
...(pgExt
|
|
2009
|
+
? {
|
|
2010
|
+
// One switch: enabling externalDatabase disables the bundled
|
|
2011
|
+
// Postgres and runs the bootstrap hook to initialize the
|
|
2012
|
+
// managed instance. db.enabled=false is explicit so chart
|
|
2013
|
+
// schema rules keyed off it hold.
|
|
2014
|
+
db: { enabled: false },
|
|
2015
|
+
externalDatabase: {
|
|
2016
|
+
enabled: true,
|
|
2017
|
+
host: pgExt.host ?? "",
|
|
2018
|
+
port: pgExt.port ?? 5432,
|
|
2019
|
+
bootstrap: {
|
|
2020
|
+
enabled: pgExt.bootstrap?.enabled ?? true,
|
|
2021
|
+
masterUsername: pgExt.bootstrap?.masterUsername ?? "postgres",
|
|
2022
|
+
masterPassword: pgExt.bootstrap?.masterPassword ?? "",
|
|
2023
|
+
appRole: pgExt.bootstrap?.appRole ?? "postgres",
|
|
2024
|
+
},
|
|
2025
|
+
},
|
|
2026
|
+
}
|
|
2027
|
+
: {
|
|
2028
|
+
db: {
|
|
2029
|
+
// Explicit so chart schema rules that key off
|
|
2030
|
+
// supabase.db.enabled (e.g. Database Backup Storage
|
|
2031
|
+
// Validation) hold without relying on subchart-default
|
|
2032
|
+
// coalescing.
|
|
2033
|
+
enabled: true,
|
|
2034
|
+
image: {
|
|
2035
|
+
// Split shape: the supabase.image helper applies
|
|
2036
|
+
// global.imageRegistry to the host. Host never in repository.
|
|
2037
|
+
registry: reg,
|
|
2038
|
+
repository: SUPABASE_POSTGRES_IMAGE_REPOSITORY,
|
|
2039
|
+
tag: SUPABASE_POSTGRES_IMAGE_TAG,
|
|
2040
|
+
pullPolicy: "IfNotPresent",
|
|
2041
|
+
},
|
|
2042
|
+
podLabels: infrastructurePodLabels,
|
|
2043
|
+
// Critical tier: the primary datastore must preempt burst
|
|
2044
|
+
// workers to reschedule; never autoscaler-evicted.
|
|
2045
|
+
// Resources and persistence size fall back to chart
|
|
2046
|
+
// defaults.
|
|
2047
|
+
priorityClassName: criticalPriorityClass,
|
|
2048
|
+
podAnnotations: safeToEvictAnnotations,
|
|
2049
|
+
...coreScheduling,
|
|
2050
|
+
persistence: {
|
|
2051
|
+
enabled: true,
|
|
2052
|
+
storageClassName: storageClass,
|
|
2053
|
+
},
|
|
2054
|
+
},
|
|
2055
|
+
}),
|
|
2056
|
+
auth: {
|
|
2057
|
+
// Explicit public URLs so GoTrue never falls back to the
|
|
2058
|
+
// in-cluster Kong service name when global.domain propagation
|
|
2059
|
+
// is lost (e.g. after manual patching or partial upgrades).
|
|
2060
|
+
siteUrl: `https://${config.domain}`,
|
|
2061
|
+
externalUrl: `https://supabase.${config.domain}`,
|
|
2062
|
+
...coreScheduling,
|
|
758
2063
|
},
|
|
759
|
-
|
|
760
|
-
|
|
2064
|
+
rest: {
|
|
2065
|
+
...coreScheduling,
|
|
761
2066
|
},
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
resources: tierConfig.dbResources,
|
|
765
|
-
tolerations: arm64Tolerations,
|
|
766
|
-
persistence: {
|
|
767
|
-
enabled: true,
|
|
768
|
-
size: tierConfig.dbPersistenceSize,
|
|
769
|
-
storageClassName: storageClass,
|
|
2067
|
+
realtime: {
|
|
2068
|
+
...coreScheduling,
|
|
770
2069
|
},
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
tolerations: arm64Tolerations,
|
|
774
|
-
},
|
|
775
|
-
rest: {
|
|
776
|
-
tolerations: arm64Tolerations,
|
|
777
|
-
},
|
|
778
|
-
realtime: {
|
|
779
|
-
tolerations: arm64Tolerations,
|
|
780
|
-
},
|
|
781
|
-
meta: {
|
|
782
|
-
tolerations: arm64Tolerations,
|
|
783
|
-
},
|
|
784
|
-
kong: {
|
|
785
|
-
tolerations: arm64Tolerations,
|
|
786
|
-
ingress: {
|
|
787
|
-
enabled: true,
|
|
788
|
-
className: "traefik",
|
|
789
|
-
annotations: {},
|
|
2070
|
+
meta: {
|
|
2071
|
+
...coreScheduling,
|
|
790
2072
|
},
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
2073
|
+
kong: {
|
|
2074
|
+
...coreScheduling,
|
|
2075
|
+
ingress: {
|
|
2076
|
+
enabled: true,
|
|
2077
|
+
className: "traefik",
|
|
2078
|
+
annotations: {},
|
|
2079
|
+
},
|
|
2080
|
+
},
|
|
2081
|
+
studio: {
|
|
2082
|
+
...coreScheduling,
|
|
2083
|
+
},
|
|
2084
|
+
};
|
|
2085
|
+
})()
|
|
796
2086
|
: {}),
|
|
797
2087
|
},
|
|
798
2088
|
// =============================================================================
|
|
799
2089
|
// MONITORING
|
|
800
2090
|
// =============================================================================
|
|
801
2091
|
monitoring: {
|
|
802
|
-
enabled:
|
|
2092
|
+
enabled: true,
|
|
803
2093
|
},
|
|
804
2094
|
"kube-prometheus-stack": {
|
|
805
|
-
enabled:
|
|
2095
|
+
enabled: true,
|
|
2096
|
+
// kube-prometheus-stack honors the parent global.imageRegistry for the host
|
|
2097
|
+
// automatically; the CLI sets the rulebricks/* repository defaults (and the
|
|
2098
|
+
// reg host explicitly) for every sub-image so a bare helm install also pulls
|
|
2099
|
+
// rulebricks/*.
|
|
806
2100
|
alertmanager: {
|
|
807
2101
|
enabled: false,
|
|
2102
|
+
alertmanagerSpec: {
|
|
2103
|
+
image: {
|
|
2104
|
+
registry: reg,
|
|
2105
|
+
repository: IMAGE_REPOSITORIES.alertmanager,
|
|
2106
|
+
},
|
|
2107
|
+
},
|
|
2108
|
+
},
|
|
2109
|
+
prometheusOperator: {
|
|
2110
|
+
image: {
|
|
2111
|
+
registry: reg,
|
|
2112
|
+
repository: IMAGE_REPOSITORIES.prometheusOperator,
|
|
2113
|
+
},
|
|
2114
|
+
prometheusConfigReloader: {
|
|
2115
|
+
image: {
|
|
2116
|
+
registry: reg,
|
|
2117
|
+
repository: IMAGE_REPOSITORIES.prometheusConfigReloader,
|
|
2118
|
+
},
|
|
2119
|
+
},
|
|
2120
|
+
admissionWebhooks: {
|
|
2121
|
+
patch: {
|
|
2122
|
+
image: {
|
|
2123
|
+
registry: reg,
|
|
2124
|
+
repository: IMAGE_REPOSITORIES.kubeWebhookCertgen,
|
|
2125
|
+
},
|
|
2126
|
+
},
|
|
2127
|
+
},
|
|
2128
|
+
},
|
|
2129
|
+
"kube-state-metrics": {
|
|
2130
|
+
image: {
|
|
2131
|
+
registry: reg,
|
|
2132
|
+
repository: IMAGE_REPOSITORIES.kubeStateMetrics,
|
|
2133
|
+
},
|
|
2134
|
+
},
|
|
2135
|
+
"prometheus-node-exporter": {
|
|
2136
|
+
image: {
|
|
2137
|
+
registry: reg,
|
|
2138
|
+
repository: IMAGE_REPOSITORIES.nodeExporter,
|
|
2139
|
+
},
|
|
808
2140
|
},
|
|
809
2141
|
grafana: {
|
|
810
2142
|
enabled: useLocalGrafana,
|
|
2143
|
+
image: {
|
|
2144
|
+
registry: reg,
|
|
2145
|
+
repository: IMAGE_REPOSITORIES.grafana,
|
|
2146
|
+
},
|
|
2147
|
+
// Dashboard sidecar imports the provisioned Rulebricks dashboards
|
|
2148
|
+
// (ConfigMaps labeled grafana_dashboard="1") when in-cluster Grafana
|
|
2149
|
+
// is enabled.
|
|
2150
|
+
sidecar: {
|
|
2151
|
+
image: {
|
|
2152
|
+
registry: reg,
|
|
2153
|
+
repository: IMAGE_REPOSITORIES.k8sSidecar,
|
|
2154
|
+
},
|
|
2155
|
+
...(useLocalGrafana
|
|
2156
|
+
? {
|
|
2157
|
+
dashboards: {
|
|
2158
|
+
enabled: true,
|
|
2159
|
+
label: "grafana_dashboard",
|
|
2160
|
+
labelValue: "1",
|
|
2161
|
+
searchNamespace: "ALL",
|
|
2162
|
+
folderAnnotation: "grafana_folder",
|
|
2163
|
+
provider: { foldersFromFilesStructure: true },
|
|
2164
|
+
},
|
|
2165
|
+
}
|
|
2166
|
+
: {}),
|
|
2167
|
+
},
|
|
811
2168
|
},
|
|
812
2169
|
prometheus: {
|
|
813
|
-
enabled:
|
|
2170
|
+
enabled: true,
|
|
814
2171
|
serviceAccount: generatePrometheusServiceAccount(config),
|
|
815
2172
|
prometheusSpec: {
|
|
816
2173
|
retention: "30d",
|
|
2174
|
+
image: {
|
|
2175
|
+
registry: reg,
|
|
2176
|
+
repository: IMAGE_REPOSITORIES.prometheus,
|
|
2177
|
+
},
|
|
817
2178
|
podMetadata: generatePrometheusPodMetadata(config),
|
|
2179
|
+
serviceMonitorSelectorNilUsesHelmValues: false,
|
|
2180
|
+
serviceMonitorSelector: {},
|
|
2181
|
+
podMonitorSelectorNilUsesHelmValues: false,
|
|
2182
|
+
podMonitorSelector: {},
|
|
818
2183
|
storageSpec: {
|
|
819
2184
|
volumeClaimTemplate: {
|
|
820
2185
|
spec: {
|
|
@@ -828,7 +2193,9 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
828
2193
|
},
|
|
829
2194
|
},
|
|
830
2195
|
},
|
|
831
|
-
remoteWrite:
|
|
2196
|
+
remoteWrite: [
|
|
2197
|
+
...(clickStackEnabled ? [] : generateRemoteWriteSpec(config)),
|
|
2198
|
+
],
|
|
832
2199
|
},
|
|
833
2200
|
},
|
|
834
2201
|
},
|
|
@@ -836,20 +2203,21 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
836
2203
|
// STORAGE CLASS
|
|
837
2204
|
// =============================================================================
|
|
838
2205
|
storageClass: {
|
|
839
|
-
create:
|
|
2206
|
+
create: false,
|
|
840
2207
|
name: storageClass,
|
|
841
|
-
provisioner: config.infrastructure.
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
2208
|
+
provisioner: config.infrastructure.storageProvisioner ||
|
|
2209
|
+
(config.infrastructure.provider === "aws"
|
|
2210
|
+
? "ebs.csi.aws.com"
|
|
2211
|
+
: config.infrastructure.provider === "gcp"
|
|
2212
|
+
? "pd.csi.storage.gke.io"
|
|
2213
|
+
: config.infrastructure.provider === "azure"
|
|
2214
|
+
? "disk.csi.azure.com"
|
|
2215
|
+
: "ebs.csi.aws.com"),
|
|
848
2216
|
// Parameters for the StorageClass - must include type for disk provisioning
|
|
849
2217
|
parameters: config.infrastructure.provider === "aws"
|
|
850
2218
|
? { type: "gp3" }
|
|
851
2219
|
: config.infrastructure.provider === "gcp"
|
|
852
|
-
? { type:
|
|
2220
|
+
? { type: gcpDiskType }
|
|
853
2221
|
: config.infrastructure.provider === "azure"
|
|
854
2222
|
? { skuName: "Premium_LRS" }
|
|
855
2223
|
: { type: "gp3" },
|
|
@@ -864,7 +2232,13 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
864
2232
|
"external-dns": externalDnsEnabled
|
|
865
2233
|
? {
|
|
866
2234
|
enabled: true,
|
|
867
|
-
|
|
2235
|
+
// external-dns has NO image.registry field: image.repository is the
|
|
2236
|
+
// FULL path including host (reg prefix + rulebricks/external-dns).
|
|
2237
|
+
image: {
|
|
2238
|
+
repository: `${reg}/${IMAGE_REPOSITORIES.externalDns}`,
|
|
2239
|
+
},
|
|
2240
|
+
// external-dns 1.21+ idiom: provider is an object ({name: ...}).
|
|
2241
|
+
provider: { name: getExternalDnsProvider(config.dns.provider) },
|
|
868
2242
|
domainFilters: [config.domain],
|
|
869
2243
|
sources: ["ingress", "service"],
|
|
870
2244
|
policy: "upsert-only",
|
|
@@ -873,6 +2247,105 @@ export async function generateHelmValues(config, options = {}) {
|
|
|
873
2247
|
enabled: false,
|
|
874
2248
|
},
|
|
875
2249
|
};
|
|
2250
|
+
// In k8s secret mode, the CLI creates Kubernetes Secrets and the chart reads
|
|
2251
|
+
// them by reference. Point the chart's secretRef seams at those Secrets and
|
|
2252
|
+
// strip every plaintext secret out of the generated values.
|
|
2253
|
+
if (secretMode === "k8s") {
|
|
2254
|
+
return redactSecretsToRefs(values, config);
|
|
2255
|
+
}
|
|
2256
|
+
return values;
|
|
2257
|
+
}
|
|
2258
|
+
/**
|
|
2259
|
+
* Rewrites generated values for k8s secret mode: sets the chart's *.secretRef
|
|
2260
|
+
* seams to the CLI-created Secret names and removes inline plaintext secrets so
|
|
2261
|
+
* none are persisted to values.yaml or the Helm release.
|
|
2262
|
+
*/
|
|
2263
|
+
export function redactSecretsToRefs(values, config) {
|
|
2264
|
+
const names = deploymentSecretNames(config);
|
|
2265
|
+
const global = (values.global ?? {});
|
|
2266
|
+
const supabase = (values.supabase ?? {});
|
|
2267
|
+
const pgExt = config.database.type === "self-hosted" &&
|
|
2268
|
+
config.externalServices?.postgres?.mode === "external"
|
|
2269
|
+
? config.externalServices.postgres.external
|
|
2270
|
+
: undefined;
|
|
2271
|
+
// App-level consolidated secret: one secretRef supplies every app cred.
|
|
2272
|
+
global.secrets = { ...(global.secrets ?? {}), secretRef: names.app };
|
|
2273
|
+
// Strip inline app/global secrets (non-secret config like host/from/url stays).
|
|
2274
|
+
if (global.smtp) {
|
|
2275
|
+
delete global.smtp.user;
|
|
2276
|
+
delete global.smtp.pass;
|
|
2277
|
+
}
|
|
2278
|
+
if (global.supabase) {
|
|
2279
|
+
delete global.supabase.jwtSecret;
|
|
2280
|
+
delete global.supabase.anonKey;
|
|
2281
|
+
delete global.supabase.serviceKey;
|
|
2282
|
+
delete global.supabase.accessToken;
|
|
2283
|
+
}
|
|
2284
|
+
if (global.ai)
|
|
2285
|
+
delete global.ai.openaiApiKey;
|
|
2286
|
+
if (global.sso) {
|
|
2287
|
+
delete global.sso.clientId;
|
|
2288
|
+
delete global.sso.clientSecret;
|
|
2289
|
+
}
|
|
2290
|
+
delete global.licenseKey;
|
|
2291
|
+
// Supabase subchart: replace each inline secret block with a secretRef.
|
|
2292
|
+
if (supabase.secret) {
|
|
2293
|
+
const dbSecret = { secretRef: names.db };
|
|
2294
|
+
if (pgExt) {
|
|
2295
|
+
dbSecret.secretRefKey = {
|
|
2296
|
+
host: "host",
|
|
2297
|
+
port: "port",
|
|
2298
|
+
username: "username",
|
|
2299
|
+
password: "password",
|
|
2300
|
+
database: "database",
|
|
2301
|
+
};
|
|
2302
|
+
}
|
|
2303
|
+
supabase.secret = {
|
|
2304
|
+
db: dbSecret,
|
|
2305
|
+
jwt: { secretRef: names.jwt },
|
|
2306
|
+
dashboard: { secretRef: names.dashboard },
|
|
2307
|
+
realtime: { secretRef: names.realtime },
|
|
2308
|
+
// Supabase auth (GoTrue) SMTP — only when SMTP creds are configured;
|
|
2309
|
+
// otherwise the global.smtp we just stripped would leave it empty.
|
|
2310
|
+
...(config.smtp?.user || config.smtp?.pass
|
|
2311
|
+
? { smtp: { secretRef: names.smtp } }
|
|
2312
|
+
: {}),
|
|
2313
|
+
};
|
|
2314
|
+
}
|
|
2315
|
+
if (pgExt && supabase.externalDatabase) {
|
|
2316
|
+
supabase.externalDatabase = {
|
|
2317
|
+
...supabase.externalDatabase,
|
|
2318
|
+
// New charts read host/port/user/pass/db from this single Secret. Keep
|
|
2319
|
+
// externalDatabase.host/port above for older charts that do not yet support
|
|
2320
|
+
// host/port secret keys.
|
|
2321
|
+
secretRef: names.db,
|
|
2322
|
+
secretRefKey: {
|
|
2323
|
+
host: "host",
|
|
2324
|
+
port: "port",
|
|
2325
|
+
username: "username",
|
|
2326
|
+
password: "password",
|
|
2327
|
+
database: "database",
|
|
2328
|
+
},
|
|
2329
|
+
bootstrap: {
|
|
2330
|
+
...(supabase.externalDatabase.bootstrap ?? {}),
|
|
2331
|
+
secretRef: names.dbBootstrap,
|
|
2332
|
+
// Master credentials move into the hook Secret in k8s mode.
|
|
2333
|
+
masterUsername: undefined,
|
|
2334
|
+
masterPassword: undefined,
|
|
2335
|
+
},
|
|
2336
|
+
};
|
|
2337
|
+
}
|
|
2338
|
+
values.global = global;
|
|
2339
|
+
values.supabase = supabase;
|
|
2340
|
+
return values;
|
|
2341
|
+
}
|
|
2342
|
+
/**
|
|
2343
|
+
* Generates Helm values from the deployment configuration
|
|
2344
|
+
*/
|
|
2345
|
+
export async function generateHelmValues(config, options = {}) {
|
|
2346
|
+
const values = buildHelmValues(config, options);
|
|
2347
|
+
// Last-line guardrail: never write/deploy values the chart would reject.
|
|
2348
|
+
assertValidHelmValues(values);
|
|
876
2349
|
await saveHelmValues(config.name, values);
|
|
877
2350
|
}
|
|
878
2351
|
/**
|