@fjall/components-infrastructure 0.102.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/dist/lib/lambda-assets/cert-generator/asset/index.js +17948 -0
  2. package/dist/lib/lambda-assets/cert-generator/asset/package.json +4 -0
  3. package/dist/lib/patterns/aws/clickhouseDatabase.d.ts +37 -0
  4. package/dist/lib/patterns/aws/clickhouseDatabase.js +120 -19
  5. package/dist/lib/patterns/aws/clickhouseTls/index.d.ts +1 -0
  6. package/dist/lib/patterns/aws/clickhouseTls/index.js +1 -0
  7. package/dist/lib/patterns/aws/clickhouseTls/types.d.ts +48 -0
  8. package/dist/lib/resources/aws/database/clickhouseConstants.d.ts +21 -0
  9. package/dist/lib/resources/aws/database/clickhouseConstants.js +21 -0
  10. package/dist/lib/resources/aws/database/clickhouseSecurityGroup.d.ts +2 -0
  11. package/dist/lib/resources/aws/database/clickhouseSecurityGroup.js +2 -0
  12. package/dist/lib/resources/aws/database/clickhouseUserData.d.ts +21 -0
  13. package/dist/lib/resources/aws/database/clickhouseUserData.js +48 -3
  14. package/dist/lib/resources/aws/database/clickhouseXmlRenderer.d.ts +1 -1
  15. package/dist/lib/resources/aws/database/clickhouseXmlRenderer.js +1 -1
  16. package/dist/lib/resources/aws/secrets/index.d.ts +2 -0
  17. package/dist/lib/resources/aws/secrets/index.js +2 -0
  18. package/dist/lib/resources/aws/secrets/tlsCaSecret.d.ts +13 -0
  19. package/dist/lib/resources/aws/secrets/tlsCaSecret.js +15 -0
  20. package/dist/lib/resources/aws/secrets/tlsServerSecret.d.ts +15 -0
  21. package/dist/lib/resources/aws/secrets/tlsServerSecret.js +17 -0
  22. package/dist/lib/resources/aws/utilities/index.d.ts +1 -0
  23. package/dist/lib/resources/aws/utilities/index.js +1 -0
  24. package/dist/lib/resources/aws/utilities/tlsCertGenerator.d.ts +33 -0
  25. package/dist/lib/resources/aws/utilities/tlsCertGenerator.js +67 -0
  26. package/package.json +7 -5
  27. package/dist/lib/config/aws/__t17fixture.js +0 -3
  28. package/dist/lib/config/aws/__t17fixtureType.d.ts +0 -2
  29. package/dist/lib/config/aws/__t17fixtureType.js +0 -1
  30. package/dist/lib/config/aws/eventBus.d.ts +0 -7
  31. package/dist/lib/config/aws/eventBus.js +0 -21
  32. package/dist/lib/config/aws/identityCenterGroupMembership.d.ts +0 -10
  33. package/dist/lib/config/aws/identityCenterGroupMembership.js +0 -102
  34. package/dist/lib/config/aws/securityBaseline.d.ts +0 -15
  35. package/dist/lib/config/aws/securityBaseline.js +0 -27
  36. package/dist/lib/patterns/aws/_eslint_test_tmp/leak.d.ts +0 -1
  37. package/dist/lib/patterns/aws/_eslint_test_tmp/leak.js +0 -4
  38. package/dist/lib/patterns/aws/managedIdentityCenter.d.ts +0 -4
  39. package/dist/lib/patterns/aws/managedIdentityCenter.js +0 -19
  40. package/dist/lib/patterns/aws/subdomainHostedZone.d.ts +0 -9
  41. package/dist/lib/patterns/aws/subdomainHostedZone.js +0 -34
  42. package/dist/lib/resources/aws/analytics/clickhouse.d.ts +0 -15
  43. package/dist/lib/resources/aws/analytics/clickhouse.js +0 -310
  44. package/dist/lib/resources/aws/analytics/clickhouseAlarms.d.ts +0 -49
  45. package/dist/lib/resources/aws/analytics/clickhouseAlarms.js +0 -140
  46. package/dist/lib/resources/aws/analytics/clickhouseConstants.d.ts +0 -73
  47. package/dist/lib/resources/aws/analytics/clickhouseConstants.js +0 -89
  48. package/dist/lib/resources/aws/analytics/clickhouseSecurityGroup.d.ts +0 -13
  49. package/dist/lib/resources/aws/analytics/clickhouseSecurityGroup.js +0 -28
  50. package/dist/lib/resources/aws/analytics/clickhouseTypes.d.ts +0 -59
  51. package/dist/lib/resources/aws/analytics/clickhouseTypes.js +0 -1
  52. package/dist/lib/resources/aws/analytics/clickhouseUserData.d.ts +0 -6
  53. package/dist/lib/resources/aws/analytics/clickhouseUserData.js +0 -299
  54. package/dist/lib/resources/aws/analytics/index.d.ts +0 -4
  55. package/dist/lib/resources/aws/analytics/index.js +0 -2
  56. package/dist/lib/resources/aws/compute/__tmp__/regression-shape.d.ts +0 -2
  57. package/dist/lib/resources/aws/compute/__tmp__/regression-shape.js +0 -11
  58. package/dist/lib/resources/aws/messaging/defaultEventBus.d.ts +0 -7
  59. package/dist/lib/resources/aws/messaging/defaultEventBus.js +0 -21
  60. package/dist/lib/resources/aws/networking/domain.d.ts +0 -13
  61. package/dist/lib/resources/aws/networking/domain.js +0 -100
  62. package/dist/lib/synth_dump.d.ts +0 -1
  63. package/dist/lib/synth_dump.js +0 -42
  64. package/dist/lib/utils/bastionFactory.d.ts +0 -10
  65. package/dist/lib/utils/bastionFactory.js +0 -29
  66. package/dist/lib/utils/constructMap.d.ts +0 -33
  67. package/dist/lib/utils/constructMap.js +0 -154
  68. package/dist/lib/utils/dnsRecords.d.ts +0 -4
  69. package/dist/lib/utils/dnsRecords.js +0 -104
  70. /package/dist/lib/{config/aws/__t17fixture.d.ts → patterns/aws/clickhouseTls/types.js} +0 -0
@@ -1,310 +0,0 @@
1
- import { Cluster, Ec2TaskDefinition, NetworkMode, ContainerImage, LogDriver, AsgCapacityProvider, EcsOptimizedImage, Ec2Service, Secret as EcsSecret } from "aws-cdk-lib/aws-ecs";
2
- import { ScheduledEc2Task } from "aws-cdk-lib/aws-ecs-patterns";
3
- import { Schedule } from "aws-cdk-lib/aws-applicationautoscaling";
4
- import { InstanceType, SubnetType, Connections, Port, UserData } from "aws-cdk-lib/aws-ec2";
5
- import { AutoScalingGroup, Monitoring, BlockDeviceVolume, EbsDeviceVolumeType } from "aws-cdk-lib/aws-autoscaling";
6
- import { Duration, Stack } from "aws-cdk-lib";
7
- import { Construct } from "constructs";
8
- import { LogGroup, RetentionDays } from "aws-cdk-lib/aws-logs";
9
- import { S3Bucket } from "../storage/s3.js";
10
- import { Secret } from "../secrets/secret.js";
11
- import { vpcHasNatGateways } from "../../../utils/vpcUtils.js";
12
- import { inferAmiHardwareType } from "../compute/ecsConstants.js";
13
- import { createClickHouseSecurityGroup } from "./clickhouseSecurityGroup.js";
14
- import { generateClickHouseUserData } from "./clickhouseUserData.js";
15
- import { createClickHouseAlarms } from "./clickhouseAlarms.js";
16
- import { CLICKHOUSE_CLUSTER_NAME, DEFAULT_CLICKHOUSE_INSTANCE_TYPE, CLICKHOUSE_IMAGE, CLICKHOUSE_EBS_VOLUME_SIZE_GB, CLICKHOUSE_EBS_IOPS, CLICKHOUSE_EBS_THROUGHPUT_MBPS, CLICKHOUSE_TASK_MEMORY_MIB, CLICKHOUSE_TASK_CPU_UNITS, CLICKHOUSE_HTTP_PORT, CLICKHOUSE_NATIVE_PORT, CLICKHOUSE_PROMETHEUS_PORT, CLICKHOUSE_DATA_MOUNT_PATH, CLICKHOUSE_SECRETS_PREFIX, CLICKHOUSE_SECRET_NAMES, CLICKHOUSE_SECRET_OPTIONS, CLICKHOUSE_HEALTH_CHECK, CLICKHOUSE_EBS_DEVICE_NAME, CLICKHOUSE_CONFIG_SUBDIR, CLICKHOUSE_USERS_SUBDIR, OPTIMISE_FINAL_SCHEDULE, REPLACING_MERGE_TREE_TABLES, OPTIMISE_MV_TABLES, CLICKHOUSE_CLOUDMAP_NAMESPACE, CLICKHOUSE_CLOUDMAP_SERVICE_NAME, OPTIMISE_TASK_MEMORY_MIB, OPTIMISE_TASK_CPU_UNITS, BACKUP_SCHEDULE, BACKUP_TASK_MEMORY_MIB, BACKUP_TASK_CPU_UNITS, BACKUP_RETENTION_DAYS } from "./clickhouseConstants.js";
17
- function createClickHouseSecret(scope, id, secretKey, description) {
18
- return new Secret(scope, id, {
19
- secretName: `${CLICKHOUSE_SECRETS_PREFIX}/${secretKey}`,
20
- description,
21
- generateSecretString: CLICKHOUSE_SECRET_OPTIONS
22
- });
23
- }
24
- /**
25
- * ClickHouse analytics infrastructure.
26
- *
27
- * Creates a single-node ClickHouse instance on ECS EC2 with a dedicated
28
- * gp3 EBS volume for data persistence. Designed for analytical workloads
29
- * (cost aggregation, deployment metrics, audit logs) rather than OLTP.
30
- */
31
- export default class ClickHouse extends Construct {
32
- connections;
33
- outputs;
34
- constructor(scope, id, props) {
35
- super(scope, id);
36
- const contextValue = this.node.tryGetContext("clickhouseInstanceType");
37
- const instanceType = (typeof contextValue === "string" ? contextValue : undefined) ??
38
- props.instanceType ??
39
- DEFAULT_CLICKHOUSE_INSTANCE_TYPE;
40
- // 1. Security group
41
- const securityGroup = createClickHouseSecurityGroup(this, props.vpc, props.webappSecurityGroup);
42
- // 2. Secrets Manager secrets (auto-generated passwords)
43
- const appPasswordSecret = createClickHouseSecret(this, "ClickHouseAppPassword", CLICKHOUSE_SECRET_NAMES.APP_PASSWORD, "ClickHouse application user password");
44
- const auditPasswordSecret = createClickHouseSecret(this, "ClickHouseAuditPassword", CLICKHOUSE_SECRET_NAMES.AUDIT_PASSWORD, "ClickHouse audit user password");
45
- const backupPasswordSecret = createClickHouseSecret(this, "ClickHouseBackupPassword", CLICKHOUSE_SECRET_NAMES.BACKUP_PASSWORD, "ClickHouse backup user password");
46
- const schemaPasswordSecret = createClickHouseSecret(this, "ClickHouseSchemaPassword", CLICKHOUSE_SECRET_NAMES.SCHEMA_PASSWORD, "ClickHouse schema migration user password");
47
- // 3. ECS cluster with Cloud Map namespace for service discovery
48
- const cluster = new Cluster(this, "ClickHouseCluster", {
49
- clusterName: CLICKHOUSE_CLUSTER_NAME,
50
- vpc: props.vpc,
51
- defaultCloudMapNamespace: {
52
- name: CLICKHOUSE_CLOUDMAP_NAMESPACE,
53
- vpc: props.vpc
54
- }
55
- });
56
- // 4. Auto Scaling Group with gp3 EBS volume
57
- const amiHardwareType = inferAmiHardwareType(instanceType);
58
- const hasNat = vpcHasNatGateways(props.vpc);
59
- const subnetType = hasNat
60
- ? SubnetType.PRIVATE_WITH_EGRESS
61
- : SubnetType.PUBLIC;
62
- const userData = UserData.custom(generateClickHouseUserData({
63
- cfAccountId: props.r2Config?.accountId
64
- }));
65
- const asg = new AutoScalingGroup(this, "ClickHouseAsg", {
66
- autoScalingGroupName: `${CLICKHOUSE_CLUSTER_NAME}-asg`,
67
- vpc: props.vpc,
68
- vpcSubnets: {
69
- subnetType
70
- },
71
- securityGroup,
72
- minCapacity: 1,
73
- maxCapacity: 1,
74
- desiredCapacity: 1,
75
- instanceType: new InstanceType(instanceType),
76
- machineImage: EcsOptimizedImage.amazonLinux2023(amiHardwareType),
77
- instanceMonitoring: Monitoring.BASIC,
78
- blockDevices: [
79
- {
80
- deviceName: CLICKHOUSE_EBS_DEVICE_NAME,
81
- volume: BlockDeviceVolume.ebs(CLICKHOUSE_EBS_VOLUME_SIZE_GB, {
82
- volumeType: EbsDeviceVolumeType.GP3,
83
- iops: CLICKHOUSE_EBS_IOPS,
84
- throughput: CLICKHOUSE_EBS_THROUGHPUT_MBPS,
85
- encrypted: true
86
- })
87
- }
88
- ],
89
- userData
90
- });
91
- // 5. Capacity provider
92
- const capacityProvider = new AsgCapacityProvider(this, "ClickHouseCapacityProvider", {
93
- autoScalingGroup: asg,
94
- enableManagedDraining: true,
95
- enableManagedTerminationProtection: false
96
- });
97
- cluster.addAsgCapacityProvider(capacityProvider);
98
- // 6. Task definition with bind mount for EBS volume
99
- const taskDefinition = new Ec2TaskDefinition(this, "ClickHouseTaskDefinition", {
100
- family: CLICKHOUSE_CLUSTER_NAME,
101
- networkMode: NetworkMode.AWS_VPC
102
- });
103
- taskDefinition.addVolume({
104
- name: "clickhouse-data",
105
- host: {
106
- sourcePath: CLICKHOUSE_DATA_MOUNT_PATH
107
- }
108
- });
109
- taskDefinition.addVolume({
110
- name: "clickhouse-config",
111
- host: {
112
- sourcePath: `${CLICKHOUSE_DATA_MOUNT_PATH}/${CLICKHOUSE_CONFIG_SUBDIR}`
113
- }
114
- });
115
- taskDefinition.addVolume({
116
- name: "clickhouse-users",
117
- host: {
118
- sourcePath: `${CLICKHOUSE_DATA_MOUNT_PATH}/${CLICKHOUSE_USERS_SUBDIR}`
119
- }
120
- });
121
- // 7. Container
122
- const container = taskDefinition.addContainer("clickhouse", {
123
- image: ContainerImage.fromRegistry(CLICKHOUSE_IMAGE),
124
- memoryLimitMiB: CLICKHOUSE_TASK_MEMORY_MIB,
125
- cpu: CLICKHOUSE_TASK_CPU_UNITS,
126
- logging: LogDriver.awsLogs({
127
- streamPrefix: "clickhouse",
128
- logRetention: RetentionDays.TWO_WEEKS
129
- }),
130
- healthCheck: {
131
- command: [
132
- "CMD-SHELL",
133
- `curl -f http://localhost:${CLICKHOUSE_HTTP_PORT}/?query=SELECT%201 || exit 1`
134
- ],
135
- interval: Duration.seconds(CLICKHOUSE_HEALTH_CHECK.INTERVAL_SECONDS),
136
- timeout: Duration.seconds(CLICKHOUSE_HEALTH_CHECK.TIMEOUT_SECONDS),
137
- retries: CLICKHOUSE_HEALTH_CHECK.RETRIES,
138
- startPeriod: Duration.seconds(CLICKHOUSE_HEALTH_CHECK.START_PERIOD_SECONDS)
139
- },
140
- secrets: {
141
- CLICKHOUSE_APP_PASSWORD: EcsSecret.fromSecretsManager(appPasswordSecret.secret),
142
- CLICKHOUSE_AUDIT_PASSWORD: EcsSecret.fromSecretsManager(auditPasswordSecret.secret),
143
- ...(props.r2Config
144
- ? {
145
- R2_ACCESS_KEY: EcsSecret.fromSecretsManager(props.r2Config.accessKeySecret),
146
- R2_SECRET_KEY: EcsSecret.fromSecretsManager(props.r2Config.secretKeySecret)
147
- }
148
- : {})
149
- },
150
- portMappings: [
151
- { containerPort: CLICKHOUSE_HTTP_PORT, hostPort: CLICKHOUSE_HTTP_PORT },
152
- {
153
- containerPort: CLICKHOUSE_NATIVE_PORT,
154
- hostPort: CLICKHOUSE_NATIVE_PORT
155
- },
156
- {
157
- containerPort: CLICKHOUSE_PROMETHEUS_PORT,
158
- hostPort: CLICKHOUSE_PROMETHEUS_PORT
159
- }
160
- ]
161
- });
162
- container.addMountPoints({
163
- sourceVolume: "clickhouse-data",
164
- containerPath: "/var/lib/clickhouse",
165
- readOnly: false
166
- }, {
167
- sourceVolume: "clickhouse-config",
168
- containerPath: "/etc/clickhouse-server/config.d",
169
- readOnly: true
170
- }, {
171
- sourceVolume: "clickhouse-users",
172
- containerPath: "/etc/clickhouse-server/users.d",
173
- readOnly: true
174
- });
175
- // 8. ECS service with Cloud Map registration for optimise task discovery
176
- const clickHouseHost = `${CLICKHOUSE_CLOUDMAP_SERVICE_NAME}.${CLICKHOUSE_CLOUDMAP_NAMESPACE}`;
177
- new Ec2Service(this, "ClickHouseService", {
178
- cluster,
179
- taskDefinition,
180
- desiredCount: 1,
181
- capacityProviderStrategies: [
182
- {
183
- capacityProvider: capacityProvider.capacityProviderName,
184
- weight: 1
185
- }
186
- ],
187
- circuitBreaker: { rollback: true },
188
- cloudMapOptions: {
189
- name: CLICKHOUSE_CLOUDMAP_SERVICE_NAME
190
- }
191
- });
192
- // 9. Scheduled OPTIMIZE TABLE FINAL task (deduplicates ReplacingMergeTree tables)
193
- const optimiseQuery = [
194
- ...REPLACING_MERGE_TREE_TABLES.map((table) => `OPTIMIZE TABLE analytics.${table} FINAL`),
195
- ...OPTIMISE_MV_TABLES.map((table) => `OPTIMIZE TABLE analytics.${table}`)
196
- ].join("; ");
197
- new ScheduledEc2Task(this, "ClickHouseOptimiseTask", {
198
- cluster,
199
- schedule: Schedule.expression(OPTIMISE_FINAL_SCHEDULE),
200
- scheduledEc2TaskImageOptions: {
201
- image: ContainerImage.fromRegistry(CLICKHOUSE_IMAGE),
202
- memoryLimitMiB: OPTIMISE_TASK_MEMORY_MIB,
203
- cpu: OPTIMISE_TASK_CPU_UNITS,
204
- command: [
205
- "clickhouse-client",
206
- "--host",
207
- clickHouseHost,
208
- "--port",
209
- String(CLICKHOUSE_NATIVE_PORT),
210
- "--user",
211
- "schema_admin",
212
- "--query",
213
- `${optimiseQuery};`
214
- ],
215
- secrets: {
216
- CLICKHOUSE_PASSWORD: EcsSecret.fromSecretsManager(schemaPasswordSecret.secret)
217
- },
218
- logDriver: LogDriver.awsLogs({
219
- streamPrefix: "clickhouse-optimise",
220
- logRetention: RetentionDays.ONE_WEEK
221
- })
222
- },
223
- securityGroups: [securityGroup],
224
- subnetSelection: {
225
- subnetType
226
- }
227
- });
228
- // 10. S3 bucket for weekly backups
229
- const backupBucket = new S3Bucket(this, "ClickHouseBackupBucket", {
230
- versioned: true,
231
- lifecycleRules: [
232
- {
233
- enabled: true,
234
- expiration: Duration.days(BACKUP_RETENTION_DAYS),
235
- noncurrentVersionExpiration: Duration.days(BACKUP_RETENTION_DAYS)
236
- }
237
- ]
238
- });
239
- // 11. Scheduled weekly backup to S3
240
- const backupDestUrl = `https://${backupBucket.bucketName}.s3.${Stack.of(this).region}.amazonaws.com/`;
241
- const backupTaskLogGroup = new LogGroup(this, "ClickHouseBackupTaskLogGroup", {
242
- retention: RetentionDays.TWO_WEEKS
243
- });
244
- new ScheduledEc2Task(this, "ClickHouseBackupTask", {
245
- cluster,
246
- schedule: Schedule.expression(BACKUP_SCHEDULE),
247
- scheduledEc2TaskImageOptions: {
248
- image: ContainerImage.fromRegistry(CLICKHOUSE_IMAGE),
249
- memoryLimitMiB: BACKUP_TASK_MEMORY_MIB,
250
- cpu: BACKUP_TASK_CPU_UNITS,
251
- command: [
252
- "sh",
253
- "-c",
254
- `STAMP=$(date +%Y%m%d-%H%M%S) && clickhouse-client --host ${clickHouseHost} --port ${CLICKHOUSE_NATIVE_PORT} --user backup_reader --password "$CLICKHOUSE_BACKUP_PASSWORD" --query "BACKUP DATABASE analytics TO S3('${backupDestUrl}weekly-$STAMP/')"`
255
- ],
256
- secrets: {
257
- CLICKHOUSE_BACKUP_PASSWORD: EcsSecret.fromSecretsManager(backupPasswordSecret.secret)
258
- },
259
- logDriver: LogDriver.awsLogs({
260
- streamPrefix: "clickhouse-backup",
261
- logGroup: backupTaskLogGroup
262
- })
263
- },
264
- securityGroups: [securityGroup],
265
- subnetSelection: {
266
- subnetType
267
- }
268
- });
269
- // BACKUP DATABASE TO S3 runs inside the ClickHouse server process on the
270
- // ASG instance, not the ephemeral backup task; the grant must therefore
271
- // attach to the ASG instance role, not the task role.
272
- backupBucket.grantReadWrite(asg.role);
273
- // 12. Grant secret read to execution role
274
- const executionRole = taskDefinition.executionRole;
275
- if (!executionRole) {
276
- throw new Error("ClickHouse task definition has no execution role — cannot grant secret access");
277
- }
278
- appPasswordSecret.secret.grantRead(executionRole);
279
- auditPasswordSecret.secret.grantRead(executionRole);
280
- backupPasswordSecret.secret.grantRead(executionRole);
281
- schemaPasswordSecret.secret.grantRead(executionRole);
282
- if (props.alarmTopic) {
283
- if (!props.webappLogGroup) {
284
- throw new Error("ClickHouse: alarmTopic requires webappLogGroup so the stuck-merge metric filter can be wired.");
285
- }
286
- createClickHouseAlarms({
287
- scope: this,
288
- asg,
289
- alarmTopic: props.alarmTopic,
290
- webappLogGroup: props.webappLogGroup,
291
- backupTaskLogGroup
292
- });
293
- }
294
- // 13. Connections and outputs
295
- this.connections = new Connections({
296
- securityGroups: [securityGroup],
297
- defaultPort: Port.tcp(CLICKHOUSE_HTTP_PORT)
298
- });
299
- this.outputs = {
300
- securityGroup,
301
- backupBucket,
302
- secrets: {
303
- appPassword: appPasswordSecret.secret,
304
- auditPassword: auditPasswordSecret.secret,
305
- backupPassword: backupPasswordSecret.secret,
306
- schemaPassword: schemaPasswordSecret.secret
307
- }
308
- };
309
- }
310
- }
@@ -1,49 +0,0 @@
1
- import { Alarm } from "aws-cdk-lib/aws-cloudwatch";
2
- import type { AutoScalingGroup } from "aws-cdk-lib/aws-autoscaling";
3
- import type { ITopic } from "aws-cdk-lib/aws-sns";
4
- import type { ILogGroup } from "aws-cdk-lib/aws-logs";
5
- import type { Construct } from "constructs";
6
- export interface ClickHouseAlarmThresholds {
7
- /** EC2 host CPU % over 5 min. Default 90. */
8
- cpuThreshold?: number;
9
- /** EC2 host memory % over 5 min (requires CWAgent). Default 80. */
10
- memoryThreshold?: number;
11
- /** EBS root-volume disk % used. Default 70 (warn) — paired with critical at 85. */
12
- diskWarnThreshold?: number;
13
- /** EBS root-volume disk % used. Default 85. */
14
- diskCriticalThreshold?: number;
15
- }
16
- export interface ClickHouseAlarmsProps {
17
- scope: Construct;
18
- asg: AutoScalingGroup;
19
- alarmTopic: ITopic;
20
- /**
21
- * Webapp log group. Required to wire the stuck-merge alarm — `client.ts`
22
- * emits `serverLogger.warn("ClickHouse", "Stuck merge detected")` when
23
- * `system.merges` shows a merge elapsed > 30 min.
24
- */
25
- webappLogGroup: ILogGroup;
26
- /**
27
- * Backup-task log group. Required to wire the backup-failure alarm —
28
- * `BACKUP DATABASE … TO S3(…)` emits `AccessDenied` / `S3Exception` lines
29
- * when the IAM grant or bucket policy is misconfigured (silent before the
30
- * alarm landed; the daily backup task exited non-zero with no signal).
31
- */
32
- backupTaskLogGroup: ILogGroup;
33
- config?: ClickHouseAlarmThresholds;
34
- }
35
- /**
36
- * Single-node ClickHouse posture alarms. Covers host-level CPU + (optional)
37
- * memory and disk via the CloudWatch Agent metric namespace `CWAgent`, plus
38
- * two log-driven alarms:
39
- *
40
- * - **Stuck merges** — `client.ts` polls `system.merges` every 5 min and logs
41
- * `serverLogger.warn("ClickHouse", "Stuck merge detected")` when elapsed
42
- * exceeds 30 min. The metric filter on the webapp log group emits a count
43
- * metric per match; the alarm fires on Sum >= 1 over 5 min × 2 evaluations.
44
- * - **Backup failures** — `AccessDenied` or `S3Exception` from the backup
45
- * task's BACKUP DATABASE TO S3 statement. Closes the silent-failure mode
46
- * that masked the original IAM-grant misconfiguration (see
47
- * `designs/2026-04-27-clickhouse-backup-iam-role.md`).
48
- */
49
- export declare function createClickHouseAlarms(props: ClickHouseAlarmsProps): Alarm[];
@@ -1,140 +0,0 @@
1
- import { Duration } from "aws-cdk-lib";
2
- import { Alarm, ComparisonOperator, TreatMissingData } from "aws-cdk-lib/aws-cloudwatch";
3
- import { SnsAction } from "aws-cdk-lib/aws-cloudwatch-actions";
4
- import { Metric } from "aws-cdk-lib/aws-cloudwatch";
5
- import { FilterPattern, MetricFilter } from "aws-cdk-lib/aws-logs";
6
- import { ALARM_DEFAULTS, registerAlarm, buildAlarmDescription } from "../monitoring/alarmDefaults.js";
7
- const CLICKHOUSE_METRIC_NAMESPACE = "Fjall/ClickHouse";
8
- /**
9
- * Single-node ClickHouse posture alarms. Covers host-level CPU + (optional)
10
- * memory and disk via the CloudWatch Agent metric namespace `CWAgent`, plus
11
- * two log-driven alarms:
12
- *
13
- * - **Stuck merges** — `client.ts` polls `system.merges` every 5 min and logs
14
- * `serverLogger.warn("ClickHouse", "Stuck merge detected")` when elapsed
15
- * exceeds 30 min. The metric filter on the webapp log group emits a count
16
- * metric per match; the alarm fires on Sum >= 1 over 5 min × 2 evaluations.
17
- * - **Backup failures** — `AccessDenied` or `S3Exception` from the backup
18
- * task's BACKUP DATABASE TO S3 statement. Closes the silent-failure mode
19
- * that masked the original IAM-grant misconfiguration (see
20
- * `designs/2026-04-27-clickhouse-backup-iam-role.md`).
21
- */
22
- export function createClickHouseAlarms(props) {
23
- const { scope, asg, alarmTopic, webappLogGroup, backupTaskLogGroup, config = {} } = props;
24
- const alarms = [];
25
- const snsAction = new SnsAction(alarmTopic);
26
- const asgName = asg.autoScalingGroupName;
27
- const cpuAlarm = new Alarm(scope, "ClickHouseCpuAlarm", {
28
- alarmDescription: buildAlarmDescription("ClickHouse host CPU utilisation exceeds threshold", undefined),
29
- metric: new Metric({
30
- namespace: "AWS/EC2",
31
- metricName: "CPUUtilization",
32
- dimensionsMap: { AutoScalingGroupName: asgName },
33
- period: ALARM_DEFAULTS.EVALUATION_PERIOD,
34
- statistic: "Average"
35
- }),
36
- threshold: config.cpuThreshold ?? 90,
37
- evaluationPeriods: 3,
38
- datapointsToAlarm: 2,
39
- comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
40
- treatMissingData: TreatMissingData.NOT_BREACHING
41
- });
42
- registerAlarm(cpuAlarm, snsAction, alarms);
43
- const memoryAlarm = new Alarm(scope, "ClickHouseMemoryAlarm", {
44
- alarmDescription: buildAlarmDescription("ClickHouse host memory utilisation exceeds threshold (CWAgent)", undefined),
45
- metric: new Metric({
46
- namespace: "CWAgent",
47
- metricName: "mem_used_percent",
48
- dimensionsMap: { AutoScalingGroupName: asgName },
49
- period: ALARM_DEFAULTS.EVALUATION_PERIOD,
50
- statistic: "Average"
51
- }),
52
- threshold: config.memoryThreshold ?? 80,
53
- evaluationPeriods: 3,
54
- datapointsToAlarm: 2,
55
- comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
56
- treatMissingData: TreatMissingData.NOT_BREACHING
57
- });
58
- registerAlarm(memoryAlarm, snsAction, alarms);
59
- const diskWarnAlarm = new Alarm(scope, "ClickHouseDiskWarnAlarm", {
60
- alarmDescription: buildAlarmDescription("ClickHouse data volume above 70% used — plan growth response", undefined),
61
- metric: new Metric({
62
- namespace: "CWAgent",
63
- metricName: "disk_used_percent",
64
- dimensionsMap: { AutoScalingGroupName: asgName },
65
- period: Duration.minutes(15),
66
- statistic: "Average"
67
- }),
68
- threshold: config.diskWarnThreshold ?? 70,
69
- evaluationPeriods: 2,
70
- datapointsToAlarm: 2,
71
- comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
72
- treatMissingData: TreatMissingData.NOT_BREACHING
73
- });
74
- registerAlarm(diskWarnAlarm, snsAction, alarms);
75
- const diskCriticalAlarm = new Alarm(scope, "ClickHouseDiskCriticalAlarm", {
76
- alarmDescription: buildAlarmDescription("ClickHouse data volume above 85% used — imminent insert failures", undefined),
77
- metric: new Metric({
78
- namespace: "CWAgent",
79
- metricName: "disk_used_percent",
80
- dimensionsMap: { AutoScalingGroupName: asgName },
81
- period: Duration.minutes(5),
82
- statistic: "Average"
83
- }),
84
- threshold: config.diskCriticalThreshold ?? 85,
85
- evaluationPeriods: 2,
86
- datapointsToAlarm: 2,
87
- comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
88
- treatMissingData: TreatMissingData.NOT_BREACHING
89
- });
90
- registerAlarm(diskCriticalAlarm, snsAction, alarms);
91
- const stuckMergeMetricName = "ClickHouseStuckMergeCount";
92
- new MetricFilter(scope, "ClickHouseStuckMergeMetricFilter", {
93
- logGroup: webappLogGroup,
94
- metricNamespace: CLICKHOUSE_METRIC_NAMESPACE,
95
- metricName: stuckMergeMetricName,
96
- filterPattern: FilterPattern.literal('"Stuck merge detected"'),
97
- metricValue: "1",
98
- defaultValue: 0
99
- });
100
- const stuckMergeAlarm = new Alarm(scope, "ClickHouseStuckMergeAlarm", {
101
- alarmDescription: buildAlarmDescription("ClickHouse merge stuck > 30 min — investigate parts pressure or replica health", undefined),
102
- metric: new Metric({
103
- namespace: CLICKHOUSE_METRIC_NAMESPACE,
104
- metricName: stuckMergeMetricName,
105
- period: Duration.minutes(5),
106
- statistic: "Sum"
107
- }),
108
- threshold: 1,
109
- evaluationPeriods: 2,
110
- datapointsToAlarm: 2,
111
- comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
112
- treatMissingData: TreatMissingData.NOT_BREACHING
113
- });
114
- registerAlarm(stuckMergeAlarm, snsAction, alarms);
115
- const backupFailureMetricName = "ClickHouseBackupFailureCount";
116
- new MetricFilter(scope, "ClickHouseBackupFailureMetricFilter", {
117
- logGroup: backupTaskLogGroup,
118
- metricNamespace: CLICKHOUSE_METRIC_NAMESPACE,
119
- metricName: backupFailureMetricName,
120
- filterPattern: FilterPattern.anyTerm("AccessDenied", "S3Exception"),
121
- metricValue: "1",
122
- defaultValue: 0
123
- });
124
- const backupFailureAlarm = new Alarm(scope, "ClickHouseBackupFailureAlarm", {
125
- alarmDescription: buildAlarmDescription("ClickHouse BACKUP TO S3 emitted AccessDenied/S3Exception — verify ASG instance role grant on backup bucket", undefined),
126
- metric: new Metric({
127
- namespace: CLICKHOUSE_METRIC_NAMESPACE,
128
- metricName: backupFailureMetricName,
129
- period: Duration.hours(1),
130
- statistic: "Sum"
131
- }),
132
- threshold: 1,
133
- evaluationPeriods: 1,
134
- datapointsToAlarm: 1,
135
- comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
136
- treatMissingData: TreatMissingData.NOT_BREACHING
137
- });
138
- registerAlarm(backupFailureAlarm, snsAction, alarms);
139
- return alarms;
140
- }
@@ -1,73 +0,0 @@
1
- /** Cluster/task family name used for ECS resources. */
2
- export declare const CLICKHOUSE_CLUSTER_NAME = "clickhouse-analytics";
3
- /** Default EC2 instance type for ClickHouse (Graviton — best cost/performance). */
4
- export declare const DEFAULT_CLICKHOUSE_INSTANCE_TYPE = "t4g.medium";
5
- /** ClickHouse container image. */
6
- export declare const CLICKHOUSE_IMAGE = "clickhouse/clickhouse-server:26.3-alpine";
7
- /** EBS volume configuration. */
8
- export declare const CLICKHOUSE_EBS_VOLUME_SIZE_GB = 80;
9
- export declare const CLICKHOUSE_EBS_IOPS = 3000;
10
- export declare const CLICKHOUSE_EBS_THROUGHPUT_MBPS = 125;
11
- /** ECS task resource allocation (t4g.medium = 4 GB total). */
12
- export declare const CLICKHOUSE_TASK_MEMORY_MIB = 3072;
13
- export declare const CLICKHOUSE_TASK_CPU_UNITS = 1024;
14
- /** ClickHouse ports. */
15
- export declare const CLICKHOUSE_HTTP_PORT = 8123;
16
- export declare const CLICKHOUSE_NATIVE_PORT = 9000;
17
- export declare const CLICKHOUSE_PROMETHEUS_PORT = 9363;
18
- /** EBS device name for the data volume (must match user data script). */
19
- export declare const CLICKHOUSE_EBS_DEVICE_NAME = "/dev/xvdf";
20
- /** EBS mount path on the EC2 host. */
21
- export declare const CLICKHOUSE_DATA_MOUNT_PATH = "/mnt/clickhouse-data";
22
- /** Secrets Manager path prefix. */
23
- export declare const CLICKHOUSE_SECRETS_PREFIX = "fjall/clickhouse";
24
- /** Secret names (under the prefix). */
25
- export declare const CLICKHOUSE_SECRET_NAMES: {
26
- readonly APP_PASSWORD: "app-password";
27
- readonly AUDIT_PASSWORD: "audit-password";
28
- readonly BACKUP_PASSWORD: "backup-password";
29
- readonly SCHEMA_PASSWORD: "schema-password";
30
- };
31
- /** Shared secret generation options (all ClickHouse users share the same policy). */
32
- export declare const CLICKHOUSE_SECRET_OPTIONS: {
33
- readonly excludePunctuation: true;
34
- readonly passwordLength: 32;
35
- };
36
- /** Health check configuration. */
37
- export declare const CLICKHOUSE_HEALTH_CHECK: {
38
- readonly INTERVAL_SECONDS: 30;
39
- readonly TIMEOUT_SECONDS: 5;
40
- readonly RETRIES: 3;
41
- readonly START_PERIOD_SECONDS: 60;
42
- };
43
- /** OPTIMIZE TABLE FINAL schedule.
44
- * RMT tables carry min_age_to_force_merge_seconds=600 so the engine already merges
45
- * old parts within 10 min; this task is a safety net for MVs (no engine-level setting)
46
- * and for ReplacingMergeTree dedup under skewed write patterns. 6 hours is sufficient. */
47
- export declare const OPTIMISE_FINAL_SCHEDULE = "rate(6 hours)";
48
- /** Tables requiring periodic OPTIMIZE FINAL (ReplacingMergeTree only).
49
- * Keep in sync with REPLACING_MERGE_TREE_TABLES in
50
- * webapp/app/.server/lib/clickhouse/tenantQuery.ts (auto-FINAL). */
51
- export declare const REPLACING_MERGE_TREE_TABLES: readonly ["application_metrics", "cost_records", "log_fingerprints", "insights", "asset_inventory"];
52
- /** Subdirectory on the EBS volume for server config files (must match CDK volume mount). */
53
- export declare const CLICKHOUSE_CONFIG_SUBDIR = "server-config.d";
54
- /** Subdirectory on the EBS volume for users config files (must match CDK volume mount). */
55
- export declare const CLICKHOUSE_USERS_SUBDIR = "server-users.d";
56
- /** Cloud Map namespace for ClickHouse service discovery. */
57
- export declare const CLICKHOUSE_CLOUDMAP_NAMESPACE = "clickhouse.local";
58
- /** Cloud Map service name (resolves to clickhouse.clickhouse.local). */
59
- export declare const CLICKHOUSE_CLOUDMAP_SERVICE_NAME = "clickhouse";
60
- /** Materialised views that benefit from periodic OPTIMIZE to reduce part count at read time.
61
- * These are not ReplacingMergeTree (no dedup needed) but un-merged parts force
62
- * read-time aggregation which degrades query performance. */
63
- export declare const OPTIMISE_MV_TABLES: readonly ["metrics_hourly_mv", "metrics_daily_mv", "response_time_quantiles_hourly_mv", "deployment_duration_quantiles_daily_mv", "log_severity_hourly_mv", "compliance_score_daily_mv", "ai_usage_daily_mv", "finding_daily_aggregate", "insight_pattern_dismissals"];
64
- /** Resource allocation for the lightweight optimise task. */
65
- export declare const OPTIMISE_TASK_MEMORY_MIB = 256;
66
- export declare const OPTIMISE_TASK_CPU_UNITS = 256;
67
- /** Automated backup schedule (daily 03:00 UTC — low-traffic window). */
68
- export declare const BACKUP_SCHEDULE = "cron(0 3 * * ? *)";
69
- /** Resource allocation for the backup task (lightweight — clickhouse-client only). */
70
- export declare const BACKUP_TASK_MEMORY_MIB = 256;
71
- export declare const BACKUP_TASK_CPU_UNITS = 256;
72
- /** Backup object expiration: 14 days (retains 14 daily snapshots). */
73
- export declare const BACKUP_RETENTION_DAYS = 14;
@@ -1,89 +0,0 @@
1
- /** Cluster/task family name used for ECS resources. */
2
- export const CLICKHOUSE_CLUSTER_NAME = "clickhouse-analytics";
3
- /** Default EC2 instance type for ClickHouse (Graviton — best cost/performance). */
4
- export const DEFAULT_CLICKHOUSE_INSTANCE_TYPE = "t4g.medium";
5
- /** ClickHouse container image. */
6
- export const CLICKHOUSE_IMAGE = "clickhouse/clickhouse-server:26.3-alpine";
7
- /** EBS volume configuration. */
8
- export const CLICKHOUSE_EBS_VOLUME_SIZE_GB = 80;
9
- export const CLICKHOUSE_EBS_IOPS = 3000;
10
- export const CLICKHOUSE_EBS_THROUGHPUT_MBPS = 125;
11
- /** ECS task resource allocation (t4g.medium = 4 GB total). */
12
- export const CLICKHOUSE_TASK_MEMORY_MIB = 3072;
13
- export const CLICKHOUSE_TASK_CPU_UNITS = 1024;
14
- /** ClickHouse ports. */
15
- export const CLICKHOUSE_HTTP_PORT = 8123;
16
- export const CLICKHOUSE_NATIVE_PORT = 9000;
17
- export const CLICKHOUSE_PROMETHEUS_PORT = 9363;
18
- /** EBS device name for the data volume (must match user data script). */
19
- export const CLICKHOUSE_EBS_DEVICE_NAME = "/dev/xvdf";
20
- /** EBS mount path on the EC2 host. */
21
- export const CLICKHOUSE_DATA_MOUNT_PATH = "/mnt/clickhouse-data";
22
- /** Secrets Manager path prefix. */
23
- export const CLICKHOUSE_SECRETS_PREFIX = "fjall/clickhouse";
24
- /** Secret names (under the prefix). */
25
- export const CLICKHOUSE_SECRET_NAMES = {
26
- APP_PASSWORD: "app-password",
27
- AUDIT_PASSWORD: "audit-password",
28
- BACKUP_PASSWORD: "backup-password",
29
- SCHEMA_PASSWORD: "schema-password"
30
- };
31
- /** Shared secret generation options (all ClickHouse users share the same policy). */
32
- export const CLICKHOUSE_SECRET_OPTIONS = {
33
- excludePunctuation: true,
34
- passwordLength: 32
35
- };
36
- /** Health check configuration. */
37
- export const CLICKHOUSE_HEALTH_CHECK = {
38
- INTERVAL_SECONDS: 30,
39
- TIMEOUT_SECONDS: 5,
40
- RETRIES: 3,
41
- START_PERIOD_SECONDS: 60
42
- };
43
- /** OPTIMIZE TABLE FINAL schedule.
44
- * RMT tables carry min_age_to_force_merge_seconds=600 so the engine already merges
45
- * old parts within 10 min; this task is a safety net for MVs (no engine-level setting)
46
- * and for ReplacingMergeTree dedup under skewed write patterns. 6 hours is sufficient. */
47
- export const OPTIMISE_FINAL_SCHEDULE = "rate(6 hours)";
48
- /** Tables requiring periodic OPTIMIZE FINAL (ReplacingMergeTree only).
49
- * Keep in sync with REPLACING_MERGE_TREE_TABLES in
50
- * webapp/app/.server/lib/clickhouse/tenantQuery.ts (auto-FINAL). */
51
- export const REPLACING_MERGE_TREE_TABLES = [
52
- "application_metrics",
53
- "cost_records",
54
- "log_fingerprints",
55
- "insights",
56
- "asset_inventory"
57
- ];
58
- /** Subdirectory on the EBS volume for server config files (must match CDK volume mount). */
59
- export const CLICKHOUSE_CONFIG_SUBDIR = "server-config.d";
60
- /** Subdirectory on the EBS volume for users config files (must match CDK volume mount). */
61
- export const CLICKHOUSE_USERS_SUBDIR = "server-users.d";
62
- /** Cloud Map namespace for ClickHouse service discovery. */
63
- export const CLICKHOUSE_CLOUDMAP_NAMESPACE = "clickhouse.local";
64
- /** Cloud Map service name (resolves to clickhouse.clickhouse.local). */
65
- export const CLICKHOUSE_CLOUDMAP_SERVICE_NAME = "clickhouse";
66
- /** Materialised views that benefit from periodic OPTIMIZE to reduce part count at read time.
67
- * These are not ReplacingMergeTree (no dedup needed) but un-merged parts force
68
- * read-time aggregation which degrades query performance. */
69
- export const OPTIMISE_MV_TABLES = [
70
- "metrics_hourly_mv",
71
- "metrics_daily_mv",
72
- "response_time_quantiles_hourly_mv",
73
- "deployment_duration_quantiles_daily_mv",
74
- "log_severity_hourly_mv",
75
- "compliance_score_daily_mv",
76
- "ai_usage_daily_mv",
77
- "finding_daily_aggregate",
78
- "insight_pattern_dismissals"
79
- ];
80
- /** Resource allocation for the lightweight optimise task. */
81
- export const OPTIMISE_TASK_MEMORY_MIB = 256;
82
- export const OPTIMISE_TASK_CPU_UNITS = 256;
83
- /** Automated backup schedule (daily 03:00 UTC — low-traffic window). */
84
- export const BACKUP_SCHEDULE = "cron(0 3 * * ? *)";
85
- /** Resource allocation for the backup task (lightweight — clickhouse-client only). */
86
- export const BACKUP_TASK_MEMORY_MIB = 256;
87
- export const BACKUP_TASK_CPU_UNITS = 256;
88
- /** Backup object expiration: 14 days (retains 14 daily snapshots). */
89
- export const BACKUP_RETENTION_DAYS = 14;