@fjall/components-infrastructure 0.95.0 → 0.99.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/app.d.ts +90 -107
- package/dist/lib/app.js +149 -139
- package/dist/lib/config/aws/__t17fixture.d.ts +1 -0
- package/dist/lib/config/aws/__t17fixture.js +3 -0
- package/dist/lib/config/aws/__t17fixtureType.d.ts +2 -0
- package/dist/lib/config/aws/__t17fixtureType.js +1 -0
- package/dist/lib/config/aws/alarmTopic.js +8 -4
- package/dist/lib/config/aws/cloudTrail.js +1 -1
- package/dist/lib/config/aws/disasterRecovery.js +11 -16
- package/dist/lib/config/aws/ecrDefaultImage.d.ts +0 -1
- package/dist/lib/config/aws/ecrDefaultImage.js +13 -23
- package/dist/lib/config/aws/identityCenter.d.ts +10 -3
- package/dist/lib/config/aws/identityCenter.js +101 -37
- package/dist/lib/config/aws/identityCenterGroupMembership.js +8 -2
- package/dist/lib/config/aws/identityCenterMembership.d.ts +11 -0
- package/dist/lib/config/aws/identityCenterMembership.js +61 -0
- package/dist/lib/config/aws/index.d.ts +1 -1
- package/dist/lib/config/aws/index.js +1 -1
- package/dist/lib/config/aws/ipam.js +6 -11
- package/dist/lib/config/aws/oidcConnector.js +5 -1
- package/dist/lib/config/aws/scpPreset.js +4 -1
- package/dist/lib/patterns/aws/_eslint_test_tmp/leak.d.ts +1 -0
- package/dist/lib/patterns/aws/_eslint_test_tmp/leak.js +4 -0
- package/dist/lib/patterns/aws/account.js +7 -8
- package/dist/lib/patterns/aws/apexDomainPattern.js +10 -10
- package/dist/lib/patterns/aws/bastionFactory.d.ts +10 -0
- package/dist/lib/patterns/aws/bastionFactory.js +29 -0
- package/dist/lib/patterns/aws/buildkite.d.ts +2 -2
- package/dist/lib/patterns/aws/buildkite.js +51 -97
- package/dist/lib/patterns/aws/cdn.js +1 -1
- package/dist/lib/patterns/aws/clickhouseDatabase.d.ts +172 -0
- package/dist/lib/patterns/aws/clickhouseDatabase.js +600 -0
- package/dist/lib/patterns/aws/compute.d.ts +4 -6
- package/dist/lib/patterns/aws/compute.js +7 -13
- package/dist/lib/patterns/aws/computeEcs.d.ts +95 -396
- package/dist/lib/patterns/aws/computeEcs.js +880 -46
- package/dist/lib/patterns/aws/computeEcsTypes.d.ts +889 -0
- package/dist/lib/patterns/aws/computeEcsTypes.js +12 -0
- package/dist/lib/patterns/aws/computeLambda.d.ts +0 -5
- package/dist/lib/patterns/aws/computeLambda.js +1 -2
- package/dist/lib/patterns/aws/database.d.ts +50 -8
- package/dist/lib/patterns/aws/database.js +183 -27
- package/dist/lib/patterns/aws/domain.js +8 -7
- package/dist/lib/patterns/aws/index.d.ts +3 -0
- package/dist/lib/patterns/aws/index.js +3 -0
- package/dist/lib/patterns/aws/interfaces/compute.d.ts +13 -1
- package/dist/lib/patterns/aws/interfaces/connector.d.ts +1 -1
- package/dist/lib/patterns/aws/interfaces/connector.js +1 -1
- package/dist/lib/patterns/aws/interfaces/database.d.ts +187 -8
- package/dist/lib/patterns/aws/interfaces/database.js +17 -3
- package/dist/lib/patterns/aws/interfaces/index.d.ts +4 -2
- package/dist/lib/patterns/aws/interfaces/index.js +4 -2
- package/dist/lib/patterns/aws/interfaces/messaging.d.ts +7 -0
- package/dist/lib/patterns/aws/interfaces/migrationContributor.d.ts +47 -0
- package/dist/lib/patterns/aws/interfaces/migrationContributor.js +9 -0
- package/dist/lib/patterns/aws/interfaces/vpcPeer.d.ts +7 -0
- package/dist/lib/patterns/aws/interfaces/vpcPeer.js +1 -0
- package/dist/lib/patterns/aws/messaging.d.ts +66 -10
- package/dist/lib/patterns/aws/messaging.js +115 -20
- package/dist/lib/patterns/aws/network.js +16 -7
- package/dist/lib/patterns/aws/organisation.d.ts +4 -0
- package/dist/lib/patterns/aws/organisation.js +24 -5
- package/dist/lib/patterns/aws/storage.d.ts +1 -2
- package/dist/lib/patterns/aws/storage.js +3 -2
- package/dist/lib/patterns/aws/vpcPeer.d.ts +34 -0
- package/dist/lib/patterns/aws/vpcPeer.js +38 -0
- package/dist/lib/patterns/aws/vpcPeerAccepter.d.ts +29 -0
- package/dist/lib/patterns/aws/vpcPeerAccepter.js +196 -0
- package/dist/lib/resources/aws/analytics/clickhouse.js +25 -7
- package/dist/lib/resources/aws/analytics/clickhouseAlarms.d.ts +49 -0
- package/dist/lib/resources/aws/analytics/clickhouseAlarms.js +140 -0
- package/dist/lib/resources/aws/analytics/clickhouseConstants.d.ts +4 -4
- package/dist/lib/resources/aws/analytics/clickhouseConstants.js +6 -4
- package/dist/lib/resources/aws/analytics/clickhouseTypes.d.ts +12 -0
- package/dist/lib/resources/aws/analytics/clickhouseUserData.d.ts +1 -0
- package/dist/lib/resources/aws/analytics/clickhouseUserData.js +56 -5
- package/dist/lib/resources/aws/analytics/index.d.ts +2 -0
- package/dist/lib/resources/aws/analytics/index.js +1 -0
- package/dist/lib/resources/aws/base/awsStack.js +4 -2
- package/dist/lib/resources/aws/compute/__tmp__/regression-shape.d.ts +2 -0
- package/dist/lib/resources/aws/compute/__tmp__/regression-shape.js +11 -0
- package/dist/lib/resources/aws/compute/asgInlineLifecycleHook.d.ts +52 -0
- package/dist/lib/resources/aws/compute/asgInlineLifecycleHook.js +60 -0
- package/dist/lib/resources/aws/compute/blockDeviceVolume.d.ts +8 -0
- package/dist/lib/resources/aws/compute/blockDeviceVolume.js +10 -0
- package/dist/lib/resources/aws/compute/ec2.d.ts +132 -12
- package/dist/lib/resources/aws/compute/ec2.js +163 -23
- package/dist/lib/resources/aws/compute/ec2GracefulTerminationHandler.d.ts +41 -0
- package/dist/lib/resources/aws/compute/ec2GracefulTerminationHandler.js +194 -0
- package/dist/lib/resources/aws/compute/ec2GracefulTerminationLambda.source.cjs +458 -0
- package/dist/lib/resources/aws/compute/ecs.d.ts +27 -1
- package/dist/lib/resources/aws/compute/ecs.js +42 -2
- package/dist/lib/resources/aws/compute/ecsConstants.d.ts +9 -0
- package/dist/lib/resources/aws/compute/ecsConstants.js +16 -0
- package/dist/lib/resources/aws/compute/ecsImages.js +32 -20
- package/dist/lib/resources/aws/compute/ecsLifecycleHookMigration.d.ts +96 -0
- package/dist/lib/resources/aws/compute/ecsLifecycleHookMigration.js +113 -0
- package/dist/lib/resources/aws/compute/ecsNetworking.d.ts +2 -1
- package/dist/lib/resources/aws/compute/ecsNetworking.js +18 -6
- package/dist/lib/resources/aws/compute/ecsRemoteConnections.d.ts +38 -0
- package/dist/lib/resources/aws/compute/ecsRemoteConnections.js +80 -0
- package/dist/lib/resources/aws/compute/ecsServiceFactory.d.ts +13 -4
- package/dist/lib/resources/aws/compute/ecsServiceFactory.js +155 -33
- package/dist/lib/resources/aws/compute/ecsTaskDefinition.d.ts +31 -1
- package/dist/lib/resources/aws/compute/ecsTaskDefinition.js +110 -6
- package/dist/lib/resources/aws/compute/ecsTypes.d.ts +180 -13
- package/dist/lib/resources/aws/compute/ecsValidation.d.ts +9 -0
- package/dist/lib/resources/aws/compute/ecsValidation.js +63 -0
- package/dist/lib/resources/aws/compute/index.d.ts +2 -0
- package/dist/lib/resources/aws/compute/index.js +2 -0
- package/dist/lib/resources/aws/compute/lambda.d.ts +7 -13
- package/dist/lib/resources/aws/compute/lambda.js +30 -38
- package/dist/lib/resources/aws/compute/lifecycleHookLambda.source.cjs +192 -0
- package/dist/lib/resources/aws/compute/persistentDataVolume.d.ts +104 -0
- package/dist/lib/resources/aws/compute/persistentDataVolume.js +245 -0
- package/dist/lib/resources/aws/compute/persistentDataVolumeLambda.source.cjs +398 -0
- package/dist/lib/resources/aws/compute/samApplication.d.ts +15 -0
- package/dist/lib/resources/aws/compute/samApplication.js +27 -0
- package/dist/lib/resources/aws/database/clickhouseConstants.d.ts +159 -0
- package/dist/lib/resources/aws/database/clickhouseConstants.js +181 -0
- package/dist/lib/resources/aws/database/clickhouseSchemas.d.ts +71 -0
- package/dist/lib/resources/aws/database/clickhouseSchemas.js +157 -0
- package/dist/lib/resources/aws/database/clickhouseSecurityGroup.d.ts +14 -0
- package/dist/lib/resources/aws/database/clickhouseSecurityGroup.js +23 -0
- package/dist/lib/resources/aws/database/clickhouseUserData.d.ts +69 -0
- package/dist/lib/resources/aws/database/clickhouseUserData.js +371 -0
- package/dist/lib/resources/aws/database/clickhouseXmlRenderer.d.ts +56 -0
- package/dist/lib/resources/aws/database/clickhouseXmlRenderer.js +112 -0
- package/dist/lib/resources/aws/database/rdsAurora.d.ts +8 -1
- package/dist/lib/resources/aws/database/rdsAurora.js +42 -32
- package/dist/lib/resources/aws/database/rdsAuroraGlobal.d.ts +15 -2
- package/dist/lib/resources/aws/database/rdsAuroraGlobal.js +39 -43
- package/dist/lib/resources/aws/database/rdsDefaults.d.ts +6 -0
- package/dist/lib/resources/aws/database/rdsDefaults.js +7 -1
- package/dist/lib/resources/aws/database/rdsHelpers.d.ts +3 -3
- package/dist/lib/resources/aws/database/rdsHelpers.js +1 -0
- package/dist/lib/resources/aws/database/rdsInstance.d.ts +8 -1
- package/dist/lib/resources/aws/database/rdsInstance.js +51 -34
- package/dist/lib/resources/aws/database/rdsProxyOutput.d.ts +1 -1
- package/dist/lib/resources/aws/database/rdsProxyOutput.js +1 -1
- package/dist/lib/resources/aws/iam/delegationRole.js +12 -5
- package/dist/lib/resources/aws/iam/identityCenter/groupMembership.d.ts +9 -0
- package/dist/lib/resources/aws/iam/identityCenter/groupMembership.js +12 -0
- package/dist/lib/resources/aws/iam/identityCenter/index.d.ts +1 -0
- package/dist/lib/resources/aws/iam/identityCenter/index.js +1 -0
- package/dist/lib/resources/aws/iam/identityCenter/permissionSet.d.ts +1 -0
- package/dist/lib/resources/aws/iam/identityCenter/permissionSet.js +1 -0
- package/dist/lib/resources/aws/logging/logGroup.d.ts +0 -8
- package/dist/lib/resources/aws/logging/logGroup.js +0 -11
- package/dist/lib/resources/aws/messaging/defaultEventBus.d.ts +7 -0
- package/dist/lib/resources/aws/messaging/defaultEventBus.js +21 -0
- package/dist/lib/resources/aws/messaging/eventBridgeRule.d.ts +96 -0
- package/dist/lib/resources/aws/messaging/eventBridgeRule.js +110 -0
- package/dist/lib/resources/aws/messaging/eventTargets.d.ts +84 -0
- package/dist/lib/resources/aws/messaging/eventTargets.js +152 -0
- package/dist/lib/resources/aws/messaging/eventbridge.d.ts +25 -2
- package/dist/lib/resources/aws/messaging/eventbridge.js +22 -10
- package/dist/lib/resources/aws/messaging/index.d.ts +5 -0
- package/dist/lib/resources/aws/messaging/index.js +2 -0
- package/dist/lib/resources/aws/messaging/schedule.d.ts +118 -0
- package/dist/lib/resources/aws/messaging/schedule.js +64 -0
- package/dist/lib/resources/aws/messaging/sns.d.ts +2 -1
- package/dist/lib/resources/aws/messaging/sqs.d.ts +2 -1
- package/dist/lib/resources/aws/messaging/subscription.d.ts +112 -0
- package/dist/lib/resources/aws/messaging/subscription.js +67 -0
- package/dist/lib/resources/aws/messaging/utils.d.ts +6 -0
- package/dist/lib/resources/aws/messaging/utils.js +10 -0
- package/dist/lib/resources/aws/monitoring/clickhouseAlarms.d.ts +60 -0
- package/dist/lib/resources/aws/monitoring/clickhouseAlarms.js +139 -0
- package/dist/lib/resources/aws/monitoring/index.d.ts +2 -0
- package/dist/lib/resources/aws/monitoring/index.js +2 -0
- package/dist/lib/resources/aws/monitoring/scheduleAlarms.d.ts +47 -0
- package/dist/lib/resources/aws/monitoring/scheduleAlarms.js +106 -0
- package/dist/lib/resources/aws/networking/crossAccountDelegationRecord.js +6 -3
- package/dist/lib/resources/aws/networking/crossAccountReturnRoutes.d.ts +40 -0
- package/dist/lib/resources/aws/networking/crossAccountReturnRoutes.js +158 -0
- package/dist/lib/resources/aws/networking/dnsRecord/dnsRecordBase.js +7 -4
- package/dist/lib/resources/aws/networking/domainCertificate.d.ts +2 -2
- package/dist/lib/resources/aws/networking/domainCertificate.js +6 -3
- package/dist/lib/resources/aws/networking/hostedZone.js +6 -4
- package/dist/lib/resources/aws/networking/index.d.ts +3 -0
- package/dist/lib/resources/aws/networking/index.js +3 -0
- package/dist/lib/resources/aws/networking/serviceDiscovery.d.ts +96 -0
- package/dist/lib/resources/aws/networking/serviceDiscovery.js +96 -0
- package/dist/lib/resources/aws/networking/vpc.d.ts +4 -1
- package/dist/lib/resources/aws/networking/vpc.js +10 -3
- package/dist/lib/resources/aws/networking/vpcPeeringAccepterRole.d.ts +18 -0
- package/dist/lib/resources/aws/networking/vpcPeeringAccepterRole.js +61 -0
- package/dist/lib/resources/aws/networking/vpcPeeringConnection.d.ts +49 -0
- package/dist/lib/resources/aws/networking/vpcPeeringConnection.js +106 -0
- package/dist/lib/resources/aws/organisation/costAllocationTagActivator.d.ts +16 -5
- package/dist/lib/resources/aws/organisation/costAllocationTagActivator.js +17 -3
- package/dist/lib/resources/aws/organisation/index.d.ts +1 -1
- package/dist/lib/resources/aws/organisation/organisationPolicy.d.ts +2 -0
- package/dist/lib/resources/aws/organisation/organisationPolicy.js +3 -2
- package/dist/lib/resources/aws/secrets/secret.d.ts +7 -0
- package/dist/lib/resources/aws/secrets/secret.js +4 -3
- package/dist/lib/resources/aws/storage/bucketDeployment.d.ts +16 -0
- package/dist/lib/resources/aws/storage/bucketDeployment.js +17 -0
- package/dist/lib/resources/aws/storage/ecr.js +5 -5
- package/dist/lib/resources/aws/storage/index.d.ts +1 -0
- package/dist/lib/resources/aws/storage/index.js +1 -0
- package/dist/lib/resources/aws/storage/s3.js +10 -3
- package/dist/lib/resources/aws/utilities/customResource.js +18 -9
- package/dist/lib/synth_dump.d.ts +1 -0
- package/dist/lib/synth_dump.js +42 -0
- package/dist/lib/utils/bastionFactory.d.ts +10 -0
- package/dist/lib/utils/bastionFactory.js +29 -0
- package/dist/lib/utils/capitaliseString.d.ts +1 -1
- package/dist/lib/utils/capitaliseString.js +1 -1
- package/dist/lib/utils/cdkContext.d.ts +10 -0
- package/dist/lib/utils/cdkContext.js +13 -0
- package/dist/lib/utils/connections.d.ts +7 -1
- package/dist/lib/utils/connections.js +21 -0
- package/dist/lib/utils/connector.d.ts +30 -2
- package/dist/lib/utils/connector.js +6 -1
- package/dist/lib/utils/costAllocationTags.d.ts +15 -0
- package/dist/lib/utils/costAllocationTags.js +16 -0
- package/dist/lib/utils/databaseTypes.d.ts +14 -0
- package/dist/lib/utils/getConfig.d.ts +2 -0
- package/dist/lib/utils/getConfig.js +2 -0
- package/dist/lib/utils/index.d.ts +4 -0
- package/dist/lib/utils/index.js +4 -0
- package/dist/lib/utils/manifestWriter.d.ts +6 -89
- package/dist/lib/utils/manifestWriter.js +36 -23
- package/dist/lib/utils/migrationVersionResolvers.d.ts +2 -0
- package/dist/lib/utils/migrationVersionResolvers.js +2 -0
- package/dist/lib/utils/orgConfigParser.js +2 -1
- package/dist/lib/utils/resolveAlertsTopic.d.ts +14 -0
- package/dist/lib/utils/resolveAlertsTopic.js +30 -0
- package/dist/lib/utils/validationLogger.js +6 -3
- package/dist/lib/utils/vpcPeerInterface.d.ts +22 -0
- package/dist/lib/utils/vpcPeerInterface.js +1 -0
- package/package.json +22 -18
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import { Annotations, Stack } from "aws-cdk-lib";
|
|
2
|
+
import { Peer, Port, SubnetType } from "aws-cdk-lib/aws-ec2";
|
|
3
|
+
import { CfnResourcePolicy, StringListParameter, StringParameter } from "aws-cdk-lib/aws-ssm";
|
|
4
|
+
import { CIDR_REGEX, VALIDATION_PATTERNS } from "@fjall/generator";
|
|
5
|
+
import { buildSsmPrefix, DEFAULT_ORG_ID, resolveOrgId } from "../../utils/cdkContext.js";
|
|
6
|
+
import { VpcPeeringAccepterRole } from "../../resources/aws/networking/vpcPeeringAccepterRole.js";
|
|
7
|
+
import { isDatabase, isRelationalDatabase } from "./interfaces/database.js";
|
|
8
|
+
import { isCompute, isEcsCompute } from "./interfaces/compute.js";
|
|
9
|
+
export class VpcPeerAccepterFactory {
|
|
10
|
+
static build(id, props) {
|
|
11
|
+
return (app, scope) => {
|
|
12
|
+
if (props.requesterAccountIds.length === 0) {
|
|
13
|
+
throw new Error("VpcPeerAccepterFactory requires at least one requester account ID.");
|
|
14
|
+
}
|
|
15
|
+
for (const accountId of props.requesterAccountIds) {
|
|
16
|
+
if (!VALIDATION_PATTERNS.AWS_ACCOUNT_ID.test(accountId)) {
|
|
17
|
+
throw new Error(`Invalid requester account ID "${accountId}". Must be a 12-digit AWS account ID.`);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
const localVpc = app.getVpc(props.localVpcName);
|
|
21
|
+
const accepterRole = new VpcPeeringAccepterRole(scope, id, {
|
|
22
|
+
requesterAccountIds: props.requesterAccountIds,
|
|
23
|
+
localVpc,
|
|
24
|
+
costAllocationEnvironment: props.costAllocationEnvironment,
|
|
25
|
+
costAllocationDomain: props.costAllocationDomain
|
|
26
|
+
});
|
|
27
|
+
const orgId = resolveOrgId(app.node, DEFAULT_ORG_ID);
|
|
28
|
+
const ssmPrefix = buildSsmPrefix(orgId, app.getName());
|
|
29
|
+
const publishVpcMetadata = props.publishToSsm ?? true;
|
|
30
|
+
const vpcMetadataParams = [];
|
|
31
|
+
if (publishVpcMetadata) {
|
|
32
|
+
const vpcIdParam = new StringParameter(scope, `${id}VpcIdParam`, {
|
|
33
|
+
parameterName: `${ssmPrefix}/vpc-id`,
|
|
34
|
+
stringValue: localVpc.vpcId
|
|
35
|
+
});
|
|
36
|
+
const vpcCidrParam = new StringParameter(scope, `${id}VpcCidrParam`, {
|
|
37
|
+
parameterName: `${ssmPrefix}/vpc-cidr`,
|
|
38
|
+
stringValue: localVpc.vpcCidrBlock
|
|
39
|
+
});
|
|
40
|
+
const roleArnParam = new StringParameter(scope, `${id}PeeringRoleArnParam`, {
|
|
41
|
+
parameterName: `${ssmPrefix}/peering-role-arn`,
|
|
42
|
+
stringValue: accepterRole.roleArn
|
|
43
|
+
});
|
|
44
|
+
const privateSubnets = localVpc.selectSubnets({
|
|
45
|
+
subnetType: SubnetType.PRIVATE_WITH_EGRESS
|
|
46
|
+
});
|
|
47
|
+
const routeTableIds = privateSubnets.subnets.map((s) => s.routeTable.routeTableId);
|
|
48
|
+
const routeTableIdsParam = new StringListParameter(scope, `${id}RouteTableIdsParam`, {
|
|
49
|
+
parameterName: `${ssmPrefix}/route-table-ids`,
|
|
50
|
+
stringListValue: routeTableIds
|
|
51
|
+
});
|
|
52
|
+
vpcMetadataParams.push(vpcIdParam, vpcCidrParam, roleArnParam, routeTableIdsParam);
|
|
53
|
+
}
|
|
54
|
+
if (vpcMetadataParams.length > 0) {
|
|
55
|
+
attachCrossAccountReadPolicy(scope, id, props.requesterAccountIds, vpcMetadataParams);
|
|
56
|
+
}
|
|
57
|
+
applyExposedResources(scope, id, ssmPrefix, props.requesterAccountIds, props.exposedResources ?? []);
|
|
58
|
+
return accepterRole;
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
function applyExposedResources(accepterScope, id, ssmPrefix, requesterAccountIds, entries) {
|
|
63
|
+
const seenNames = new Set();
|
|
64
|
+
for (const entry of entries) {
|
|
65
|
+
if (seenNames.has(entry.name)) {
|
|
66
|
+
throw new Error(`Duplicate exposedResource name '${entry.name}' — each exposed resource must have a unique name.`);
|
|
67
|
+
}
|
|
68
|
+
seenNames.add(entry.name);
|
|
69
|
+
for (const cidr of entry.allowedFromCidrs) {
|
|
70
|
+
if (!CIDR_REGEX.test(cidr)) {
|
|
71
|
+
throw new Error(`exposedResource '${entry.name}' has malformed CIDR '${cidr}' — expected dotted-quad/prefix-length form (e.g. 10.0.0.0/16).`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const resourcePrefix = `${ssmPrefix}/resources/${entry.name}`;
|
|
75
|
+
const constructPrefix = `${id}Exposed${entry.name}`;
|
|
76
|
+
const { params, scope: resourceScope } = resolveExposedResource(accepterScope, constructPrefix, resourcePrefix, entry);
|
|
77
|
+
if (params.length > 0) {
|
|
78
|
+
attachCrossAccountReadPolicy(resourceScope, constructPrefix, requesterAccountIds, params);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
function resolveExposedResource(accepterScope, constructPrefix, resourcePrefix, entry) {
|
|
83
|
+
if ("serviceName" in entry) {
|
|
84
|
+
const resource = entry.resource;
|
|
85
|
+
if (!isCompute(resource) || !isEcsCompute(resource)) {
|
|
86
|
+
throw new Error(`exposedResource '${entry.name}' carries 'serviceName' (ECS variant) but its resource is not IEcsCompute.`);
|
|
87
|
+
}
|
|
88
|
+
const scope = stackOfNode(resource.node);
|
|
89
|
+
const params = applyEcsServiceExposure(scope, constructPrefix, resourcePrefix, entry);
|
|
90
|
+
return { params, scope };
|
|
91
|
+
}
|
|
92
|
+
const resource = entry.resource;
|
|
93
|
+
if (!isDatabase(resource) || !isRelationalDatabase(resource)) {
|
|
94
|
+
throw new Error(`exposedResource '${entry.name}' has no 'serviceName' (database variant) but its resource is not IRelationalDatabase.`);
|
|
95
|
+
}
|
|
96
|
+
const scope = stackOfNode(resource.node);
|
|
97
|
+
const params = applyDatabaseExposure(scope, constructPrefix, resourcePrefix, {
|
|
98
|
+
...entry,
|
|
99
|
+
resource
|
|
100
|
+
});
|
|
101
|
+
return { params, scope };
|
|
102
|
+
}
|
|
103
|
+
function applyDatabaseExposure(scope, constructPrefix, resourcePrefix, entry) {
|
|
104
|
+
const port = parseInt(entry.resource.getHostPort(), 10);
|
|
105
|
+
if (!Number.isInteger(port) || port < 1 || port > 65535) {
|
|
106
|
+
throw new Error(`exposedResource '${entry.name}' resolved an out-of-range port from getHostPort() (must be 1-65535); cannot open ingress.`);
|
|
107
|
+
}
|
|
108
|
+
if (entry.allowedFromCidrs.length === 0) {
|
|
109
|
+
Annotations.of(scope).addWarning(`exposedResource '${entry.name}' has no allowed CIDRs; consumers will not be able to reach it.`);
|
|
110
|
+
return [];
|
|
111
|
+
}
|
|
112
|
+
for (const cidr of entry.allowedFromCidrs) {
|
|
113
|
+
entry.resource.connections.allowFrom(Peer.ipv4(cidr), Port.tcp(port), `fjall:peer:${cidr}`);
|
|
114
|
+
}
|
|
115
|
+
return publishExposedResourceParams(scope, constructPrefix, resourcePrefix, {
|
|
116
|
+
kind: "relational-db",
|
|
117
|
+
endpoint: entry.resource.getHostEndpoint(),
|
|
118
|
+
port: String(port),
|
|
119
|
+
access: entry.access
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
function applyEcsServiceExposure(scope, constructPrefix, resourcePrefix, entry) {
|
|
123
|
+
const loadBalancer = entry.resource.getLoadBalancer();
|
|
124
|
+
if (!loadBalancer) {
|
|
125
|
+
throw new Error(`exposedResource '${entry.name}' targets ECS service '${entry.serviceName}' but the cluster has no load balancer (cluster.loadBalancer: false). Enable an internal ALB to expose this service.`);
|
|
126
|
+
}
|
|
127
|
+
const resolvedListenerPort = entry.port === undefined
|
|
128
|
+
? entry.resource.getPrimaryListenerPort()
|
|
129
|
+
: undefined;
|
|
130
|
+
const port = entry.port ?? resolvedListenerPort ?? 443;
|
|
131
|
+
if (entry.port === undefined && resolvedListenerPort !== undefined) {
|
|
132
|
+
Annotations.of(scope).addInfo(`exposedResource '${entry.name}' defaulted to listener port ${resolvedListenerPort}`);
|
|
133
|
+
}
|
|
134
|
+
if (entry.allowedFromCidrs.length === 0) {
|
|
135
|
+
Annotations.of(scope).addWarning(`exposedResource '${entry.name}' has no allowed CIDRs; consumers will not be able to reach it.`);
|
|
136
|
+
return [];
|
|
137
|
+
}
|
|
138
|
+
for (const cidr of entry.allowedFromCidrs) {
|
|
139
|
+
loadBalancer.connections.allowFrom(Peer.ipv4(cidr), Port.tcp(port), `fjall:peer:${cidr}`);
|
|
140
|
+
}
|
|
141
|
+
return publishExposedResourceParams(scope, constructPrefix, resourcePrefix, {
|
|
142
|
+
kind: "ecs-service",
|
|
143
|
+
endpoint: loadBalancer.loadBalancerDnsName,
|
|
144
|
+
port: String(port)
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
function publishExposedResourceParams(scope, constructPrefix, resourcePrefix, values) {
|
|
148
|
+
const kindParam = new StringParameter(scope, `${constructPrefix}KindParam`, {
|
|
149
|
+
parameterName: `${resourcePrefix}/kind`,
|
|
150
|
+
stringValue: values.kind
|
|
151
|
+
});
|
|
152
|
+
const endpointParam = new StringParameter(scope, `${constructPrefix}EndpointParam`, {
|
|
153
|
+
parameterName: `${resourcePrefix}/endpoint`,
|
|
154
|
+
stringValue: values.endpoint
|
|
155
|
+
});
|
|
156
|
+
const portParam = new StringParameter(scope, `${constructPrefix}PortParam`, {
|
|
157
|
+
parameterName: `${resourcePrefix}/port`,
|
|
158
|
+
stringValue: values.port
|
|
159
|
+
});
|
|
160
|
+
const params = [kindParam, endpointParam, portParam];
|
|
161
|
+
if (values.access !== undefined) {
|
|
162
|
+
const accessParam = new StringParameter(scope, `${constructPrefix}AccessParam`, {
|
|
163
|
+
parameterName: `${resourcePrefix}/access`,
|
|
164
|
+
stringValue: values.access
|
|
165
|
+
});
|
|
166
|
+
params.push(accessParam);
|
|
167
|
+
}
|
|
168
|
+
return params;
|
|
169
|
+
}
|
|
170
|
+
function attachCrossAccountReadPolicy(scope, id, requesterAccountIds, parameters) {
|
|
171
|
+
const partition = Stack.of(scope).partition;
|
|
172
|
+
const principals = requesterAccountIds.map((accountId) => `arn:${partition}:iam::${accountId}:root`);
|
|
173
|
+
for (const [index, parameter] of parameters.entries()) {
|
|
174
|
+
new CfnResourcePolicy(scope, `${id}ReadPolicy${index}`, {
|
|
175
|
+
resourceArn: parameter.parameterArn,
|
|
176
|
+
policy: {
|
|
177
|
+
Version: "2012-10-17",
|
|
178
|
+
Statement: [
|
|
179
|
+
{
|
|
180
|
+
Effect: "Allow",
|
|
181
|
+
Principal: { AWS: principals },
|
|
182
|
+
Action: ["ssm:GetParameter"],
|
|
183
|
+
Resource: parameter.parameterArn
|
|
184
|
+
}
|
|
185
|
+
]
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
function stackOfNode(node) {
|
|
191
|
+
const stack = node.scopes.find((a) => Stack.isStack(a));
|
|
192
|
+
if (!stack) {
|
|
193
|
+
throw new Error("exposedResource resource is not bound to a CDK Stack — cannot derive scope for SSM publishing.");
|
|
194
|
+
}
|
|
195
|
+
return stack;
|
|
196
|
+
}
|
|
@@ -5,13 +5,14 @@ import { InstanceType, SubnetType, Connections, Port, UserData } from "aws-cdk-l
|
|
|
5
5
|
import { AutoScalingGroup, Monitoring, BlockDeviceVolume, EbsDeviceVolumeType } from "aws-cdk-lib/aws-autoscaling";
|
|
6
6
|
import { Duration, Stack } from "aws-cdk-lib";
|
|
7
7
|
import { Construct } from "constructs";
|
|
8
|
-
import { RetentionDays } from "aws-cdk-lib/aws-logs";
|
|
8
|
+
import { LogGroup, RetentionDays } from "aws-cdk-lib/aws-logs";
|
|
9
9
|
import { S3Bucket } from "../storage/s3.js";
|
|
10
10
|
import { Secret } from "../secrets/secret.js";
|
|
11
11
|
import { vpcHasNatGateways } from "../../../utils/vpcUtils.js";
|
|
12
12
|
import { inferAmiHardwareType } from "../compute/ecsConstants.js";
|
|
13
13
|
import { createClickHouseSecurityGroup } from "./clickhouseSecurityGroup.js";
|
|
14
14
|
import { generateClickHouseUserData } from "./clickhouseUserData.js";
|
|
15
|
+
import { createClickHouseAlarms } from "./clickhouseAlarms.js";
|
|
15
16
|
import { CLICKHOUSE_CLUSTER_NAME, DEFAULT_CLICKHOUSE_INSTANCE_TYPE, CLICKHOUSE_IMAGE, CLICKHOUSE_EBS_VOLUME_SIZE_GB, CLICKHOUSE_EBS_IOPS, CLICKHOUSE_EBS_THROUGHPUT_MBPS, CLICKHOUSE_TASK_MEMORY_MIB, CLICKHOUSE_TASK_CPU_UNITS, CLICKHOUSE_HTTP_PORT, CLICKHOUSE_NATIVE_PORT, CLICKHOUSE_PROMETHEUS_PORT, CLICKHOUSE_DATA_MOUNT_PATH, CLICKHOUSE_SECRETS_PREFIX, CLICKHOUSE_SECRET_NAMES, CLICKHOUSE_SECRET_OPTIONS, CLICKHOUSE_HEALTH_CHECK, CLICKHOUSE_EBS_DEVICE_NAME, CLICKHOUSE_CONFIG_SUBDIR, CLICKHOUSE_USERS_SUBDIR, OPTIMISE_FINAL_SCHEDULE, REPLACING_MERGE_TREE_TABLES, OPTIMISE_MV_TABLES, CLICKHOUSE_CLOUDMAP_NAMESPACE, CLICKHOUSE_CLOUDMAP_SERVICE_NAME, OPTIMISE_TASK_MEMORY_MIB, OPTIMISE_TASK_CPU_UNITS, BACKUP_SCHEDULE, BACKUP_TASK_MEMORY_MIB, BACKUP_TASK_CPU_UNITS, BACKUP_RETENTION_DAYS } from "./clickhouseConstants.js";
|
|
16
17
|
function createClickHouseSecret(scope, id, secretKey, description) {
|
|
17
18
|
return new Secret(scope, id, {
|
|
@@ -237,7 +238,10 @@ export default class ClickHouse extends Construct {
|
|
|
237
238
|
});
|
|
238
239
|
// 11. Scheduled weekly backup to S3
|
|
239
240
|
const backupDestUrl = `https://${backupBucket.bucketName}.s3.${Stack.of(this).region}.amazonaws.com/`;
|
|
240
|
-
const
|
|
241
|
+
const backupTaskLogGroup = new LogGroup(this, "ClickHouseBackupTaskLogGroup", {
|
|
242
|
+
retention: RetentionDays.TWO_WEEKS
|
|
243
|
+
});
|
|
244
|
+
new ScheduledEc2Task(this, "ClickHouseBackupTask", {
|
|
241
245
|
cluster,
|
|
242
246
|
schedule: Schedule.expression(BACKUP_SCHEDULE),
|
|
243
247
|
scheduledEc2TaskImageOptions: {
|
|
@@ -254,7 +258,7 @@ export default class ClickHouse extends Construct {
|
|
|
254
258
|
},
|
|
255
259
|
logDriver: LogDriver.awsLogs({
|
|
256
260
|
streamPrefix: "clickhouse-backup",
|
|
257
|
-
|
|
261
|
+
logGroup: backupTaskLogGroup
|
|
258
262
|
})
|
|
259
263
|
},
|
|
260
264
|
securityGroups: [securityGroup],
|
|
@@ -262,9 +266,11 @@ export default class ClickHouse extends Construct {
|
|
|
262
266
|
subnetType
|
|
263
267
|
}
|
|
264
268
|
});
|
|
265
|
-
//
|
|
266
|
-
|
|
267
|
-
//
|
|
269
|
+
// BACKUP DATABASE TO S3 runs inside the ClickHouse server process on the
|
|
270
|
+
// ASG instance, not the ephemeral backup task; the grant must therefore
|
|
271
|
+
// attach to the ASG instance role, not the task role.
|
|
272
|
+
backupBucket.grantReadWrite(asg.role);
|
|
273
|
+
// 12. Grant secret read to execution role
|
|
268
274
|
const executionRole = taskDefinition.executionRole;
|
|
269
275
|
if (!executionRole) {
|
|
270
276
|
throw new Error("ClickHouse task definition has no execution role — cannot grant secret access");
|
|
@@ -273,7 +279,19 @@ export default class ClickHouse extends Construct {
|
|
|
273
279
|
auditPasswordSecret.secret.grantRead(executionRole);
|
|
274
280
|
backupPasswordSecret.secret.grantRead(executionRole);
|
|
275
281
|
schemaPasswordSecret.secret.grantRead(executionRole);
|
|
276
|
-
|
|
282
|
+
if (props.alarmTopic) {
|
|
283
|
+
if (!props.webappLogGroup) {
|
|
284
|
+
throw new Error("ClickHouse: alarmTopic requires webappLogGroup so the stuck-merge metric filter can be wired.");
|
|
285
|
+
}
|
|
286
|
+
createClickHouseAlarms({
|
|
287
|
+
scope: this,
|
|
288
|
+
asg,
|
|
289
|
+
alarmTopic: props.alarmTopic,
|
|
290
|
+
webappLogGroup: props.webappLogGroup,
|
|
291
|
+
backupTaskLogGroup
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
// 13. Connections and outputs
|
|
277
295
|
this.connections = new Connections({
|
|
278
296
|
securityGroups: [securityGroup],
|
|
279
297
|
defaultPort: Port.tcp(CLICKHOUSE_HTTP_PORT)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { Alarm } from "aws-cdk-lib/aws-cloudwatch";
|
|
2
|
+
import type { AutoScalingGroup } from "aws-cdk-lib/aws-autoscaling";
|
|
3
|
+
import type { ITopic } from "aws-cdk-lib/aws-sns";
|
|
4
|
+
import type { ILogGroup } from "aws-cdk-lib/aws-logs";
|
|
5
|
+
import type { Construct } from "constructs";
|
|
6
|
+
export interface ClickHouseAlarmThresholds {
|
|
7
|
+
/** EC2 host CPU % over 5 min. Default 90. */
|
|
8
|
+
cpuThreshold?: number;
|
|
9
|
+
/** EC2 host memory % over 5 min (requires CWAgent). Default 80. */
|
|
10
|
+
memoryThreshold?: number;
|
|
11
|
+
/** EBS root-volume disk % used. Default 70 (warn) — paired with critical at 85. */
|
|
12
|
+
diskWarnThreshold?: number;
|
|
13
|
+
/** EBS root-volume disk % used. Default 85. */
|
|
14
|
+
diskCriticalThreshold?: number;
|
|
15
|
+
}
|
|
16
|
+
export interface ClickHouseAlarmsProps {
|
|
17
|
+
scope: Construct;
|
|
18
|
+
asg: AutoScalingGroup;
|
|
19
|
+
alarmTopic: ITopic;
|
|
20
|
+
/**
|
|
21
|
+
* Webapp log group. Required to wire the stuck-merge alarm — `client.ts`
|
|
22
|
+
* emits `serverLogger.warn("ClickHouse", "Stuck merge detected")` when
|
|
23
|
+
* `system.merges` shows a merge elapsed > 30 min.
|
|
24
|
+
*/
|
|
25
|
+
webappLogGroup: ILogGroup;
|
|
26
|
+
/**
|
|
27
|
+
* Backup-task log group. Required to wire the backup-failure alarm —
|
|
28
|
+
* `BACKUP DATABASE … TO S3(…)` emits `AccessDenied` / `S3Exception` lines
|
|
29
|
+
* when the IAM grant or bucket policy is misconfigured (silent before the
|
|
30
|
+
* alarm landed; the daily backup task exited non-zero with no signal).
|
|
31
|
+
*/
|
|
32
|
+
backupTaskLogGroup: ILogGroup;
|
|
33
|
+
config?: ClickHouseAlarmThresholds;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Single-node ClickHouse posture alarms. Covers host-level CPU + (optional)
|
|
37
|
+
* memory and disk via the CloudWatch Agent metric namespace `CWAgent`, plus
|
|
38
|
+
* two log-driven alarms:
|
|
39
|
+
*
|
|
40
|
+
* - **Stuck merges** — `client.ts` polls `system.merges` every 5 min and logs
|
|
41
|
+
* `serverLogger.warn("ClickHouse", "Stuck merge detected")` when elapsed
|
|
42
|
+
* exceeds 30 min. The metric filter on the webapp log group emits a count
|
|
43
|
+
* metric per match; the alarm fires on Sum >= 1 over 5 min × 2 evaluations.
|
|
44
|
+
* - **Backup failures** — `AccessDenied` or `S3Exception` from the backup
|
|
45
|
+
* task's BACKUP DATABASE TO S3 statement. Closes the silent-failure mode
|
|
46
|
+
* that masked the original IAM-grant misconfiguration (see
|
|
47
|
+
* `designs/2026-04-27-clickhouse-backup-iam-role.md`).
|
|
48
|
+
*/
|
|
49
|
+
export declare function createClickHouseAlarms(props: ClickHouseAlarmsProps): Alarm[];
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import { Duration } from "aws-cdk-lib";
|
|
2
|
+
import { Alarm, ComparisonOperator, TreatMissingData } from "aws-cdk-lib/aws-cloudwatch";
|
|
3
|
+
import { SnsAction } from "aws-cdk-lib/aws-cloudwatch-actions";
|
|
4
|
+
import { Metric } from "aws-cdk-lib/aws-cloudwatch";
|
|
5
|
+
import { FilterPattern, MetricFilter } from "aws-cdk-lib/aws-logs";
|
|
6
|
+
import { ALARM_DEFAULTS, registerAlarm, buildAlarmDescription } from "../monitoring/alarmDefaults.js";
|
|
7
|
+
const CLICKHOUSE_METRIC_NAMESPACE = "Fjall/ClickHouse";
|
|
8
|
+
/**
|
|
9
|
+
* Single-node ClickHouse posture alarms. Covers host-level CPU + (optional)
|
|
10
|
+
* memory and disk via the CloudWatch Agent metric namespace `CWAgent`, plus
|
|
11
|
+
* two log-driven alarms:
|
|
12
|
+
*
|
|
13
|
+
* - **Stuck merges** — `client.ts` polls `system.merges` every 5 min and logs
|
|
14
|
+
* `serverLogger.warn("ClickHouse", "Stuck merge detected")` when elapsed
|
|
15
|
+
* exceeds 30 min. The metric filter on the webapp log group emits a count
|
|
16
|
+
* metric per match; the alarm fires on Sum >= 1 over 5 min × 2 evaluations.
|
|
17
|
+
* - **Backup failures** — `AccessDenied` or `S3Exception` from the backup
|
|
18
|
+
* task's BACKUP DATABASE TO S3 statement. Closes the silent-failure mode
|
|
19
|
+
* that masked the original IAM-grant misconfiguration (see
|
|
20
|
+
* `designs/2026-04-27-clickhouse-backup-iam-role.md`).
|
|
21
|
+
*/
|
|
22
|
+
export function createClickHouseAlarms(props) {
|
|
23
|
+
const { scope, asg, alarmTopic, webappLogGroup, backupTaskLogGroup, config = {} } = props;
|
|
24
|
+
const alarms = [];
|
|
25
|
+
const snsAction = new SnsAction(alarmTopic);
|
|
26
|
+
const asgName = asg.autoScalingGroupName;
|
|
27
|
+
const cpuAlarm = new Alarm(scope, "ClickHouseCpuAlarm", {
|
|
28
|
+
alarmDescription: buildAlarmDescription("ClickHouse host CPU utilisation exceeds threshold", undefined),
|
|
29
|
+
metric: new Metric({
|
|
30
|
+
namespace: "AWS/EC2",
|
|
31
|
+
metricName: "CPUUtilization",
|
|
32
|
+
dimensionsMap: { AutoScalingGroupName: asgName },
|
|
33
|
+
period: ALARM_DEFAULTS.EVALUATION_PERIOD,
|
|
34
|
+
statistic: "Average"
|
|
35
|
+
}),
|
|
36
|
+
threshold: config.cpuThreshold ?? 90,
|
|
37
|
+
evaluationPeriods: 3,
|
|
38
|
+
datapointsToAlarm: 2,
|
|
39
|
+
comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
|
|
40
|
+
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
41
|
+
});
|
|
42
|
+
registerAlarm(cpuAlarm, snsAction, alarms);
|
|
43
|
+
const memoryAlarm = new Alarm(scope, "ClickHouseMemoryAlarm", {
|
|
44
|
+
alarmDescription: buildAlarmDescription("ClickHouse host memory utilisation exceeds threshold (CWAgent)", undefined),
|
|
45
|
+
metric: new Metric({
|
|
46
|
+
namespace: "CWAgent",
|
|
47
|
+
metricName: "mem_used_percent",
|
|
48
|
+
dimensionsMap: { AutoScalingGroupName: asgName },
|
|
49
|
+
period: ALARM_DEFAULTS.EVALUATION_PERIOD,
|
|
50
|
+
statistic: "Average"
|
|
51
|
+
}),
|
|
52
|
+
threshold: config.memoryThreshold ?? 80,
|
|
53
|
+
evaluationPeriods: 3,
|
|
54
|
+
datapointsToAlarm: 2,
|
|
55
|
+
comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
|
|
56
|
+
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
57
|
+
});
|
|
58
|
+
registerAlarm(memoryAlarm, snsAction, alarms);
|
|
59
|
+
const diskWarnAlarm = new Alarm(scope, "ClickHouseDiskWarnAlarm", {
|
|
60
|
+
alarmDescription: buildAlarmDescription("ClickHouse data volume above 70% used — plan growth response", undefined),
|
|
61
|
+
metric: new Metric({
|
|
62
|
+
namespace: "CWAgent",
|
|
63
|
+
metricName: "disk_used_percent",
|
|
64
|
+
dimensionsMap: { AutoScalingGroupName: asgName },
|
|
65
|
+
period: Duration.minutes(15),
|
|
66
|
+
statistic: "Average"
|
|
67
|
+
}),
|
|
68
|
+
threshold: config.diskWarnThreshold ?? 70,
|
|
69
|
+
evaluationPeriods: 2,
|
|
70
|
+
datapointsToAlarm: 2,
|
|
71
|
+
comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
|
|
72
|
+
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
73
|
+
});
|
|
74
|
+
registerAlarm(diskWarnAlarm, snsAction, alarms);
|
|
75
|
+
const diskCriticalAlarm = new Alarm(scope, "ClickHouseDiskCriticalAlarm", {
|
|
76
|
+
alarmDescription: buildAlarmDescription("ClickHouse data volume above 85% used — imminent insert failures", undefined),
|
|
77
|
+
metric: new Metric({
|
|
78
|
+
namespace: "CWAgent",
|
|
79
|
+
metricName: "disk_used_percent",
|
|
80
|
+
dimensionsMap: { AutoScalingGroupName: asgName },
|
|
81
|
+
period: Duration.minutes(5),
|
|
82
|
+
statistic: "Average"
|
|
83
|
+
}),
|
|
84
|
+
threshold: config.diskCriticalThreshold ?? 85,
|
|
85
|
+
evaluationPeriods: 2,
|
|
86
|
+
datapointsToAlarm: 2,
|
|
87
|
+
comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
|
|
88
|
+
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
89
|
+
});
|
|
90
|
+
registerAlarm(diskCriticalAlarm, snsAction, alarms);
|
|
91
|
+
const stuckMergeMetricName = "ClickHouseStuckMergeCount";
|
|
92
|
+
new MetricFilter(scope, "ClickHouseStuckMergeMetricFilter", {
|
|
93
|
+
logGroup: webappLogGroup,
|
|
94
|
+
metricNamespace: CLICKHOUSE_METRIC_NAMESPACE,
|
|
95
|
+
metricName: stuckMergeMetricName,
|
|
96
|
+
filterPattern: FilterPattern.literal('"Stuck merge detected"'),
|
|
97
|
+
metricValue: "1",
|
|
98
|
+
defaultValue: 0
|
|
99
|
+
});
|
|
100
|
+
const stuckMergeAlarm = new Alarm(scope, "ClickHouseStuckMergeAlarm", {
|
|
101
|
+
alarmDescription: buildAlarmDescription("ClickHouse merge stuck > 30 min — investigate parts pressure or replica health", undefined),
|
|
102
|
+
metric: new Metric({
|
|
103
|
+
namespace: CLICKHOUSE_METRIC_NAMESPACE,
|
|
104
|
+
metricName: stuckMergeMetricName,
|
|
105
|
+
period: Duration.minutes(5),
|
|
106
|
+
statistic: "Sum"
|
|
107
|
+
}),
|
|
108
|
+
threshold: 1,
|
|
109
|
+
evaluationPeriods: 2,
|
|
110
|
+
datapointsToAlarm: 2,
|
|
111
|
+
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
|
|
112
|
+
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
113
|
+
});
|
|
114
|
+
registerAlarm(stuckMergeAlarm, snsAction, alarms);
|
|
115
|
+
const backupFailureMetricName = "ClickHouseBackupFailureCount";
|
|
116
|
+
new MetricFilter(scope, "ClickHouseBackupFailureMetricFilter", {
|
|
117
|
+
logGroup: backupTaskLogGroup,
|
|
118
|
+
metricNamespace: CLICKHOUSE_METRIC_NAMESPACE,
|
|
119
|
+
metricName: backupFailureMetricName,
|
|
120
|
+
filterPattern: FilterPattern.anyTerm("AccessDenied", "S3Exception"),
|
|
121
|
+
metricValue: "1",
|
|
122
|
+
defaultValue: 0
|
|
123
|
+
});
|
|
124
|
+
const backupFailureAlarm = new Alarm(scope, "ClickHouseBackupFailureAlarm", {
|
|
125
|
+
alarmDescription: buildAlarmDescription("ClickHouse BACKUP TO S3 emitted AccessDenied/S3Exception — verify ASG instance role grant on backup bucket", undefined),
|
|
126
|
+
metric: new Metric({
|
|
127
|
+
namespace: CLICKHOUSE_METRIC_NAMESPACE,
|
|
128
|
+
metricName: backupFailureMetricName,
|
|
129
|
+
period: Duration.hours(1),
|
|
130
|
+
statistic: "Sum"
|
|
131
|
+
}),
|
|
132
|
+
threshold: 1,
|
|
133
|
+
evaluationPeriods: 1,
|
|
134
|
+
datapointsToAlarm: 1,
|
|
135
|
+
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
|
|
136
|
+
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
137
|
+
});
|
|
138
|
+
registerAlarm(backupFailureAlarm, snsAction, alarms);
|
|
139
|
+
return alarms;
|
|
140
|
+
}
|
|
@@ -60,14 +60,14 @@ export declare const CLICKHOUSE_CLOUDMAP_SERVICE_NAME = "clickhouse";
|
|
|
60
60
|
/** Materialised views that benefit from periodic OPTIMIZE to reduce part count at read time.
|
|
61
61
|
* These are not ReplacingMergeTree (no dedup needed) but un-merged parts force
|
|
62
62
|
* read-time aggregation which degrades query performance. */
|
|
63
|
-
export declare const OPTIMISE_MV_TABLES: readonly ["metrics_hourly_mv", "metrics_daily_mv", "response_time_quantiles_hourly_mv", "deployment_duration_quantiles_daily_mv", "log_severity_hourly_mv", "compliance_score_daily_mv", "ai_usage_daily_mv"];
|
|
63
|
+
export declare const OPTIMISE_MV_TABLES: readonly ["metrics_hourly_mv", "metrics_daily_mv", "response_time_quantiles_hourly_mv", "deployment_duration_quantiles_daily_mv", "log_severity_hourly_mv", "compliance_score_daily_mv", "ai_usage_daily_mv", "finding_daily_aggregate", "insight_pattern_dismissals"];
|
|
64
64
|
/** Resource allocation for the lightweight optimise task. */
|
|
65
65
|
export declare const OPTIMISE_TASK_MEMORY_MIB = 256;
|
|
66
66
|
export declare const OPTIMISE_TASK_CPU_UNITS = 256;
|
|
67
|
-
/** Automated backup schedule (
|
|
68
|
-
export declare const BACKUP_SCHEDULE = "cron(0 3
|
|
67
|
+
/** Automated backup schedule (daily 03:00 UTC — low-traffic window). */
|
|
68
|
+
export declare const BACKUP_SCHEDULE = "cron(0 3 * * ? *)";
|
|
69
69
|
/** Resource allocation for the backup task (lightweight — clickhouse-client only). */
|
|
70
70
|
export declare const BACKUP_TASK_MEMORY_MIB = 256;
|
|
71
71
|
export declare const BACKUP_TASK_CPU_UNITS = 256;
|
|
72
|
-
/** Backup object expiration: 14 days (retains
|
|
72
|
+
/** Backup object expiration: 14 days (retains 14 daily snapshots). */
|
|
73
73
|
export declare const BACKUP_RETENTION_DAYS = 14;
|
|
@@ -73,15 +73,17 @@ export const OPTIMISE_MV_TABLES = [
|
|
|
73
73
|
"deployment_duration_quantiles_daily_mv",
|
|
74
74
|
"log_severity_hourly_mv",
|
|
75
75
|
"compliance_score_daily_mv",
|
|
76
|
-
"ai_usage_daily_mv"
|
|
76
|
+
"ai_usage_daily_mv",
|
|
77
|
+
"finding_daily_aggregate",
|
|
78
|
+
"insight_pattern_dismissals"
|
|
77
79
|
];
|
|
78
80
|
/** Resource allocation for the lightweight optimise task. */
|
|
79
81
|
export const OPTIMISE_TASK_MEMORY_MIB = 256;
|
|
80
82
|
export const OPTIMISE_TASK_CPU_UNITS = 256;
|
|
81
|
-
/** Automated backup schedule (
|
|
82
|
-
export const BACKUP_SCHEDULE = "cron(0 3
|
|
83
|
+
/** Automated backup schedule (daily 03:00 UTC — low-traffic window). */
|
|
84
|
+
export const BACKUP_SCHEDULE = "cron(0 3 * * ? *)";
|
|
83
85
|
/** Resource allocation for the backup task (lightweight — clickhouse-client only). */
|
|
84
86
|
export const BACKUP_TASK_MEMORY_MIB = 256;
|
|
85
87
|
export const BACKUP_TASK_CPU_UNITS = 256;
|
|
86
|
-
/** Backup object expiration: 14 days (retains
|
|
88
|
+
/** Backup object expiration: 14 days (retains 14 daily snapshots). */
|
|
87
89
|
export const BACKUP_RETENTION_DAYS = 14;
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import type { IVpc, ISecurityGroup } from "aws-cdk-lib/aws-ec2";
|
|
2
|
+
import type { ILogGroup } from "aws-cdk-lib/aws-logs";
|
|
2
3
|
import type { IBucket } from "aws-cdk-lib/aws-s3";
|
|
3
4
|
import type { ISecret } from "aws-cdk-lib/aws-secretsmanager";
|
|
5
|
+
import type { ITopic } from "aws-cdk-lib/aws-sns";
|
|
4
6
|
/** Props for the ClickHouse CDK construct. */
|
|
5
7
|
export interface ClickHouseProps {
|
|
6
8
|
/** VPC to deploy into. */
|
|
@@ -21,6 +23,16 @@ export interface ClickHouseProps {
|
|
|
21
23
|
* If omitted, tiered storage is disabled (local-only).
|
|
22
24
|
*/
|
|
23
25
|
r2Config?: ClickHouseR2Config;
|
|
26
|
+
/**
|
|
27
|
+
* SNS topic for CloudWatch alarms (CPU, memory, disk, stuck merges).
|
|
28
|
+
* If omitted, posture alarms are not created.
|
|
29
|
+
*/
|
|
30
|
+
alarmTopic?: ITopic;
|
|
31
|
+
/**
|
|
32
|
+
* Webapp log group, required when `alarmTopic` is set so the stuck-merge
|
|
33
|
+
* metric filter can read the structured warning emitted by `client.ts`.
|
|
34
|
+
*/
|
|
35
|
+
webappLogGroup?: ILogGroup;
|
|
24
36
|
}
|
|
25
37
|
/** Cloudflare R2 configuration for tiered storage and backups. */
|
|
26
38
|
export interface ClickHouseR2Config {
|
|
@@ -2,4 +2,5 @@ export interface ClickHouseUserDataOptions {
|
|
|
2
2
|
/** Cloudflare account ID for R2 cold storage. If omitted, local-only storage is used. */
|
|
3
3
|
cfAccountId?: string;
|
|
4
4
|
}
|
|
5
|
+
export declare const USERS_CONFIG_XML = "<clickhouse>\n <users>\n <default>\n <networks>\n <ip>127.0.0.1</ip>\n <ip>::1</ip>\n </networks>\n </default>\n </users>\n <profiles>\n <default>\n <optimize_move_to_prewhere>1</optimize_move_to_prewhere>\n <!-- ALTER TABLE ... MODIFY TTL on a 30-day-partitioned table would otherwise\n trigger an immediate full-table rewrite (default = 1). On the t4g.medium\n box that's a merge-pool starvation event. Keep TTL changes lazy: parts\n re-evaluate TTL on their next natural merge, no forced rewrite. -->\n <materialize_ttl_after_modify>0</materialize_ttl_after_modify>\n </default>\n <app_writer>\n <max_threads>2</max_threads>\n <max_insert_threads>1</max_insert_threads>\n <max_concurrent_queries_for_user>4</max_concurrent_queries_for_user>\n <log_queries_min_query_duration_ms>100</log_queries_min_query_duration_ms>\n <optimize_move_to_prewhere>1</optimize_move_to_prewhere>\n <use_query_condition_cache>1</use_query_condition_cache>\n <!-- Re-enable skip indexes under FINAL (tenantQuery auto-FINALs RMT tables;\n default disables idx_aws_account, idx_application, idx_dedup, idx_fingerprint). -->\n <use_skip_indexes_if_final>1</use_skip_indexes_if_final>\n <!-- Tenant-isolation guards (ClickHouse PR #91065 fix). Belt-and-braces with the\n per-user SQL SETTINGS in 002-users.sql \u2014 keep both so `CREATE OR REPLACE USER`\n cannot regress this. Without these flags, FINAL queries on un-merged\n ReplacingMergeTree parts can leak across tenants. -->\n <apply_row_policy_after_final>1</apply_row_policy_after_final>\n <apply_prewhere_after_final>1</apply_prewhere_after_final>\n <do_not_merge_across_partitions_select_final>1</do_not_merge_across_partitions_select_final>\n <async_insert>1</async_insert>\n <wait_for_async_insert>1</wait_for_async_insert>\n <async_insert_max_data_size>10000000</async_insert_max_data_size>\n <!-- Adaptive batching: tune flush window between 50 ms (low-latency rare inserts)\n and 2 s (absorbs bursts). A single fixed value is silently overridden by the\n adaptive algorithm. -->\n <async_insert_busy_timeout_min_ms>50</async_insert_busy_timeout_min_ms>\n <async_insert_busy_timeout_max_ms>2000</async_insert_busy_timeout_max_ms>\n <async_insert_use_adaptive_busy_timeout>1</async_insert_use_adaptive_busy_timeout>\n <!-- Server-side deduplication of async inserts. Latent retry safety net:\n if a producer retries the same insert window (network hiccup, lambda re-run,\n SQS redelivery), the second attempt collapses against the first. As of CH 26.1\n this also propagates end-to-end through dependent materialised views \u2014 without\n it, a retried insert could double-count in metrics_hourly_mv / log_severity_hourly_mv\n even if the base table dedups. CH pin is 26.3 so the propagation fix is in. -->\n <async_insert_deduplicate>1</async_insert_deduplicate>\n <input_format_parallel_parsing>0</input_format_parallel_parsing>\n <output_format_parallel_formatting>0</output_format_parallel_formatting>\n <!-- Lazy materialisation (CH 25.4+): for `SELECT * ... LIMIT N` shapes the planner\n reads only the columns needed to evaluate ORDER BY / WHERE, then fetches the\n remaining columns for the surviving N rows. Order-of-magnitude I/O reduction\n on dashboard queries (e.g. getLatestMetrics LIMIT 1 BY application_id). -->\n <query_plan_optimize_lazy_materialization>1</query_plan_optimize_lazy_materialization>\n <!-- Per-query memory cap (overrides server-wide max_memory_usage of 1 GB\n to give app_writer 2 GB headroom). Belt-and-braces with the inline\n SETTINGS in 002-users.sql so neither layer can drift alone. -->\n <max_memory_usage>2000000000</max_memory_usage>\n <max_memory_usage_for_user>2684354560</max_memory_usage_for_user>\n <max_bytes_before_external_sort>536870912</max_bytes_before_external_sort>\n <max_bytes_before_external_group_by>536870912</max_bytes_before_external_group_by>\n <!-- Per-query caps. Belt-and-braces with the inline SETTINGS in\n 002-users.sql so `CREATE OR REPLACE USER` cannot regress the bound. -->\n <max_execution_time>30</max_execution_time>\n <max_rows_to_read>10000000</max_rows_to_read>\n </app_writer>\n <audit_writer>\n <max_threads>1</max_threads>\n <max_insert_threads>1</max_insert_threads>\n <max_concurrent_queries_for_user>2</max_concurrent_queries_for_user>\n <max_memory_usage>500000000</max_memory_usage>\n <max_execution_time>10</max_execution_time>\n <async_insert>1</async_insert>\n <wait_for_async_insert>1</wait_for_async_insert>\n </audit_writer>\n <backup_reader>\n <max_threads>2</max_threads>\n <max_concurrent_queries_for_user>1</max_concurrent_queries_for_user>\n <max_memory_usage>1000000000</max_memory_usage>\n <max_execution_time>3600</max_execution_time>\n </backup_reader>\n <schema_admin>\n <max_threads>2</max_threads>\n <max_concurrent_queries_for_user>1</max_concurrent_queries_for_user>\n <max_memory_usage>1000000000</max_memory_usage>\n <max_execution_time>1800</max_execution_time>\n </schema_admin>\n </profiles>\n <quotas>\n <tenant_default>\n <interval>\n <duration>3600</duration>\n <queries>1000</queries>\n <result_rows>10000000</result_rows>\n </interval>\n </tenant_default>\n </quotas>\n</clickhouse>";
|
|
5
6
|
export declare function generateClickHouseUserData(options?: ClickHouseUserDataOptions): string;
|
|
@@ -96,7 +96,6 @@ function generateServerConfigXml(cfAccountId) {
|
|
|
96
96
|
</merge_tree>
|
|
97
97
|
<http_port>${CLICKHOUSE_HTTP_PORT}</http_port>
|
|
98
98
|
<custom_settings_prefixes>current_</custom_settings_prefixes>
|
|
99
|
-
<allow_experimental_full_text_index>1</allow_experimental_full_text_index>
|
|
100
99
|
<!-- HTTP keep-alive window. Must exceed @clickhouse/client idle_socket_ttl (15 s)
|
|
101
100
|
so the client always closes the socket first. Prevents ECONNRESET on reuse. -->
|
|
102
101
|
<keep_alive_timeout>30</keep_alive_timeout>
|
|
@@ -145,7 +144,7 @@ ${storageBlock}
|
|
|
145
144
|
<processors_profile_log remove="1"/>
|
|
146
145
|
</clickhouse>`;
|
|
147
146
|
}
|
|
148
|
-
const USERS_CONFIG_XML = `<clickhouse>
|
|
147
|
+
export const USERS_CONFIG_XML = `<clickhouse>
|
|
149
148
|
<users>
|
|
150
149
|
<default>
|
|
151
150
|
<networks>
|
|
@@ -157,15 +156,29 @@ const USERS_CONFIG_XML = `<clickhouse>
|
|
|
157
156
|
<profiles>
|
|
158
157
|
<default>
|
|
159
158
|
<optimize_move_to_prewhere>1</optimize_move_to_prewhere>
|
|
159
|
+
<!-- ALTER TABLE ... MODIFY TTL on a 30-day-partitioned table would otherwise
|
|
160
|
+
trigger an immediate full-table rewrite (default = 1). On the t4g.medium
|
|
161
|
+
box that's a merge-pool starvation event. Keep TTL changes lazy: parts
|
|
162
|
+
re-evaluate TTL on their next natural merge, no forced rewrite. -->
|
|
163
|
+
<materialize_ttl_after_modify>0</materialize_ttl_after_modify>
|
|
160
164
|
</default>
|
|
161
165
|
<app_writer>
|
|
162
166
|
<max_threads>2</max_threads>
|
|
163
167
|
<max_insert_threads>1</max_insert_threads>
|
|
168
|
+
<max_concurrent_queries_for_user>4</max_concurrent_queries_for_user>
|
|
169
|
+
<log_queries_min_query_duration_ms>100</log_queries_min_query_duration_ms>
|
|
164
170
|
<optimize_move_to_prewhere>1</optimize_move_to_prewhere>
|
|
165
171
|
<use_query_condition_cache>1</use_query_condition_cache>
|
|
166
172
|
<!-- Re-enable skip indexes under FINAL (tenantQuery auto-FINALs RMT tables;
|
|
167
173
|
default disables idx_aws_account, idx_application, idx_dedup, idx_fingerprint). -->
|
|
168
174
|
<use_skip_indexes_if_final>1</use_skip_indexes_if_final>
|
|
175
|
+
<!-- Tenant-isolation guards (ClickHouse PR #91065 fix). Belt-and-braces with the
|
|
176
|
+
per-user SQL SETTINGS in 002-users.sql — keep both so \`CREATE OR REPLACE USER\`
|
|
177
|
+
cannot regress this. Without these flags, FINAL queries on un-merged
|
|
178
|
+
ReplacingMergeTree parts can leak across tenants. -->
|
|
179
|
+
<apply_row_policy_after_final>1</apply_row_policy_after_final>
|
|
180
|
+
<apply_prewhere_after_final>1</apply_prewhere_after_final>
|
|
181
|
+
<do_not_merge_across_partitions_select_final>1</do_not_merge_across_partitions_select_final>
|
|
169
182
|
<async_insert>1</async_insert>
|
|
170
183
|
<wait_for_async_insert>1</wait_for_async_insert>
|
|
171
184
|
<async_insert_max_data_size>10000000</async_insert_max_data_size>
|
|
@@ -175,15 +188,53 @@ const USERS_CONFIG_XML = `<clickhouse>
|
|
|
175
188
|
<async_insert_busy_timeout_min_ms>50</async_insert_busy_timeout_min_ms>
|
|
176
189
|
<async_insert_busy_timeout_max_ms>2000</async_insert_busy_timeout_max_ms>
|
|
177
190
|
<async_insert_use_adaptive_busy_timeout>1</async_insert_use_adaptive_busy_timeout>
|
|
191
|
+
<!-- Server-side deduplication of async inserts. Latent retry safety net:
|
|
192
|
+
if a producer retries the same insert window (network hiccup, lambda re-run,
|
|
193
|
+
SQS redelivery), the second attempt collapses against the first. As of CH 26.1
|
|
194
|
+
this also propagates end-to-end through dependent materialised views — without
|
|
195
|
+
it, a retried insert could double-count in metrics_hourly_mv / log_severity_hourly_mv
|
|
196
|
+
even if the base table dedups. CH pin is 26.3 so the propagation fix is in. -->
|
|
197
|
+
<async_insert_deduplicate>1</async_insert_deduplicate>
|
|
178
198
|
<input_format_parallel_parsing>0</input_format_parallel_parsing>
|
|
179
199
|
<output_format_parallel_formatting>0</output_format_parallel_formatting>
|
|
200
|
+
<!-- Lazy materialisation (CH 25.4+): for \`SELECT * ... LIMIT N\` shapes the planner
|
|
201
|
+
reads only the columns needed to evaluate ORDER BY / WHERE, then fetches the
|
|
202
|
+
remaining columns for the surviving N rows. Order-of-magnitude I/O reduction
|
|
203
|
+
on dashboard queries (e.g. getLatestMetrics LIMIT 1 BY application_id). -->
|
|
204
|
+
<query_plan_optimize_lazy_materialization>1</query_plan_optimize_lazy_materialization>
|
|
205
|
+
<!-- Per-query memory cap (overrides server-wide max_memory_usage of 1 GB
|
|
206
|
+
to give app_writer 2 GB headroom). Belt-and-braces with the inline
|
|
207
|
+
SETTINGS in 002-users.sql so neither layer can drift alone. -->
|
|
208
|
+
<max_memory_usage>2000000000</max_memory_usage>
|
|
180
209
|
<max_memory_usage_for_user>2684354560</max_memory_usage_for_user>
|
|
181
210
|
<max_bytes_before_external_sort>536870912</max_bytes_before_external_sort>
|
|
182
211
|
<max_bytes_before_external_group_by>536870912</max_bytes_before_external_group_by>
|
|
212
|
+
<!-- Per-query caps. Belt-and-braces with the inline SETTINGS in
|
|
213
|
+
002-users.sql so \`CREATE OR REPLACE USER\` cannot regress the bound. -->
|
|
214
|
+
<max_execution_time>30</max_execution_time>
|
|
215
|
+
<max_rows_to_read>10000000</max_rows_to_read>
|
|
183
216
|
</app_writer>
|
|
184
|
-
<
|
|
185
|
-
<
|
|
186
|
-
|
|
217
|
+
<audit_writer>
|
|
218
|
+
<max_threads>1</max_threads>
|
|
219
|
+
<max_insert_threads>1</max_insert_threads>
|
|
220
|
+
<max_concurrent_queries_for_user>2</max_concurrent_queries_for_user>
|
|
221
|
+
<max_memory_usage>500000000</max_memory_usage>
|
|
222
|
+
<max_execution_time>10</max_execution_time>
|
|
223
|
+
<async_insert>1</async_insert>
|
|
224
|
+
<wait_for_async_insert>1</wait_for_async_insert>
|
|
225
|
+
</audit_writer>
|
|
226
|
+
<backup_reader>
|
|
227
|
+
<max_threads>2</max_threads>
|
|
228
|
+
<max_concurrent_queries_for_user>1</max_concurrent_queries_for_user>
|
|
229
|
+
<max_memory_usage>1000000000</max_memory_usage>
|
|
230
|
+
<max_execution_time>3600</max_execution_time>
|
|
231
|
+
</backup_reader>
|
|
232
|
+
<schema_admin>
|
|
233
|
+
<max_threads>2</max_threads>
|
|
234
|
+
<max_concurrent_queries_for_user>1</max_concurrent_queries_for_user>
|
|
235
|
+
<max_memory_usage>1000000000</max_memory_usage>
|
|
236
|
+
<max_execution_time>1800</max_execution_time>
|
|
237
|
+
</schema_admin>
|
|
187
238
|
</profiles>
|
|
188
239
|
<quotas>
|
|
189
240
|
<tenant_default>
|