@fjall/components-infrastructure 2.17.0 → 2.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/lambda-assets/cert-generator/asset/index.js +14 -1
- package/dist/lib/patterns/aws/clickhouseDatabase.d.ts +17 -0
- package/dist/lib/patterns/aws/clickhouseDatabase.js +16 -0
- package/dist/lib/patterns/aws/compute.d.ts +2 -2
- package/dist/lib/patterns/aws/computeEcs.d.ts +12 -1
- package/dist/lib/patterns/aws/computeEcs.js +57 -0
- package/dist/lib/patterns/aws/computeEcsTypes.d.ts +51 -1
- package/dist/lib/patterns/aws/database.d.ts +19 -1
- package/dist/lib/patterns/aws/database.js +21 -2
- package/dist/lib/resources/aws/compute/ecs.js +14 -3
- package/dist/lib/resources/aws/compute/ecsConstants.d.ts +2 -0
- package/dist/lib/resources/aws/compute/ecsConstants.js +4 -0
- package/dist/lib/resources/aws/compute/ecsTaskDefinition.d.ts +2 -0
- package/dist/lib/resources/aws/compute/ecsTaskDefinition.js +13 -4
- package/dist/lib/resources/aws/compute/ecsTypes.d.ts +17 -1
- package/dist/lib/resources/aws/compute/ecsValidation.d.ts +7 -0
- package/dist/lib/resources/aws/compute/ecsValidation.js +10 -0
- package/dist/lib/resources/aws/compute/lambda.js +20 -2
- package/dist/lib/resources/aws/database/rdsInstance.d.ts +19 -0
- package/dist/lib/resources/aws/database/rdsInstance.js +13 -1
- package/dist/lib/resources/aws/monitoring/clickhouseAlarms.d.ts +10 -15
- package/dist/lib/resources/aws/monitoring/clickhouseAlarms.js +34 -56
- package/dist/lib/resources/aws/monitoring/ecsAlarms.js +5 -1
- package/dist/lib/resources/aws/monitoring/index.d.ts +2 -0
- package/dist/lib/resources/aws/monitoring/index.js +2 -0
- package/dist/lib/resources/aws/monitoring/logPatternAlarms.d.ts +55 -0
- package/dist/lib/resources/aws/monitoring/logPatternAlarms.js +74 -0
- package/dist/lib/resources/aws/monitoring/metricNamespaces.d.ts +13 -0
- package/dist/lib/resources/aws/monitoring/metricNamespaces.js +12 -0
- package/package.json +5 -7
|
@@ -17897,6 +17897,15 @@ ${body}
|
|
|
17897
17897
|
-----END PRIVATE KEY-----
|
|
17898
17898
|
`;
|
|
17899
17899
|
}
|
|
17900
|
+
function certPhysicalResourceId(event, caSecretArn, serverSecretArn) {
|
|
17901
|
+
if (event.RequestType === "Update" && event.PhysicalResourceId) {
|
|
17902
|
+
return event.PhysicalResourceId;
|
|
17903
|
+
}
|
|
17904
|
+
const digest = createHash("sha256").update(`${caSecretArn}
|
|
17905
|
+
${serverSecretArn}`).digest("hex").slice(0, 16);
|
|
17906
|
+
return `tls-cert-${digest}`;
|
|
17907
|
+
}
|
|
17908
|
+
exports.certPhysicalResourceId = certPhysicalResourceId;
|
|
17900
17909
|
exports.handler = async (event) => {
|
|
17901
17910
|
if (event.RequestType === "Delete") {
|
|
17902
17911
|
const props2 = event.ResourceProperties || {};
|
|
@@ -17964,7 +17973,11 @@ exports.handler = async (event) => {
|
|
|
17964
17973
|
throw err;
|
|
17965
17974
|
}
|
|
17966
17975
|
return {
|
|
17967
|
-
PhysicalResourceId:
|
|
17976
|
+
PhysicalResourceId: certPhysicalResourceId(
|
|
17977
|
+
event,
|
|
17978
|
+
caSecretArn,
|
|
17979
|
+
serverSecretArn
|
|
17980
|
+
),
|
|
17968
17981
|
Data: { CaCertSha256: caCertSha256 }
|
|
17969
17982
|
};
|
|
17970
17983
|
};
|
|
@@ -5,6 +5,7 @@ import { Construct } from "constructs";
|
|
|
5
5
|
import { Secret, type SecretImport } from "../../resources/aws/secrets/secret.js";
|
|
6
6
|
import { type ClickHouseSchemaAdmin, type ProfileSpec } from "../../resources/aws/database/clickhouseSchemas.js";
|
|
7
7
|
import { type ISecret } from "aws-cdk-lib/aws-secretsmanager";
|
|
8
|
+
import { type ITopic } from "aws-cdk-lib/aws-sns";
|
|
8
9
|
import type { ClickHouseTlsOptions } from "./clickhouseTls/index.js";
|
|
9
10
|
import { type ClickHouseMigrationsConfig, type IClickHouseDatabase } from "./interfaces/database.js";
|
|
10
11
|
import { type ISecurityGroupConnector } from "./interfaces/connector.js";
|
|
@@ -115,6 +116,22 @@ export interface ClickHouseDatabaseProps {
|
|
|
115
116
|
* See ./clickhouseTls/types.ts.
|
|
116
117
|
*/
|
|
117
118
|
tls?: ClickHouseTlsOptions;
|
|
119
|
+
/**
|
|
120
|
+
* Ops alarm SNS topic for the ClickHouse host-posture alarms (CPU / memory /
|
|
121
|
+
* disk warn+critical) and — when `backupSchedule` is enabled — the
|
|
122
|
+
* backup-failure alarm. Accepts an `ITopic`, an `arn:` string, or the
|
|
123
|
+
* `"import:<ExportName>"` form the generator scaffolds onto production apps
|
|
124
|
+
* (`"import:SharedAlarmTopicArn"`). Forwarded automatically by
|
|
125
|
+
* `DatabaseFactory.build` from `BaseDatabaseProps.alertsTopic`. Omitted (the
|
|
126
|
+
* default) → no host alarms, matching the dormant pre-dogfood behaviour.
|
|
127
|
+
*
|
|
128
|
+
* Resolved internally via `resolveAlertsTopic`; the construct owns the
|
|
129
|
+
* `instanceRole` / `asgName` / `backupTaskLogGroup` the alarms need, so no
|
|
130
|
+
* caller wiring is required. The stuck-merge alarm is NOT declared here —
|
|
131
|
+
* `"Stuck merge detected"` is emitted by the webapp app process, so it lives
|
|
132
|
+
* on the app service's declarative `logAlarms` instead.
|
|
133
|
+
*/
|
|
134
|
+
alertsTopic?: ITopic | string;
|
|
118
135
|
}
|
|
119
136
|
/**
|
|
120
137
|
* ClickHouse analytics database wrapper implementing IClickHouseDatabase.
|
|
@@ -18,6 +18,8 @@ import { LogGroup } from "../../resources/aws/logging/logGroup.js";
|
|
|
18
18
|
import { createClickHouseSecurityGroup } from "../../resources/aws/database/clickhouseSecurityGroup.js";
|
|
19
19
|
import { buildClickHouseEntrypointWrapper, buildClickHouseUserData, generateUsersConfigXml } from "../../resources/aws/database/clickhouseUserData.js";
|
|
20
20
|
import { toPascalCase } from "../../utils/capitaliseString.js";
|
|
21
|
+
import { resolveAlertsTopic } from "../../utils/resolveAlertsTopic.js";
|
|
22
|
+
import { createClickHouseAlarms } from "../../resources/aws/monitoring/index.js";
|
|
21
23
|
import { ClickHouseSchemaAdminSchema, ManagedPasswordNameSchema, ProfileSpecSchema, ClickHouseDefaultProfiles, PROFILE_NAME_PATTERN } from "../../resources/aws/database/clickhouseSchemas.js";
|
|
22
24
|
import { inferAmiHardwareType } from "../../resources/aws/compute/ecsConstants.js";
|
|
23
25
|
import { CLICKHOUSE_DATABASE_NAME, DEFAULT_CLICKHOUSE_INSTANCE_TYPE, CLICKHOUSE_IMAGE, CLICKHOUSE_EBS_VOLUME_SIZE_GB, CLICKHOUSE_EBS_IOPS, CLICKHOUSE_EBS_THROUGHPUT_MBPS, CLICKHOUSE_TASK_MEMORY_MIB, CLICKHOUSE_HTTP_PORT, CLICKHOUSE_HTTPS_PORT, CLICKHOUSE_NATIVE_PORT, CLICKHOUSE_TCP_SECURE_PORT, CLICKHOUSE_TLS_CERT_MOUNT_PATH, CLICKHOUSE_PROMETHEUS_PORT, CLICKHOUSE_DATA_MOUNT_PATH, CLICKHOUSE_SECRET_OPTIONS, CLICKHOUSE_SERVER_ROLE_TAG, clickHouseUserSecretName, CLICKHOUSE_HEALTH_CHECK, CLICKHOUSE_STOP_TIMEOUT_SECONDS, CLICKHOUSE_EBS_DEVICE_NAME, CLICKHOUSE_CONFIG_SUBDIR, CLICKHOUSE_USERS_SUBDIR, userPasswordEnvName, OPTIMISE_FINAL_SCHEDULE, REPLACING_MERGE_TREE_TABLES, OPTIMISE_MV_TABLES, CLICKHOUSE_CLOUDMAP_SERVICE_NAME, CLICKHOUSE_SERVER_CONTAINER_NAME, OPTIMISE_TASK_MEMORY_MIB, OPTIMISE_TASK_CPU_UNITS, BACKUP_SCHEDULE, BACKUP_TASK_MEMORY_MIB, BACKUP_TASK_CPU_UNITS, BACKUP_RETENTION_DAYS } from "../../resources/aws/database/clickhouseConstants.js";
|
|
@@ -480,6 +482,20 @@ export class ClickHouseDatabase extends Construct {
|
|
|
480
482
|
tlsCaSecret.grantRead(instanceRole);
|
|
481
483
|
if (tlsServerSecret !== undefined)
|
|
482
484
|
tlsServerSecret.grantRead(instanceRole);
|
|
485
|
+
// asgName is the CloudWatch dimension the host metrics key off, so without
|
|
486
|
+
// it the alarms cannot be built; no topic is the default, so skip silently
|
|
487
|
+
// rather than throw.
|
|
488
|
+
const resolvedAlertsTopic = resolveAlertsTopic(this, "AlertsTopic", props.alertsTopic);
|
|
489
|
+
const asgName = ecsCompute.getAutoScalingGroupName();
|
|
490
|
+
if (resolvedAlertsTopic !== undefined && asgName !== undefined) {
|
|
491
|
+
createClickHouseAlarms({
|
|
492
|
+
scope: this,
|
|
493
|
+
instanceRole,
|
|
494
|
+
asgName,
|
|
495
|
+
alarmTopic: resolvedAlertsTopic,
|
|
496
|
+
...(backupTaskLogGroup !== undefined && { backupTaskLogGroup })
|
|
497
|
+
});
|
|
498
|
+
}
|
|
483
499
|
const adminSecretName = clickHouseUserSecretName(schemaAdmin.name);
|
|
484
500
|
// Password via CLICKHOUSE_CLIENT_PASSWORD env, not --password on argv
|
|
485
501
|
// (argv → /proc/<pid>/cmdline). `jq -r .password` on an empty pipeline
|
|
@@ -3,10 +3,10 @@ import { type Construct } from "constructs";
|
|
|
3
3
|
import { type SecretImport } from "../../resources/aws/secrets/index.js";
|
|
4
4
|
import { type ComputeType, type IEcsCompute, type ILambdaCompute, type IEc2Compute, type AnyCompute, isCompute, isEcsCompute, isLambdaCompute, isEc2Compute } from "./interfaces/compute.js";
|
|
5
5
|
import type App from "../../app.js";
|
|
6
|
-
import { EcsCompute, type EcsComputeProps, type EcsServiceConfig, type EcsContainerConfig, type EcsScalingConfig, type EcsClusterConfig, type EcsRoutingConfig, type EcsCapacityProviderConfig, type ContainerDependency, type EcsMigrationsConfig, ECS_CAPACITY_PROVIDER_CONFIG, getEcsCapacityProviderConfig, ScalingType, type EcsCapacityProvider, type Ec2CapacityConfig, validateEcsProps, buildContainerConfigs, expandMigrationsSugar, type ResolvedScalingConfig, resolveScalingConfig } from "./computeEcs.js";
|
|
6
|
+
import { EcsCompute, type EcsComputeProps, type EcsServiceConfig, type ServiceLogAlarm, type EcsContainerConfig, type EcsScalingConfig, type EcsClusterConfig, type EcsRoutingConfig, type EcsCapacityProviderConfig, type ContainerDependency, type EcsMigrationsConfig, ECS_CAPACITY_PROVIDER_CONFIG, getEcsCapacityProviderConfig, ScalingType, type EcsCapacityProvider, type Ec2CapacityConfig, validateEcsProps, buildContainerConfigs, expandMigrationsSugar, type ResolvedScalingConfig, resolveScalingConfig } from "./computeEcs.js";
|
|
7
7
|
import { LambdaCompute, type LambdaComputeProps, type ContainerLambdaProps, type CodeLambdaProps, type FunctionUrlConfig, type ResolvedLambdaDeployment, resolveLambdaDeployment, Architecture, HttpMethod, InvokeMode, type FunctionUrlCorsOptions } from "./computeLambda.js";
|
|
8
8
|
import { Ec2Compute, type Ec2ComputeProps, type SshConfig } from "./computeEc2.js";
|
|
9
|
-
export { EcsCompute, type EcsComputeProps, type EcsServiceConfig, type EcsContainerConfig, type EcsScalingConfig, type EcsClusterConfig, type EcsRoutingConfig, type EcsCapacityProviderConfig, ECS_CAPACITY_PROVIDER_CONFIG, getEcsCapacityProviderConfig, ScalingType, type EcsCapacityProvider, type Ec2CapacityConfig, type ContainerDependency, type EcsMigrationsConfig, validateEcsProps, buildContainerConfigs, expandMigrationsSugar, type ResolvedScalingConfig, resolveScalingConfig, LambdaCompute, type LambdaComputeProps, type ContainerLambdaProps, type CodeLambdaProps, type FunctionUrlConfig, type ResolvedLambdaDeployment, resolveLambdaDeployment, Architecture, HttpMethod, InvokeMode, type FunctionUrlCorsOptions, Ec2Compute, type Ec2ComputeProps, type SshConfig };
|
|
9
|
+
export { EcsCompute, type EcsComputeProps, type EcsServiceConfig, type ServiceLogAlarm, type EcsContainerConfig, type EcsScalingConfig, type EcsClusterConfig, type EcsRoutingConfig, type EcsCapacityProviderConfig, ECS_CAPACITY_PROVIDER_CONFIG, getEcsCapacityProviderConfig, ScalingType, type EcsCapacityProvider, type Ec2CapacityConfig, type ContainerDependency, type EcsMigrationsConfig, validateEcsProps, buildContainerConfigs, expandMigrationsSugar, type ResolvedScalingConfig, resolveScalingConfig, LambdaCompute, type LambdaComputeProps, type ContainerLambdaProps, type CodeLambdaProps, type FunctionUrlConfig, type ResolvedLambdaDeployment, resolveLambdaDeployment, Architecture, HttpMethod, InvokeMode, type FunctionUrlCorsOptions, Ec2Compute, type Ec2ComputeProps, type SshConfig };
|
|
10
10
|
export type { ComputeType } from "./interfaces/compute.js";
|
|
11
11
|
/**
|
|
12
12
|
* Configuration defaults for each compute type.
|
|
@@ -7,7 +7,7 @@ import { type IEcsCompute } from "./interfaces/compute.js";
|
|
|
7
7
|
import { type SecretImport } from "../../resources/aws/secrets/index.js";
|
|
8
8
|
import EcsCluster, { type EcsClusterProps } from "../../resources/aws/compute/ecs.js";
|
|
9
9
|
export { ScalingType } from "./computeEcsTypes.js";
|
|
10
|
-
export type { EcsCapacityProvider, Ec2CapacityConfig, RemoteConnectionSpec, EcsCapacityProviderConfig, EcsContainerConfig, ContainerDependency, ContainerVolume, EcsScheduledTaskConfig, EcsLifecycleHookMigrationsConfig, EcsPostDeployMigrationsConfig, EcsHookMigrationsConfig, EcsMigrationsConfig, EcsMigrationsMode, EcsCircuitBreakerConfig, EcsScalingConfig, EcsClusterConfig, EcsRoutingConfig, EcsServiceConfig, EcsComputeProps } from "./computeEcsTypes.js";
|
|
10
|
+
export type { EcsCapacityProvider, Ec2CapacityConfig, RemoteConnectionSpec, EcsCapacityProviderConfig, EcsContainerConfig, ContainerDependency, ContainerVolume, EcsScheduledTaskConfig, EcsLifecycleHookMigrationsConfig, EcsPostDeployMigrationsConfig, EcsHookMigrationsConfig, EcsMigrationsConfig, EcsMigrationsMode, EcsCircuitBreakerConfig, EcsScalingConfig, EcsClusterConfig, EcsRoutingConfig, EcsServiceConfig, ServiceLogAlarm, EcsComputeProps } from "./computeEcsTypes.js";
|
|
11
11
|
import { ScalingType, type EcsCapacityProviderConfig, type EcsCapacityProvider, type EcsContainerConfig, type EcsScalingConfig, type EcsServiceConfig, type EcsComputeProps, type QueueScalingConfig } from "./computeEcsTypes.js";
|
|
12
12
|
export declare const ECS_CAPACITY_PROVIDER_CONFIG: Record<EcsCapacityProvider, EcsCapacityProviderConfig>;
|
|
13
13
|
export declare function getEcsCapacityProviderConfig(provider: EcsCapacityProvider): EcsCapacityProviderConfig;
|
|
@@ -124,6 +124,17 @@ export declare class EcsCompute extends Construct implements IEcsCompute {
|
|
|
124
124
|
* migrate container injected by `expandMigrationsSugar`.
|
|
125
125
|
*/
|
|
126
126
|
private wireLifecycleHookMigrations;
|
|
127
|
+
/**
|
|
128
|
+
* Wire a native CloudFormation `DependsOn` from each service that declares
|
|
129
|
+
* `awaitMigrationsFrom` to the named migrate-owning service, so CFN does not
|
|
130
|
+
* roll out the dependent service until the target's ECS deployment — including
|
|
131
|
+
* its PRE_SCALE_UP migrate hook — has completed. Prevents the RDS-IAM auth /
|
|
132
|
+
* schema-mismatch race when a DB-connecting worker starts before the app's
|
|
133
|
+
* migrations have applied, and enables a single-pass greenfield deploy.
|
|
134
|
+
* Shape (target exists, declares hook migrations, no self / cycle) is already
|
|
135
|
+
* guaranteed by `validateEcsProps`; the undefined guards are synth invariants.
|
|
136
|
+
*/
|
|
137
|
+
private wireServiceMigrationDependencies;
|
|
127
138
|
/**
|
|
128
139
|
* Synthesise a dedicated migration task definition for a lifecycle-hook
|
|
129
140
|
* migration when `separateTaskDef` is set. Creates the migration's own
|
|
@@ -154,6 +154,39 @@ export function validateEcsProps(props) {
|
|
|
154
154
|
validateSeparateTaskDef(service.name, service.migrations.separateTaskDef);
|
|
155
155
|
}
|
|
156
156
|
}
|
|
157
|
+
// Validate awaitMigrationsFrom cross-service migration ordering.
|
|
158
|
+
const serviceByName = new Map(props.services.map((s) => [s.name, s]));
|
|
159
|
+
for (const service of props.services) {
|
|
160
|
+
const target = service.awaitMigrationsFrom;
|
|
161
|
+
if (target === undefined)
|
|
162
|
+
continue;
|
|
163
|
+
if (target === service.name) {
|
|
164
|
+
throw new Error(`Service '${service.name}': awaitMigrationsFrom cannot reference its own migrations.`);
|
|
165
|
+
}
|
|
166
|
+
const targetService = serviceByName.get(target);
|
|
167
|
+
if (targetService === undefined) {
|
|
168
|
+
throw new Error(`Service '${service.name}': awaitMigrationsFrom names unknown service '${target}'. ` +
|
|
169
|
+
"It must name another service in the same cluster.");
|
|
170
|
+
}
|
|
171
|
+
if (targetService.migrations === undefined ||
|
|
172
|
+
!isHookMigrations(targetService.migrations)) {
|
|
173
|
+
throw new Error(`Service '${service.name}': awaitMigrationsFrom names service '${target}', which does not ` +
|
|
174
|
+
"declare lifecycle-hook or post-deploy migrations. Only those modes have a single " +
|
|
175
|
+
"completion point to await (init-container migrations run per-replica).");
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// Detect cycles in the awaitMigrationsFrom graph (each service awaits <=1 other).
|
|
179
|
+
for (const service of props.services) {
|
|
180
|
+
const seen = new Set();
|
|
181
|
+
let current = service.name;
|
|
182
|
+
while (current !== undefined) {
|
|
183
|
+
if (seen.has(current)) {
|
|
184
|
+
throw new Error(`Service '${service.name}': awaitMigrationsFrom forms a dependency cycle.`);
|
|
185
|
+
}
|
|
186
|
+
seen.add(current);
|
|
187
|
+
current = serviceByName.get(current)?.awaitMigrationsFrom;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
157
190
|
if (props.cluster?.directAccess === true) {
|
|
158
191
|
const hasEc2Service = props.services.some((s) => s.capacityProvider === "EC2");
|
|
159
192
|
if (!hasEc2Service) {
|
|
@@ -722,6 +755,8 @@ export class EcsCompute extends Construct {
|
|
|
722
755
|
ssmSecretsPath: service.ssmSecretsPath,
|
|
723
756
|
docker: service.docker,
|
|
724
757
|
alarms: service.alarms,
|
|
758
|
+
logAlarms: service.logAlarms,
|
|
759
|
+
logMetricNamespace: service.logMetricNamespace,
|
|
725
760
|
circuitBreaker: service.circuitBreaker,
|
|
726
761
|
...(service.deployment !== undefined && {
|
|
727
762
|
deployment: service.deployment
|
|
@@ -763,6 +798,7 @@ export class EcsCompute extends Construct {
|
|
|
763
798
|
this.ecsCluster = new EcsCluster(this, `${id}Ecs`, ecsProps);
|
|
764
799
|
this.connections = this.ecsCluster.connections;
|
|
765
800
|
this.wireLifecycleHookMigrations(props.services);
|
|
801
|
+
this.wireServiceMigrationDependencies(props.services);
|
|
766
802
|
this.materialiseScheduledTasks(id, props);
|
|
767
803
|
}
|
|
768
804
|
/**
|
|
@@ -985,6 +1021,27 @@ export class EcsCompute extends Construct {
|
|
|
985
1021
|
});
|
|
986
1022
|
}
|
|
987
1023
|
}
|
|
1024
|
+
/**
|
|
1025
|
+
* Wire a native CloudFormation `DependsOn` from each service that declares
|
|
1026
|
+
* `awaitMigrationsFrom` to the named migrate-owning service, so CFN does not
|
|
1027
|
+
* roll out the dependent service until the target's ECS deployment — including
|
|
1028
|
+
* its PRE_SCALE_UP migrate hook — has completed. Prevents the RDS-IAM auth /
|
|
1029
|
+
* schema-mismatch race when a DB-connecting worker starts before the app's
|
|
1030
|
+
* migrations have applied, and enables a single-pass greenfield deploy.
|
|
1031
|
+
* Shape (target exists, declares hook migrations, no self / cycle) is already
|
|
1032
|
+
* guaranteed by `validateEcsProps`; the undefined guards are synth invariants.
|
|
1033
|
+
*/
|
|
1034
|
+
wireServiceMigrationDependencies(services) {
|
|
1035
|
+
for (const svcConfig of services) {
|
|
1036
|
+
if (svcConfig.awaitMigrationsFrom === undefined)
|
|
1037
|
+
continue;
|
|
1038
|
+
const dependent = this.ecsCluster.getService(svcConfig.name);
|
|
1039
|
+
const dependency = this.ecsCluster.getService(svcConfig.awaitMigrationsFrom);
|
|
1040
|
+
if (dependent !== undefined && dependency !== undefined) {
|
|
1041
|
+
dependent.node.addDependency(dependency);
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
988
1045
|
/**
|
|
989
1046
|
* Synthesise a dedicated migration task definition for a lifecycle-hook
|
|
990
1047
|
* migration when `separateTaskDef` is set. Creates the migration's own
|
|
@@ -8,7 +8,7 @@ import { type ConnectionSpec } from "./interfaces/connector.js";
|
|
|
8
8
|
import { type RemoteConnectionSpec } from "../../resources/aws/compute/ecsRemoteConnections.js";
|
|
9
9
|
import { type EcsRoutingConfig, type EcsContainerDependency } from "../../resources/aws/compute/ecsTypes.js";
|
|
10
10
|
import { ScalingType, type DomainConfig, type EcsCapacityProvider, type Ec2CapacityConfig, type QueueScalingConfig } from "../../resources/aws/compute/ecs.js";
|
|
11
|
-
import type { EcsServiceAlarmThresholds } from "../../resources/aws/monitoring/index.js";
|
|
11
|
+
import type { EcsServiceAlarmThresholds, LogPatternAlarmSpec } from "../../resources/aws/monitoring/index.js";
|
|
12
12
|
import { type SecretImport } from "../../resources/aws/secrets/index.js";
|
|
13
13
|
import type { DockerBuild } from "@fjall/util/manifest/schemas";
|
|
14
14
|
export type { RemoteConnectionSpec };
|
|
@@ -39,6 +39,13 @@ export interface EcsCapacityProviderConfig {
|
|
|
39
39
|
* @see https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDependency.html
|
|
40
40
|
*/
|
|
41
41
|
export type ContainerDependency = EcsContainerDependency;
|
|
42
|
+
/**
|
|
43
|
+
* A log-pattern alarm declared on a service: a CloudWatch metric filter over the
|
|
44
|
+
* service's log group plus the alarm that fires on a match. Public-facing alias
|
|
45
|
+
* for the canonical resource-layer `LogPatternAlarmSpec`, re-exported so factory
|
|
46
|
+
* consumers can declare `logAlarms` from the patterns barrel.
|
|
47
|
+
*/
|
|
48
|
+
export type ServiceLogAlarm = LogPatternAlarmSpec;
|
|
42
49
|
/**
|
|
43
50
|
* Configuration for a container in an ECS task.
|
|
44
51
|
*
|
|
@@ -729,6 +736,20 @@ export interface EcsServiceConfig {
|
|
|
729
736
|
* - object: override specific thresholds
|
|
730
737
|
*/
|
|
731
738
|
alarms?: EcsServiceAlarmThresholds | false;
|
|
739
|
+
/**
|
|
740
|
+
* Log-pattern alarms for this service. Each entry creates a CloudWatch metric
|
|
741
|
+
* filter over the service's log group plus an alarm wired to the cluster's
|
|
742
|
+
* `alertsTopic` — so it materialises only when `alertsTopic` is set and
|
|
743
|
+
* `alarms !== false`. Declarative: describe the match with `literal` or
|
|
744
|
+
* `anyTerms`.
|
|
745
|
+
*/
|
|
746
|
+
logAlarms?: ServiceLogAlarm[];
|
|
747
|
+
/**
|
|
748
|
+
* Default CloudWatch metric namespace for this service's `logAlarms`. Each
|
|
749
|
+
* entry may override it per-alarm (e.g. `Fjall/WebApp` for app alarms,
|
|
750
|
+
* `Fjall/ClickHouse` for a relocated stuck-merge alarm on the same service).
|
|
751
|
+
*/
|
|
752
|
+
logMetricNamespace?: string;
|
|
732
753
|
/**
|
|
733
754
|
* Run an init container before any other container in this service starts.
|
|
734
755
|
* Synthesises a non-essential container with the given migration command,
|
|
@@ -739,6 +760,35 @@ export interface EcsServiceConfig {
|
|
|
739
760
|
* migrations: { command: ["npx", "payload", "migrate"] }
|
|
740
761
|
*/
|
|
741
762
|
migrations?: EcsMigrationsConfig;
|
|
763
|
+
/**
|
|
764
|
+
* Names another service in the same cluster whose **lifecycle-hook** (or
|
|
765
|
+
* post-deploy) migrations must finish before this service rolls out.
|
|
766
|
+
*
|
|
767
|
+
* Implemented as a native CloudFormation `DependsOn`: CFN will not begin
|
|
768
|
+
* creating or updating this service until the named service's ECS deployment
|
|
769
|
+
* — including its PRE_SCALE_UP migrate hook — has completed. Use it for
|
|
770
|
+
* DB-connecting workers that must not start until the migrate-owning service
|
|
771
|
+
* has applied role / grant / schema migrations; without it, a worker can
|
|
772
|
+
* attempt RDS-IAM auth before the migrate task has run `GRANT rds_iam` (the
|
|
773
|
+
* race that aborts a migration-bearing deploy). Also enables a single-pass
|
|
774
|
+
* greenfield deploy (app migrates, then workers come up, in one `cdk deploy`).
|
|
775
|
+
*
|
|
776
|
+
* Works for EC2-capacity services — it is a resource-ordering edge, not a
|
|
777
|
+
* worker-side lifecycle hook, so the FARGATE-only restriction on hook
|
|
778
|
+
* migrations does not apply here.
|
|
779
|
+
*
|
|
780
|
+
* The named service MUST declare `migrations` in `lifecycle-hook` or
|
|
781
|
+
* `post-deploy` mode — init-container migrations run per-replica and have no
|
|
782
|
+
* single completion point to await. Validated at synth (target exists,
|
|
783
|
+
* declares hook migrations, no self-reference, no cycle).
|
|
784
|
+
*
|
|
785
|
+
* Escape hatch for orderings this field can't express: drop to the L2 API,
|
|
786
|
+
* `cluster.getService("x")?.node.addDependency(cluster.getService("y")!)`.
|
|
787
|
+
*
|
|
788
|
+
* @example
|
|
789
|
+
* awaitMigrationsFrom: "app"
|
|
790
|
+
*/
|
|
791
|
+
awaitMigrationsFrom?: string;
|
|
742
792
|
/**
|
|
743
793
|
* Deployment circuit breaker policy. Omit for the safe default
|
|
744
794
|
* `{ enable: true, rollback: true }` — failed deployments automatically
|
|
@@ -131,6 +131,14 @@ export interface InstanceDatabaseProps extends BaseDatabaseProps {
|
|
|
131
131
|
credentials?: CredentialsConfig;
|
|
132
132
|
encryption?: EncryptionConfig;
|
|
133
133
|
publiclyAccessible?: boolean;
|
|
134
|
+
/**
|
|
135
|
+
* Enable RDS IAM database authentication (opt-in; default off). Instance
|
|
136
|
+
* databases only. When true, IAM principals granted via
|
|
137
|
+
* {@link RelationalDatabase.grantIamConnect} connect with short-lived
|
|
138
|
+
* `rds-db:connect` tokens instead of a stored password. See ADR
|
|
139
|
+
* decisions/2026-06-17-rls-role-auth-and-launch-gating.md.
|
|
140
|
+
*/
|
|
141
|
+
iamAuthentication?: boolean;
|
|
134
142
|
/** ARN or identifier of DB instance snapshot to restore from */
|
|
135
143
|
snapshotIdentifier?: string;
|
|
136
144
|
/** Username from the snapshot (required when restoring from snapshot to reset password) */
|
|
@@ -247,7 +255,7 @@ export declare class RelationalDatabase extends Construct implements IRelational
|
|
|
247
255
|
private database;
|
|
248
256
|
constructor(scope: Construct, id: string, props: IRelationalDatabaseProps);
|
|
249
257
|
private resolveAlertsTopic;
|
|
250
|
-
addDatabase
|
|
258
|
+
private addDatabase;
|
|
251
259
|
private addAurora;
|
|
252
260
|
private addAuroraGlobal;
|
|
253
261
|
private addRdsInstance;
|
|
@@ -278,6 +286,16 @@ export declare class RelationalDatabase extends Construct implements IRelational
|
|
|
278
286
|
* @param grantee The connectable principal to grant connect permissions to
|
|
279
287
|
*/
|
|
280
288
|
grantConnect(grantee: IConnectable): void;
|
|
289
|
+
/**
|
|
290
|
+
* Grant an IAM principal permission to connect as `dbUsername` via RDS IAM
|
|
291
|
+
* database authentication. Instance databases only — Aurora uses a different
|
|
292
|
+
* mechanism, so this throws for non-Instance databases. Requires the instance
|
|
293
|
+
* to be created with `iamAuthentication: true`. The L2 `grantConnect` scopes
|
|
294
|
+
* `rds-db:connect` to the exact `dbuser:<resourceId>/<dbUsername>` ARN — never
|
|
295
|
+
* a bare wildcard. See ADR
|
|
296
|
+
* decisions/2026-06-17-rls-role-auth-and-launch-gating.md.
|
|
297
|
+
*/
|
|
298
|
+
grantIamConnect(grantee: IGrantable, dbUsername: string): Grant;
|
|
281
299
|
}
|
|
282
300
|
export { ClickHouseDatabase, type ClickHouseDatabaseProps };
|
|
283
301
|
export { ClickHouseDefaultProfiles, ClickHouseSchemaAdminSchema, ManagedPasswordNameSchema, ProfileSpecSchema, type ClickHouseSchemaAdmin, type ManagedPasswordName, type ProfileSpec } from "../../resources/aws/database/clickhouseSchemas.js";
|
|
@@ -118,7 +118,7 @@ function validateRelationalDatabaseProps(props) {
|
|
|
118
118
|
"Specify the AWS region where the primary cluster will be created.");
|
|
119
119
|
}
|
|
120
120
|
// Instance-only options on Aurora/GlobalAurora
|
|
121
|
-
warnIfPropertiesIgnored(props, ["readReplica", "multiAz", "allocatedStorage"], "Instance", "Instance database");
|
|
121
|
+
warnIfPropertiesIgnored(props, ["readReplica", "multiAz", "allocatedStorage", "iamAuthentication"], "Instance", "Instance database");
|
|
122
122
|
// Aurora-only options on Instance
|
|
123
123
|
warnIfPropertiesIgnored(props, ["readers", "writer", "backupRetention"], ["Aurora", "GlobalAurora"], "Aurora database");
|
|
124
124
|
// GlobalAurora-only options on non-global databases
|
|
@@ -328,6 +328,8 @@ export class RelationalDatabase extends Construct {
|
|
|
328
328
|
return resolveAlertsTopicShared(this, "AlertsTopic", alertsTopic);
|
|
329
329
|
}
|
|
330
330
|
addDatabase(props) {
|
|
331
|
+
// Re-asserted here (the constructor already validates) to narrow
|
|
332
|
+
// `props.vpc` to `IVpc` for the type-specific add* dispatch below.
|
|
331
333
|
validateRelationalDatabaseProps(props);
|
|
332
334
|
switch (props.type) {
|
|
333
335
|
case "Aurora":
|
|
@@ -440,7 +442,8 @@ export class RelationalDatabase extends Construct {
|
|
|
440
442
|
snapshotUsername: props.snapshotUsername,
|
|
441
443
|
alertsTopic: this.resolveAlertsTopic(props.alertsTopic),
|
|
442
444
|
alarms: props.alarms,
|
|
443
|
-
applicationId: props.applicationId
|
|
445
|
+
applicationId: props.applicationId,
|
|
446
|
+
iamAuthentication: props.iamAuthentication
|
|
444
447
|
});
|
|
445
448
|
this.connections = this.database.connections;
|
|
446
449
|
this.addDatabaseOutputs(props);
|
|
@@ -595,6 +598,22 @@ export class RelationalDatabase extends Construct {
|
|
|
595
598
|
grantConnect(grantee) {
|
|
596
599
|
this.connections.allowDefaultPortFrom(grantee);
|
|
597
600
|
}
|
|
601
|
+
/**
|
|
602
|
+
* Grant an IAM principal permission to connect as `dbUsername` via RDS IAM
|
|
603
|
+
* database authentication. Instance databases only — Aurora uses a different
|
|
604
|
+
* mechanism, so this throws for non-Instance databases. Requires the instance
|
|
605
|
+
* to be created with `iamAuthentication: true`. The L2 `grantConnect` scopes
|
|
606
|
+
* `rds-db:connect` to the exact `dbuser:<resourceId>/<dbUsername>` ARN — never
|
|
607
|
+
* a bare wildcard. See ADR
|
|
608
|
+
* decisions/2026-06-17-rls-role-auth-and-launch-gating.md.
|
|
609
|
+
*/
|
|
610
|
+
grantIamConnect(grantee, dbUsername) {
|
|
611
|
+
if (!(this.database instanceof RdsInstance)) {
|
|
612
|
+
throw new Error(`grantIamConnect is only supported for Instance databases; ` +
|
|
613
|
+
`'${this._databaseName}' is a ${this.databaseType} database.`);
|
|
614
|
+
}
|
|
615
|
+
return this.database.grantIamConnect(grantee, dbUsername);
|
|
616
|
+
}
|
|
598
617
|
}
|
|
599
618
|
export { ClickHouseDatabase };
|
|
600
619
|
export { ClickHouseDefaultProfiles, ClickHouseSchemaAdminSchema, ManagedPasswordNameSchema, ProfileSpecSchema } from "../../resources/aws/database/clickhouseSchemas.js";
|
|
@@ -4,7 +4,7 @@ import { Construct } from "constructs";
|
|
|
4
4
|
import { CfnOutput, Aspects } from "aws-cdk-lib";
|
|
5
5
|
import { processConnections } from "../../../utils/connections.js";
|
|
6
6
|
import { toPascalCase } from "../../../utils/capitaliseString.js";
|
|
7
|
-
import { createEcsServiceAlarms } from "../monitoring/index.js";
|
|
7
|
+
import { createEcsServiceAlarms, createLogPatternAlarms } from "../monitoring/index.js";
|
|
8
8
|
// Extracted modules
|
|
9
9
|
import { CapacityProviderDependencyAspect } from "./ecsCapacityProviderAspect.js";
|
|
10
10
|
import { validateEcsClusterProps } from "./ecsValidation.js";
|
|
@@ -210,7 +210,7 @@ export default class EcsCluster extends Construct {
|
|
|
210
210
|
const executionRole = createExecutionRole(this.ctx, serviceName);
|
|
211
211
|
const taskRole = createTaskRole(this.ctx, serviceName, serviceProps);
|
|
212
212
|
const taskDefinition = createTaskDefinition(this.ctx, serviceName, serviceProps, executionRole, taskRole);
|
|
213
|
-
const { containers, primaryContainer } = addContainersToTask(this.ctx, serviceName, serviceProps, taskDefinition);
|
|
213
|
+
const { containers, primaryContainer, logGroup } = addContainersToTask(this.ctx, serviceName, serviceProps, taskDefinition);
|
|
214
214
|
const service = createService(this.ctx, serviceName, serviceProps, taskDefinition, this.asgState);
|
|
215
215
|
let targetGroup;
|
|
216
216
|
if (!this.loadBalancerDisabled &&
|
|
@@ -230,7 +230,8 @@ export default class EcsCluster extends Construct {
|
|
|
230
230
|
containers,
|
|
231
231
|
primaryContainer,
|
|
232
232
|
targetGroup,
|
|
233
|
-
scalingPolicy
|
|
233
|
+
scalingPolicy,
|
|
234
|
+
logGroup
|
|
234
235
|
});
|
|
235
236
|
if (serviceProps.connections && serviceProps.connections.length > 0) {
|
|
236
237
|
try {
|
|
@@ -253,6 +254,16 @@ export default class EcsCluster extends Construct {
|
|
|
253
254
|
alarmTopic: this.props.alertsTopic,
|
|
254
255
|
applicationId: this.props.applicationId
|
|
255
256
|
});
|
|
257
|
+
if (serviceProps.logAlarms && serviceProps.logAlarms.length > 0) {
|
|
258
|
+
createLogPatternAlarms({
|
|
259
|
+
scope: this,
|
|
260
|
+
logGroup,
|
|
261
|
+
alarmTopic: this.props.alertsTopic,
|
|
262
|
+
metricNamespace: serviceProps.logMetricNamespace,
|
|
263
|
+
specs: serviceProps.logAlarms,
|
|
264
|
+
applicationId: this.props.applicationId
|
|
265
|
+
});
|
|
266
|
+
}
|
|
256
267
|
}
|
|
257
268
|
}
|
|
258
269
|
setupConnections(props) {
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import { AmiHardwareType } from "aws-cdk-lib/aws-ecs";
|
|
2
|
+
import { RetentionDays } from "aws-cdk-lib/aws-logs";
|
|
2
3
|
export declare const DEFAULT_EC2_INSTANCE_TYPE = "t4g.micro";
|
|
3
4
|
export declare const DEFAULT_WARM_POOL_MIN_SIZE = 1;
|
|
4
5
|
export declare const DEFAULT_WARM_POOL_REUSE_ON_SCALE_IN = false;
|
|
5
6
|
export declare const DEFAULT_LOG_RETENTION_DAYS = 14;
|
|
7
|
+
export declare const DEFAULT_LOG_RETENTION = RetentionDays.TWO_WEEKS;
|
|
6
8
|
export declare const DEFAULT_FARGATE_CPU = 256;
|
|
7
9
|
export declare const DEFAULT_FARGATE_MEMORY_MIB = 512;
|
|
8
10
|
export declare const DEFAULT_EC2_CONTAINER_MEMORY_MIB = 1024;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { AmiHardwareType } from "aws-cdk-lib/aws-ecs";
|
|
2
|
+
import { RetentionDays } from "aws-cdk-lib/aws-logs";
|
|
2
3
|
// Canonical source: @fjall/generator schemas/constants.ts — keep in sync
|
|
3
4
|
export const DEFAULT_EC2_INSTANCE_TYPE = "t4g.micro";
|
|
4
5
|
export const DEFAULT_WARM_POOL_MIN_SIZE = 1;
|
|
@@ -8,6 +9,9 @@ export const DEFAULT_WARM_POOL_MIN_SIZE = 1;
|
|
|
8
9
|
export const DEFAULT_WARM_POOL_REUSE_ON_SCALE_IN = false;
|
|
9
10
|
// 14 days balances cost against retaining enough history for post-mortem debugging
|
|
10
11
|
export const DEFAULT_LOG_RETENTION_DAYS = 14;
|
|
12
|
+
// The RetentionDays enum the explicit service LogGroup needs. Coupled to
|
|
13
|
+
// DEFAULT_LOG_RETENTION_DAYS (TWO_WEEKS === 14) — the two must move together.
|
|
14
|
+
export const DEFAULT_LOG_RETENTION = RetentionDays.TWO_WEEKS;
|
|
11
15
|
// Smallest valid (cpu, memory) pair on the Fargate matrix — must move together.
|
|
12
16
|
export const DEFAULT_FARGATE_CPU = 256;
|
|
13
17
|
export const DEFAULT_FARGATE_MEMORY_MIB = 512;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { FargateTaskDefinition, Ec2TaskDefinition, NetworkMode, type ContainerDefinition } from "aws-cdk-lib/aws-ecs";
|
|
2
2
|
import type { Construct } from "constructs";
|
|
3
|
+
import type { ILogGroup } from "aws-cdk-lib/aws-logs";
|
|
3
4
|
import { type Role } from "aws-cdk-lib/aws-iam";
|
|
4
5
|
import type { EcsConstructContext } from "./ecsContext.js";
|
|
5
6
|
import type { EcsClusterProps, EcsServiceProps, EcsCapacityProvider } from "./ecsTypes.js";
|
|
@@ -52,4 +53,5 @@ export declare function createMigrationTaskDefinition(scope: Construct, id: stri
|
|
|
52
53
|
export declare function addContainersToTask(ctx: EcsConstructContext, serviceName: string, serviceProps: EcsServiceProps, taskDefinition: FargateTaskDefinition | Ec2TaskDefinition): {
|
|
53
54
|
containers: ContainerDefinition[];
|
|
54
55
|
primaryContainer?: ContainerDefinition;
|
|
56
|
+
logGroup: ILogGroup;
|
|
55
57
|
};
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { AwsLogDriver, ContainerDependencyCondition, FargateTaskDefinition, Ec2TaskDefinition, NetworkMode, CpuArchitecture, OperatingSystemFamily } from "aws-cdk-lib/aws-ecs";
|
|
2
|
-
import { Duration } from "aws-cdk-lib";
|
|
2
|
+
import { Duration, RemovalPolicy } from "aws-cdk-lib";
|
|
3
3
|
import { Secret as EcsSecret } from "aws-cdk-lib/aws-ecs";
|
|
4
4
|
import { StringParameter } from "aws-cdk-lib/aws-ssm";
|
|
5
5
|
import { resolveOrgId } from "../../../utils/cdkContext.js";
|
|
6
6
|
import { validateSsmPathComponent } from "./ecsValidation.js";
|
|
7
|
-
import {
|
|
7
|
+
import { DEFAULT_LOG_RETENTION, DEFAULT_FARGATE_CPU, DEFAULT_FARGATE_MEMORY_MIB, DEFAULT_EC2_CONTAINER_MEMORY_MIB } from "./ecsConstants.js";
|
|
8
|
+
import { LogGroup } from "../logging/logGroup.js";
|
|
8
9
|
import { getContainerImage } from "./ecsImages.js";
|
|
9
10
|
import { resolveRemoteConnections } from "./ecsRemoteConnections.js";
|
|
10
11
|
import { resolveImportedSecret } from "../secrets/index.js";
|
|
@@ -124,6 +125,14 @@ export function addContainersToTask(ctx, serviceName, serviceProps, taskDefiniti
|
|
|
124
125
|
const orgId = resolveOrgId(ctx.scope.node);
|
|
125
126
|
const remoteEnvByService = resolveRemoteConnections([serviceProps], ctx.scope, orgId);
|
|
126
127
|
const remoteEnv = remoteEnvByService[serviceName] ?? {};
|
|
128
|
+
// Explicit so the cluster can attach metric filters (createLogPatternAlarms).
|
|
129
|
+
// No logGroupName: CDK auto-names from the logical ID so it cannot collide
|
|
130
|
+
// with the orphaned-retained old group during the implicit→explicit replace.
|
|
131
|
+
// RETAIN (never DESTROY): preserves fail-closed evidence and supports rollback.
|
|
132
|
+
const logGroup = new LogGroup(ctx.scope, `${ctx.props.clusterName}${serviceName}LogGroup`, {
|
|
133
|
+
retention: DEFAULT_LOG_RETENTION,
|
|
134
|
+
removalPolicy: RemovalPolicy.RETAIN
|
|
135
|
+
});
|
|
127
136
|
for (const containerConfig of serviceProps.containers) {
|
|
128
137
|
const image = getContainerImage(ctx, serviceName, containerConfig, serviceProps);
|
|
129
138
|
const isFirstWithPort = !primaryContainer && containerConfig.port !== undefined;
|
|
@@ -156,7 +165,7 @@ export function addContainersToTask(ctx, serviceName, serviceProps, taskDefiniti
|
|
|
156
165
|
containerName: containerConfig.name,
|
|
157
166
|
logging: new AwsLogDriver({
|
|
158
167
|
streamPrefix: `/ecs/${ctx.props.clusterName}/${serviceName}`,
|
|
159
|
-
|
|
168
|
+
logGroup
|
|
160
169
|
}),
|
|
161
170
|
// remoteEnv (cross-app `${PREFIX}_HOST/_PORT`) intentionally overrides user
|
|
162
171
|
// values — a stale manual setting must not mask the resolved peer.
|
|
@@ -250,5 +259,5 @@ export function addContainersToTask(ctx, serviceName, serviceProps, taskDefiniti
|
|
|
250
259
|
});
|
|
251
260
|
}
|
|
252
261
|
}
|
|
253
|
-
return { containers, primaryContainer };
|
|
262
|
+
return { containers, primaryContainer, logGroup };
|
|
254
263
|
}
|
|
@@ -19,7 +19,8 @@ import { type RemoteConnectionSpec } from "./ecsRemoteConnections.js";
|
|
|
19
19
|
import { type SecretImport } from "../secrets/index.js";
|
|
20
20
|
import type { ManagedDomainExports } from "../../../utils/domainTypes.js";
|
|
21
21
|
import type { ITopic } from "aws-cdk-lib/aws-sns";
|
|
22
|
-
import type {
|
|
22
|
+
import type { ILogGroup } from "aws-cdk-lib/aws-logs";
|
|
23
|
+
import type { EcsServiceAlarmThresholds, LogPatternAlarmSpec } from "../monitoring/index.js";
|
|
23
24
|
import { type Ec2InstancePersistentDataVolumeConfig } from "./ec2.js";
|
|
24
25
|
export declare enum Protocol {
|
|
25
26
|
HTTP = 0,
|
|
@@ -437,6 +438,19 @@ export interface EcsServiceProps {
|
|
|
437
438
|
* - object: override specific thresholds
|
|
438
439
|
*/
|
|
439
440
|
alarms?: EcsServiceAlarmThresholds | false;
|
|
441
|
+
/**
|
|
442
|
+
* Log-pattern alarms for this service's log group. Each spec creates a
|
|
443
|
+
* CloudWatch metric filter + alarm (see `createLogPatternAlarms`). Materialised
|
|
444
|
+
* only when the cluster carries `alertsTopic` and `alarms !== false` — the same
|
|
445
|
+
* gate as the per-service metric alarms.
|
|
446
|
+
*/
|
|
447
|
+
logAlarms?: LogPatternAlarmSpec[];
|
|
448
|
+
/**
|
|
449
|
+
* Default CloudWatch metric namespace for this service's `logAlarms`. Each
|
|
450
|
+
* spec may override it per-entry, so one service can emit alarms in different
|
|
451
|
+
* namespaces (e.g. `Fjall/WebApp` for RLS, `Fjall/ClickHouse` for stuck-merge).
|
|
452
|
+
*/
|
|
453
|
+
logMetricNamespace?: string;
|
|
440
454
|
/**
|
|
441
455
|
* Deployment circuit breaker policy.
|
|
442
456
|
* - undefined (default): `{ enable: true, rollback: true }`
|
|
@@ -543,4 +557,6 @@ export interface ServiceData {
|
|
|
543
557
|
primaryContainer?: ContainerDefinition;
|
|
544
558
|
targetGroup?: IApplicationTargetGroup;
|
|
545
559
|
scalingPolicy?: TargetTrackingScalingPolicy | StepScalingPolicy;
|
|
560
|
+
/** Explicit log group shared by all of the service's containers. */
|
|
561
|
+
logGroup: ILogGroup;
|
|
546
562
|
}
|
|
@@ -12,6 +12,13 @@ import type { EcsClusterProps } from "./ecsTypes.js";
|
|
|
12
12
|
* layer consumers never see a `migrations` field, so duplicating the
|
|
13
13
|
* validation here would be unreachable.
|
|
14
14
|
*
|
|
15
|
+
* Same applies to `service.awaitMigrationsFrom`: it is a patterns-layer
|
|
16
|
+
* cross-service ordering knob, resolved into a `node.addDependency(...)` edge
|
|
17
|
+
* (`wireServiceMigrationDependencies` in `computeEcs.ts`) BEFORE reaching the
|
|
18
|
+
* resources layer. It is not a field on `EcsServiceProps`, so a direct
|
|
19
|
+
* `new EcsCluster(...)` consumer cannot pass it — there is no resources-layer
|
|
20
|
+
* code path to validate.
|
|
21
|
+
*
|
|
15
22
|
* @param props - The cluster props to validate
|
|
16
23
|
* @throws Error if validation fails
|
|
17
24
|
*/
|
|
@@ -13,6 +13,13 @@ import { ScalingType } from "./ecsTypes.js";
|
|
|
13
13
|
* layer consumers never see a `migrations` field, so duplicating the
|
|
14
14
|
* validation here would be unreachable.
|
|
15
15
|
*
|
|
16
|
+
* Same applies to `service.awaitMigrationsFrom`: it is a patterns-layer
|
|
17
|
+
* cross-service ordering knob, resolved into a `node.addDependency(...)` edge
|
|
18
|
+
* (`wireServiceMigrationDependencies` in `computeEcs.ts`) BEFORE reaching the
|
|
19
|
+
* resources layer. It is not a field on `EcsServiceProps`, so a direct
|
|
20
|
+
* `new EcsCluster(...)` consumer cannot pass it — there is no resources-layer
|
|
21
|
+
* code path to validate.
|
|
22
|
+
*
|
|
16
23
|
* @param props - The cluster props to validate
|
|
17
24
|
* @throws Error if validation fails
|
|
18
25
|
*/
|
|
@@ -94,6 +101,9 @@ export function validateEcsClusterProps(props) {
|
|
|
94
101
|
if (max !== undefined && (max < 100 || max > 200)) {
|
|
95
102
|
throw new Error(`Service '${service.name}': deployment.maxHealthyPercent must be between 100 and 200 (got ${max}).`);
|
|
96
103
|
}
|
|
104
|
+
if (min !== undefined && max !== undefined && min > max) {
|
|
105
|
+
throw new Error(`Service '${service.name}': deployment.minHealthyPercent (${min}) must be <= maxHealthyPercent (${max}).`);
|
|
106
|
+
}
|
|
97
107
|
if (min === 100 && max === 100) {
|
|
98
108
|
throw new Error(`Service '${service.name}': deployment.minHealthyPercent and maxHealthyPercent cannot both be 100 ` +
|
|
99
109
|
"(no capacity to drain or expand — deploys would never roll forward).");
|
|
@@ -3,12 +3,12 @@ import { SingletonFunction as singletonFunction, Function, Code, Architecture, F
|
|
|
3
3
|
import { FactName } from "aws-cdk-lib/region-info";
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
|
+
import { createHash } from "node:crypto";
|
|
6
7
|
import { SqsEventSource, DynamoEventSource, S3EventSource } from "aws-cdk-lib/aws-lambda-event-sources";
|
|
7
8
|
import { EventType } from "aws-cdk-lib/aws-s3";
|
|
8
9
|
import { PolicyStatement, Effect } from "aws-cdk-lib/aws-iam";
|
|
9
10
|
import { RetentionDays } from "aws-cdk-lib/aws-logs";
|
|
10
11
|
import { LogGroup } from "../logging/logGroup.js";
|
|
11
|
-
import { v4 as uuid } from "uuid";
|
|
12
12
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
13
13
|
import { resolveImportedSecret } from "../secrets/index.js";
|
|
14
14
|
import { toPascalCase } from "../../../utils/capitaliseString.js";
|
|
@@ -65,11 +65,29 @@ const SECRETS_EXTENSION = {
|
|
|
65
65
|
* mis-tune alarms relative to runtime behaviour.
|
|
66
66
|
*/
|
|
67
67
|
const LAMBDA_DEFAULT_TIMEOUT_SECONDS = 300;
|
|
68
|
+
/**
|
|
69
|
+
* Stable, deterministic uuid for a SingletonFunction so its logical ID
|
|
70
|
+
* (`SingletonLambda${uuid-without-dashes}`) does not drift across synths. A
|
|
71
|
+
* random default would recreate the singleton — and re-run any custom resource
|
|
72
|
+
* fronted by it — on every deploy.
|
|
73
|
+
*/
|
|
74
|
+
function deriveStableSingletonUuid(scope, id) {
|
|
75
|
+
const hash = createHash("sha256")
|
|
76
|
+
.update(`${scope.node.path}/${id}`)
|
|
77
|
+
.digest("hex");
|
|
78
|
+
return [
|
|
79
|
+
hash.slice(0, 8),
|
|
80
|
+
hash.slice(8, 12),
|
|
81
|
+
hash.slice(12, 16),
|
|
82
|
+
hash.slice(16, 20),
|
|
83
|
+
hash.slice(20, 32)
|
|
84
|
+
].join("-");
|
|
85
|
+
}
|
|
68
86
|
export class SingletonFunction extends singletonFunction {
|
|
69
87
|
constructor(scope, id, props) {
|
|
70
88
|
super(scope, id, {
|
|
71
89
|
...props,
|
|
72
|
-
uuid: props.uuid ??
|
|
90
|
+
uuid: props.uuid ?? deriveStableSingletonUuid(scope, id),
|
|
73
91
|
timeout: Duration.seconds(props.timeout ?? LAMBDA_DEFAULT_TIMEOUT_SECONDS),
|
|
74
92
|
description: props.lambdaDescription ?? `${id} singleton lambda`,
|
|
75
93
|
runtime: props.runtime,
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { Duration } from "aws-cdk-lib";
|
|
2
2
|
import { Connections, type IConnectable, type IVpc } from "aws-cdk-lib/aws-ec2";
|
|
3
3
|
import { type IInstanceEngine } from "aws-cdk-lib/aws-rds";
|
|
4
|
+
import { type IGrantable, type Grant } from "aws-cdk-lib/aws-iam";
|
|
4
5
|
import { Construct } from "constructs";
|
|
5
6
|
import { SecurityGroup } from "../networking/securityGroup.js";
|
|
6
7
|
import { Secret } from "../secrets/index.js";
|
|
@@ -31,6 +32,15 @@ interface RdsProps {
|
|
|
31
32
|
encryption?: EncryptionConfig;
|
|
32
33
|
publiclyAccessible?: boolean;
|
|
33
34
|
deletionProtection?: boolean;
|
|
35
|
+
/**
|
|
36
|
+
* Enable RDS IAM database authentication on the instance. Opt-in; defaults to
|
|
37
|
+
* off (undefined → CDK omits the property, leaving existing consumers' synth
|
|
38
|
+
* unchanged). When true, IAM principals granted via {@link grantIamConnect}
|
|
39
|
+
* connect with short-lived `rds-db:connect` tokens instead of a stored
|
|
40
|
+
* password — password auth keeps working in parallel. See ADR
|
|
41
|
+
* decisions/2026-06-17-rls-role-auth-and-launch-gating.md.
|
|
42
|
+
*/
|
|
43
|
+
iamAuthentication?: boolean;
|
|
34
44
|
/** ARN or identifier of DB instance snapshot to restore from */
|
|
35
45
|
snapshotIdentifier?: string;
|
|
36
46
|
/** Username from the snapshot (required when restoring from snapshot to reset password) */
|
|
@@ -70,6 +80,15 @@ export declare class RdsInstance extends Construct implements IConnectable {
|
|
|
70
80
|
}>;
|
|
71
81
|
getDatabaseName(): string;
|
|
72
82
|
getConnectionString(): string;
|
|
83
|
+
/**
|
|
84
|
+
* Grant an IAM principal permission to connect as `dbUsername` via RDS IAM
|
|
85
|
+
* database authentication. Requires the instance to be created with
|
|
86
|
+
* `iamAuthentication: true`. Delegates to the L2 `grantConnect`, which scopes
|
|
87
|
+
* `rds-db:connect` to the exact `dbuser:<dbiResourceId>/<dbUsername>` ARN —
|
|
88
|
+
* never a bare wildcard. See ADR
|
|
89
|
+
* decisions/2026-06-17-rls-role-auth-and-launch-gating.md.
|
|
90
|
+
*/
|
|
91
|
+
grantIamConnect(grantee: IGrantable, dbUsername: string): Grant;
|
|
73
92
|
static build(id: string, props: RdsProps): (sb: StackBuilder) => Construct;
|
|
74
93
|
}
|
|
75
94
|
export {};
|
|
@@ -136,7 +136,8 @@ export class RdsInstance extends Construct {
|
|
|
136
136
|
deletionProtection: props.deletionProtection ?? true,
|
|
137
137
|
preferredMaintenanceWindow: props.preferredMaintenanceWindow ??
|
|
138
138
|
RDS_DEFAULTS.PREFERRED_MAINTENANCE_WINDOW,
|
|
139
|
-
publiclyAccessible: props.publiclyAccessible ?? false
|
|
139
|
+
publiclyAccessible: props.publiclyAccessible ?? false,
|
|
140
|
+
iamAuthentication: props.iamAuthentication
|
|
140
141
|
};
|
|
141
142
|
if (props.snapshotIdentifier) {
|
|
142
143
|
// Create from snapshot
|
|
@@ -328,6 +329,17 @@ exports.handler = async (event) => {
|
|
|
328
329
|
getConnectionString() {
|
|
329
330
|
return `${this.engineConfig.family}://${this.getHostEndpoint()}:${this.getHostPort()}/${this.getDatabaseName()}`;
|
|
330
331
|
}
|
|
332
|
+
/**
|
|
333
|
+
* Grant an IAM principal permission to connect as `dbUsername` via RDS IAM
|
|
334
|
+
* database authentication. Requires the instance to be created with
|
|
335
|
+
* `iamAuthentication: true`. Delegates to the L2 `grantConnect`, which scopes
|
|
336
|
+
* `rds-db:connect` to the exact `dbuser:<dbiResourceId>/<dbUsername>` ARN —
|
|
337
|
+
* never a bare wildcard. See ADR
|
|
338
|
+
* decisions/2026-06-17-rls-role-auth-and-launch-gating.md.
|
|
339
|
+
*/
|
|
340
|
+
grantIamConnect(grantee, dbUsername) {
|
|
341
|
+
return this.database.grantConnect(grantee, dbUsername);
|
|
342
|
+
}
|
|
331
343
|
static build(id, props) {
|
|
332
344
|
return (sb) => {
|
|
333
345
|
const newProps = {
|
|
@@ -29,32 +29,27 @@ export interface ClickHouseAlarmsProps {
|
|
|
29
29
|
asgName: string;
|
|
30
30
|
alarmTopic: ITopic;
|
|
31
31
|
/**
|
|
32
|
-
*
|
|
33
|
-
* emits `serverLogger.warn("ClickHouse", "Stuck merge detected")` when
|
|
34
|
-
* `system.merges` shows a merge elapsed > 30 min.
|
|
35
|
-
*/
|
|
36
|
-
webappLogGroup: ILogGroup;
|
|
37
|
-
/**
|
|
38
|
-
* Backup-task log group. Required to wire the backup-failure alarm —
|
|
32
|
+
* Backup-task log group. When present, wires the backup-failure alarm —
|
|
39
33
|
* `BACKUP DATABASE … TO S3(…)` emits `AccessDenied` / `S3Exception` lines
|
|
40
34
|
* when the IAM grant or bucket policy is misconfigured (silent before the
|
|
41
35
|
* alarm landed; the daily backup task exited non-zero with no signal).
|
|
36
|
+
* Omitted when `backupSchedule: false` — no backup task, no log group.
|
|
42
37
|
*/
|
|
43
|
-
backupTaskLogGroup
|
|
38
|
+
backupTaskLogGroup?: ILogGroup;
|
|
44
39
|
config?: ClickHouseAlarmThresholds;
|
|
45
40
|
}
|
|
46
41
|
/**
|
|
47
|
-
* Single-node ClickHouse posture alarms. Covers host-level CPU + (optional)
|
|
48
|
-
* memory and disk via the CloudWatch Agent metric namespace `CWAgent`, plus
|
|
49
|
-
*
|
|
42
|
+
* Single-node ClickHouse host-posture alarms. Covers host-level CPU + (optional)
|
|
43
|
+
* memory and disk via the CloudWatch Agent metric namespace `CWAgent`, plus the
|
|
44
|
+
* backup-failure log alarm when a backup-task log group is supplied:
|
|
50
45
|
*
|
|
51
|
-
* - **Stuck merges** — `client.ts` polls `system.merges` every 5 min and logs
|
|
52
|
-
* `serverLogger.warn("ClickHouse", "Stuck merge detected")` when elapsed
|
|
53
|
-
* exceeds 30 min. The metric filter on the webapp log group emits a count
|
|
54
|
-
* metric per match; the alarm fires on Sum >= 1 over 5 min × 2 evaluations.
|
|
55
46
|
* - **Backup failures** — `AccessDenied` or `S3Exception` from the backup
|
|
56
47
|
* task's BACKUP DATABASE TO S3 statement. Closes the silent-failure mode
|
|
57
48
|
* that masked the original IAM-grant misconfiguration (see
|
|
58
49
|
* `designs/2026-04-27-clickhouse-backup-iam-role.md`).
|
|
50
|
+
*
|
|
51
|
+
* The stuck-merge alarm — a `"Stuck merge detected"` line emitted by the webapp
|
|
52
|
+
* app process, not this construct — lives on the app service's declarative
|
|
53
|
+
* `logAlarms` instead; it is an app-log alarm, not a database-host concern.
|
|
59
54
|
*/
|
|
60
55
|
export declare function createClickHouseAlarms(props: ClickHouseAlarmsProps): Alarm[];
|
|
@@ -4,23 +4,23 @@ import { SnsAction } from "aws-cdk-lib/aws-cloudwatch-actions";
|
|
|
4
4
|
import { Metric } from "aws-cdk-lib/aws-cloudwatch";
|
|
5
5
|
import { FilterPattern, MetricFilter } from "aws-cdk-lib/aws-logs";
|
|
6
6
|
import { ALARM_DEFAULTS, registerAlarm, buildAlarmDescription } from "./alarmDefaults.js";
|
|
7
|
-
|
|
7
|
+
import { METRIC_NAMESPACE } from "./metricNamespaces.js";
|
|
8
8
|
/**
|
|
9
|
-
* Single-node ClickHouse posture alarms. Covers host-level CPU + (optional)
|
|
10
|
-
* memory and disk via the CloudWatch Agent metric namespace `CWAgent`, plus
|
|
11
|
-
*
|
|
9
|
+
* Single-node ClickHouse host-posture alarms. Covers host-level CPU + (optional)
|
|
10
|
+
* memory and disk via the CloudWatch Agent metric namespace `CWAgent`, plus the
|
|
11
|
+
* backup-failure log alarm when a backup-task log group is supplied:
|
|
12
12
|
*
|
|
13
|
-
* - **Stuck merges** — `client.ts` polls `system.merges` every 5 min and logs
|
|
14
|
-
* `serverLogger.warn("ClickHouse", "Stuck merge detected")` when elapsed
|
|
15
|
-
* exceeds 30 min. The metric filter on the webapp log group emits a count
|
|
16
|
-
* metric per match; the alarm fires on Sum >= 1 over 5 min × 2 evaluations.
|
|
17
13
|
* - **Backup failures** — `AccessDenied` or `S3Exception` from the backup
|
|
18
14
|
* task's BACKUP DATABASE TO S3 statement. Closes the silent-failure mode
|
|
19
15
|
* that masked the original IAM-grant misconfiguration (see
|
|
20
16
|
* `designs/2026-04-27-clickhouse-backup-iam-role.md`).
|
|
17
|
+
*
|
|
18
|
+
* The stuck-merge alarm — a `"Stuck merge detected"` line emitted by the webapp
|
|
19
|
+
* app process, not this construct — lives on the app service's declarative
|
|
20
|
+
* `logAlarms` instead; it is an app-log alarm, not a database-host concern.
|
|
21
21
|
*/
|
|
22
22
|
export function createClickHouseAlarms(props) {
|
|
23
|
-
const { scope, instanceRole, asgName, alarmTopic,
|
|
23
|
+
const { scope, instanceRole, asgName, alarmTopic, backupTaskLogGroup, config = {} } = props;
|
|
24
24
|
const alarms = [];
|
|
25
25
|
const snsAction = new SnsAction(alarmTopic);
|
|
26
26
|
const cpuAlarm = new Alarm(scope, "ClickHouseCpuAlarm", {
|
|
@@ -87,53 +87,31 @@ export function createClickHouseAlarms(props) {
|
|
|
87
87
|
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
88
88
|
});
|
|
89
89
|
registerAlarm(diskCriticalAlarm, snsAction, alarms);
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
filterPattern: FilterPattern.literal('"Stuck merge detected"'),
|
|
96
|
-
metricValue: "1",
|
|
97
|
-
defaultValue: 0
|
|
98
|
-
});
|
|
99
|
-
const stuckMergeAlarm = new Alarm(scope, "ClickHouseStuckMergeAlarm", {
|
|
100
|
-
alarmDescription: buildAlarmDescription("ClickHouse merge stuck > 30 min — investigate parts pressure or replica health", undefined),
|
|
101
|
-
metric: new Metric({
|
|
102
|
-
namespace: CLICKHOUSE_METRIC_NAMESPACE,
|
|
103
|
-
metricName: stuckMergeMetricName,
|
|
104
|
-
period: Duration.minutes(5),
|
|
105
|
-
statistic: "Sum"
|
|
106
|
-
}),
|
|
107
|
-
threshold: 1,
|
|
108
|
-
evaluationPeriods: 2,
|
|
109
|
-
datapointsToAlarm: 2,
|
|
110
|
-
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
|
|
111
|
-
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
112
|
-
});
|
|
113
|
-
registerAlarm(stuckMergeAlarm, snsAction, alarms);
|
|
114
|
-
const backupFailureMetricName = "ClickHouseBackupFailureCount";
|
|
115
|
-
new MetricFilter(scope, "ClickHouseBackupFailureMetricFilter", {
|
|
116
|
-
logGroup: backupTaskLogGroup,
|
|
117
|
-
metricNamespace: CLICKHOUSE_METRIC_NAMESPACE,
|
|
118
|
-
metricName: backupFailureMetricName,
|
|
119
|
-
filterPattern: FilterPattern.anyTerm("AccessDenied", "S3Exception"),
|
|
120
|
-
metricValue: "1",
|
|
121
|
-
defaultValue: 0
|
|
122
|
-
});
|
|
123
|
-
const backupFailureAlarm = new Alarm(scope, "ClickHouseBackupFailureAlarm", {
|
|
124
|
-
alarmDescription: buildAlarmDescription(`ClickHouse BACKUP TO S3 emitted AccessDenied/S3Exception — verify instance role '${instanceRole.roleName}' grant on backup bucket`, undefined),
|
|
125
|
-
metric: new Metric({
|
|
126
|
-
namespace: CLICKHOUSE_METRIC_NAMESPACE,
|
|
90
|
+
if (backupTaskLogGroup !== undefined) {
|
|
91
|
+
const backupFailureMetricName = "ClickHouseBackupFailureCount";
|
|
92
|
+
new MetricFilter(scope, "ClickHouseBackupFailureMetricFilter", {
|
|
93
|
+
logGroup: backupTaskLogGroup,
|
|
94
|
+
metricNamespace: METRIC_NAMESPACE.CLICKHOUSE,
|
|
127
95
|
metricName: backupFailureMetricName,
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
96
|
+
filterPattern: FilterPattern.anyTerm("AccessDenied", "S3Exception"),
|
|
97
|
+
metricValue: "1",
|
|
98
|
+
defaultValue: 0
|
|
99
|
+
});
|
|
100
|
+
const backupFailureAlarm = new Alarm(scope, "ClickHouseBackupFailureAlarm", {
|
|
101
|
+
alarmDescription: buildAlarmDescription(`ClickHouse BACKUP TO S3 emitted AccessDenied/S3Exception — verify instance role '${instanceRole.roleName}' grant on backup bucket`, undefined),
|
|
102
|
+
metric: new Metric({
|
|
103
|
+
namespace: METRIC_NAMESPACE.CLICKHOUSE,
|
|
104
|
+
metricName: backupFailureMetricName,
|
|
105
|
+
period: Duration.hours(1),
|
|
106
|
+
statistic: "Sum"
|
|
107
|
+
}),
|
|
108
|
+
threshold: 1,
|
|
109
|
+
evaluationPeriods: 1,
|
|
110
|
+
datapointsToAlarm: 1,
|
|
111
|
+
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
|
|
112
|
+
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
113
|
+
});
|
|
114
|
+
registerAlarm(backupFailureAlarm, snsAction, alarms);
|
|
115
|
+
}
|
|
138
116
|
return alarms;
|
|
139
117
|
}
|
|
@@ -43,7 +43,11 @@ export function createEcsServiceAlarms(props) {
|
|
|
43
43
|
evaluationPeriods: 2,
|
|
44
44
|
datapointsToAlarm: 2,
|
|
45
45
|
comparisonOperator: ComparisonOperator.LESS_THAN_THRESHOLD,
|
|
46
|
-
|
|
46
|
+
// RunningTaskCount (Container Insights) is sparse, so missing data must
|
|
47
|
+
// not breach — otherwise metric gaps false-alarm healthy services and
|
|
48
|
+
// scaled-to-zero workers alarm permanently. A real count < threshold
|
|
49
|
+
// still fires.
|
|
50
|
+
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
47
51
|
});
|
|
48
52
|
registerAlarm(runningTasksAlarm, snsAction, alarms);
|
|
49
53
|
}
|
|
@@ -4,3 +4,5 @@ export { createRdsAlarms, type RdsAlarmThresholds, type RdsAlarmsProps } from ".
|
|
|
4
4
|
export { createLambdaAlarms, type LambdaAlarmThresholds, type LambdaAlarmsProps } from "./lambdaAlarms.js";
|
|
5
5
|
export { createScheduleAlarms, type ScheduleAlarmThresholds, type CreateScheduleAlarmsProps } from "./scheduleAlarms.js";
|
|
6
6
|
export { createClickHouseAlarms, type ClickHouseAlarmThresholds, type ClickHouseAlarmsProps } from "./clickhouseAlarms.js";
|
|
7
|
+
export { createLogPatternAlarms, type LogPatternAlarmSpec, type LogPatternAlarmsProps } from "./logPatternAlarms.js";
|
|
8
|
+
export { METRIC_NAMESPACE, type MetricNamespace } from "./metricNamespaces.js";
|
|
@@ -4,3 +4,5 @@ export { createRdsAlarms } from "./rdsAlarms.js";
|
|
|
4
4
|
export { createLambdaAlarms } from "./lambdaAlarms.js";
|
|
5
5
|
export { createScheduleAlarms } from "./scheduleAlarms.js";
|
|
6
6
|
export { createClickHouseAlarms } from "./clickhouseAlarms.js";
|
|
7
|
+
export { createLogPatternAlarms } from "./logPatternAlarms.js";
|
|
8
|
+
export { METRIC_NAMESPACE } from "./metricNamespaces.js";
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { Duration } from "aws-cdk-lib";
|
|
2
|
+
import { Alarm } from "aws-cdk-lib/aws-cloudwatch";
|
|
3
|
+
import { type ILogGroup } from "aws-cdk-lib/aws-logs";
|
|
4
|
+
import type { ITopic } from "aws-cdk-lib/aws-sns";
|
|
5
|
+
import type { Construct } from "constructs";
|
|
6
|
+
/**
|
|
7
|
+
* One log-pattern alarm: a CloudWatch metric filter over a log group plus the
|
|
8
|
+
* alarm that fires when the pattern is matched. Declarative — callers describe
|
|
9
|
+
* the match with plain strings; the raw CDK `FilterPattern` is built inside the
|
|
10
|
+
* primitive, so the spec never leaks raw CDK at the call site.
|
|
11
|
+
*
|
|
12
|
+
* Exactly one of `literal` / `anyTerms` must be set.
|
|
13
|
+
*/
|
|
14
|
+
export interface LogPatternAlarmSpec {
|
|
15
|
+
/** Stable logical-ID stem; the MetricFilter + Alarm derive their construct IDs from it. */
|
|
16
|
+
idStem: string;
|
|
17
|
+
/** CloudWatch metric name emitted by the filter and read by the alarm. */
|
|
18
|
+
metricName: string;
|
|
19
|
+
/** Responder-facing alarm description (the applicationId tag is appended automatically). */
|
|
20
|
+
description: string;
|
|
21
|
+
/** Match this exact CloudWatch Logs filter pattern (e.g. `'"Stuck merge detected"'`). */
|
|
22
|
+
literal?: string;
|
|
23
|
+
/** Match if ANY of these terms appears (CloudWatch Logs OR semantics). */
|
|
24
|
+
anyTerms?: string[];
|
|
25
|
+
/** Metric namespace for this spec; falls back to the props-level default. */
|
|
26
|
+
metricNamespace?: string;
|
|
27
|
+
/** Sum-over-period threshold the alarm fires at. Default 1 (first match). */
|
|
28
|
+
threshold?: number;
|
|
29
|
+
/** Metric period. Default 1 minute (fast fail-closed detection). */
|
|
30
|
+
period?: Duration;
|
|
31
|
+
/** Consecutive periods evaluated. Default 1. */
|
|
32
|
+
evaluationPeriods?: number;
|
|
33
|
+
/** Datapoints within the window that must breach. Default = evaluationPeriods. */
|
|
34
|
+
datapointsToAlarm?: number;
|
|
35
|
+
}
|
|
36
|
+
export interface LogPatternAlarmsProps {
|
|
37
|
+
scope: Construct;
|
|
38
|
+
/** Log group the metric filters attach to. */
|
|
39
|
+
logGroup: ILogGroup;
|
|
40
|
+
/** SNS topic alarms notify on both ALARM and OK transitions. */
|
|
41
|
+
alarmTopic: ITopic;
|
|
42
|
+
/** Default metric namespace for specs that do not override it per-entry. */
|
|
43
|
+
metricNamespace?: string;
|
|
44
|
+
/** The alarms to create. */
|
|
45
|
+
specs: LogPatternAlarmSpec[];
|
|
46
|
+
/** Application ID for webhook-to-application alarm mapping. */
|
|
47
|
+
applicationId?: string;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Create CloudWatch log-pattern alarms — a MetricFilter + Alarm per spec —
|
|
51
|
+
* wired to the SNS topic. Sibling to `createEcsServiceAlarms`: same conventions
|
|
52
|
+
* (`registerAlarm` for ALARM+OK actions, applicationId tagging via
|
|
53
|
+
* `tagAlarmsWithApplicationId`). Reusable across every factory consumer.
|
|
54
|
+
*/
|
|
55
|
+
export declare function createLogPatternAlarms(props: LogPatternAlarmsProps): Alarm[];
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { Duration } from "aws-cdk-lib";
|
|
2
|
+
import { Alarm, ComparisonOperator, Metric, TreatMissingData } from "aws-cdk-lib/aws-cloudwatch";
|
|
3
|
+
import { SnsAction } from "aws-cdk-lib/aws-cloudwatch-actions";
|
|
4
|
+
import { FilterPattern, MetricFilter } from "aws-cdk-lib/aws-logs";
|
|
5
|
+
import { buildAlarmDescription, registerAlarm, tagAlarmsWithApplicationId } from "./alarmDefaults.js";
|
|
6
|
+
/**
|
|
7
|
+
* Validate a spec carries exactly one matcher, a usable id stem, and a
|
|
8
|
+
* resolvable namespace; return the built filter pattern + resolved namespace.
|
|
9
|
+
* Throws (not a `Result`) — this is synth-time CDK construction, where a
|
|
10
|
+
* malformed spec must fail the synth loudly rather than emit a dead alarm.
|
|
11
|
+
*/
|
|
12
|
+
function resolveSpec(spec, defaultNamespace) {
|
|
13
|
+
if (spec.idStem.trim() === "") {
|
|
14
|
+
throw new Error("logPatternAlarm spec requires a non-empty idStem");
|
|
15
|
+
}
|
|
16
|
+
const hasLiteral = spec.literal !== undefined;
|
|
17
|
+
const hasAnyTerms = spec.anyTerms !== undefined;
|
|
18
|
+
if (hasLiteral === hasAnyTerms) {
|
|
19
|
+
throw new Error(`logPatternAlarm spec '${spec.idStem}' must set exactly one of 'literal' or 'anyTerms'`);
|
|
20
|
+
}
|
|
21
|
+
if (hasAnyTerms &&
|
|
22
|
+
spec.anyTerms !== undefined &&
|
|
23
|
+
spec.anyTerms.length === 0) {
|
|
24
|
+
throw new Error(`logPatternAlarm spec '${spec.idStem}' has an empty 'anyTerms' array`);
|
|
25
|
+
}
|
|
26
|
+
const namespace = spec.metricNamespace ?? defaultNamespace;
|
|
27
|
+
if (namespace === undefined || namespace.trim() === "") {
|
|
28
|
+
throw new Error(`logPatternAlarm spec '${spec.idStem}' has no metric namespace — set spec.metricNamespace or props.metricNamespace`);
|
|
29
|
+
}
|
|
30
|
+
const filterPattern = spec.literal !== undefined
|
|
31
|
+
? FilterPattern.literal(spec.literal)
|
|
32
|
+
: FilterPattern.anyTerm(...(spec.anyTerms ?? []));
|
|
33
|
+
return { filterPattern, namespace };
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Create CloudWatch log-pattern alarms — a MetricFilter + Alarm per spec —
|
|
37
|
+
* wired to the SNS topic. Sibling to `createEcsServiceAlarms`: same conventions
|
|
38
|
+
* (`registerAlarm` for ALARM+OK actions, applicationId tagging via
|
|
39
|
+
* `tagAlarmsWithApplicationId`). Reusable across every factory consumer.
|
|
40
|
+
*/
|
|
41
|
+
export function createLogPatternAlarms(props) {
|
|
42
|
+
const { scope, logGroup, alarmTopic, metricNamespace, specs, applicationId } = props;
|
|
43
|
+
const alarms = [];
|
|
44
|
+
const snsAction = new SnsAction(alarmTopic);
|
|
45
|
+
for (const spec of specs) {
|
|
46
|
+
const { filterPattern, namespace } = resolveSpec(spec, metricNamespace);
|
|
47
|
+
new MetricFilter(scope, `${spec.idStem}MetricFilter`, {
|
|
48
|
+
logGroup,
|
|
49
|
+
metricNamespace: namespace,
|
|
50
|
+
metricName: spec.metricName,
|
|
51
|
+
filterPattern,
|
|
52
|
+
metricValue: "1",
|
|
53
|
+
defaultValue: 0
|
|
54
|
+
});
|
|
55
|
+
const evaluationPeriods = spec.evaluationPeriods ?? 1;
|
|
56
|
+
const alarm = new Alarm(scope, `${spec.idStem}Alarm`, {
|
|
57
|
+
alarmDescription: buildAlarmDescription(spec.description, applicationId),
|
|
58
|
+
metric: new Metric({
|
|
59
|
+
namespace,
|
|
60
|
+
metricName: spec.metricName,
|
|
61
|
+
period: spec.period ?? Duration.minutes(1),
|
|
62
|
+
statistic: "Sum"
|
|
63
|
+
}),
|
|
64
|
+
threshold: spec.threshold ?? 1,
|
|
65
|
+
evaluationPeriods,
|
|
66
|
+
datapointsToAlarm: spec.datapointsToAlarm ?? evaluationPeriods,
|
|
67
|
+
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
|
|
68
|
+
treatMissingData: TreatMissingData.NOT_BREACHING
|
|
69
|
+
});
|
|
70
|
+
registerAlarm(alarm, snsAction, alarms);
|
|
71
|
+
}
|
|
72
|
+
tagAlarmsWithApplicationId(alarms, applicationId);
|
|
73
|
+
return alarms;
|
|
74
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single source of truth for Fjall CloudWatch metric namespaces.
|
|
3
|
+
*
|
|
4
|
+
* Log-pattern metric filters and their alarms read from these namespaces. The
|
|
5
|
+
* webapp app service declares RLS alarms under `WEBAPP` and relocates the
|
|
6
|
+
* ClickHouse stuck-merge alarm under `CLICKHOUSE` (the log line is app-emitted,
|
|
7
|
+
* so it is an app-log alarm carrying a database-domain namespace).
|
|
8
|
+
*/
|
|
9
|
+
export declare const METRIC_NAMESPACE: {
|
|
10
|
+
readonly CLICKHOUSE: "Fjall/ClickHouse";
|
|
11
|
+
readonly WEBAPP: "Fjall/WebApp";
|
|
12
|
+
};
|
|
13
|
+
export type MetricNamespace = (typeof METRIC_NAMESPACE)[keyof typeof METRIC_NAMESPACE];
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Single source of truth for Fjall CloudWatch metric namespaces.
|
|
3
|
+
*
|
|
4
|
+
* Log-pattern metric filters and their alarms read from these namespaces. The
|
|
5
|
+
* webapp app service declares RLS alarms under `WEBAPP` and relocates the
|
|
6
|
+
* ClickHouse stuck-merge alarm under `CLICKHOUSE` (the log line is app-emitted,
|
|
7
|
+
* so it is an app-log alarm carrying a database-domain namespace).
|
|
8
|
+
*/
|
|
9
|
+
export const METRIC_NAMESPACE = {
|
|
10
|
+
CLICKHOUSE: "Fjall/ClickHouse",
|
|
11
|
+
WEBAPP: "Fjall/WebApp"
|
|
12
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fjall/components-infrastructure",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.18.0",
|
|
4
4
|
"license": "SEE LICENSE IN LICENSE",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -53,7 +53,6 @@
|
|
|
53
53
|
"@peculiar/x509": "1.14.0",
|
|
54
54
|
"@types/aws-lambda": "^8.10.161",
|
|
55
55
|
"@types/node": "^25.6.0",
|
|
56
|
-
"@types/uuid": "^11.0.0",
|
|
57
56
|
"@typescript-eslint/eslint-plugin": "^8.59.1",
|
|
58
57
|
"@typescript-eslint/parser": "^8.59.1",
|
|
59
58
|
"eslint": "^10.2.1",
|
|
@@ -63,10 +62,9 @@
|
|
|
63
62
|
},
|
|
64
63
|
"dependencies": {
|
|
65
64
|
"@aws-sdk/client-organizations": "^3.1038.0",
|
|
66
|
-
"@fjall/generator": "^2.
|
|
67
|
-
"@fjall/util": "^2.
|
|
68
|
-
"constructs": "^10.0.0"
|
|
69
|
-
"uuid": "^14.0.0"
|
|
65
|
+
"@fjall/generator": "^2.18.0",
|
|
66
|
+
"@fjall/util": "^2.18.0",
|
|
67
|
+
"constructs": "^10.0.0"
|
|
70
68
|
},
|
|
71
69
|
"overrides": {
|
|
72
70
|
"@smithy/core": "2.5.5"
|
|
@@ -79,5 +77,5 @@
|
|
|
79
77
|
"engines": {
|
|
80
78
|
"node": ">=18.0.0"
|
|
81
79
|
},
|
|
82
|
-
"gitHead": "
|
|
80
|
+
"gitHead": "37008ca5469398c42a09e6babc8cc4192ab938b2"
|
|
83
81
|
}
|