@cdklabs/multi-az-observability 0.0.1-alpha.6 → 0.0.1-alpha.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.jsii +2826 -1008
- package/.jsii.tabl.json +1 -0
- package/API.md +5406 -1585
- package/README.md +176 -152
- package/lib/alarmsandrules/AvailabilityAndLatencyAlarmsAndRules.d.ts +18 -14
- package/lib/alarmsandrules/AvailabilityAndLatencyAlarmsAndRules.js +98 -60
- package/lib/alarmsandrules/BaseOperationRegionalAlarmsAndRules.d.ts +3 -3
- package/lib/alarmsandrules/BaseOperationRegionalAlarmsAndRules.js +2 -2
- package/lib/alarmsandrules/BaseOperationZonalAlarmsAndRules.d.ts +6 -15
- package/lib/alarmsandrules/BaseOperationZonalAlarmsAndRules.js +2 -10
- package/lib/alarmsandrules/CanaryOperationZonalAlarmsAndRules.d.ts +11 -3
- package/lib/alarmsandrules/CanaryOperationZonalAlarmsAndRules.js +29 -13
- package/lib/alarmsandrules/IBaseOperationZonalAlarmsAndRules.d.ts +0 -8
- package/lib/alarmsandrules/IBaseOperationZonalAlarmsAndRules.js +1 -1
- package/lib/alarmsandrules/IOperationAlarmsAndRules.d.ts +26 -23
- package/lib/alarmsandrules/IOperationAlarmsAndRules.js +1 -1
- package/lib/alarmsandrules/IServerSideOperationRegionalAlarmsAndRules.js +1 -1
- package/lib/alarmsandrules/IServiceAlarmsAndRules.d.ts +19 -15
- package/lib/alarmsandrules/IServiceAlarmsAndRules.js +1 -1
- package/lib/alarmsandrules/OperationAlarmsAndRules.d.ts +21 -18
- package/lib/alarmsandrules/OperationAlarmsAndRules.js +60 -72
- package/lib/alarmsandrules/ServerSideOperationRegionalAlarmsAndRules.d.ts +5 -5
- package/lib/alarmsandrules/ServerSideOperationRegionalAlarmsAndRules.js +2 -3
- package/lib/alarmsandrules/ServerSideOperationZonalAlarmsAndRules.d.ts +21 -13
- package/lib/alarmsandrules/ServerSideOperationZonalAlarmsAndRules.js +48 -29
- package/lib/alarmsandrules/ServiceAlarmsAndRules.d.ts +19 -15
- package/lib/alarmsandrules/ServiceAlarmsAndRules.js +34 -136
- package/lib/alarmsandrules/props/BaseOperationRegionalAlarmsAndRulesProps.d.ts +4 -3
- package/lib/alarmsandrules/props/BaseOperationRegionalAlarmsAndRulesProps.js +1 -1
- package/lib/alarmsandrules/props/BaseOperationZonalAlarmsAndRulesProps.d.ts +6 -44
- package/lib/alarmsandrules/props/BaseOperationZonalAlarmsAndRulesProps.js +1 -1
- package/lib/alarmsandrules/props/OperationAlarmsAndRulesProps.d.ts +39 -7
- package/lib/alarmsandrules/props/OperationAlarmsAndRulesProps.js +1 -1
- package/lib/azmapper/AvailabilityZoneMapper.js +8 -9
- package/lib/basic_observability/BasicServiceDashboard.d.ts +0 -3
- package/lib/basic_observability/BasicServiceDashboard.js +24 -112
- package/lib/basic_observability/BasicServiceMultiAZObservability.d.ts +8 -9
- package/lib/basic_observability/BasicServiceMultiAZObservability.js +97 -349
- package/lib/basic_observability/props/ApplicationLoadBalancerDetectionProps.d.ts +66 -0
- package/lib/basic_observability/props/ApplicationLoadBalancerDetectionProps.js +3 -0
- package/lib/basic_observability/props/BasicServiceDashboardProps.d.ts +27 -13
- package/lib/basic_observability/props/BasicServiceDashboardProps.js +1 -1
- package/lib/basic_observability/props/BasicServiceMultiAZObservabilityProps.d.ts +10 -44
- package/lib/basic_observability/props/BasicServiceMultiAZObservabilityProps.js +1 -1
- package/lib/basic_observability/props/NatGatewayDetectionProps.d.ts +31 -0
- package/lib/basic_observability/props/NatGatewayDetectionProps.js +3 -0
- package/lib/canaries/CanaryFunction.js +14 -13
- package/lib/canaries/CanaryTest.js +4 -4
- package/lib/canaries/src/canary.zip +0 -0
- package/lib/dashboards/ContributorInsightsWidget.d.ts +1 -1
- package/lib/dashboards/ContributorInsightsWidget.js +13 -17
- package/lib/dashboards/OperationAvailabilityAndLatencyDashboard.d.ts +2 -6
- package/lib/dashboards/OperationAvailabilityAndLatencyDashboard.js +436 -510
- package/lib/dashboards/ServiceAvailabilityAndLatencyDashboard.d.ts +2 -10
- package/lib/dashboards/ServiceAvailabilityAndLatencyDashboard.js +420 -416
- package/lib/dashboards/props/OperationAvailabilityAndLatencyDashboardProps.d.ts +3 -66
- package/lib/dashboards/props/OperationAvailabilityAndLatencyDashboardProps.js +1 -1
- package/lib/dashboards/props/OperationAvailabilityWidgetProps.d.ts +7 -2
- package/lib/dashboards/props/OperationAvailabilityWidgetProps.js +1 -1
- package/lib/dashboards/props/OperationLatencyWidgetProps.d.ts +7 -2
- package/lib/dashboards/props/OperationLatencyWidgetProps.js +1 -1
- package/lib/dashboards/props/ServiceAvailabilityAndLatencyDashboardProps.d.ts +8 -8
- package/lib/dashboards/props/ServiceAvailabilityAndLatencyDashboardProps.js +1 -1
- package/lib/index.d.ts +56 -25
- package/lib/index.js +37 -19
- package/lib/metrics/ApplicationLoadBalancerMetrics.d.ts +79 -2
- package/lib/metrics/ApplicationLoadBalancerMetrics.js +883 -26
- package/lib/metrics/AvailabilityAndLatencyMetrics.d.ts +23 -0
- package/lib/metrics/AvailabilityAndLatencyMetrics.js +125 -31
- package/lib/metrics/NatGatewayMetrics.d.ts +113 -0
- package/lib/metrics/NatGatewayMetrics.js +357 -0
- package/lib/metrics/RegionalAvailabilityMetrics.js +9 -10
- package/lib/metrics/RegionalLatencyMetrics.d.ts +1 -1
- package/lib/metrics/RegionalLatencyMetrics.js +27 -20
- package/lib/metrics/ZonalAvailabilityMetrics.d.ts +2 -8
- package/lib/metrics/ZonalAvailabilityMetrics.js +13 -28
- package/lib/metrics/ZonalLatencyMetrics.d.ts +2 -1
- package/lib/metrics/ZonalLatencyMetrics.js +33 -23
- package/lib/metrics/props/AvailabilityAndLatencyMetricProps.d.ts +4 -0
- package/lib/metrics/props/AvailabilityAndLatencyMetricProps.js +1 -1
- package/lib/metrics/props/LatencyMetricProps.d.ts +6 -0
- package/lib/metrics/props/LatencyMetricProps.js +1 -1
- package/lib/metrics/props/ZonalAvailabilityMetricProps.d.ts +4 -0
- package/lib/metrics/props/ZonalAvailabilityMetricProps.js +1 -1
- package/lib/metrics/props/ZonalLatencyMetricProps.d.ts +4 -0
- package/lib/metrics/props/ZonalLatencyMetricProps.js +1 -1
- package/lib/monitoring/src/monitoring-layer.zip +0 -0
- package/lib/outlier-detection/ApplicationLoadBalancerAvailabilityOutlierAlgorithm.d.ts +10 -0
- package/lib/outlier-detection/ApplicationLoadBalancerAvailabilityOutlierAlgorithm.js +15 -0
- package/lib/{basic_observability/props/ApplicationLoadBalancerLatencyOutlierCalculation.d.ts → outlier-detection/ApplicationLoadBalancerLatencyOutlierAlgorithm.d.ts} +1 -1
- package/lib/outlier-detection/ApplicationLoadBalancerLatencyOutlierAlgorithm.js +23 -0
- package/lib/outlier-detection/LatencyOutlierMetricAggregation.d.ts +22 -0
- package/lib/outlier-detection/LatencyOutlierMetricAggregation.js +27 -0
- package/lib/outlier-detection/OutlierDetectionFunction.js +7 -6
- package/lib/outlier-detection/PacketLossOutlierAlgorithm.d.ts +10 -0
- package/lib/outlier-detection/PacketLossOutlierAlgorithm.js +15 -0
- package/lib/outlier-detection/src/outlier-detection.zip +0 -0
- package/lib/outlier-detection/src/scipy-layer.zip +0 -0
- package/lib/services/CanaryMetrics.d.ts +4 -3
- package/lib/services/CanaryMetrics.js +3 -4
- package/lib/services/CanaryTestAvailabilityMetricsOverride.d.ts +21 -0
- package/lib/services/CanaryTestAvailabilityMetricsOverride.js +23 -0
- package/lib/services/CanaryTestLatencyMetricsOverride.d.ts +15 -0
- package/lib/services/CanaryTestLatencyMetricsOverride.js +20 -0
- package/lib/services/CanaryTestMetricsOverride.d.ts +1 -13
- package/lib/services/CanaryTestMetricsOverride.js +2 -4
- package/lib/services/ContributorInsightRuleDetails.js +1 -1
- package/lib/services/ICanaryMetrics.d.ts +4 -3
- package/lib/services/ICanaryMetrics.js +1 -1
- package/lib/services/ICanaryTestAvailabilityMetricsOverride.d.ts +23 -0
- package/lib/services/ICanaryTestAvailabilityMetricsOverride.js +3 -0
- package/lib/services/ICanaryTestLatencyMetricsOverride.d.ts +13 -0
- package/lib/services/ICanaryTestLatencyMetricsOverride.js +3 -0
- package/lib/services/ICanaryTestMetricsOverride.d.ts +0 -12
- package/lib/services/ICanaryTestMetricsOverride.js +1 -1
- package/lib/services/IInstrumentedServiceMultiAZObservability.d.ts +13 -3
- package/lib/services/IInstrumentedServiceMultiAZObservability.js +1 -1
- package/lib/services/IOperation.d.ts +8 -6
- package/lib/services/IOperation.js +1 -1
- package/lib/services/IOperationAvailabilityMetricDetails.d.ts +18 -0
- package/lib/services/IOperationAvailabilityMetricDetails.js +3 -0
- package/lib/services/IOperationLatencyMetricDetails.d.ts +12 -0
- package/lib/services/IOperationLatencyMetricDetails.js +3 -0
- package/lib/services/IOperationMetricDetails.d.ts +0 -12
- package/lib/services/IOperationMetricDetails.js +1 -1
- package/lib/services/IService.d.ts +18 -4
- package/lib/services/IService.js +1 -1
- package/lib/services/IServiceAvailabilityMetricDetails.d.ts +18 -0
- package/lib/services/IServiceAvailabilityMetricDetails.js +3 -0
- package/lib/services/IServiceLatencyMetricDetails.d.ts +12 -0
- package/lib/services/IServiceLatencyMetricDetails.js +3 -0
- package/lib/services/IServiceMetricDetails.d.ts +0 -12
- package/lib/services/IServiceMetricDetails.js +1 -1
- package/lib/services/InstrumentedServiceMultiAZObservability.d.ts +15 -3
- package/lib/services/InstrumentedServiceMultiAZObservability.js +252 -228
- package/lib/services/Operation.d.ts +8 -6
- package/lib/services/Operation.js +4 -3
- package/lib/services/OperationAvailabilityMetricDetails.d.ts +22 -0
- package/lib/services/OperationAvailabilityMetricDetails.js +24 -0
- package/lib/services/OperationLatencyMetricDetails.d.ts +16 -0
- package/lib/services/OperationLatencyMetricDetails.js +21 -0
- package/lib/services/OperationMetricDetails.d.ts +1 -13
- package/lib/services/OperationMetricDetails.js +2 -8
- package/lib/services/Service.d.ts +18 -4
- package/lib/services/Service.js +4 -2
- package/lib/services/ServiceAvailabilityMetricDetails.d.ts +21 -0
- package/lib/services/ServiceAvailabilityMetricDetails.js +20 -0
- package/lib/services/ServiceLatencyMetricDetails.d.ts +15 -0
- package/lib/services/ServiceLatencyMetricDetails.js +19 -0
- package/lib/services/ServiceMetricDetails.d.ts +1 -13
- package/lib/services/ServiceMetricDetails.js +2 -4
- package/lib/services/props/CanaryMetricProps.d.ts +4 -3
- package/lib/services/props/CanaryMetricProps.js +1 -1
- package/lib/services/props/CanaryTestAvailabilityMetricsOverrideProps.d.ts +22 -0
- package/lib/services/props/CanaryTestAvailabilityMetricsOverrideProps.js +3 -0
- package/lib/services/props/CanaryTestLatencyMetricsOverrideProps.d.ts +14 -0
- package/lib/services/props/CanaryTestLatencyMetricsOverrideProps.js +3 -0
- package/lib/services/props/CanaryTestMetricsOverrideProps.d.ts +0 -16
- package/lib/services/props/CanaryTestMetricsOverrideProps.js +1 -1
- package/lib/services/props/InstrumentedServiceMultiAZObservabilityProps.d.ts +50 -5
- package/lib/services/props/InstrumentedServiceMultiAZObservabilityProps.js +1 -1
- package/lib/services/props/MetricDimensions.js +1 -1
- package/lib/services/props/OperationAvailabilityMetricDetailsProps.d.ts +22 -0
- package/lib/services/props/OperationAvailabilityMetricDetailsProps.js +3 -0
- package/lib/services/props/OperationLatencyMetricDetailsProps.d.ts +14 -0
- package/lib/services/props/OperationLatencyMetricDetailsProps.js +3 -0
- package/lib/services/props/OperationMetricDetailsProps.d.ts +0 -16
- package/lib/services/props/OperationMetricDetailsProps.js +1 -1
- package/lib/services/props/OperationProps.d.ts +4 -3
- package/lib/services/props/OperationProps.js +1 -1
- package/lib/services/props/ServiceAvailabilityMetricDetailsProps.d.ts +18 -0
- package/lib/services/props/ServiceAvailabilityMetricDetailsProps.js +3 -0
- package/lib/services/props/ServiceLatencyMetricDetailsProps.d.ts +12 -0
- package/lib/services/props/ServiceLatencyMetricDetailsProps.js +3 -0
- package/lib/services/props/ServiceMetricDetailsProps.d.ts +0 -12
- package/lib/services/props/ServiceMetricDetailsProps.js +1 -1
- package/lib/services/props/ServiceProps.d.ts +18 -4
- package/lib/services/props/ServiceProps.js +1 -1
- package/lib/utilities/MetricsHelper.d.ts +17 -9
- package/lib/utilities/MetricsHelper.js +34 -10
- package/lib/utilities/MinimumUnhealthyTargets.d.ts +25 -0
- package/lib/utilities/MinimumUnhealthyTargets.js +3 -0
- package/package.json +14 -14
- package/rosetta/default.ts-fixture +72 -0
- package/rosetta/service.ts-fixture +144 -0
- package/lib/basic_observability/props/ApplicationLoadBalancerLatencyOutlierCalculation.js +0 -23
- package/lib/services/IBasicServiceMultiAZObservability.d.ts +0 -45
- package/lib/services/IBasicServiceMultiAZObservability.js +0 -3
package/README.md
CHANGED
|
@@ -1,175 +1,199 @@
|
|
|
1
|
+
  
|
|
2
|
+
|
|
1
3
|
# multi-az-observability
|
|
2
4
|
This is a CDK construct for multi-AZ observability to help detect single-AZ impairments. This is currently an `alpha` version, but is being used in the AWS [Advanced Multi-AZ Resilience Patterns](https://catalog.workshops.aws/multi-az-gray-failures/en-US) workshop.
|
|
3
5
|
|
|
4
|
-
There is a lot of available information to think through and combine to provide signals about single-AZ impact. To simplify the setup and use reasonable defaults, this construct (available in TypeScript, Go, Python,
|
|
6
|
+
There is a lot of available information to think through and combine to provide signals about single-AZ impact. To simplify the setup and use reasonable defaults, this construct (available in [TypeScript](https://www.npmjs.com/package/@cdklabs/multi-az-observability), [Go](https://github.com/cdklabs/cdk-multi-az-observability-go), [Python](https://pypi.org/project/cdklabs.multi-az-observability/), [.NET](https://www.nuget.org/packages/Cdklabs.MultiAZObservability), and [Java](https://central.sonatype.com/artifact/io.github.cdklabs/cdk-multi-az-observability)) sets up the necessary observability. To use the CDK construct, you first define your service like this:
|
|
5
7
|
|
|
6
|
-
```
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
8
|
+
```typescript
|
|
9
|
+
let service: IService = new Service({
|
|
10
|
+
serviceName: 'test',
|
|
11
|
+
availabilityZoneNames: vpc.availabilityZones,
|
|
12
|
+
baseUrl: 'http://www.example.com',
|
|
13
|
+
faultCountThreshold: 25,
|
|
14
|
+
period: Duration.seconds(60),
|
|
15
|
+
loadBalancer: loadBalancer,
|
|
16
|
+
targetGroups: [ targetGroup1, targetGroup2 ],
|
|
17
|
+
defaultAvailabilityMetricDetails: new ServiceAvailabilityMetricDetails({
|
|
18
|
+
metricNamespace: 'front-end/metrics',
|
|
19
|
+
successMetricNames: ['Success'],
|
|
20
|
+
faultMetricNames: ['Fault', 'Error'],
|
|
21
|
+
alarmStatistic: 'Sum',
|
|
22
|
+
unit: Unit.COUNT,
|
|
23
|
+
period: Duration.seconds(60),
|
|
24
|
+
evaluationPeriods: 5,
|
|
25
|
+
datapointsToAlarm: 3,
|
|
26
|
+
successAlarmThreshold: 99.9,
|
|
27
|
+
faultAlarmThreshold: 0.1,
|
|
28
|
+
graphedFaultStatistics: ['Sum'],
|
|
29
|
+
graphedSuccessStatistics: ['Sum'],
|
|
27
30
|
}),
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
Unit = Unit.MILLISECONDS,
|
|
31
|
+
defaultLatencyMetricDetails: new ServiceLatencyMetricDetails({
|
|
32
|
+
metricNamespace: 'front-end/metrics',
|
|
33
|
+
successMetricNames: ['SuccessLatency'],
|
|
34
|
+
faultMetricNames: ['FaultLatency'],
|
|
35
|
+
alarmStatistic: 'p99',
|
|
36
|
+
unit: Unit.MILLISECONDS,
|
|
37
|
+
period: Duration.seconds(60),
|
|
38
|
+
evaluationPeriods: 5,
|
|
39
|
+
datapointsToAlarm: 3,
|
|
40
|
+
successAlarmThreshold: Duration.millis(150),
|
|
41
|
+
graphedFaultStatistics: ['p99'],
|
|
42
|
+
graphedSuccessStatistics: ['p50', 'p99', 'tm99'],
|
|
41
43
|
}),
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
44
|
+
defaultContributorInsightRuleDetails: new ContributorInsightRuleDetails({
|
|
45
|
+
successLatencyMetricJsonPath: '$.SuccessLatency',
|
|
46
|
+
faultMetricJsonPath: '$.Faults',
|
|
47
|
+
operationNameJsonPath: '$.Operation',
|
|
48
|
+
instanceIdJsonPath: '$.InstanceId',
|
|
49
|
+
availabilityZoneIdJsonPath: '$.AZ-ID',
|
|
50
|
+
logGroups: [logGroup],
|
|
49
51
|
}),
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
52
|
+
canaryTestProps: {
|
|
53
|
+
requestCount: 10,
|
|
54
|
+
schedule: 'rate(1 minute)',
|
|
55
|
+
loadBalancer: loadBalancer,
|
|
56
|
+
networkConfiguration: {
|
|
57
|
+
vpc: vpc,
|
|
58
|
+
subnetSelection: { subnetType: SubnetType.PRIVATE_ISOLATED },
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
minimumUnhealthyTargets: {
|
|
62
|
+
percentage: 0.1
|
|
58
63
|
}
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
}
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
let rideOperation: Operation = {
|
|
67
|
+
operationName: 'ride',
|
|
68
|
+
service: service,
|
|
69
|
+
path: '/ride',
|
|
70
|
+
critical: true,
|
|
71
|
+
httpMethods: ['GET'],
|
|
72
|
+
serverSideContributorInsightRuleDetails: new ContributorInsightRuleDetails({
|
|
73
|
+
logGroups: [logGroup],
|
|
74
|
+
successLatencyMetricJsonPath: '$.SuccessLatency',
|
|
75
|
+
faultMetricJsonPath: '$.Faults',
|
|
76
|
+
operationNameJsonPath: '$.Operation',
|
|
77
|
+
instanceIdJsonPath: '$.InstanceId',
|
|
78
|
+
availabilityZoneIdJsonPath: '$.AZ-ID',
|
|
79
|
+
}),
|
|
80
|
+
serverSideAvailabilityMetricDetails: new OperationAvailabilityMetricDetails(
|
|
81
|
+
{
|
|
82
|
+
operationName: 'ride',
|
|
83
|
+
metricDimensions: new MetricDimensions(
|
|
84
|
+
{ Operation: 'ride' },
|
|
85
|
+
'AZ-ID',
|
|
86
|
+
'Region',
|
|
87
|
+
),
|
|
88
|
+
},
|
|
89
|
+
service.defaultAvailabilityMetricDetails,
|
|
90
|
+
),
|
|
91
|
+
serverSideLatencyMetricDetails: new OperationLatencyMetricDetails(
|
|
92
|
+
{
|
|
93
|
+
operationName: 'ride',
|
|
94
|
+
metricDimensions: new MetricDimensions(
|
|
95
|
+
{ Operation: 'ride' },
|
|
96
|
+
'AZ-ID',
|
|
97
|
+
'Region',
|
|
98
|
+
),
|
|
99
|
+
},
|
|
100
|
+
service.defaultLatencyMetricDetails,
|
|
101
|
+
),
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
let payOperation: Operation = {
|
|
105
|
+
operationName: 'pay',
|
|
106
|
+
service: service,
|
|
107
|
+
path: '/pay',
|
|
108
|
+
critical: true,
|
|
109
|
+
httpMethods: ['GET'],
|
|
110
|
+
serverSideContributorInsightRuleDetails: new ContributorInsightRuleDetails({
|
|
111
|
+
logGroups: [logGroup],
|
|
112
|
+
successLatencyMetricJsonPath: '$.SuccessLatency',
|
|
113
|
+
faultMetricJsonPath: '$.Faults',
|
|
114
|
+
operationNameJsonPath: '$.Operation',
|
|
115
|
+
instanceIdJsonPath: '$.InstanceId',
|
|
116
|
+
availabilityZoneIdJsonPath: '$.AZ-ID',
|
|
117
|
+
}),
|
|
118
|
+
serverSideAvailabilityMetricDetails: new OperationAvailabilityMetricDetails(
|
|
119
|
+
{
|
|
120
|
+
operationName: 'pay',
|
|
121
|
+
metricDimensions: new MetricDimensions(
|
|
122
|
+
{ Operation: 'ride' },
|
|
123
|
+
'AZ-ID',
|
|
124
|
+
'Region',
|
|
125
|
+
),
|
|
126
|
+
},
|
|
127
|
+
service.defaultAvailabilityMetricDetails,
|
|
128
|
+
),
|
|
129
|
+
serverSideLatencyMetricDetails: new OperationLatencyMetricDetails(
|
|
130
|
+
{
|
|
131
|
+
operationName: 'pay',
|
|
132
|
+
metricDimensions: new MetricDimensions(
|
|
133
|
+
{ Operation: 'ride' },
|
|
134
|
+
'AZ-ID',
|
|
135
|
+
'Region',
|
|
136
|
+
),
|
|
137
|
+
},
|
|
138
|
+
service.defaultLatencyMetricDetails,
|
|
139
|
+
),
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
service.addOperation(rideOperation);
|
|
143
|
+
service.addOperation(payOperation);
|
|
136
144
|
```
|
|
137
145
|
|
|
138
146
|
Then you provide that service definition to the CDK construct.
|
|
139
147
|
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
OutlierDetectionAlgorithm = OutlierDetectionAlgorithm.STATIC
|
|
148
|
+
```typescript fixture=service
|
|
149
|
+
new InstrumentedServiceMultiAZObservability(stack, 'MAZObservability', {
|
|
150
|
+
createDashboards: true,
|
|
151
|
+
service: service,
|
|
152
|
+
interval: Duration.minutes(60)
|
|
146
153
|
});
|
|
147
154
|
```
|
|
148
155
|
|
|
149
|
-
You define some characteristics of the service, default values for metrics and alarms, and then add operations as well as any overrides for default values that you need. The construct can also automatically create synthetic canaries that test each operation with a very simple HTTP check, or you can configure your own synthetics and just tell the construct about the metric details and optionally log files. This creates metrics, alarms, and dashboards that can be used to detect single-AZ impact.
|
|
156
|
+
You define some characteristics of the service, default values for metrics and alarms, and then add operations as well as any overrides for default values that you need. The construct can also automatically create synthetic canaries that test each operation with a very simple HTTP check, or you can configure your own synthetics and just tell the construct about the metric details and optionally log files. This creates metrics, alarms, and dashboards that can be used to detect single-AZ impact. You can access these alarms from the `multiAvailabilityZoneObservability` object and use them in your CDK project to start automation, send SNS notifications, or incorporate in your own dashboards.
|
|
150
157
|
|
|
151
|
-
If you don't have service specific logs and custom metrics with per-AZ dimensions, you can still use the construct to evaluate ALB and NAT Gateway metrics to find single AZ
|
|
158
|
+
If you don't have service specific logs and custom metrics with per-AZ dimensions, you can still use the construct to evaluate ALB and/or NAT Gateway metrics to find single AZ impairments.
|
|
152
159
|
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
{
|
|
158
|
-
|
|
159
|
-
|
|
160
|
+
```typescript
|
|
161
|
+
new BasicServiceMultiAZObservability(stack, 'MAZObservability', {
|
|
162
|
+
applicationLoadBalancerProps: {
|
|
163
|
+
albTargetGroupMap: [
|
|
164
|
+
{
|
|
165
|
+
applicationLoadBalancer: new ApplicationLoadBalancer(stack, 'alb', {
|
|
166
|
+
vpc: vpc,
|
|
167
|
+
crossZoneEnabled: true,
|
|
168
|
+
}),
|
|
169
|
+
targetGroups: [
|
|
170
|
+
targetGroup1,
|
|
171
|
+
targetGroup2
|
|
172
|
+
]
|
|
173
|
+
}
|
|
174
|
+
],
|
|
175
|
+
faultCountPercentThreshold: 1,
|
|
176
|
+
latencyStatistic: Stats.percentile(99),
|
|
177
|
+
latencyThreshold: Duration.millis(200),
|
|
178
|
+
latencyOutlierAlgorithm: ApplicationLoadBalancerLatencyOutlierAlgorithm.STATIC,
|
|
179
|
+
latencyOutlierThreshold: 45
|
|
180
|
+
},
|
|
181
|
+
natGatewayProps: {
|
|
182
|
+
natGateways: {
|
|
183
|
+
"us-east-1a": [ natGateway1 ],
|
|
184
|
+
"us-east-1b": [ natGateway2 ],
|
|
185
|
+
"us-east-1c": [ natGateway3 ]
|
|
186
|
+
},
|
|
187
|
+
packetLossPercentThreshold: 0.01
|
|
160
188
|
},
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
Period = Duration.Seconds(60), // The period for metric evaluation
|
|
167
|
-
Interval = Duration.Minutes(60) // The interval for the dashboards
|
|
168
|
-
EvaluationPeriods = 5,
|
|
169
|
-
DatapointsToAlarm = 3
|
|
189
|
+
serviceName: 'test',
|
|
190
|
+
period: Duration.seconds(60),
|
|
191
|
+
createDashboard: true,
|
|
192
|
+
evaluationPeriods: 5,
|
|
193
|
+
datapointsToAlarm: 3,
|
|
170
194
|
});
|
|
171
195
|
```
|
|
172
196
|
|
|
173
197
|
If you provide a load balancer, the construct assumes it is deployed in each AZ of the VPC the load balancer is associated with and will look for HTTP metrics using those AZs as dimensions.
|
|
174
198
|
|
|
175
|
-
Both options support running workloads on EC2, ECS, Lambda, and EKS.
|
|
199
|
+
Both options support running workloads on EC2, ECS, Lambda, and EKS.
|
|
@@ -9,6 +9,10 @@ import { IContributorInsightRuleDetails } from '../services/IContributorInsightR
|
|
|
9
9
|
import { IOperation } from '../services/IOperation';
|
|
10
10
|
import { IOperationMetricDetails } from '../services/IOperationMetricDetails';
|
|
11
11
|
import { OutlierDetectionAlgorithm } from '../utilities/OutlierDetectionAlgorithm';
|
|
12
|
+
import { IOperationAvailabilityMetricDetails } from '../services/IOperationAvailabilityMetricDetails';
|
|
13
|
+
import { IOperationLatencyMetricDetails } from '../services/IOperationLatencyMetricDetails';
|
|
14
|
+
import { LatencyOutlierMetricAggregation } from '../outlier-detection/LatencyOutlierMetricAggregation';
|
|
15
|
+
import { MinimumUnhealthyTargets } from '../utilities/MinimumUnhealthyTargets';
|
|
12
16
|
/**
|
|
13
17
|
* Class used to create availability and latency alarms and Contributor Insight rules
|
|
14
18
|
*/
|
|
@@ -22,7 +26,7 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
22
26
|
* @param counter
|
|
23
27
|
* @returns
|
|
24
28
|
*/
|
|
25
|
-
static createZonalAvailabilityAlarm(scope: Construct, metricDetails:
|
|
29
|
+
static createZonalAvailabilityAlarm(scope: Construct, metricDetails: IOperationAvailabilityMetricDetails, availabilityZone: string, availabilityZoneId: string, counter: number, nameSuffix?: string): IAlarm;
|
|
26
30
|
/**
|
|
27
31
|
* Creates a zonal latency alarm
|
|
28
32
|
* @param scope
|
|
@@ -32,7 +36,7 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
32
36
|
* @param counter
|
|
33
37
|
* @returns
|
|
34
38
|
*/
|
|
35
|
-
static createZonalLatencyAlarm(scope: Construct, metricDetails:
|
|
39
|
+
static createZonalLatencyAlarm(scope: Construct, metricDetails: IOperationLatencyMetricDetails, availabilityZone: string, availabilityZoneId: string, counter: number, nameSuffix?: string): IAlarm;
|
|
36
40
|
/**
|
|
37
41
|
* Creates a composite alarm when either latency or availability is breached in the Availabiltiy Zone
|
|
38
42
|
* @param scope
|
|
@@ -55,7 +59,7 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
55
59
|
* @param outlierThreshold
|
|
56
60
|
* @returns
|
|
57
61
|
*/
|
|
58
|
-
static createZonalFaultRateStaticOutlierAlarm(scope: Construct, metricDetails: IOperationMetricDetails, availabilityZoneId: string, counter: number, outlierThreshold: number, nameSuffix?: string): IAlarm;
|
|
62
|
+
static createZonalFaultRateStaticOutlierAlarm(scope: Construct, metricDetails: IOperationMetricDetails, availabilityZone: string, availabilityZoneId: string, counter: number, outlierThreshold: number, nameSuffix?: string): IAlarm;
|
|
59
63
|
/**
|
|
60
64
|
* An alarm that compares error rate in this AZ to the overall region error based only on metric data.
|
|
61
65
|
* This is different for canaries because the metrics they test at the regional level are different
|
|
@@ -69,15 +73,15 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
69
73
|
* @param outlierThreshold
|
|
70
74
|
* @returns
|
|
71
75
|
*/
|
|
72
|
-
static createZonalFaultRateStaticOutlierAlarmForCanaries(scope: Construct, metricDetails: IOperationMetricDetails, availabilityZoneId: string,
|
|
76
|
+
static createZonalFaultRateStaticOutlierAlarmForCanaries(scope: Construct, metricDetails: IOperationMetricDetails, availabilityZone: string, availabilityZoneId: string, availabilityZoneIds: string[], counter: number, outlierThreshold: number, nameSuffix?: string): IAlarm;
|
|
73
77
|
static createZonalFaultRateOutlierAlarm(scope: IConstruct, metricDetails: IOperationMetricDetails, availabilityZoneId: string, allAvailabilityZoneIds: string[], outlierThreshold: number, outlierDetectionFunction: IFunction, outlierDetectionAlgorithm: OutlierDetectionAlgorithm, counter: number, nameSuffix?: string): IAlarm;
|
|
74
78
|
static createZonalFaultRateOutlierAlarmForAlb(scope: IConstruct, loadBalancers: IApplicationLoadBalancer[], availabilityZoneId: string, outlierThreshold: number, outlierDetectionFunction: IFunction, outlierDetectionAlgorithm: OutlierDetectionAlgorithm, azMapper: IAvailabilityZoneMapper, counter: number, evaluationPeriods: number, datapointsToAlarm: number, period: Duration, nameSuffix?: string): IAlarm;
|
|
75
79
|
static createZonalFaultRateOutlierAlarmForNatGW(scope: IConstruct, natGateways: {
|
|
76
80
|
[key: string]: CfnNatGateway[];
|
|
77
81
|
}, availabilityZoneId: string, outlierThreshold: number, outlierDetectionFunction: IFunction, outlierDetectionAlgorithm: OutlierDetectionAlgorithm, azMapper: IAvailabilityZoneMapper, counter: number, evaluationPeriods: number, datapointsToAlarm: number, period: Duration, nameSuffix?: string): IAlarm;
|
|
78
|
-
static createZonalHighLatencyOutlierAlarm(scope: IConstruct, metricDetails:
|
|
79
|
-
static createZonalHighLatencyStaticOutlierAlarm(scope: Construct, metricDetails:
|
|
80
|
-
static createZonalHighLatencyStaticOutlierAlarmForCanaries(scope: Construct, metricDetails:
|
|
82
|
+
static createZonalHighLatencyOutlierAlarm(scope: IConstruct, metricDetails: IOperationLatencyMetricDetails, availabilityZoneId: string, allAvailabilityZoneIds: string[], outlierThreshold: number, outlierDetectionFunction: IFunction, outlierMetric: LatencyOutlierMetricAggregation, outlierDetectionAlgorithm: OutlierDetectionAlgorithm, counter: number, nameSuffix?: string): IAlarm;
|
|
83
|
+
static createZonalHighLatencyStaticOutlierAlarm(scope: Construct, metricDetails: IOperationLatencyMetricDetails, availabilityZone: string, availabilityZoneId: string, counter: number, outlierThreshold: number, nameSuffix?: string): IAlarm;
|
|
84
|
+
static createZonalHighLatencyStaticOutlierAlarmForCanaries(scope: Construct, metricDetails: IOperationLatencyMetricDetails, availabilityZone: string, availabilityZoneId: string, availabilityZones: string[], counter: number, outlierThreshold: number, nameSuffix?: string): IAlarm;
|
|
81
85
|
/**
|
|
82
86
|
* An insight rule that calculates how many instances are responding to requests in
|
|
83
87
|
* the specified AZ. Only useful for server-side metrics since the canary doesn't record instance id metrics.
|
|
@@ -120,7 +124,7 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
120
124
|
* @param counter
|
|
121
125
|
* @returns
|
|
122
126
|
*/
|
|
123
|
-
static createServerSideInstanceHighLatencyContributorsInThisAZRule(scope: Construct, metricDetails:
|
|
127
|
+
static createServerSideInstanceHighLatencyContributorsInThisAZRule(scope: Construct, metricDetails: IOperationLatencyMetricDetails, availabilityZoneId: string, ruleDetails: IContributorInsightRuleDetails, counter: number, nameSuffix?: string): CfnInsightRule;
|
|
124
128
|
/**
|
|
125
129
|
* An alarm that indicates some percentage of the instances in this AZ are producing errors. Only
|
|
126
130
|
* useful for server-side metrics since the canary doesn't record instance id metrics.
|
|
@@ -134,7 +138,7 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
134
138
|
* @param instancesHandlingRequestsInThisAZ
|
|
135
139
|
* @returns
|
|
136
140
|
*/
|
|
137
|
-
static createServerSideZonalMoreThanOneInstanceProducingFaultsAlarm(scope: Construct, metricDetails: IOperationMetricDetails, availabilityZoneId: string, counter: number,
|
|
141
|
+
static createServerSideZonalMoreThanOneInstanceProducingFaultsAlarm(scope: Construct, metricDetails: IOperationMetricDetails, availabilityZoneId: string, counter: number, instanceFaultRateContributorsInThisAZ: CfnInsightRule, instancesHandlingRequestsInThisAZ: CfnInsightRule, minimumUnhealthyTargets?: MinimumUnhealthyTargets, nameSuffix?: string): IAlarm;
|
|
138
142
|
/**
|
|
139
143
|
* An alarm indicating more than some percentage of instances in this AZ
|
|
140
144
|
* are contributing to high latency. Only useful for server-side metrics since
|
|
@@ -149,7 +153,7 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
149
153
|
* @param instancesHandlingRequestsInThisAZ
|
|
150
154
|
* @returns
|
|
151
155
|
*/
|
|
152
|
-
static createServerSideZonalMoreThanOneInstanceProducingHighLatencyAlarm(scope: Construct, metricDetails: IOperationMetricDetails, availabilityZoneId: string, counter: number,
|
|
156
|
+
static createServerSideZonalMoreThanOneInstanceProducingHighLatencyAlarm(scope: Construct, metricDetails: IOperationMetricDetails, availabilityZoneId: string, counter: number, instanceHighLatencyContributorsInThisAZ: CfnInsightRule, instancesHandlingRequestsInThisAZ: CfnInsightRule, minimumUnhealthyTargets?: MinimumUnhealthyTargets, nameSuffix?: string): IAlarm;
|
|
153
157
|
/**
|
|
154
158
|
* An alarm that indicates this AZ as an outlier
|
|
155
159
|
* for availability or latency. This does not ensure that the errors
|
|
@@ -204,7 +208,7 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
204
208
|
* @param counter
|
|
205
209
|
* @returns
|
|
206
210
|
*/
|
|
207
|
-
static createRegionalAvailabilityAlarm(scope: Construct, metricDetails:
|
|
211
|
+
static createRegionalAvailabilityAlarm(scope: Construct, metricDetails: IOperationAvailabilityMetricDetails, nameSuffix: string): IAlarm;
|
|
208
212
|
/**
|
|
209
213
|
* Creates a regional latency alarm for the operation
|
|
210
214
|
* @param scope
|
|
@@ -213,7 +217,7 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
213
217
|
* @param counter
|
|
214
218
|
* @returns
|
|
215
219
|
*/
|
|
216
|
-
static createRegionalLatencyAlarm(scope: Construct, metricDetails:
|
|
220
|
+
static createRegionalLatencyAlarm(scope: Construct, metricDetails: IOperationLatencyMetricDetails, nameSuffix: string): IAlarm;
|
|
217
221
|
/**
|
|
218
222
|
* A composite alarm combining latency and availability alarms for this operation in the region
|
|
219
223
|
* as measured from either the server side or canary
|
|
@@ -224,7 +228,7 @@ export declare class AvailabilityAndLatencyAlarmsAndRules {
|
|
|
224
228
|
* @param regionalLatencyAlarm
|
|
225
229
|
* @returns
|
|
226
230
|
*/
|
|
227
|
-
static
|
|
228
|
-
static createRegionalInstanceContributorsToHighLatency(scope: Construct, metricDetails:
|
|
231
|
+
static createRegionalAvailabilityOrLatencyImpactAlarm(scope: Construct, operationName: string, nameSuffix: string, regionalAvailabilityAlarm: IAlarm, regionalLatencyAlarm: IAlarm): IAlarm;
|
|
232
|
+
static createRegionalInstanceContributorsToHighLatency(scope: Construct, metricDetails: IOperationLatencyMetricDetails, ruleDetails: IContributorInsightRuleDetails): CfnInsightRule;
|
|
229
233
|
static createRegionalInstanceContributorsToFaults(scope: Construct, metricDetails: IOperationMetricDetails, ruleDetails: IContributorInsightRuleDetails): CfnInsightRule;
|
|
230
234
|
}
|