@liflig/cdk 3.12.0 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/open-telemetry/otel-collector-task-metrics-config.yaml +310 -0
- package/lib/build-artifacts/index.d.ts +1 -1
- package/lib/build-artifacts/index.js +5 -1
- package/lib/ecs/index.d.ts +2 -0
- package/lib/ecs/index.js +2 -1
- package/lib/ecs/open-telemetry.d.ts +98 -0
- package/lib/ecs/open-telemetry.js +211 -0
- package/package.json +4 -4
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
## Validate using https://www.otelbin.io/#distro=adot%7E&distroVersion=v0.43.1%7E .
|
|
2
|
+
##
|
|
3
|
+
## Original copyright is Apache 2.0 to AWS (aws-observability on GitHub).
|
|
4
|
+
## https://github.com/aws-observability/aws-otel-collector/blob/14833d4af543da709c77cf9dc6827351dbd529b1/config/ecs/container-insights/otel-task-metrics-config.yaml
|
|
5
|
+
##
|
|
6
|
+
## This is a modified version of `./etc/ecs/container-insights/otel-task-metrics-config.yaml`
|
|
7
|
+
## where we add `resource_to_telemetry_conversion` so the otel resource `service.name`
|
|
8
|
+
## can be added to the metric. This is useful because you can filter in metrics based on service,
|
|
9
|
+
## instead of grouping e.g. all services' memory usage under the same metric.
|
|
10
|
+
##
|
|
11
|
+
## Search for `# THIS IS WHAT LIFLIG ADDED` to find our modifications.
|
|
12
|
+
##
|
|
13
|
+
##
|
|
14
|
+
## @see https://aws-otel.github.io/docs/setup/ecs/config-through-ssm
|
|
15
|
+
extensions:
|
|
16
|
+
health_check:
|
|
17
|
+
|
|
18
|
+
## These ports are also set in `taskDefinition.defaultContainer` as
|
|
19
|
+
## `OTEL_EXPORTER_OTLP_ENDPOINT` and so on.
|
|
20
|
+
## Changes must happen in both places.
|
|
21
|
+
receivers:
|
|
22
|
+
otlp:
|
|
23
|
+
protocols:
|
|
24
|
+
grpc:
|
|
25
|
+
endpoint: 0.0.0.0:4317
|
|
26
|
+
http:
|
|
27
|
+
endpoint: 0.0.0.0:4318
|
|
28
|
+
awsxray:
|
|
29
|
+
endpoint: 0.0.0.0:2000
|
|
30
|
+
transport: udp
|
|
31
|
+
awsecscontainermetrics:
|
|
32
|
+
|
|
33
|
+
processors:
|
|
34
|
+
batch/traces:
|
|
35
|
+
timeout: 1s
|
|
36
|
+
send_batch_size: 50
|
|
37
|
+
batch/metrics:
|
|
38
|
+
timeout: 60s
|
|
39
|
+
filter:
|
|
40
|
+
metrics:
|
|
41
|
+
include:
|
|
42
|
+
match_type: strict
|
|
43
|
+
metric_names:
|
|
44
|
+
- ecs.task.memory.reserved
|
|
45
|
+
- ecs.task.memory.utilized
|
|
46
|
+
- ecs.task.cpu.reserved
|
|
47
|
+
- ecs.task.cpu.utilized
|
|
48
|
+
- ecs.task.network.rate.rx
|
|
49
|
+
- ecs.task.network.rate.tx
|
|
50
|
+
- ecs.task.storage.read_bytes
|
|
51
|
+
- ecs.task.storage.write_bytes
|
|
52
|
+
- container.duration
|
|
53
|
+
metricstransform:
|
|
54
|
+
transforms:
|
|
55
|
+
- include: ecs.task.memory.utilized
|
|
56
|
+
action: update
|
|
57
|
+
new_name: MemoryUtilized
|
|
58
|
+
- include: ecs.task.memory.reserved
|
|
59
|
+
action: update
|
|
60
|
+
new_name: MemoryReserved
|
|
61
|
+
- include: ecs.task.cpu.utilized
|
|
62
|
+
action: update
|
|
63
|
+
new_name: CpuUtilized
|
|
64
|
+
- include: ecs.task.cpu.reserved
|
|
65
|
+
action: update
|
|
66
|
+
new_name: CpuReserved
|
|
67
|
+
- include: ecs.task.network.rate.rx
|
|
68
|
+
action: update
|
|
69
|
+
new_name: NetworkRxBytes
|
|
70
|
+
- include: ecs.task.network.rate.tx
|
|
71
|
+
action: update
|
|
72
|
+
new_name: NetworkTxBytes
|
|
73
|
+
- include: ecs.task.storage.read_bytes
|
|
74
|
+
action: update
|
|
75
|
+
new_name: StorageReadBytes
|
|
76
|
+
- include: ecs.task.storage.write_bytes
|
|
77
|
+
action: update
|
|
78
|
+
new_name: StorageWriteBytes
|
|
79
|
+
## THIS IS WHAT LIFLIG ADDED
|
|
80
|
+
filter/application:
|
|
81
|
+
error_mode: ignore
|
|
82
|
+
metrics:
|
|
83
|
+
metric:
|
|
84
|
+
- 'resource.attributes["http.target"] == "/health"'
|
|
85
|
+
- 'resource.attributes["net.peer.name"] == "169.254.170.2"' # EC2 Metadata Endpoint
|
|
86
|
+
filter/traces:
|
|
87
|
+
error_mode: ignore
|
|
88
|
+
traces:
|
|
89
|
+
span:
|
|
90
|
+
- 'attributes["http.route"] == "health"'
|
|
91
|
+
- 'attributes["http.url"] == "/health"'
|
|
92
|
+
- 'name == "GET health"'
|
|
93
|
+
attributes/application:
|
|
94
|
+
actions:
|
|
95
|
+
# Http adds lots of high cardinality, which xray creates 1 metric per. Expensive.
|
|
96
|
+
- key: http.url
|
|
97
|
+
action: delete
|
|
98
|
+
- key: http.target
|
|
99
|
+
action: delete
|
|
100
|
+
- key: http.client_ip
|
|
101
|
+
action: delete
|
|
102
|
+
- key: http.response_content_length
|
|
103
|
+
action: delete
|
|
104
|
+
- key: http.request_content_length
|
|
105
|
+
action: delete
|
|
106
|
+
- key: net.host.name
|
|
107
|
+
action: delete
|
|
108
|
+
- key: net.sock.peer.addr
|
|
109
|
+
action: delete
|
|
110
|
+
- key: net.sock.peer.port
|
|
111
|
+
action: delete
|
|
112
|
+
- key: net.sock.host.addr
|
|
113
|
+
action: delete
|
|
114
|
+
- key: net.sock.host.port
|
|
115
|
+
action: delete
|
|
116
|
+
- key: user_agent.original
|
|
117
|
+
action: delete
|
|
118
|
+
resource/application:
|
|
119
|
+
attributes:
|
|
120
|
+
- key: cloud.provider
|
|
121
|
+
action: delete
|
|
122
|
+
- key: host.arch
|
|
123
|
+
action: delete
|
|
124
|
+
- key: aws.ecs.container.image.id
|
|
125
|
+
action: delete
|
|
126
|
+
- key: aws.ecs.task.arn
|
|
127
|
+
action: delete
|
|
128
|
+
- key: aws.log.stream.names
|
|
129
|
+
action: delete
|
|
130
|
+
- key: aws.log.stream.arns
|
|
131
|
+
action: delete
|
|
132
|
+
- key: cloud.platform
|
|
133
|
+
action: delete
|
|
134
|
+
- key: container.name
|
|
135
|
+
action: delete
|
|
136
|
+
- key: process.executable.path
|
|
137
|
+
action: delete
|
|
138
|
+
- key: process.runtime.version
|
|
139
|
+
action: delete
|
|
140
|
+
- key: telemetry.auto.version
|
|
141
|
+
action: delete
|
|
142
|
+
- key: telemetry.sdk.name
|
|
143
|
+
action: delete
|
|
144
|
+
- key: container.id
|
|
145
|
+
action: delete
|
|
146
|
+
- key: container.image.tag
|
|
147
|
+
action: delete
|
|
148
|
+
- key: process.runtime.name
|
|
149
|
+
action: delete
|
|
150
|
+
- key: service.namespace
|
|
151
|
+
action: delete
|
|
152
|
+
- key: telemetry.sdk.version
|
|
153
|
+
action: delete
|
|
154
|
+
- key: aws.ecs.task.family
|
|
155
|
+
action: delete
|
|
156
|
+
- key: aws.ecs.task.revision
|
|
157
|
+
action: delete
|
|
158
|
+
- key: aws.log.group.arns
|
|
159
|
+
action: delete
|
|
160
|
+
- key: container.image.name
|
|
161
|
+
action: delete
|
|
162
|
+
- key: os.description
|
|
163
|
+
action: delete
|
|
164
|
+
- key: os.type
|
|
165
|
+
action: delete
|
|
166
|
+
- key: process.runtime.description
|
|
167
|
+
action: delete
|
|
168
|
+
- key: version
|
|
169
|
+
action: delete
|
|
170
|
+
- key: aws.ecs.launchtype
|
|
171
|
+
action: delete
|
|
172
|
+
- key: aws.log.group.names
|
|
173
|
+
action: delete
|
|
174
|
+
- key: host.name
|
|
175
|
+
action: delete
|
|
176
|
+
- key: process.pid
|
|
177
|
+
action: delete
|
|
178
|
+
- key: telemetry.sdk.language
|
|
179
|
+
action: delete
|
|
180
|
+
- key: aws.ecs.container.arn
|
|
181
|
+
action: delete
|
|
182
|
+
- key: service.version
|
|
183
|
+
action: delete
|
|
184
|
+
- key: process.command_args
|
|
185
|
+
action: delete
|
|
186
|
+
- key: service
|
|
187
|
+
action: delete
|
|
188
|
+
## END OF LIFLIG CHANGES
|
|
189
|
+
resource:
|
|
190
|
+
attributes:
|
|
191
|
+
- key: ClusterName
|
|
192
|
+
from_attribute: aws.ecs.cluster.name
|
|
193
|
+
action: insert
|
|
194
|
+
- key: aws.ecs.cluster.name
|
|
195
|
+
action: delete
|
|
196
|
+
- key: ServiceName
|
|
197
|
+
from_attribute: aws.ecs.service.name
|
|
198
|
+
action: insert
|
|
199
|
+
- key: aws.ecs.service.name
|
|
200
|
+
action: delete
|
|
201
|
+
## THIS IS WHAT LIFLIG ADDED (bunch of sporadic removals)
|
|
202
|
+
- key: aws.ecs.task.id
|
|
203
|
+
action: delete
|
|
204
|
+
- key: TaskDefinitionFamily
|
|
205
|
+
from_attribute: aws.ecs.task.family
|
|
206
|
+
action: insert
|
|
207
|
+
- key: aws.ecs.task.family
|
|
208
|
+
action: delete
|
|
209
|
+
- key: aws.ecs.task.arn
|
|
210
|
+
action: delete
|
|
211
|
+
- key: aws.ecs.docker.name
|
|
212
|
+
action: delete
|
|
213
|
+
- key: aws.ecs.task.version
|
|
214
|
+
action: delete
|
|
215
|
+
- key: aws.ecs.task.pull_started_at
|
|
216
|
+
action: delete
|
|
217
|
+
- key: aws.ecs.task.pull_stopped_at
|
|
218
|
+
action: delete
|
|
219
|
+
- key: AvailabilityZone
|
|
220
|
+
from_attribute: cloud.zone
|
|
221
|
+
action: insert
|
|
222
|
+
- key: cloud.zone
|
|
223
|
+
action: delete
|
|
224
|
+
- key: aws.ecs.task.launch_type
|
|
225
|
+
action: delete
|
|
226
|
+
- key: Region
|
|
227
|
+
from_attribute: cloud.region
|
|
228
|
+
action: insert
|
|
229
|
+
- key: cloud.region
|
|
230
|
+
action: delete
|
|
231
|
+
- key: AccountId
|
|
232
|
+
from_attribute: cloud.account.id
|
|
233
|
+
action: insert
|
|
234
|
+
- key: cloud.account.id
|
|
235
|
+
action: delete
|
|
236
|
+
- key: container.id
|
|
237
|
+
action: delete
|
|
238
|
+
- key: container.name
|
|
239
|
+
action: delete
|
|
240
|
+
- key: container.image.name
|
|
241
|
+
action: delete
|
|
242
|
+
- key: aws.ecs.container.image.id
|
|
243
|
+
action: delete
|
|
244
|
+
- key: aws.ecs.container.exit_code
|
|
245
|
+
action: delete
|
|
246
|
+
- key: aws.ecs.container.created_at
|
|
247
|
+
action: delete
|
|
248
|
+
- key: aws.ecs.container.started_at
|
|
249
|
+
action: delete
|
|
250
|
+
- key: aws.ecs.container.finished_at
|
|
251
|
+
action: delete
|
|
252
|
+
- key: container.image.tag
|
|
253
|
+
action: delete
|
|
254
|
+
## END OF LIFLIG CHANGES
|
|
255
|
+
|
|
256
|
+
exporters:
|
|
257
|
+
awsxray:
|
|
258
|
+
## THIS IS WHAT LIFLIG ADDED
|
|
259
|
+
indexed_attributes: ["otel.resource.service.name"] # Max 50 attributes
|
|
260
|
+
## END OF LIFLIG CHANGES
|
|
261
|
+
awsemf/application:
|
|
262
|
+
namespace: ECS/AWSOTel/Application
|
|
263
|
+
log_group_name: '/aws/ecs/application/metrics'
|
|
264
|
+
## THIS IS WHAT LIFLIG ADDED
|
|
265
|
+
## Config docs at https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/exporter/awsemfexporter/README.md
|
|
266
|
+
## log retention is days to keep the emf metric logs.
|
|
267
|
+
log_retention: 365
|
|
268
|
+
resource_to_telemetry_conversion:
|
|
269
|
+
enabled: true
|
|
270
|
+
dimension_rollup_option: NoDimensionRollup
|
|
271
|
+
## No rollup means we keep all the dimensions on the metric, instead of merging similar metrics to one in CloudWatch
|
|
272
|
+
## END OF LIFLIG CHANGES
|
|
273
|
+
awsemf/performance:
|
|
274
|
+
namespace: ECS/ContainerInsights
|
|
275
|
+
log_group_name: '/aws/ecs/containerinsights/{ClusterName}/performance'
|
|
276
|
+
log_stream_name: '{TaskId}'
|
|
277
|
+
resource_to_telemetry_conversion:
|
|
278
|
+
enabled: true
|
|
279
|
+
dimension_rollup_option: NoDimensionRollup
|
|
280
|
+
metric_declarations:
|
|
281
|
+
- dimensions: [ [ ClusterName ], [ ClusterName, TaskDefinitionFamily ] ]
|
|
282
|
+
metric_name_selectors:
|
|
283
|
+
- MemoryUtilized
|
|
284
|
+
- MemoryReserved
|
|
285
|
+
- CpuUtilized
|
|
286
|
+
- CpuReserved
|
|
287
|
+
- NetworkRxBytes
|
|
288
|
+
- NetworkTxBytes
|
|
289
|
+
- StorageReadBytes
|
|
290
|
+
- StorageWriteBytes
|
|
291
|
+
- metric_name_selectors: [container.*]
|
|
292
|
+
|
|
293
|
+
service:
|
|
294
|
+
pipelines:
|
|
295
|
+
traces:
|
|
296
|
+
receivers: [otlp,awsxray]
|
|
297
|
+
## THIS IS WHAT LIFLIG ADDED
|
|
298
|
+
processors: [filter/traces, batch/traces]
|
|
299
|
+
exporters: [awsxray]
|
|
300
|
+
metrics/application:
|
|
301
|
+
receivers: [otlp]
|
|
302
|
+
processors: [filter/application, resource/application, attributes/application, batch/metrics]
|
|
303
|
+
exporters: [awsemf/application]
|
|
304
|
+
## END OF LIFLIG CHANGES
|
|
305
|
+
metrics/performance:
|
|
306
|
+
receivers: [awsecscontainermetrics ]
|
|
307
|
+
processors: [filter, metricstransform, resource]
|
|
308
|
+
exporters: [ awsemf/performance ]
|
|
309
|
+
|
|
310
|
+
extensions: [health_check]
|
|
@@ -24,7 +24,7 @@ interface Props {
|
|
|
24
24
|
/**
|
|
25
25
|
* The lifecycle rules to apply to images stored in the ECR repository.
|
|
26
26
|
*
|
|
27
|
-
* @default - Expire images after 180 days
|
|
27
|
+
* @default - Expire untagged images after 10 days and any image older than 180 days
|
|
28
28
|
*/
|
|
29
29
|
ecrRepositoryLifecycleRules?: ecr.LifecycleRule[];
|
|
30
30
|
/**
|
|
@@ -90,6 +90,10 @@ export class BuildArtifacts extends constructs.Construct {
|
|
|
90
90
|
const ecrRepo = new ecr.Repository(this, "EcrRepository", {
|
|
91
91
|
repositoryName: ecrRepositoryName,
|
|
92
92
|
lifecycleRules: props.ecrRepositoryLifecycleRules || [
|
|
93
|
+
{
|
|
94
|
+
maxImageAge: cdk.Duration.days(10),
|
|
95
|
+
tagStatus: ecr.TagStatus.UNTAGGED,
|
|
96
|
+
},
|
|
93
97
|
{
|
|
94
98
|
maxImageAge: cdk.Duration.days(180),
|
|
95
99
|
tagStatus: ecr.TagStatus.ANY,
|
|
@@ -181,4 +185,4 @@ export class BuildArtifacts extends constructs.Construct {
|
|
|
181
185
|
}
|
|
182
186
|
}
|
|
183
187
|
}
|
|
184
|
-
//# sourceMappingURL=data:application/json;base64,
|
|
188
|
+
//# sourceMappingURL=data:application/json;base64,
|
package/lib/ecs/index.d.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
export { Cluster } from "./cluster";
|
|
2
2
|
export { FargateService } from "./fargate-service";
|
|
3
3
|
export { ListenerRule } from "./listener-rule";
|
|
4
|
+
export { OpenTelemetryCollectors } from "./open-telemetry";
|
|
4
5
|
export type { ClusterProps } from "./cluster";
|
|
5
6
|
export type { FargateServiceProps } from "./fargate-service";
|
|
6
7
|
export type { ListenerRuleProps } from "./listener-rule";
|
|
8
|
+
export type { OpenTelemetryCollectorsProps } from "./open-telemetry";
|
package/lib/ecs/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export { Cluster } from "./cluster";
|
|
2
2
|
export { FargateService } from "./fargate-service";
|
|
3
3
|
export { ListenerRule } from "./listener-rule";
|
|
4
|
-
|
|
4
|
+
export { OpenTelemetryCollectors } from "./open-telemetry";
|
|
5
|
+
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvZWNzL2luZGV4LnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBLE9BQU8sRUFBRSxPQUFPLEVBQUUsTUFBTSxXQUFXLENBQUE7QUFDbkMsT0FBTyxFQUFFLGNBQWMsRUFBRSxNQUFNLG1CQUFtQixDQUFBO0FBQ2xELE9BQU8sRUFBRSxZQUFZLEVBQUUsTUFBTSxpQkFBaUIsQ0FBQTtBQUM5QyxPQUFPLEVBQUUsdUJBQXVCLEVBQUUsTUFBTSxrQkFBa0IsQ0FBQSIsInNvdXJjZXNDb250ZW50IjpbImV4cG9ydCB7IENsdXN0ZXIgfSBmcm9tIFwiLi9jbHVzdGVyXCJcbmV4cG9ydCB7IEZhcmdhdGVTZXJ2aWNlIH0gZnJvbSBcIi4vZmFyZ2F0ZS1zZXJ2aWNlXCJcbmV4cG9ydCB7IExpc3RlbmVyUnVsZSB9IGZyb20gXCIuL2xpc3RlbmVyLXJ1bGVcIlxuZXhwb3J0IHsgT3BlblRlbGVtZXRyeUNvbGxlY3RvcnMgfSBmcm9tIFwiLi9vcGVuLXRlbGVtZXRyeVwiXG5leHBvcnQgdHlwZSB7IENsdXN0ZXJQcm9wcyB9IGZyb20gXCIuL2NsdXN0ZXJcIlxuZXhwb3J0IHR5cGUgeyBGYXJnYXRlU2VydmljZVByb3BzIH0gZnJvbSBcIi4vZmFyZ2F0ZS1zZXJ2aWNlXCJcbmV4cG9ydCB0eXBlIHsgTGlzdGVuZXJSdWxlUHJvcHMgfSBmcm9tIFwiLi9saXN0ZW5lci1ydWxlXCJcbmV4cG9ydCB0eXBlIHsgT3BlblRlbGVtZXRyeUNvbGxlY3RvcnNQcm9wcyB9IGZyb20gXCIuL29wZW4tdGVsZW1ldHJ5XCJcbiJdfQ==
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import * as constructs from "constructs";
|
|
2
|
+
import * as ecs from "aws-cdk-lib/aws-ecs";
|
|
3
|
+
import { RetentionDays } from "aws-cdk-lib/aws-logs";
|
|
4
|
+
import { FargateService } from "./fargate-service";
|
|
5
|
+
export interface OpenTelemetryCollectorsProps {
|
|
6
|
+
service: FargateService;
|
|
7
|
+
/** @default 6 months **/
|
|
8
|
+
logRetention?: RetentionDays;
|
|
9
|
+
/** @default "amazon/aws-otel-collector:v0.43.1" */
|
|
10
|
+
dockerImage?: string;
|
|
11
|
+
/** Should be kept as `undefined` unless you know what you are doing.
|
|
12
|
+
* This is the YAML config for the OpenTelemetry collector sidecar.
|
|
13
|
+
*
|
|
14
|
+
* An example of a config can be found at https://github.com/aws-observability/aws-otel-collector/blob/0ae198c7e7b8c43bcc8715f54e52c879c04407b6/config/ecs/container-insights/otel-task-metrics-config.yaml
|
|
15
|
+
*
|
|
16
|
+
* @default a file in `assets` tuned to work for aws and strips known high-cardinality metrics (like those containing IP addresses and ports)
|
|
17
|
+
*/
|
|
18
|
+
awsOtelConfig?: string;
|
|
19
|
+
/** Overrides for the sidecar container.
|
|
20
|
+
* You do not need to specify this.
|
|
21
|
+
*
|
|
22
|
+
* Defaults:
|
|
23
|
+
* - cpu: 32 units
|
|
24
|
+
* - memory reservation: 24 MiB
|
|
25
|
+
* - memory limit: 256 MiB
|
|
26
|
+
*/
|
|
27
|
+
containerProps?: SidecarContainerProps;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Methods to enable collection of Open Telemetry (otel) data of a {@link FargateService}
|
|
31
|
+
* using a docker container with an otel agent.
|
|
32
|
+
*
|
|
33
|
+
*
|
|
34
|
+
* An example of a Java auto-instrumentation agent in Docker can be found
|
|
35
|
+
* [in liflig-rest-baseline Dockerfile](https://github.com/capralifecycle/liflig-rest-service-baseline/blob/a29b5a472c982aa7ce04d09d0e7cfdc92a6cc977/docker/Dockerfile#L9-L29).
|
|
36
|
+
*
|
|
37
|
+
* The agent must be configured to output metrics to a collector.
|
|
38
|
+
* That collector is what this construct provides.
|
|
39
|
+
* Usually, the agent is specified in the Dockerfile or as a dependency/library,
|
|
40
|
+
* and configured in the Dockerfile or in the application source code.
|
|
41
|
+
*
|
|
42
|
+
* Use this construct on a {@link FargateService} by constructing a new instance of {@link OpenTelemetryCollectors}
|
|
43
|
+
* and calling the {@link addOpenTelemetryCollectorSidecar} method on it.
|
|
44
|
+
*
|
|
45
|
+
* ```ts
|
|
46
|
+
* const service = FargateService(...);
|
|
47
|
+
*
|
|
48
|
+
* new OpenTelemetryCollectors(this, "OtelSidecar").addOpenTelemetryCollectorSidecar(service)
|
|
49
|
+
* ```
|
|
50
|
+
*
|
|
51
|
+
* The sidecar exposes these ports to your service:
|
|
52
|
+
* - udp 2000 : AWS XRay
|
|
53
|
+
* - tcp 4317 : OpenTelemetry collection GRPC
|
|
54
|
+
* - tcp 4318 : OpenTelemetry collection HTTP
|
|
55
|
+
*
|
|
56
|
+
* ---
|
|
57
|
+
*
|
|
58
|
+
* You can also disable the OpenTelemetry instrumentation agent
|
|
59
|
+
* for Java-based services,
|
|
60
|
+
* by setting the appropriate environment variable with {@link disableOpenTelemetryJavaAgent}:
|
|
61
|
+
* ```ts
|
|
62
|
+
* const service = FargateService(...);
|
|
63
|
+
*
|
|
64
|
+
* OpenTelemetryCollectors.disableOpenTelemetryJavaAgent(service)
|
|
65
|
+
* ```
|
|
66
|
+
*
|
|
67
|
+
* @see OpenTelemetryCollectors.addOpenTelemetryCollectorSidecar
|
|
68
|
+
*/
|
|
69
|
+
export declare class OpenTelemetryCollectors extends constructs.Construct {
|
|
70
|
+
private readonly props;
|
|
71
|
+
constructor(scope: constructs.Construct, id: string, props: OpenTelemetryCollectorsProps);
|
|
72
|
+
/**
|
|
73
|
+
* The OpenTelemetry Java agent may run by default in the Docker image.
|
|
74
|
+
* This method will tell the agent to disable itself.
|
|
75
|
+
*
|
|
76
|
+
* You might want to do this to avoid overhead or error logs from failed
|
|
77
|
+
* connection attempts to the otel collector.
|
|
78
|
+
*/
|
|
79
|
+
disableOpenTelemetryJavaAgent(): void;
|
|
80
|
+
/**
|
|
81
|
+
* The OpenTelemetry Java agent may run by default in the Docker image.
|
|
82
|
+
* This method will tell the agent to disable itself.
|
|
83
|
+
*
|
|
84
|
+
* You might want to do this to avoid overhead or error logs from failed
|
|
85
|
+
* connection attempts to the otel collector.
|
|
86
|
+
* @param service
|
|
87
|
+
*/
|
|
88
|
+
static disableOpenTelemetryJavaAgent(service: FargateService): void;
|
|
89
|
+
/**
|
|
90
|
+
* Adds a sidecar with an AWS Distro OpenTelemetry Collector.
|
|
91
|
+
* https://aws-otel.github.io/docs/setup/ecs
|
|
92
|
+
*
|
|
93
|
+
* You also need to add either the Java SDK for OTel or a Java agent,
|
|
94
|
+
* to capture telemetry and send to this collector.
|
|
95
|
+
*/
|
|
96
|
+
addOpenTelemetryCollectorSidecar(): void;
|
|
97
|
+
}
|
|
98
|
+
export type SidecarContainerProps = Pick<ecs.ContainerDefinitionProps, "cpu" | "memoryReservationMiB" | "memoryLimitMiB">;
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import * as constructs from "constructs";
|
|
2
|
+
import * as ecs from "aws-cdk-lib/aws-ecs";
|
|
3
|
+
import * as iam from "aws-cdk-lib/aws-iam";
|
|
4
|
+
import * as logs from "aws-cdk-lib/aws-logs";
|
|
5
|
+
import { RetentionDays } from "aws-cdk-lib/aws-logs";
|
|
6
|
+
import { RemovalPolicy } from "aws-cdk-lib";
|
|
7
|
+
import { readFileSync } from "fs";
|
|
8
|
+
import { fileURLToPath } from "url";
|
|
9
|
+
import * as path from "path";
|
|
10
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
11
|
+
const __dirname = path.dirname(__filename);
|
|
12
|
+
/**
|
|
13
|
+
* Methods to enable collection of Open Telemetry (otel) data of a {@link FargateService}
|
|
14
|
+
* using a docker container with an otel agent.
|
|
15
|
+
*
|
|
16
|
+
*
|
|
17
|
+
* An example of a Java auto-instrumentation agent in Docker can be found
|
|
18
|
+
* [in liflig-rest-baseline Dockerfile](https://github.com/capralifecycle/liflig-rest-service-baseline/blob/a29b5a472c982aa7ce04d09d0e7cfdc92a6cc977/docker/Dockerfile#L9-L29).
|
|
19
|
+
*
|
|
20
|
+
* The agent must be configured to output metrics to a collector.
|
|
21
|
+
* That collector is what this construct provides.
|
|
22
|
+
* Usually, the agent is specified in the Dockerfile or as a dependency/library,
|
|
23
|
+
* and configured in the Dockerfile or in the application source code.
|
|
24
|
+
*
|
|
25
|
+
* Use this construct on a {@link FargateService} by constructing a new instance of {@link OpenTelemetryCollectors}
|
|
26
|
+
* and calling the {@link addOpenTelemetryCollectorSidecar} method on it.
|
|
27
|
+
*
|
|
28
|
+
* ```ts
|
|
29
|
+
* const service = FargateService(...);
|
|
30
|
+
*
|
|
31
|
+
* new OpenTelemetryCollectors(this, "OtelSidecar").addOpenTelemetryCollectorSidecar(service)
|
|
32
|
+
* ```
|
|
33
|
+
*
|
|
34
|
+
* The sidecar exposes these ports to your service:
|
|
35
|
+
* - udp 2000 : AWS XRay
|
|
36
|
+
* - tcp 4317 : OpenTelemetry collection GRPC
|
|
37
|
+
* - tcp 4318 : OpenTelemetry collection HTTP
|
|
38
|
+
*
|
|
39
|
+
* ---
|
|
40
|
+
*
|
|
41
|
+
* You can also disable the OpenTelemetry instrumentation agent
|
|
42
|
+
* for Java-based services,
|
|
43
|
+
* by setting the appropriate environment variable with {@link disableOpenTelemetryJavaAgent}:
|
|
44
|
+
* ```ts
|
|
45
|
+
* const service = FargateService(...);
|
|
46
|
+
*
|
|
47
|
+
* OpenTelemetryCollectors.disableOpenTelemetryJavaAgent(service)
|
|
48
|
+
* ```
|
|
49
|
+
*
|
|
50
|
+
* @see OpenTelemetryCollectors.addOpenTelemetryCollectorSidecar
|
|
51
|
+
*/
|
|
52
|
+
export class OpenTelemetryCollectors extends constructs.Construct {
|
|
53
|
+
props;
|
|
54
|
+
constructor(scope, id, props) {
|
|
55
|
+
super(scope, id);
|
|
56
|
+
this.props = props;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* The OpenTelemetry Java agent may run by default in the Docker image.
|
|
60
|
+
* This method will tell the agent to disable itself.
|
|
61
|
+
*
|
|
62
|
+
* You might want to do this to avoid overhead or error logs from failed
|
|
63
|
+
* connection attempts to the otel collector.
|
|
64
|
+
*/
|
|
65
|
+
disableOpenTelemetryJavaAgent() {
|
|
66
|
+
OpenTelemetryCollectors.disableOpenTelemetryJavaAgent(this.props.service);
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* The OpenTelemetry Java agent may run by default in the Docker image.
|
|
70
|
+
* This method will tell the agent to disable itself.
|
|
71
|
+
*
|
|
72
|
+
* You might want to do this to avoid overhead or error logs from failed
|
|
73
|
+
* connection attempts to the otel collector.
|
|
74
|
+
* @param service
|
|
75
|
+
*/
|
|
76
|
+
static disableOpenTelemetryJavaAgent(service) {
|
|
77
|
+
service.taskDefinition.defaultContainer?.addEnvironment("OTEL_JAVAAGENT_ENABLED", "false");
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Adds a sidecar with an AWS Distro OpenTelemetry Collector.
|
|
81
|
+
* https://aws-otel.github.io/docs/setup/ecs
|
|
82
|
+
*
|
|
83
|
+
* You also need to add either the Java SDK for OTel or a Java agent,
|
|
84
|
+
* to capture telemetry and send to this collector.
|
|
85
|
+
*/
|
|
86
|
+
addOpenTelemetryCollectorSidecar() {
|
|
87
|
+
new OpenTelemetryPolicies(this, "OpenTelemetryPolicies", {
|
|
88
|
+
taskDefinition: this.props.service.taskDefinition,
|
|
89
|
+
});
|
|
90
|
+
this.props.service.taskDefinition.addExtension(new OpenTelemetryCollectorSidecar(this, this.props.logRetention, this.props.dockerImage, this.props.awsOtelConfig, this.props.containerProps));
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Adds a sidecar with an AWS Distro OpenTelemetry Collector.
|
|
95
|
+
* https://aws-otel.github.io/docs/setup/ecs
|
|
96
|
+
*
|
|
97
|
+
* You also need to add either the Java SDK for OTel or a Java agent,
|
|
98
|
+
* to capture telemetry and send to this collector.
|
|
99
|
+
*/
|
|
100
|
+
class OpenTelemetryCollectorSidecar {
|
|
101
|
+
construct;
|
|
102
|
+
logRetention;
|
|
103
|
+
dockerImage;
|
|
104
|
+
awsOtelConfig;
|
|
105
|
+
containerProps;
|
|
106
|
+
constructor(construct, logRetention, dockerImage, awsOtelConfig, containerProps) {
|
|
107
|
+
this.construct = construct;
|
|
108
|
+
this.logRetention = logRetention;
|
|
109
|
+
this.dockerImage = dockerImage;
|
|
110
|
+
this.awsOtelConfig = awsOtelConfig;
|
|
111
|
+
this.containerProps = containerProps;
|
|
112
|
+
}
|
|
113
|
+
extend(taskDefinition) {
|
|
114
|
+
if (taskDefinition.networkMode !== ecs.NetworkMode.AWS_VPC) {
|
|
115
|
+
throw new Error("Task NetworkMode must be AWS_VPC: " + taskDefinition.networkMode);
|
|
116
|
+
}
|
|
117
|
+
const commands = {
|
|
118
|
+
metricsAndTraces: "--config=/etc/ecs/ecs-default-config.yaml",
|
|
119
|
+
metricsAndTracesAndContainerResources: "--config=/etc/ecs/container-insights/otel-task-metrics-config.yaml",
|
|
120
|
+
};
|
|
121
|
+
const logGroup = new logs.LogGroup(this.construct, "CollectorLogGroup", {
|
|
122
|
+
retention: this.logRetention ?? RetentionDays.SIX_MONTHS,
|
|
123
|
+
removalPolicy: RemovalPolicy.DESTROY,
|
|
124
|
+
});
|
|
125
|
+
const sidecarImage = this.dockerImage ?? "amazon/aws-otel-collector:v0.43.1";
|
|
126
|
+
const sidecar = taskDefinition.addContainer("aws-opentelemetry-collector", {
|
|
127
|
+
cpu: this.containerProps?.cpu ?? 32,
|
|
128
|
+
memoryReservationMiB: this.containerProps?.memoryReservationMiB ?? 24,
|
|
129
|
+
memoryLimitMiB: this.containerProps?.memoryLimitMiB ?? 256,
|
|
130
|
+
image: ecs.ContainerImage.fromRegistry(sidecarImage),
|
|
131
|
+
command: [commands.metricsAndTracesAndContainerResources], // This is not used when the AOT_CONFIG_CONTENT is set!
|
|
132
|
+
environment: {
|
|
133
|
+
// You can alternatively create an SSM parameter with the config, and pass it to the `secrets` option
|
|
134
|
+
AOT_CONFIG_CONTENT: this.awsOtelConfig ?? awsOtelCustomConfigYaml,
|
|
135
|
+
},
|
|
136
|
+
logging: ecs.LogDrivers.awsLogs({
|
|
137
|
+
logGroup: logGroup,
|
|
138
|
+
streamPrefix: "ecs",
|
|
139
|
+
}),
|
|
140
|
+
});
|
|
141
|
+
// A dependency should be added to all containers that export metrics.
|
|
142
|
+
// We are currently assuming that this is only the default container
|
|
143
|
+
taskDefinition.defaultContainer?.addContainerDependencies({
|
|
144
|
+
container: sidecar,
|
|
145
|
+
condition: ecs.ContainerDependencyCondition.START,
|
|
146
|
+
});
|
|
147
|
+
/*
|
|
148
|
+
* aws-otel-collector exposes these ports, and more:
|
|
149
|
+
* - udp 2000 : AWS XRay
|
|
150
|
+
* - tcp 4317 : OpenTelemetry collection GRPC
|
|
151
|
+
* - tcp 4318 : OpenTelemetry collection HTTP
|
|
152
|
+
*
|
|
153
|
+
* These are defined in the yaml config for the collector.
|
|
154
|
+
*/
|
|
155
|
+
taskDefinition.defaultContainer?.addEnvironment("AWS_XRAY_DAEMON_ADDRESS", "http://localhost:2000");
|
|
156
|
+
taskDefinition.defaultContainer?.addEnvironment("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317");
|
|
157
|
+
taskDefinition.defaultContainer?.addEnvironment("OTEL_JAVAAGENT_ENABLED", "true");
|
|
158
|
+
if (!taskDefinition.isFargateCompatible) {
|
|
159
|
+
// This extension is made for the FargateService, so this is a requirement.
|
|
160
|
+
throw new Error("This task definition can not be ran on fargate! " +
|
|
161
|
+
taskDefinition.node.id);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Grants the sidecar permissions to create logs, metrics and XRay traces by extending the task roles.
|
|
167
|
+
*/
|
|
168
|
+
class OpenTelemetryPolicies extends constructs.Construct {
|
|
169
|
+
constructor(scope, id, props) {
|
|
170
|
+
super(scope, id);
|
|
171
|
+
const awsDistroOpenTelemetryPolicyStatement = new iam.PolicyStatement({
|
|
172
|
+
effect: iam.Effect.ALLOW,
|
|
173
|
+
resources: ["*"],
|
|
174
|
+
actions: [
|
|
175
|
+
"logs:PutLogEvents",
|
|
176
|
+
"logs:CreateLogGroup",
|
|
177
|
+
"logs:CreateLogStream",
|
|
178
|
+
"logs:DescribeLogStreams",
|
|
179
|
+
"logs:DescribeLogGroups",
|
|
180
|
+
"logs:PutRetentionPolicy",
|
|
181
|
+
"xray:PutTraceSegments",
|
|
182
|
+
"xray:PutTelemetryRecords",
|
|
183
|
+
"xray:GetSamplingRules",
|
|
184
|
+
"xray:GetSamplingTargets",
|
|
185
|
+
"xray:GetSamplingStatisticSummaries",
|
|
186
|
+
"cloudwatch:PutMetricData",
|
|
187
|
+
"ec2:DescribeVolumes",
|
|
188
|
+
"ec2:DescribeTags",
|
|
189
|
+
"ssm:GetParameters",
|
|
190
|
+
],
|
|
191
|
+
});
|
|
192
|
+
props.taskDefinition.addToTaskRolePolicy(awsDistroOpenTelemetryPolicyStatement);
|
|
193
|
+
props.taskDefinition.addToExecutionRolePolicy(awsDistroOpenTelemetryPolicyStatement);
|
|
194
|
+
props.taskDefinition.executionRole?.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName("CloudWatchLogsFullAccess"));
|
|
195
|
+
props.taskDefinition.executionRole?.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName("AmazonSSMReadOnlyAccess"));
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* This is a modified version of `./etc/ecs/container-insights/otel-task-metrics-config.yaml`
|
|
200
|
+
* where we add `resource_to_telemetry_conversion` so the otel resource `service.name`
|
|
201
|
+
* can be added to the metric. This is useful because you can filter in metrics based on service,
|
|
202
|
+
* instead of grouping e.g. all services' memory usage under the same metric.
|
|
203
|
+
*
|
|
204
|
+
* @see https://aws-otel.github.io/docs/setup/ecs/config-through-ssm
|
|
205
|
+
* @see https://aws-otel.github.io/docs/getting-started/cloudwatch-metrics#cloudwatch-emf-exporter-awsemf
|
|
206
|
+
*/
|
|
207
|
+
const awsOtelCustomConfigYaml = readFileSync(path.resolve(__dirname, "..", "..", "assets", "open-telemetry", "otel-collector-task-metrics-config.yaml"), "utf-8")
|
|
208
|
+
.split("\n")
|
|
209
|
+
.filter((line) => !/^\s*##/.test(line)) // Skip comments starting with ##
|
|
210
|
+
.join("\n");
|
|
211
|
+
//# sourceMappingURL=data:application/json;base64,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@liflig/cdk",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.13.0",
|
|
4
4
|
"description": "CDK library for Liflig",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"repository": {
|
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
"@commitlint/cli": "19.8.1",
|
|
56
56
|
"@commitlint/config-conventional": "19.8.1",
|
|
57
57
|
"@eslint/eslintrc": "3.3.1",
|
|
58
|
-
"@eslint/js": "9.
|
|
58
|
+
"@eslint/js": "9.32.0",
|
|
59
59
|
"@types/aws-lambda": "8.10.152",
|
|
60
60
|
"@types/jest": "30.0.0",
|
|
61
61
|
"@types/node": "24.1.0",
|
|
@@ -65,7 +65,7 @@
|
|
|
65
65
|
"aws-cdk-lib": "2.207.0",
|
|
66
66
|
"constructs": "10.4.2",
|
|
67
67
|
"esbuild": "0.25.8",
|
|
68
|
-
"eslint": "9.
|
|
68
|
+
"eslint": "9.32.0",
|
|
69
69
|
"eslint-config-prettier": "10.1.8",
|
|
70
70
|
"eslint-plugin-prettier": "5.5.3",
|
|
71
71
|
"husky": "9.1.7",
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
"semantic-release": "24.2.7",
|
|
76
76
|
"ts-jest": "29.4.0",
|
|
77
77
|
"tsx": "4.20.3",
|
|
78
|
-
"typedoc": "0.28.
|
|
78
|
+
"typedoc": "0.28.8",
|
|
79
79
|
"typescript": "5.8.3"
|
|
80
80
|
},
|
|
81
81
|
"dependencies": {
|