@aws/ml-container-creator 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +88 -86
- package/config/bootstrap-stack.json +211 -0
- package/config/parameter-schema.json +88 -0
- package/infra/ci-harness/bin/ci-harness.ts +26 -0
- package/infra/ci-harness/buildspec.yml +352 -0
- package/infra/ci-harness/cdk.json +27 -0
- package/infra/ci-harness/lambda/scanner/index.ts +199 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +609 -0
- package/infra/ci-harness/package-lock.json +3979 -0
- package/infra/ci-harness/package.json +32 -0
- package/infra/ci-harness/tsconfig.json +38 -0
- package/package.json +13 -3
- package/src/app.js +318 -318
- package/src/copy-tpl.js +19 -19
- package/src/lib/asset-manager.js +74 -74
- package/src/lib/aws-profile-parser.js +45 -45
- package/src/lib/bootstrap-command-handler.js +560 -547
- package/src/lib/bootstrap-config.js +45 -45
- package/src/lib/ci-register-helpers.js +19 -19
- package/src/lib/ci-report-helpers.js +37 -37
- package/src/lib/ci-stage-helpers.js +49 -49
- package/src/lib/comment-generator.js +4 -4
- package/src/lib/config-manager.js +105 -105
- package/src/lib/deployment-config-resolver.js +10 -10
- package/src/lib/deployment-registry.js +153 -153
- package/src/lib/engine-prefix-resolver.js +8 -8
- package/src/lib/key-value-parser.js +6 -6
- package/src/lib/manifest-cli.js +108 -108
- package/src/lib/prompt-runner.js +224 -224
- package/src/lib/prompts.js +121 -121
- package/src/lib/registry-command-handler.js +174 -174
- package/src/lib/registry-loader.js +52 -52
- package/src/lib/sensitive-redactor.js +9 -9
- package/src/lib/template-engine.js +1 -1
- package/src/lib/template-manager.js +62 -62
- package/src/prompt-adapter.js +18 -18
|
@@ -0,0 +1,609 @@
|
|
|
1
|
+
import * as cdk from 'aws-cdk-lib';
|
|
2
|
+
import * as dynamodb from 'aws-cdk-lib/aws-dynamodb';
|
|
3
|
+
import * as events from 'aws-cdk-lib/aws-events';
|
|
4
|
+
import * as targets from 'aws-cdk-lib/aws-events-targets';
|
|
5
|
+
import * as iam from 'aws-cdk-lib/aws-iam';
|
|
6
|
+
import * as lambda from 'aws-cdk-lib/aws-lambda';
|
|
7
|
+
import { NodejsFunction } from 'aws-cdk-lib/aws-lambda-nodejs';
|
|
8
|
+
import * as logs from 'aws-cdk-lib/aws-logs';
|
|
9
|
+
import * as sns from 'aws-cdk-lib/aws-sns';
|
|
10
|
+
import * as codebuild from 'aws-cdk-lib/aws-codebuild';
|
|
11
|
+
import * as sfn from 'aws-cdk-lib/aws-stepfunctions';
|
|
12
|
+
import { Construct } from 'constructs';
|
|
13
|
+
import * as path from 'path';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* MlccCiHarnessStack defines the CI Integration Harness infrastructure
|
|
17
|
+
* for automated lifecycle testing of ML Container Creator generated projects.
|
|
18
|
+
*
|
|
19
|
+
* Resources:
|
|
20
|
+
* - DynamoDB table (CI_Table) with GSI
|
|
21
|
+
* - Lambda function (Scanner) — starts Step Functions executions directly
|
|
22
|
+
* - EventBridge scheduled rule
|
|
23
|
+
* - Step Functions state machine (CI_Orchestrator)
|
|
24
|
+
* - CodeBuild project (CI_CodeBuild_Project)
|
|
25
|
+
* - CloudWatch log group and alarms
|
|
26
|
+
* - IAM roles with least-privilege policies
|
|
27
|
+
* - SNS topic for alarm notifications
|
|
28
|
+
*/
|
|
29
|
+
export class MlccCiHarnessStack extends cdk.Stack {
|
|
30
|
+
/** DynamoDB table storing CI test configurations and results */
|
|
31
|
+
public readonly ciTable: dynamodb.Table;
|
|
32
|
+
|
|
33
|
+
/** SNS topic for alarm notifications */
|
|
34
|
+
public readonly ciDlqNotificationsTopic: sns.Topic;
|
|
35
|
+
|
|
36
|
+
/** CloudWatch log group for all CI harness components */
|
|
37
|
+
public readonly ciLogGroup: logs.LogGroup;
|
|
38
|
+
|
|
39
|
+
/** Scanner Lambda function that queries for stale CI records */
|
|
40
|
+
public readonly scannerFunction: NodejsFunction;
|
|
41
|
+
|
|
42
|
+
/** EventBridge rule that triggers the Scanner Lambda hourly */
|
|
43
|
+
public readonly scannerScheduleRule: events.Rule;
|
|
44
|
+
|
|
45
|
+
/** Step Functions state machine that orchestrates CI test executions */
|
|
46
|
+
public readonly ciOrchestrator: sfn.StateMachine;
|
|
47
|
+
|
|
48
|
+
/** IAM role for the Step Functions orchestrator */
|
|
49
|
+
public readonly orchestratorRole: iam.Role;
|
|
50
|
+
|
|
51
|
+
/** CodeBuild project that executes the full lifecycle stages */
|
|
52
|
+
public readonly ciCodeBuildProject: codebuild.Project;
|
|
53
|
+
|
|
54
|
+
constructor(scope: Construct, id: string, props?: cdk.StackProps) {
|
|
55
|
+
super(scope, id, props);
|
|
56
|
+
|
|
57
|
+
// Stack-level tags applied to all resources
|
|
58
|
+
cdk.Tags.of(this).add('mlcc:managed-by', 'ml-container-creator');
|
|
59
|
+
cdk.Tags.of(this).add('mlcc:created-by', 'bootstrap-ci');
|
|
60
|
+
cdk.Tags.of(this).add('mlcc:version', '0.1.0');
|
|
61
|
+
|
|
62
|
+
// Stack parameters
|
|
63
|
+
const maxConcurrency = new cdk.CfnParameter(this, 'MaxConcurrency', {
|
|
64
|
+
type: 'Number',
|
|
65
|
+
default: 1,
|
|
66
|
+
description: 'Maximum number of parallel CodeBuild executions',
|
|
67
|
+
minValue: 1,
|
|
68
|
+
maxValue: 10,
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
const codebuildComputeType = new cdk.CfnParameter(this, 'CodeBuildComputeType', {
|
|
72
|
+
type: 'String',
|
|
73
|
+
default: 'BUILD_GENERAL1_MEDIUM',
|
|
74
|
+
description: 'CodeBuild compute type for CI executor',
|
|
75
|
+
allowedValues: [
|
|
76
|
+
'BUILD_GENERAL1_SMALL',
|
|
77
|
+
'BUILD_GENERAL1_MEDIUM',
|
|
78
|
+
'BUILD_GENERAL1_LARGE',
|
|
79
|
+
],
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
// SNS topic for alarm notifications
|
|
83
|
+
this.ciDlqNotificationsTopic = new sns.Topic(this, 'CiDlqNotificationsTopic', {
|
|
84
|
+
topicName: 'mlcc-ci-dlq-notifications',
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// CloudWatch Log Group for all CI harness components
|
|
88
|
+
this.ciLogGroup = new logs.LogGroup(this, 'CiLogGroup', {
|
|
89
|
+
logGroupName: 'ml-container-creator-ci',
|
|
90
|
+
retention: logs.RetentionDays.THREE_MONTHS,
|
|
91
|
+
removalPolicy: cdk.RemovalPolicy.DESTROY,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
// DynamoDB CI Table
|
|
95
|
+
this.ciTable = new dynamodb.Table(this, 'CiTable', {
|
|
96
|
+
tableName: 'mlcc-ci-table',
|
|
97
|
+
partitionKey: {
|
|
98
|
+
name: 'configId',
|
|
99
|
+
type: dynamodb.AttributeType.STRING,
|
|
100
|
+
},
|
|
101
|
+
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
|
|
102
|
+
pointInTimeRecovery: true,
|
|
103
|
+
removalPolicy: cdk.RemovalPolicy.DESTROY,
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// GSI for Scanner Lambda to query by testStatus and sort by lastTestTimestamp
|
|
107
|
+
this.ciTable.addGlobalSecondaryIndex({
|
|
108
|
+
indexName: 'testStatus-lastTestTimestamp-index',
|
|
109
|
+
partitionKey: {
|
|
110
|
+
name: 'testStatus',
|
|
111
|
+
type: dynamodb.AttributeType.STRING,
|
|
112
|
+
},
|
|
113
|
+
sortKey: {
|
|
114
|
+
name: 'lastTestTimestamp',
|
|
115
|
+
type: dynamodb.AttributeType.STRING,
|
|
116
|
+
},
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// Scanner Lambda IAM role with least-privilege permissions
|
|
120
|
+
const scannerRole = new iam.Role(this, 'ScannerRole', {
|
|
121
|
+
roleName: 'mlcc-ci-scanner-role',
|
|
122
|
+
assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'),
|
|
123
|
+
description: 'IAM role for the MLCC CI Scanner Lambda function',
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// DynamoDB:Query on CI_Table and its GSI
|
|
127
|
+
scannerRole.addToPolicy(new iam.PolicyStatement({
|
|
128
|
+
effect: iam.Effect.ALLOW,
|
|
129
|
+
actions: ['dynamodb:Query'],
|
|
130
|
+
resources: [
|
|
131
|
+
this.ciTable.tableArn,
|
|
132
|
+
`${this.ciTable.tableArn}/index/testStatus-lastTestTimestamp-index`,
|
|
133
|
+
],
|
|
134
|
+
}));
|
|
135
|
+
|
|
136
|
+
// Logs:CreateLogStream and PutLogEvents on the CI log group scanner prefix
|
|
137
|
+
scannerRole.addToPolicy(new iam.PolicyStatement({
|
|
138
|
+
effect: iam.Effect.ALLOW,
|
|
139
|
+
actions: [
|
|
140
|
+
'logs:CreateLogStream',
|
|
141
|
+
'logs:PutLogEvents',
|
|
142
|
+
],
|
|
143
|
+
resources: [
|
|
144
|
+
`${this.ciLogGroup.logGroupArn}:log-stream:scanner/*`,
|
|
145
|
+
this.ciLogGroup.logGroupArn,
|
|
146
|
+
],
|
|
147
|
+
}));
|
|
148
|
+
|
|
149
|
+
// Scanner Lambda function
|
|
150
|
+
this.scannerFunction = new NodejsFunction(this, 'ScannerFunction', {
|
|
151
|
+
functionName: 'mlcc-ci-scanner',
|
|
152
|
+
runtime: lambda.Runtime.NODEJS_20_X,
|
|
153
|
+
memorySize: 256,
|
|
154
|
+
timeout: cdk.Duration.seconds(60),
|
|
155
|
+
entry: path.join(__dirname, '..', 'lambda', 'scanner', 'index.ts'),
|
|
156
|
+
handler: 'handler',
|
|
157
|
+
role: scannerRole,
|
|
158
|
+
environment: {
|
|
159
|
+
CI_TABLE_NAME: this.ciTable.tableName,
|
|
160
|
+
GSI_NAME: 'testStatus-lastTestTimestamp-index',
|
|
161
|
+
},
|
|
162
|
+
logGroup: this.ciLogGroup,
|
|
163
|
+
loggingFormat: lambda.LoggingFormat.TEXT,
|
|
164
|
+
bundling: {
|
|
165
|
+
minify: true,
|
|
166
|
+
sourceMap: true,
|
|
167
|
+
},
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// EventBridge scheduled rule — triggers Scanner Lambda every hour
|
|
171
|
+
this.scannerScheduleRule = new events.Rule(this, 'ScannerScheduleRule', {
|
|
172
|
+
ruleName: 'mlcc-ci-scanner-schedule',
|
|
173
|
+
description: 'Triggers the MLCC CI Scanner Lambda every hour to find stale test records',
|
|
174
|
+
schedule: events.Schedule.rate(cdk.Duration.hours(1)),
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
this.scannerScheduleRule.addTarget(new targets.LambdaFunction(this.scannerFunction));
|
|
178
|
+
|
|
179
|
+
// Step Functions Orchestrator IAM role
|
|
180
|
+
// Permissions for DynamoDB UpdateItem, Logs, and CodeBuild are defined here.
|
|
181
|
+
this.orchestratorRole = new iam.Role(this, 'OrchestratorRole', {
|
|
182
|
+
roleName: 'mlcc-ci-orchestrator-role',
|
|
183
|
+
assumedBy: new iam.ServicePrincipal('states.amazonaws.com'),
|
|
184
|
+
description: 'IAM role for the MLCC CI Orchestrator Step Functions state machine',
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
// DynamoDB:UpdateItem on CI_Table for UpdateResults states
|
|
188
|
+
this.orchestratorRole.addToPolicy(new iam.PolicyStatement({
|
|
189
|
+
effect: iam.Effect.ALLOW,
|
|
190
|
+
actions: ['dynamodb:UpdateItem'],
|
|
191
|
+
resources: [this.ciTable.tableArn],
|
|
192
|
+
}));
|
|
193
|
+
|
|
194
|
+
// Logs permissions for state machine execution logging
|
|
195
|
+
this.orchestratorRole.addToPolicy(new iam.PolicyStatement({
|
|
196
|
+
effect: iam.Effect.ALLOW,
|
|
197
|
+
actions: [
|
|
198
|
+
'logs:CreateLogDelivery',
|
|
199
|
+
'logs:GetLogDelivery',
|
|
200
|
+
'logs:UpdateLogDelivery',
|
|
201
|
+
'logs:DeleteLogDelivery',
|
|
202
|
+
'logs:ListLogDeliveries',
|
|
203
|
+
'logs:PutResourcePolicy',
|
|
204
|
+
'logs:DescribeResourcePolicies',
|
|
205
|
+
'logs:DescribeLogGroups',
|
|
206
|
+
'logs:PutLogEvents',
|
|
207
|
+
'logs:CreateLogStream',
|
|
208
|
+
],
|
|
209
|
+
resources: ['*'],
|
|
210
|
+
}));
|
|
211
|
+
|
|
212
|
+
// State machine definition using CustomState for SDK integrations
|
|
213
|
+
// Input: { configId, configJson, buildStrategy }
|
|
214
|
+
|
|
215
|
+
// RecordStartTime: capture the execution start timestamp
|
|
216
|
+
const recordStartTime = new sfn.Pass(this, 'RecordStartTime', {
|
|
217
|
+
parameters: {
|
|
218
|
+
'configId.$': '$.configId',
|
|
219
|
+
'configJson.$': '$.configJson',
|
|
220
|
+
'buildStrategy.$': '$.buildStrategy',
|
|
221
|
+
'startTime.$': '$$.State.EnteredTime',
|
|
222
|
+
},
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
// StartCodeBuild: Start a CodeBuild build with environment variables
|
|
226
|
+
// The CodeBuild project ARN will be updated in Task 3.3 when the project is created.
|
|
227
|
+
// Using a placeholder project name that will be replaced.
|
|
228
|
+
const startCodeBuild = new sfn.CustomState(this, 'StartCodeBuild', {
|
|
229
|
+
stateJson: {
|
|
230
|
+
Type: 'Task',
|
|
231
|
+
Resource: 'arn:aws:states:::codebuild:startBuild',
|
|
232
|
+
Parameters: {
|
|
233
|
+
ProjectName: 'mlcc-ci-executor',
|
|
234
|
+
EnvironmentVariablesOverride: [
|
|
235
|
+
{
|
|
236
|
+
Name: 'CONFIG_ID',
|
|
237
|
+
'Value.$': '$.configId',
|
|
238
|
+
Type: 'PLAINTEXT',
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
Name: 'CONFIG_JSON',
|
|
242
|
+
'Value.$': '$.configJson',
|
|
243
|
+
Type: 'PLAINTEXT',
|
|
244
|
+
},
|
|
245
|
+
{
|
|
246
|
+
Name: 'BUILD_STRATEGY',
|
|
247
|
+
'Value.$': '$.buildStrategy',
|
|
248
|
+
Type: 'PLAINTEXT',
|
|
249
|
+
},
|
|
250
|
+
{
|
|
251
|
+
Name: 'CI_TABLE_NAME',
|
|
252
|
+
Value: this.ciTable.tableName,
|
|
253
|
+
Type: 'PLAINTEXT',
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
Name: 'CI_LOG_GROUP',
|
|
257
|
+
Value: this.ciLogGroup.logGroupName,
|
|
258
|
+
Type: 'PLAINTEXT',
|
|
259
|
+
},
|
|
260
|
+
],
|
|
261
|
+
},
|
|
262
|
+
ResultPath: '$.buildResult',
|
|
263
|
+
},
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
// WaitForBuild: Wait 30 seconds before polling build status
|
|
267
|
+
const waitForBuild = new sfn.Wait(this, 'WaitForBuild', {
|
|
268
|
+
time: sfn.WaitTime.duration(cdk.Duration.seconds(30)),
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
// PollBuildStatus: BatchGetBuilds to check current build status
|
|
272
|
+
const pollBuildStatus = new sfn.CustomState(this, 'PollBuildStatus', {
|
|
273
|
+
stateJson: {
|
|
274
|
+
Type: 'Task',
|
|
275
|
+
Resource: 'arn:aws:states:::aws-sdk:codebuild:batchGetBuilds',
|
|
276
|
+
Parameters: {
|
|
277
|
+
'Ids.$': 'States.Array($.buildResult.Build.Id)',
|
|
278
|
+
},
|
|
279
|
+
ResultPath: '$.pollResult',
|
|
280
|
+
},
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
// CheckTimestamp: Compute elapsed time to detect 90-minute timeout
|
|
284
|
+
const checkTimestamp = new sfn.Pass(this, 'CheckTimestamp', {
|
|
285
|
+
parameters: {
|
|
286
|
+
'configId.$': '$.configId',
|
|
287
|
+
'configJson.$': '$.configJson',
|
|
288
|
+
'buildStrategy.$': '$.buildStrategy',
|
|
289
|
+
'startTime.$': '$.startTime',
|
|
290
|
+
'buildResult.$': '$.buildResult',
|
|
291
|
+
'pollResult.$': '$.pollResult',
|
|
292
|
+
'buildStatus.$': '$.pollResult.Builds[0].BuildStatus',
|
|
293
|
+
'currentTime.$': '$$.State.EnteredTime',
|
|
294
|
+
},
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
// HandleTimeout: Set failure status when build exceeds 90 minutes
|
|
298
|
+
const handleTimeout = new sfn.Pass(this, 'HandleTimeout', {
|
|
299
|
+
parameters: {
|
|
300
|
+
'configId.$': '$.configId',
|
|
301
|
+
'configJson.$': '$.configJson',
|
|
302
|
+
'buildStrategy.$': '$.buildStrategy',
|
|
303
|
+
'startTime.$': '$.startTime',
|
|
304
|
+
'testStatus': 'fail-build',
|
|
305
|
+
'errorMessage': 'CodeBuild execution timed out after 90 minutes',
|
|
306
|
+
},
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
// SetSuccessResult: Prepare result data for successful/completed builds
|
|
310
|
+
const setSuccessResult = new sfn.Pass(this, 'SetBuildCompleteResult', {
|
|
311
|
+
parameters: {
|
|
312
|
+
'configId.$': '$.configId',
|
|
313
|
+
'configJson.$': '$.configJson',
|
|
314
|
+
'buildStrategy.$': '$.buildStrategy',
|
|
315
|
+
'startTime.$': '$.startTime',
|
|
316
|
+
'buildStatus.$': '$.buildStatus',
|
|
317
|
+
'pollResult.$': '$.pollResult',
|
|
318
|
+
},
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
// CheckBuildStatus: Branch on build complete vs still running vs timed out
|
|
322
|
+
const checkBuildStatus = new sfn.Choice(this, 'CheckBuildStatus')
|
|
323
|
+
.when(
|
|
324
|
+
sfn.Condition.stringEquals('$.buildStatus', 'SUCCEEDED'),
|
|
325
|
+
setSuccessResult,
|
|
326
|
+
)
|
|
327
|
+
.when(
|
|
328
|
+
sfn.Condition.stringEquals('$.buildStatus', 'FAILED'),
|
|
329
|
+
setSuccessResult,
|
|
330
|
+
)
|
|
331
|
+
.when(
|
|
332
|
+
sfn.Condition.stringEquals('$.buildStatus', 'STOPPED'),
|
|
333
|
+
setSuccessResult,
|
|
334
|
+
)
|
|
335
|
+
.when(
|
|
336
|
+
sfn.Condition.stringEquals('$.buildStatus', 'TIMED_OUT'),
|
|
337
|
+
handleTimeout,
|
|
338
|
+
)
|
|
339
|
+
.otherwise(waitForBuild);
|
|
340
|
+
|
|
341
|
+
// UpdateResults: DynamoDB UpdateItem with final test results
|
|
342
|
+
// For builds that completed (success or failure), the CodeBuild buildspec
|
|
343
|
+
// writes detailed stageResults and testStatus to DynamoDB in its Update stage.
|
|
344
|
+
// This state records orchestrator-level metadata as a fallback — if CodeBuild's
|
|
345
|
+
// post_build phase failed to write, this ensures the record is updated.
|
|
346
|
+
const updateResults = new sfn.CustomState(this, 'UpdateResults', {
|
|
347
|
+
stateJson: {
|
|
348
|
+
Type: 'Task',
|
|
349
|
+
Resource: 'arn:aws:states:::dynamodb:updateItem',
|
|
350
|
+
Parameters: {
|
|
351
|
+
TableName: this.ciTable.tableName,
|
|
352
|
+
Key: {
|
|
353
|
+
configId: { 'S.$': '$.configId' },
|
|
354
|
+
},
|
|
355
|
+
UpdateExpression: 'SET lastTestTimestamp = :ts, errorMessage = :err',
|
|
356
|
+
ExpressionAttributeValues: {
|
|
357
|
+
':ts': { 'S.$': '$$.State.EnteredTime' },
|
|
358
|
+
':err': {
|
|
359
|
+
'S.$': "States.Format('Build completed with status: {}', $.buildStatus)",
|
|
360
|
+
},
|
|
361
|
+
},
|
|
362
|
+
},
|
|
363
|
+
ResultPath: '$.updateResult',
|
|
364
|
+
Retry: [
|
|
365
|
+
{
|
|
366
|
+
ErrorEquals: ['States.ALL'],
|
|
367
|
+
IntervalSeconds: 2,
|
|
368
|
+
MaxAttempts: 3,
|
|
369
|
+
BackoffRate: 2.0,
|
|
370
|
+
},
|
|
371
|
+
],
|
|
372
|
+
End: true,
|
|
373
|
+
},
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
// UpdateResultsFromTimeout: DynamoDB UpdateItem for timed-out builds
|
|
377
|
+
const updateResultsFromTimeout = new sfn.CustomState(this, 'UpdateResultsFromTimeout', {
|
|
378
|
+
stateJson: {
|
|
379
|
+
Type: 'Task',
|
|
380
|
+
Resource: 'arn:aws:states:::dynamodb:updateItem',
|
|
381
|
+
Parameters: {
|
|
382
|
+
TableName: this.ciTable.tableName,
|
|
383
|
+
Key: {
|
|
384
|
+
configId: { 'S.$': '$.configId' },
|
|
385
|
+
},
|
|
386
|
+
UpdateExpression: 'SET testStatus = :status, lastTestTimestamp = :ts, errorMessage = :err',
|
|
387
|
+
ExpressionAttributeValues: {
|
|
388
|
+
':status': { 'S.$': '$.testStatus' },
|
|
389
|
+
':ts': { 'S.$': '$$.State.EnteredTime' },
|
|
390
|
+
':err': { 'S.$': '$.errorMessage' },
|
|
391
|
+
},
|
|
392
|
+
},
|
|
393
|
+
ResultPath: '$.updateResult',
|
|
394
|
+
Retry: [
|
|
395
|
+
{
|
|
396
|
+
ErrorEquals: ['States.ALL'],
|
|
397
|
+
IntervalSeconds: 2,
|
|
398
|
+
MaxAttempts: 3,
|
|
399
|
+
BackoffRate: 2.0,
|
|
400
|
+
},
|
|
401
|
+
],
|
|
402
|
+
End: true,
|
|
403
|
+
},
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
// Wire up the state machine chain
|
|
407
|
+
// RecordStartTime → StartCodeBuild → WaitForBuild → PollBuildStatus → CheckTimestamp → CheckBuildStatus
|
|
408
|
+
// CheckBuildStatus branches:
|
|
409
|
+
// - SUCCEEDED/FAILED/STOPPED → SetBuildCompleteResult → UpdateResults
|
|
410
|
+
// - TIMED_OUT → HandleTimeout → UpdateResultsFromTimeout
|
|
411
|
+
// - IN_PROGRESS (otherwise) → WaitForBuild (loop)
|
|
412
|
+
recordStartTime.next(startCodeBuild);
|
|
413
|
+
startCodeBuild.next(waitForBuild);
|
|
414
|
+
waitForBuild.next(pollBuildStatus);
|
|
415
|
+
pollBuildStatus.next(checkTimestamp);
|
|
416
|
+
checkTimestamp.next(checkBuildStatus);
|
|
417
|
+
setSuccessResult.next(updateResults);
|
|
418
|
+
handleTimeout.next(updateResultsFromTimeout);
|
|
419
|
+
|
|
420
|
+
// Create the state machine
|
|
421
|
+
this.ciOrchestrator = new sfn.StateMachine(this, 'CiOrchestrator', {
|
|
422
|
+
stateMachineName: 'mlcc-ci-orchestrator',
|
|
423
|
+
stateMachineType: sfn.StateMachineType.STANDARD,
|
|
424
|
+
definitionBody: sfn.DefinitionBody.fromChainable(recordStartTime),
|
|
425
|
+
role: this.orchestratorRole,
|
|
426
|
+
logs: {
|
|
427
|
+
destination: this.ciLogGroup,
|
|
428
|
+
level: sfn.LogLevel.ALL,
|
|
429
|
+
includeExecutionData: true,
|
|
430
|
+
},
|
|
431
|
+
tracingEnabled: true,
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
// Grant Scanner Lambda permission to start Step Functions executions directly
|
|
435
|
+
scannerRole.addToPolicy(new iam.PolicyStatement({
|
|
436
|
+
effect: iam.Effect.ALLOW,
|
|
437
|
+
actions: ['states:StartExecution'],
|
|
438
|
+
resources: [this.ciOrchestrator.stateMachineArn],
|
|
439
|
+
}));
|
|
440
|
+
|
|
441
|
+
// Add STATE_MACHINE_ARN env var to Scanner Lambda (defined after state machine)
|
|
442
|
+
this.scannerFunction.addEnvironment('STATE_MACHINE_ARN', this.ciOrchestrator.stateMachineArn);
|
|
443
|
+
|
|
444
|
+
// CodeBuild IAM role with permissions for lifecycle execution
|
|
445
|
+
const codebuildRole = new iam.Role(this, 'CodeBuildRole', {
|
|
446
|
+
roleName: 'mlcc-ci-codebuild-role',
|
|
447
|
+
assumedBy: new iam.ServicePrincipal('codebuild.amazonaws.com'),
|
|
448
|
+
description: 'IAM role for the MLCC CI CodeBuild executor project',
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
// DynamoDB:UpdateItem on CI_Table for writing stage results
|
|
452
|
+
codebuildRole.addToPolicy(new iam.PolicyStatement({
|
|
453
|
+
effect: iam.Effect.ALLOW,
|
|
454
|
+
actions: ['dynamodb:UpdateItem'],
|
|
455
|
+
resources: [this.ciTable.tableArn],
|
|
456
|
+
}));
|
|
457
|
+
|
|
458
|
+
// ECR:* for building and pushing container images
|
|
459
|
+
codebuildRole.addToPolicy(new iam.PolicyStatement({
|
|
460
|
+
effect: iam.Effect.ALLOW,
|
|
461
|
+
actions: ['ecr:*'],
|
|
462
|
+
resources: ['*'],
|
|
463
|
+
}));
|
|
464
|
+
|
|
465
|
+
// CodeBuild:* for creating and managing per-project build projects
|
|
466
|
+
codebuildRole.addToPolicy(new iam.PolicyStatement({
|
|
467
|
+
effect: iam.Effect.ALLOW,
|
|
468
|
+
actions: ['codebuild:*'],
|
|
469
|
+
resources: ['*'],
|
|
470
|
+
}));
|
|
471
|
+
|
|
472
|
+
// SageMaker:* for deploying and testing endpoints
|
|
473
|
+
codebuildRole.addToPolicy(new iam.PolicyStatement({
|
|
474
|
+
effect: iam.Effect.ALLOW,
|
|
475
|
+
actions: ['sagemaker:*'],
|
|
476
|
+
resources: ['*'],
|
|
477
|
+
}));
|
|
478
|
+
|
|
479
|
+
// S3:* for model artifact storage
|
|
480
|
+
codebuildRole.addToPolicy(new iam.PolicyStatement({
|
|
481
|
+
effect: iam.Effect.ALLOW,
|
|
482
|
+
actions: ['s3:*'],
|
|
483
|
+
resources: ['*'],
|
|
484
|
+
}));
|
|
485
|
+
|
|
486
|
+
// Logs:* for writing build logs to the CI log group
|
|
487
|
+
codebuildRole.addToPolicy(new iam.PolicyStatement({
|
|
488
|
+
effect: iam.Effect.ALLOW,
|
|
489
|
+
actions: ['logs:*'],
|
|
490
|
+
resources: ['*'],
|
|
491
|
+
}));
|
|
492
|
+
|
|
493
|
+
// IAM permissions for creating per-project CodeBuild service roles and passing roles
|
|
494
|
+
codebuildRole.addToPolicy(new iam.PolicyStatement({
|
|
495
|
+
effect: iam.Effect.ALLOW,
|
|
496
|
+
actions: [
|
|
497
|
+
'iam:CreateRole',
|
|
498
|
+
'iam:GetRole',
|
|
499
|
+
'iam:PutRolePolicy',
|
|
500
|
+
'iam:PassRole',
|
|
501
|
+
'iam:TagRole',
|
|
502
|
+
'iam:DeleteRole',
|
|
503
|
+
'iam:DeleteRolePolicy',
|
|
504
|
+
],
|
|
505
|
+
resources: ['*'],
|
|
506
|
+
}));
|
|
507
|
+
|
|
508
|
+
// CodeBuild project: mlcc-ci-executor
|
|
509
|
+
// The buildspec is defined inline as a placeholder. Task 5.1 will create the
|
|
510
|
+
// full buildspec.yml at infra/ci-harness/buildspec.yml. Once a source is
|
|
511
|
+
// configured, this can switch to BuildSpec.fromSourceFilename('buildspec.yml').
|
|
512
|
+
this.ciCodeBuildProject = new codebuild.Project(this, 'CiCodeBuildProject', {
|
|
513
|
+
projectName: 'mlcc-ci-executor',
|
|
514
|
+
description: 'MLCC CI executor — full lifecycle testing with AWS CLI v2',
|
|
515
|
+
role: codebuildRole,
|
|
516
|
+
environment: {
|
|
517
|
+
buildImage: codebuild.LinuxBuildImage.fromCodeBuildImageId(
|
|
518
|
+
'aws/codebuild/amazonlinux2-x86_64-standard:5.0'
|
|
519
|
+
),
|
|
520
|
+
computeType: codebuild.ComputeType.MEDIUM,
|
|
521
|
+
privileged: true,
|
|
522
|
+
},
|
|
523
|
+
buildSpec: codebuild.BuildSpec.fromObject({
|
|
524
|
+
version: '0.2',
|
|
525
|
+
env: {
|
|
526
|
+
variables: {
|
|
527
|
+
CI_TABLE_NAME: '',
|
|
528
|
+
CI_LOG_GROUP: '',
|
|
529
|
+
CONFIG_ID: '',
|
|
530
|
+
CONFIG_JSON: '',
|
|
531
|
+
BUILD_STRATEGY: 'codebuild-submit',
|
|
532
|
+
ROLE_ARN: `arn:aws:iam::${this.account}:role/mlcc-sagemaker-execution-role`,
|
|
533
|
+
},
|
|
534
|
+
},
|
|
535
|
+
phases: {
|
|
536
|
+
install: {
|
|
537
|
+
'runtime-versions': {
|
|
538
|
+
nodejs: 22,
|
|
539
|
+
},
|
|
540
|
+
commands: [
|
|
541
|
+
'echo "=== MLCC CI Harness - Install Phase ==="',
|
|
542
|
+
// Install AWS CLI v2 (CodeBuild standard image has v1 which lacks newer SageMaker waiters)
|
|
543
|
+
'curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip && unzip -q /tmp/awscliv2.zip -d /tmp && /tmp/aws/install --update && rm -rf /tmp/aws /tmp/awscliv2.zip',
|
|
544
|
+
'npm install -g @aws/ml-container-creator',
|
|
545
|
+
'BUILD_START_TIME=$(date +%s)',
|
|
546
|
+
'FIRST_FAILURE=""',
|
|
547
|
+
'GENERATE_STATUS="skip"',
|
|
548
|
+
'VALIDATE_STATUS="skip"',
|
|
549
|
+
'BUILD_STATUS_VAR="skip"',
|
|
550
|
+
'DEPLOY_TEST_STATUS="skip"',
|
|
551
|
+
'TEARDOWN_STATUS="skip"',
|
|
552
|
+
'UPDATE_STATUS="skip"',
|
|
553
|
+
'BUILD_TIMESTAMP=$(date -u +%Y%m%d-%H%M%S)',
|
|
554
|
+
],
|
|
555
|
+
},
|
|
556
|
+
pre_build: {
|
|
557
|
+
commands: [
|
|
558
|
+
'echo "=== Stage: Generate ==="',
|
|
559
|
+
'echo "$CONFIG_JSON" > /tmp/ci-config.json && chmod 644 /tmp/ci-config.json',
|
|
560
|
+
'export CI_PROJECT_DIR="/tmp/ci-project"',
|
|
561
|
+
'rm -rf "$CI_PROJECT_DIR"',
|
|
562
|
+
'ml-container-creator --config /tmp/ci-config.json --skip-prompts --project-dir "$CI_PROJECT_DIR" && chmod +x "$CI_PROJECT_DIR"/do/* && GENERATE_STATUS="pass" || { GENERATE_STATUS="fail"; FIRST_FAILURE="generate"; }',
|
|
563
|
+
],
|
|
564
|
+
},
|
|
565
|
+
build: {
|
|
566
|
+
commands: [
|
|
567
|
+
'export CI_PROJECT_DIR="/tmp/ci-project"',
|
|
568
|
+
'echo "=== Stage: Build ==="',
|
|
569
|
+
'if [ -z "$FIRST_FAILURE" ]; then cd "$CI_PROJECT_DIR" && if [ "$BUILD_STRATEGY" = "docker-in-docker" ]; then ./do/build && ./do/push; else ./do/submit; fi && BUILD_STATUS_VAR="pass" || { BUILD_STATUS_VAR="fail"; FIRST_FAILURE="build"; }; fi',
|
|
570
|
+
'echo "=== Stage: Deploy_Test ==="',
|
|
571
|
+
'if [ -z "$FIRST_FAILURE" ]; then cd "$CI_PROJECT_DIR" && ./do/deploy && ./do/test && DEPLOY_TEST_STATUS="pass" || { DEPLOY_TEST_STATUS="fail"; FIRST_FAILURE="deploy_test"; }; fi',
|
|
572
|
+
],
|
|
573
|
+
},
|
|
574
|
+
post_build: {
|
|
575
|
+
commands: [
|
|
576
|
+
'export CI_PROJECT_DIR="/tmp/ci-project"',
|
|
577
|
+
'echo "=== Stage: Teardown ==="',
|
|
578
|
+
'cd "$CI_PROJECT_DIR" && yes yes | ./do/clean all && TEARDOWN_STATUS="pass" || TEARDOWN_STATUS="fail"',
|
|
579
|
+
'echo "=== Stage: Update ==="',
|
|
580
|
+
'TOTAL_DURATION=$(($(date +%s) - BUILD_START_TIME))',
|
|
581
|
+
'if [ -n "$FIRST_FAILURE" ]; then FINAL_TEST_STATUS="fail-${FIRST_FAILURE}"; else FINAL_TEST_STATUS="pass"; fi',
|
|
582
|
+
'LAST_TEST_TIMESTAMP=$(date -u +%Y-%m-%dT%H:%M:%SZ)',
|
|
583
|
+
'aws dynamodb update-item --table-name "$CI_TABLE_NAME" --key "{\\\"configId\\\":{\\\"S\\\":\\\"$CONFIG_ID\\\"}}" --update-expression "SET testStatus = :ts, lastTestTimestamp = :ltt, lastTestDuration = :ltd, errorMessage = :em" --expression-attribute-values "{\\\":ts\\\":{\\\"S\\\":\\\"$FINAL_TEST_STATUS\\\"},\\\":ltt\\\":{\\\"S\\\":\\\"$LAST_TEST_TIMESTAMP\\\"},\\\":ltd\\\":{\\\"N\\\":\\\"$TOTAL_DURATION\\\"},\\\":em\\\":{\\\"S\\\":\\\"$FIRST_FAILURE\\\"}}" && UPDATE_STATUS="pass" || UPDATE_STATUS="fail"',
|
|
584
|
+
'echo "=== MLCC CI Complete: $FINAL_TEST_STATUS (${TOTAL_DURATION}s) ==="',
|
|
585
|
+
],
|
|
586
|
+
},
|
|
587
|
+
},
|
|
588
|
+
}),
|
|
589
|
+
timeout: cdk.Duration.minutes(90),
|
|
590
|
+
logging: {
|
|
591
|
+
cloudWatch: {
|
|
592
|
+
logGroup: this.ciLogGroup,
|
|
593
|
+
prefix: 'build',
|
|
594
|
+
enabled: true,
|
|
595
|
+
},
|
|
596
|
+
},
|
|
597
|
+
});
|
|
598
|
+
|
|
599
|
+
// Add CodeBuild permissions to the orchestrator role so Step Functions can start builds
|
|
600
|
+
this.orchestratorRole.addToPolicy(new iam.PolicyStatement({
|
|
601
|
+
effect: iam.Effect.ALLOW,
|
|
602
|
+
actions: [
|
|
603
|
+
'codebuild:StartBuild',
|
|
604
|
+
'codebuild:BatchGetBuilds',
|
|
605
|
+
],
|
|
606
|
+
resources: [this.ciCodeBuildProject.projectArn],
|
|
607
|
+
}));
|
|
608
|
+
}
|
|
609
|
+
}
|