@aws/ml-container-creator 0.9.1 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +2049 -0
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -68
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +138 -138
- package/servers/instance-sizer/lib/instance-ranker.js +76 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/catalogs/models.json +182 -26
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +4 -390
- package/src/lib/bootstrap-command-handler.js +710 -1148
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +641 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +408 -0
- package/src/lib/config-manager.js +66 -1685
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +149 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +315 -311
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +71 -71
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/path-prover-brain.js +607 -0
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +82 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +422 -0
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +149 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/config +108 -5
- package/templates/do/deploy.d/managed-inference.ejs +192 -11
- package/templates/do/optimize +106 -37
- package/templates/do/register +89 -0
- package/templates/do/test +13 -0
- package/templates/do/tune +378 -59
- package/templates/do/validate +44 -4
- package/config/parameter-schema.json +0 -88
|
@@ -17,16 +17,17 @@
|
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
19
|
import { execSync } from 'node:child_process';
|
|
20
|
-
import { existsSync,
|
|
20
|
+
import { existsSync, writeFileSync, mkdirSync } from 'node:fs';
|
|
21
21
|
import path from 'node:path';
|
|
22
22
|
import { tmpdir } from 'node:os';
|
|
23
23
|
import { fileURLToPath } from 'node:url';
|
|
24
24
|
import BootstrapConfig from './bootstrap-config.js';
|
|
25
25
|
import AwsProfileParser from './aws-profile-parser.js';
|
|
26
|
-
import AssetManager from './asset-manager.js';
|
|
27
26
|
import McpCommandHandler from './mcp-command-handler.js';
|
|
28
27
|
import RegistryCommandHandler from './registry-command-handler.js';
|
|
29
28
|
import { runPrompts } from '../prompt-adapter.js';
|
|
29
|
+
import BootstrapProfileManager from './bootstrap-profile-manager.js';
|
|
30
|
+
import BootstrapProvisioners from './bootstrap-provisioners.js';
|
|
30
31
|
|
|
31
32
|
const __filename = fileURLToPath(import.meta.url);
|
|
32
33
|
const __dirname = path.dirname(__filename);
|
|
@@ -39,16 +40,58 @@ export default class BootstrapCommandHandler {
|
|
|
39
40
|
this.config = new BootstrapConfig();
|
|
40
41
|
this.profileParser = new AwsProfileParser();
|
|
41
42
|
this._promptFn = promptFn || runPrompts;
|
|
43
|
+
this.profileManager = new BootstrapProfileManager(this);
|
|
44
|
+
this.provisioners = new BootstrapProvisioners(this);
|
|
42
45
|
}
|
|
43
46
|
|
|
47
|
+
// ── Provisioner delegations (backward compat for tests) ─────────
|
|
48
|
+
|
|
49
|
+
_buildResourceTags() { return this.provisioners._buildResourceTags(); }
|
|
50
|
+
_setupEcrRepository() { return this.provisioners._setupEcrRepository(); }
|
|
51
|
+
_setupIamRole(options) { return this.provisioners._setupIamRole(options); }
|
|
52
|
+
_setupS3Buckets() { return this.provisioners._setupS3Buckets(); }
|
|
53
|
+
_createS3Bucket(name, tags) { return this.provisioners._createS3Bucket(name, tags); }
|
|
54
|
+
_verifyCliV2() { return this.provisioners._verifyCliV2(); }
|
|
55
|
+
|
|
56
|
+
// ── ProfileManager delegations (backward compat for tests) ──────
|
|
57
|
+
|
|
58
|
+
_handleStatus(options) { return this.profileManager._handleStatus(options); }
|
|
59
|
+
_handleUse(profileName) { return this.profileManager._handleUse(profileName); }
|
|
60
|
+
_handleList() { return this.profileManager._handleList(); }
|
|
61
|
+
_handleRemove(profileName, options) { return this.profileManager._handleRemove(profileName, options); }
|
|
62
|
+
_handleScan() { return this.profileManager._handleScan(); }
|
|
63
|
+
_handlePrune() { return this.profileManager._handlePrune(); }
|
|
64
|
+
_handleSyncSchemas() { return this.profileManager._handleSyncSchemas(); }
|
|
65
|
+
_handleSyncModelFamilies() { return this.profileManager._handleSyncModelFamilies(); }
|
|
66
|
+
|
|
44
67
|
/**
|
|
45
68
|
* Dispatch bootstrap subcommands.
|
|
46
69
|
* @param {string[]} args - Remaining positional args after 'bootstrap'
|
|
47
70
|
* @param {object} options - Parsed CLI options
|
|
48
71
|
*/
|
|
49
72
|
async handle(args, options) {
|
|
73
|
+
// Commander.js with passThroughOptions() captures flags after positional
|
|
74
|
+
// arguments in args rather than options. Extract known flags from args.
|
|
75
|
+
const extractedOptions = { ...options };
|
|
76
|
+
const cleanArgs = [];
|
|
77
|
+
for (const arg of args) {
|
|
78
|
+
if (arg === '--ci') extractedOptions.ci = true;
|
|
79
|
+
else if (arg === '--benchmark-infra') extractedOptions.benchmarkInfra = true;
|
|
80
|
+
else if (arg === '--skip-ci') extractedOptions.skipCi = true;
|
|
81
|
+
else if (arg === '--skip-s3') extractedOptions.skipS3 = true;
|
|
82
|
+
else if (arg === '--skip-post-setup') extractedOptions.skipPostSetup = true;
|
|
83
|
+
else if (arg === '--force') extractedOptions.force = true;
|
|
84
|
+
else if (arg === '--verify') extractedOptions.verify = true;
|
|
85
|
+
else if (arg === '--delete-stack') extractedOptions.deleteStack = true;
|
|
86
|
+
else if (arg === '--non-interactive') extractedOptions.nonInteractive = true;
|
|
87
|
+
else if (arg === '--ignore-staleness') extractedOptions.ignoreStaleness = true;
|
|
88
|
+
else cleanArgs.push(arg);
|
|
89
|
+
}
|
|
90
|
+
args = cleanArgs;
|
|
91
|
+
options = extractedOptions;
|
|
92
|
+
|
|
50
93
|
// Handle legacy --sync-schemas flag for backward compatibility
|
|
51
|
-
if (options['sync-schemas']) {
|
|
94
|
+
if ((options['sync-schemas'] || options.syncSchemas)) {
|
|
52
95
|
await this._handleSyncSchemas();
|
|
53
96
|
if (args.length === 0) return;
|
|
54
97
|
}
|
|
@@ -85,6 +128,15 @@ export default class BootstrapCommandHandler {
|
|
|
85
128
|
case 'sync-schemas':
|
|
86
129
|
await this._handleSyncSchemas();
|
|
87
130
|
break;
|
|
131
|
+
case 'sync-model-families':
|
|
132
|
+
await this._handleSyncModelFamilies();
|
|
133
|
+
break;
|
|
134
|
+
// Migration path: upgrades legacy profiles to current naming conventions.
|
|
135
|
+
// Corrects stackName to mlcc-bootstrap-{profileName}, renames sharedStackFrom
|
|
136
|
+
// to sharedInfraFrom. Idempotent — safe to run multiple times.
|
|
137
|
+
case 'migrate':
|
|
138
|
+
await this._handleMigrate();
|
|
139
|
+
break;
|
|
88
140
|
default:
|
|
89
141
|
console.log(`Unknown bootstrap subcommand: ${subcommand}`);
|
|
90
142
|
this._showHelp();
|
|
@@ -97,7 +149,8 @@ export default class BootstrapCommandHandler {
|
|
|
97
149
|
* @param {object} options - Parsed CLI options
|
|
98
150
|
*/
|
|
99
151
|
async _handleInteractiveSetup(options) {
|
|
100
|
-
|
|
152
|
+
// Commander.js converts --non-interactive to options.nonInteractive (camelCase)
|
|
153
|
+
const nonInteractive = options['non-interactive'] || options.nonInteractive;
|
|
101
154
|
|
|
102
155
|
// Non-interactive mode: validate required flags upfront
|
|
103
156
|
if (nonInteractive) {
|
|
@@ -117,7 +170,7 @@ export default class BootstrapCommandHandler {
|
|
|
117
170
|
console.log('\n🚀 Bootstrap — Shared AWS Infrastructure Setup\n');
|
|
118
171
|
|
|
119
172
|
// Verify AWS CLI v2 is installed
|
|
120
|
-
if (!this._verifyCliV2()) {
|
|
173
|
+
if (!this.provisioners._verifyCliV2()) {
|
|
121
174
|
return;
|
|
122
175
|
}
|
|
123
176
|
|
|
@@ -158,13 +211,13 @@ export default class BootstrapCommandHandler {
|
|
|
158
211
|
|
|
159
212
|
// Step 3: Determine stack parameters
|
|
160
213
|
let useExistingRoleArn = '';
|
|
161
|
-
if (nonInteractive && options['role-arn']) {
|
|
162
|
-
useExistingRoleArn = options['role-arn'];
|
|
163
|
-
console.log(` Using provided IAM role ARN: ${options['role-arn']}`);
|
|
214
|
+
if (nonInteractive && (options['role-arn'] || options.roleArn)) {
|
|
215
|
+
useExistingRoleArn = (options['role-arn'] || options.roleArn);
|
|
216
|
+
console.log(` Using provided IAM role ARN: ${(options['role-arn'] || options.roleArn)}`);
|
|
164
217
|
}
|
|
165
218
|
|
|
166
219
|
let createS3Buckets = false;
|
|
167
|
-
if (nonInteractive && options['skip-s3']) {
|
|
220
|
+
if (nonInteractive && (options['skip-s3'] || options.skipS3)) {
|
|
168
221
|
console.log(' ⏭️ Skipping S3 bucket creation (--skip-s3)');
|
|
169
222
|
} else if (nonInteractive) {
|
|
170
223
|
createS3Buckets = true;
|
|
@@ -209,7 +262,8 @@ export default class BootstrapCommandHandler {
|
|
|
209
262
|
|
|
210
263
|
profileData.roleArn = stackOutputs.RoleArn;
|
|
211
264
|
profileData.ecrRepositoryName = stackOutputs.EcrRepositoryName;
|
|
212
|
-
profileData.stackName =
|
|
265
|
+
profileData.stackName = stackName;
|
|
266
|
+
profileData.sharedInfraFrom = otherStack; // Track that this profile reuses another's stack
|
|
213
267
|
if (stackOutputs.AsyncS3BucketName) profileData.asyncS3Bucket = stackOutputs.AsyncS3BucketName;
|
|
214
268
|
if (stackOutputs.BatchS3BucketName) profileData.batchS3Bucket = stackOutputs.BatchS3BucketName;
|
|
215
269
|
if (stackOutputs.AdapterS3BucketName) profileData.adapterS3Bucket = stackOutputs.AdapterS3BucketName;
|
|
@@ -223,15 +277,45 @@ export default class BootstrapCommandHandler {
|
|
|
223
277
|
}
|
|
224
278
|
|
|
225
279
|
if (!profileData.stackName) {
|
|
280
|
+
// Pre-check: if IAM role already exists globally (from another region's deployment),
|
|
281
|
+
// pass its ARN so CloudFormation skips re-creation (account-level singleton)
|
|
282
|
+
if (!useExistingRoleArn) {
|
|
283
|
+
try {
|
|
284
|
+
const roleResult = this._execAws(
|
|
285
|
+
'iam get-role --role-name mlcc-sagemaker-execution-role',
|
|
286
|
+
awsProfile
|
|
287
|
+
);
|
|
288
|
+
const roleArn = roleResult && roleResult.Role && roleResult.Role.Arn;
|
|
289
|
+
if (roleArn && roleArn.startsWith('arn:aws:iam::')) {
|
|
290
|
+
useExistingRoleArn = roleArn;
|
|
291
|
+
console.log(` ℹ️ Reusing existing IAM role: ${roleArn}`);
|
|
292
|
+
}
|
|
293
|
+
} catch (_) {
|
|
294
|
+
// Role doesn't exist yet — will be created by the stack
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
226
298
|
try {
|
|
299
|
+
// Check if ECR repo already exists (avoid ResourceExistenceCheck failure)
|
|
300
|
+
let skipEcr = 'false';
|
|
301
|
+
try {
|
|
302
|
+
this._execAws(
|
|
303
|
+
`ecr describe-repositories --repository-names ml-container-creator --region ${region}`,
|
|
304
|
+
awsProfile
|
|
305
|
+
);
|
|
306
|
+
skipEcr = 'true';
|
|
307
|
+
console.log(' ℹ️ ECR repository already exists — skipping creation');
|
|
308
|
+
} catch (_) { /* doesn't exist — will be created */ }
|
|
309
|
+
|
|
227
310
|
const stackOutputs = this._deployStack(stackName, {
|
|
228
311
|
CreateS3Buckets: createS3Buckets ? 'true' : 'false',
|
|
229
|
-
UseExistingRoleArn: useExistingRoleArn
|
|
312
|
+
UseExistingRoleArn: useExistingRoleArn,
|
|
313
|
+
SkipEcrCreation: skipEcr
|
|
230
314
|
}, awsProfile, region);
|
|
231
315
|
|
|
232
316
|
// Read outputs into profile data
|
|
233
317
|
profileData.roleArn = stackOutputs.RoleArn;
|
|
234
|
-
profileData.ecrRepositoryName = stackOutputs.EcrRepositoryName;
|
|
318
|
+
profileData.ecrRepositoryName = stackOutputs.EcrRepositoryName || 'ml-container-creator';
|
|
235
319
|
profileData.stackName = stackName;
|
|
236
320
|
|
|
237
321
|
if (stackOutputs.AsyncS3BucketName) {
|
|
@@ -256,6 +340,23 @@ export default class BootstrapCommandHandler {
|
|
|
256
340
|
}
|
|
257
341
|
} // end if (!profileData.stackName)
|
|
258
342
|
|
|
343
|
+
// Step 4b: MLflow App for model customization experiment tracking
|
|
344
|
+
this._displayProgress('📊', 'MLflow App for experiment tracking...');
|
|
345
|
+
try {
|
|
346
|
+
if (!profileData.mlflowAppArn) {
|
|
347
|
+
const mlflowAppArn = this._ensureMlflowApp(profileData, awsProfile);
|
|
348
|
+
if (mlflowAppArn) {
|
|
349
|
+
profileData.mlflowAppArn = mlflowAppArn;
|
|
350
|
+
console.log(` ✅ MLflow App ready: ${mlflowAppArn}`);
|
|
351
|
+
}
|
|
352
|
+
} else {
|
|
353
|
+
console.log(` ✅ MLflow App already configured: ${profileData.mlflowAppArn}`);
|
|
354
|
+
}
|
|
355
|
+
} catch (error) {
|
|
356
|
+
console.log(` ⚠️ MLflow App setup skipped: ${error.message}`);
|
|
357
|
+
console.log(' Tune jobs will still work but experiment tracking may not be available.');
|
|
358
|
+
}
|
|
359
|
+
|
|
259
360
|
// Step 5: CI Infrastructure setup (separate CDK stack — unchanged)
|
|
260
361
|
this._displayProgress('🧪', 'CI Testing Infrastructure...');
|
|
261
362
|
try {
|
|
@@ -264,7 +365,7 @@ export default class BootstrapCommandHandler {
|
|
|
264
365
|
if (nonInteractive) {
|
|
265
366
|
if (options.ci) {
|
|
266
367
|
provisionCi = true;
|
|
267
|
-
} else if (options['skip-ci']) {
|
|
368
|
+
} else if ((options['skip-ci'] || options.skipCi)) {
|
|
268
369
|
console.log(' ⏭️ Skipping CI infrastructure (--skip-ci)');
|
|
269
370
|
provisionCi = false;
|
|
270
371
|
} else {
|
|
@@ -281,6 +382,21 @@ export default class BootstrapCommandHandler {
|
|
|
281
382
|
}
|
|
282
383
|
|
|
283
384
|
if (provisionCi) {
|
|
385
|
+
// --- CI single-region enforcement ---
|
|
386
|
+
const ciConflict = this._findExistingCiProfile(profileName);
|
|
387
|
+
if (ciConflict) {
|
|
388
|
+
console.log(`❌ CI infrastructure already deployed in region ${ciConflict.config.awsRegion} (profile: ${ciConflict.name}).`);
|
|
389
|
+
console.log(' CI can only be deployed in one region per account.');
|
|
390
|
+
provisionCi = false;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
if (provisionCi) {
|
|
395
|
+
// Persist CI intent immediately so that `bootstrap update --ci` can
|
|
396
|
+
// retry if the CDK deploy fails. Don't wait for success.
|
|
397
|
+
profileData.ciInfraProvisioned = true;
|
|
398
|
+
profileData.ciTableName = profileData.ciTableName || 'mlcc-ci-table';
|
|
399
|
+
|
|
284
400
|
// Ensure CDK is bootstrapped in this account/region
|
|
285
401
|
const cdkBootstrapped = this._resourceExists(
|
|
286
402
|
`ssm get-parameter --name /cdk-bootstrap/hnb659fds/version --region ${profileData.awsRegion}`,
|
|
@@ -336,14 +452,25 @@ export default class BootstrapCommandHandler {
|
|
|
336
452
|
stdio: ['pipe', 'pipe', 'pipe']
|
|
337
453
|
});
|
|
338
454
|
|
|
455
|
+
// Warn if shell AWS_REGION differs from profile region
|
|
456
|
+
if (process.env.AWS_REGION && process.env.AWS_REGION !== profileData.awsRegion) {
|
|
457
|
+
console.log(` ⚠️ AWS_REGION env var (${process.env.AWS_REGION}) differs from profile region (${profileData.awsRegion}) — using profile region`);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// --no-rollback prevents rollback on AlreadyExists errors for IAM roles
|
|
461
|
+
// that may pre-exist from a prior deployment or another region.
|
|
462
|
+
const cdkDeployCmd = options.benchmarkInfra
|
|
463
|
+
? 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true'
|
|
464
|
+
: 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
|
|
339
465
|
execSync(
|
|
340
|
-
|
|
466
|
+
cdkDeployCmd,
|
|
341
467
|
{
|
|
342
468
|
cwd: ciHarnessDir,
|
|
343
469
|
encoding: 'utf8',
|
|
344
470
|
stdio: 'inherit',
|
|
345
471
|
env: {
|
|
346
472
|
...process.env,
|
|
473
|
+
AWS_REGION: profileData.awsRegion,
|
|
347
474
|
CDK_DEFAULT_REGION: profileData.awsRegion,
|
|
348
475
|
CDK_DEFAULT_ACCOUNT: profileData.accountId,
|
|
349
476
|
AWS_PROFILE: profileData.awsProfile
|
|
@@ -354,6 +481,11 @@ export default class BootstrapCommandHandler {
|
|
|
354
481
|
|
|
355
482
|
profileData.ciInfraProvisioned = true;
|
|
356
483
|
profileData.ciTableName = 'mlcc-ci-table';
|
|
484
|
+
if (options.benchmarkInfra) {
|
|
485
|
+
profileData.benchmarkInfraProvisioned = true;
|
|
486
|
+
profileData.ciGlueDatabase = 'mlcc_ci';
|
|
487
|
+
profileData.ciBenchmarkResultsBucket = `mlcc-benchmark-results-${profileData.accountId}-${profileData.awsRegion}`;
|
|
488
|
+
}
|
|
357
489
|
}
|
|
358
490
|
}
|
|
359
491
|
} catch (error) {
|
|
@@ -372,241 +504,325 @@ export default class BootstrapCommandHandler {
|
|
|
372
504
|
}
|
|
373
505
|
|
|
374
506
|
/**
|
|
375
|
-
*
|
|
376
|
-
*
|
|
507
|
+
* Re-deploy bootstrap infrastructure using the active profile.
|
|
508
|
+
* No prompts — reads all config from the existing profile and re-applies
|
|
509
|
+
* the CloudFormation stack and optionally the CI CDK stack.
|
|
510
|
+
*
|
|
511
|
+
* @param {object} [options] - Parsed CLI options (e.g., --ci to force CI update)
|
|
377
512
|
*/
|
|
378
|
-
async
|
|
379
|
-
const config = this.config.read();
|
|
380
|
-
if (!config) {
|
|
381
|
-
console.log('No bootstrap configuration found.');
|
|
382
|
-
console.log('Run `ml-container-creator bootstrap` to set up shared infrastructure.');
|
|
383
|
-
return;
|
|
384
|
-
}
|
|
385
|
-
|
|
513
|
+
async _handleUpdate(options = {}) {
|
|
386
514
|
const profile = this.config.getActiveProfile();
|
|
387
515
|
if (!profile) {
|
|
388
516
|
console.log('No active bootstrap profile found.');
|
|
389
|
-
console.log('Run `ml-container-creator bootstrap` to set up shared infrastructure.');
|
|
517
|
+
console.log('Run `ml-container-creator bootstrap` to set up shared infrastructure first.');
|
|
390
518
|
return;
|
|
391
519
|
}
|
|
392
520
|
|
|
393
|
-
const
|
|
394
|
-
console.log(`\n
|
|
395
|
-
console.log(
|
|
521
|
+
const { name, config: profileConfig } = profile;
|
|
522
|
+
console.log(`\n🔄 Updating bootstrap infrastructure for profile "${name}"`);
|
|
523
|
+
console.log(` Region: ${profileConfig.awsRegion}`);
|
|
524
|
+
console.log(` Account: ${profileConfig.accountId}`);
|
|
396
525
|
|
|
397
|
-
|
|
398
|
-
|
|
526
|
+
// --- SANITY CHECK 1: Account identity ---
|
|
527
|
+
const callerAccount = this._getCallerAccount(profileConfig.awsProfile);
|
|
528
|
+
if (callerAccount !== profileConfig.accountId) {
|
|
529
|
+
console.log(`❌ Account mismatch: profile expects ${profileConfig.accountId} but credentials resolve to ${callerAccount}`);
|
|
530
|
+
return;
|
|
399
531
|
}
|
|
400
532
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
// Validate bootstrap stack
|
|
404
|
-
console.log('\n🔍 Resource Validation:');
|
|
405
|
-
|
|
406
|
-
const stackName = profile.config.stackName || `${STACK_NAME_PREFIX}-${profile.name}`;
|
|
533
|
+
// Re-deploy the CloudFormation bootstrap stack
|
|
534
|
+
const stackName = profileConfig.stackName || `${STACK_NAME_PREFIX}-${name}`;
|
|
407
535
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
);
|
|
536
|
+
// Sanity check: stack name consistency (warn-and-continue)
|
|
537
|
+
const expectedStackName = `${STACK_NAME_PREFIX}-${name}`;
|
|
538
|
+
if (profileConfig.stackName && profileConfig.stackName !== expectedStackName) {
|
|
539
|
+
console.log(`⚠️ Stack name mismatch: expected "${expectedStackName}" but profile has "${profileConfig.stackName}"`);
|
|
540
|
+
console.log(' Run `ml-container-creator bootstrap migrate` to fix.');
|
|
541
|
+
console.log(' Proceeding with stored stack name...');
|
|
542
|
+
}
|
|
413
543
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
}
|
|
544
|
+
// --- SANITY CHECK 3: Stack exists in target region ---
|
|
545
|
+
const stackExists = this._resourceExists(
|
|
546
|
+
`cloudformation describe-stacks --stack-name ${stackName} --region ${profileConfig.awsRegion}`,
|
|
547
|
+
profileConfig.awsProfile
|
|
548
|
+
);
|
|
549
|
+
if (!stackExists) {
|
|
550
|
+
console.log(`❌ Stack "${stackName}" not found in ${profileConfig.awsRegion}.`);
|
|
551
|
+
console.log(' Run `ml-container-creator bootstrap` to create it.');
|
|
552
|
+
return;
|
|
553
|
+
}
|
|
425
554
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
console.log(` ✅ S3 bucket (async): ${outputs.AsyncS3BucketName}`);
|
|
434
|
-
}
|
|
435
|
-
if (outputs.BatchS3BucketName) {
|
|
436
|
-
console.log(` ✅ S3 bucket (batch): ${outputs.BatchS3BucketName}`);
|
|
437
|
-
}
|
|
438
|
-
if (outputs.AdapterS3BucketName) {
|
|
439
|
-
console.log(` ✅ S3 bucket (adapters): ${outputs.AdapterS3BucketName}`);
|
|
440
|
-
}
|
|
441
|
-
if (outputs.BenchmarkS3BucketName) {
|
|
442
|
-
console.log(` ✅ S3 bucket (benchmark): ${outputs.BenchmarkS3BucketName}`);
|
|
443
|
-
}
|
|
444
|
-
if (outputs.StackVersion) {
|
|
445
|
-
console.log(` 📋 Stack version: ${outputs.StackVersion}`);
|
|
446
|
-
}
|
|
555
|
+
// --- CI single-region enforcement ---
|
|
556
|
+
if (options.ci) {
|
|
557
|
+
const ciConflict = this._findExistingCiProfile(name);
|
|
558
|
+
if (ciConflict) {
|
|
559
|
+
console.log(`❌ CI infrastructure already deployed in region ${ciConflict.config.awsRegion} (profile: ${ciConflict.name}).`);
|
|
560
|
+
console.log(' CI can only be deployed in one region per account.');
|
|
561
|
+
return;
|
|
447
562
|
}
|
|
448
|
-
}
|
|
449
|
-
// Fall back to individual resource checks for profiles created before CloudFormation migration
|
|
450
|
-
console.log(` ⚠️ Bootstrap stack "${stackName}" not found — checking resources individually`);
|
|
563
|
+
}
|
|
451
564
|
|
|
452
|
-
|
|
453
|
-
const defaultRoleName = 'mlcc-sagemaker-execution-role';
|
|
454
|
-
let roleName = defaultRoleName;
|
|
455
|
-
if (profile.config.roleArn) {
|
|
456
|
-
const arnParts = profile.config.roleArn.split('/');
|
|
457
|
-
roleName = arnParts[arnParts.length - 1];
|
|
458
|
-
}
|
|
565
|
+
this._displayProgress('☁️', 'Updating bootstrap stack...');
|
|
459
566
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
567
|
+
// Pre-check: if IAM role already exists globally (from another region's deployment),
|
|
568
|
+
// pass its ARN so CloudFormation skips re-creation (account-level singleton)
|
|
569
|
+
let useExistingRoleArn = profileConfig.roleArn || '';
|
|
570
|
+
if (!useExistingRoleArn) {
|
|
571
|
+
try {
|
|
572
|
+
const roleResult = this._execAws(
|
|
573
|
+
'iam get-role --role-name mlcc-sagemaker-execution-role',
|
|
574
|
+
profileConfig.awsProfile
|
|
463
575
|
);
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
console.log(` ⚠️ IAM role: ${roleName} — missing`);
|
|
576
|
+
const roleArn = roleResult && roleResult.Role && roleResult.Role.Arn;
|
|
577
|
+
if (roleArn && roleArn.startsWith('arn:aws:iam::')) {
|
|
578
|
+
useExistingRoleArn = roleArn;
|
|
468
579
|
}
|
|
469
|
-
} catch {
|
|
470
|
-
|
|
580
|
+
} catch (_) {
|
|
581
|
+
// Role doesn't exist yet — will be created by the stack
|
|
471
582
|
}
|
|
583
|
+
}
|
|
472
584
|
|
|
585
|
+
try {
|
|
586
|
+
// Check if ECR repo already exists (avoid ResourceExistenceCheck failure)
|
|
587
|
+
let skipEcr = 'false';
|
|
473
588
|
try {
|
|
474
|
-
|
|
475
|
-
`ecr describe-repositories --repository-names ml-container-creator --region ${
|
|
476
|
-
|
|
589
|
+
this._execAws(
|
|
590
|
+
`ecr describe-repositories --repository-names ml-container-creator --region ${profileConfig.awsRegion}`,
|
|
591
|
+
profileConfig.awsProfile
|
|
477
592
|
);
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
} else {
|
|
481
|
-
console.log(' ⚠️ ECR repository: ml-container-creator — missing');
|
|
482
|
-
}
|
|
483
|
-
} catch {
|
|
484
|
-
console.log(' ⚠️ ECR repository: could not validate');
|
|
485
|
-
}
|
|
593
|
+
skipEcr = 'true';
|
|
594
|
+
} catch (_) { /* doesn't exist */ }
|
|
486
595
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
);
|
|
493
|
-
console.log(asyncExists
|
|
494
|
-
? ` ✅ S3 bucket: ${profile.config.asyncS3Bucket}`
|
|
495
|
-
: ` ⚠️ S3 bucket: ${profile.config.asyncS3Bucket} — missing`);
|
|
496
|
-
} catch {
|
|
497
|
-
console.log(` ⚠️ S3 bucket: ${profile.config.asyncS3Bucket} — could not validate`);
|
|
498
|
-
}
|
|
499
|
-
}
|
|
596
|
+
const stackOutputs = this._deployStack(stackName, {
|
|
597
|
+
CreateS3Buckets: (profileConfig.asyncS3Bucket || profileConfig.batchS3Bucket) ? 'true' : 'false',
|
|
598
|
+
UseExistingRoleArn: useExistingRoleArn,
|
|
599
|
+
SkipEcrCreation: skipEcr
|
|
600
|
+
}, profileConfig.awsProfile, profileConfig.awsRegion);
|
|
500
601
|
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
602
|
+
// Update profile with any new outputs
|
|
603
|
+
if (stackOutputs.RoleArn) profileConfig.roleArn = stackOutputs.RoleArn;
|
|
604
|
+
if (stackOutputs.EcrRepositoryName) profileConfig.ecrRepositoryName = stackOutputs.EcrRepositoryName;
|
|
605
|
+
if (stackOutputs.AsyncS3BucketName) profileConfig.asyncS3Bucket = stackOutputs.AsyncS3BucketName;
|
|
606
|
+
if (stackOutputs.BatchS3BucketName) profileConfig.batchS3Bucket = stackOutputs.BatchS3BucketName;
|
|
607
|
+
if (stackOutputs.BenchmarkS3BucketName) profileConfig.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
|
|
608
|
+
profileConfig.stackName = stackName;
|
|
609
|
+
|
|
610
|
+
console.log(' ✅ Bootstrap stack updated');
|
|
611
|
+
} catch (error) {
|
|
612
|
+
console.log(` ❌ Stack update failed: ${error.message}`);
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// Re-deploy CI stack if it was provisioned or --ci flag is set
|
|
616
|
+
const shouldUpdateCi = profileConfig.ciInfraProvisioned || options.ci;
|
|
617
|
+
if (shouldUpdateCi) {
|
|
618
|
+
this._displayProgress('🧪', 'Updating CI harness stack...');
|
|
619
|
+
|
|
620
|
+
try {
|
|
621
|
+
const ciHarnessDir = path.resolve(__dirname, '../../infra/ci-harness');
|
|
622
|
+
|
|
623
|
+
// CI harness source is not bundled in the npm package — only available from git clone
|
|
624
|
+
if (!existsSync(ciHarnessDir)) {
|
|
625
|
+
console.log(' ⏭️ CI harness source not available (npm install does not include infra/)');
|
|
626
|
+
console.log(' To update the CI stack, run from a git clone: git clone https://github.com/awslabs/ml-container-creator && cd ml-container-creator && npx cdk deploy -c region=REGION');
|
|
627
|
+
} else {
|
|
628
|
+
// Ensure dependencies are installed (handles cold starts / fresh clones)
|
|
629
|
+
execSync('npm install --silent', {
|
|
630
|
+
cwd: ciHarnessDir,
|
|
631
|
+
encoding: 'utf8',
|
|
632
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
// --no-rollback prevents rollback on AlreadyExists errors for IAM roles
|
|
636
|
+
// that may pre-exist from a prior deployment or another region.
|
|
637
|
+
const updateCdkCmd = (options.benchmarkInfra || profileConfig.benchmarkInfraProvisioned)
|
|
638
|
+
? 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true'
|
|
639
|
+
: 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
|
|
640
|
+
execSync(
|
|
641
|
+
updateCdkCmd,
|
|
642
|
+
{
|
|
643
|
+
cwd: ciHarnessDir,
|
|
644
|
+
encoding: 'utf8',
|
|
645
|
+
stdio: 'inherit',
|
|
646
|
+
env: {
|
|
647
|
+
...process.env,
|
|
648
|
+
AWS_REGION: profileConfig.awsRegion,
|
|
649
|
+
CDK_DEFAULT_REGION: profileConfig.awsRegion,
|
|
650
|
+
CDK_DEFAULT_ACCOUNT: profileConfig.accountId,
|
|
651
|
+
AWS_PROFILE: profileConfig.awsProfile
|
|
652
|
+
}
|
|
653
|
+
}
|
|
506
654
|
);
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
console.log(` ⚠️ S3 bucket: ${profile.config.batchS3Bucket} — could not validate`);
|
|
655
|
+
profileConfig.ciInfraProvisioned = true;
|
|
656
|
+
profileConfig.ciGlueDatabase = profileConfig.ciGlueDatabase || 'mlcc_ci';
|
|
657
|
+
profileConfig.ciBenchmarkResultsBucket = profileConfig.ciBenchmarkResultsBucket || `mlcc-benchmark-results-${profileConfig.accountId}-${profileConfig.awsRegion}`;
|
|
658
|
+
console.log(' ✅ CI harness stack updated');
|
|
512
659
|
}
|
|
660
|
+
} catch (error) {
|
|
661
|
+
console.log(` ❌ CI stack update failed: ${error.message}`);
|
|
513
662
|
}
|
|
663
|
+
} else {
|
|
664
|
+
console.log(' ⏭️ CI stack skipped (not provisioned — use --ci to force)');
|
|
665
|
+
}
|
|
514
666
|
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
? ` ✅ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket}`
|
|
523
|
-
: ` ⚠️ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — missing`);
|
|
524
|
-
} catch {
|
|
525
|
-
console.log(` ⚠️ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — could not validate`);
|
|
526
|
-
}
|
|
667
|
+
// Ensure MLflow App exists
|
|
668
|
+
this._displayProgress('📊', 'MLflow App for experiment tracking...');
|
|
669
|
+
try {
|
|
670
|
+
const mlflowAppArn = this._ensureMlflowApp(profileConfig, profileConfig.awsProfile);
|
|
671
|
+
if (mlflowAppArn) {
|
|
672
|
+
profileConfig.mlflowAppArn = mlflowAppArn;
|
|
673
|
+
console.log(` ✅ MLflow App ready: ${mlflowAppArn}`);
|
|
527
674
|
}
|
|
675
|
+
} catch (error) {
|
|
676
|
+
console.log(` ⚠️ MLflow App setup skipped: ${error.message}`);
|
|
528
677
|
}
|
|
529
678
|
|
|
530
|
-
//
|
|
531
|
-
|
|
679
|
+
// Save updated profile
|
|
680
|
+
this.config.setProfile(name, profileConfig);
|
|
681
|
+
console.log(`\n✅ Update complete for profile "${name}"`);
|
|
532
682
|
|
|
533
|
-
|
|
683
|
+
// Re-run post-setup chain after updating AWS resources
|
|
684
|
+
await this._runPostSetupChain(options);
|
|
685
|
+
}
|
|
534
686
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
687
|
+
/**
|
|
688
|
+
* Migrate legacy profiles to current naming conventions.
|
|
689
|
+
* Corrects stackName mismatches and renames sharedStackFrom → sharedInfraFrom.
|
|
690
|
+
* Displays a preview of all changes and requires confirmation before writing.
|
|
691
|
+
*/
|
|
692
|
+
async _handleMigrate() {
|
|
693
|
+
const config = this.config.read();
|
|
694
|
+
if (!config || !config.profiles) {
|
|
695
|
+
console.log('No profiles to migrate.');
|
|
538
696
|
return;
|
|
539
697
|
}
|
|
540
698
|
|
|
541
|
-
const
|
|
699
|
+
const changes = [];
|
|
700
|
+
|
|
701
|
+
for (const [name, profileConfig] of Object.entries(config.profiles)) {
|
|
702
|
+
const expected = `${STACK_NAME_PREFIX}-${name}`;
|
|
703
|
+
|
|
704
|
+
// Fix stackName mismatch
|
|
705
|
+
if (profileConfig.stackName && profileConfig.stackName !== expected) {
|
|
706
|
+
changes.push({
|
|
707
|
+
profile: name,
|
|
708
|
+
field: 'stackName',
|
|
709
|
+
from: profileConfig.stackName,
|
|
710
|
+
to: expected
|
|
711
|
+
});
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
// Rename sharedStackFrom → sharedInfraFrom
|
|
715
|
+
if (profileConfig.sharedStackFrom) {
|
|
716
|
+
changes.push({
|
|
717
|
+
profile: name,
|
|
718
|
+
field: 'sharedStackFrom → sharedInfraFrom',
|
|
719
|
+
from: profileConfig.sharedStackFrom,
|
|
720
|
+
to: profileConfig.sharedStackFrom
|
|
721
|
+
});
|
|
722
|
+
}
|
|
723
|
+
}
|
|
542
724
|
|
|
543
|
-
if (
|
|
544
|
-
console.log('
|
|
725
|
+
if (changes.length === 0) {
|
|
726
|
+
console.log('✅ All profiles already use current naming conventions.');
|
|
545
727
|
return;
|
|
546
728
|
}
|
|
547
729
|
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
}
|
|
730
|
+
// Display preview
|
|
731
|
+
console.log('📋 Migration Preview:\n');
|
|
732
|
+
for (const change of changes) {
|
|
733
|
+
console.log(` Profile "${change.profile}":`);
|
|
734
|
+
console.log(` ${change.field}: "${change.from}" → "${change.to}"`);
|
|
554
735
|
}
|
|
555
736
|
|
|
556
|
-
|
|
557
|
-
|
|
737
|
+
// Prompt for confirmation
|
|
738
|
+
const { confirm } = await this._promptFn([{
|
|
739
|
+
type: 'confirm',
|
|
740
|
+
name: 'confirm',
|
|
741
|
+
message: 'Apply these changes?',
|
|
742
|
+
default: true
|
|
743
|
+
}]);
|
|
744
|
+
|
|
745
|
+
if (!confirm) return;
|
|
558
746
|
|
|
559
|
-
//
|
|
560
|
-
|
|
561
|
-
|
|
747
|
+
// Apply changes
|
|
748
|
+
for (const [name, profileConfig] of Object.entries(config.profiles)) {
|
|
749
|
+
const expected = `${STACK_NAME_PREFIX}-${name}`;
|
|
750
|
+
if (profileConfig.stackName !== expected) {
|
|
751
|
+
profileConfig.stackName = expected;
|
|
752
|
+
}
|
|
753
|
+
if (profileConfig.sharedStackFrom) {
|
|
754
|
+
profileConfig.sharedInfraFrom = profileConfig.sharedStackFrom;
|
|
755
|
+
delete profileConfig.sharedStackFrom;
|
|
756
|
+
}
|
|
562
757
|
}
|
|
758
|
+
|
|
759
|
+
this.config.write(config);
|
|
760
|
+
console.log('✅ Migration complete.');
|
|
563
761
|
}
|
|
564
762
|
|
|
565
763
|
/**
|
|
566
|
-
*
|
|
567
|
-
*
|
|
568
|
-
*
|
|
764
|
+
* Run the post-setup chain: mcp init → registry sync-architectures → sync-schemas.
|
|
765
|
+
* Each step is independent — failures are collected and reported at the end.
|
|
766
|
+
*
|
|
767
|
+
* @param {object} options - Parsed CLI options (checks skipPostSetup)
|
|
569
768
|
*/
|
|
570
|
-
async
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
const activeResources = assetManager.listResources({ status: 'active' });
|
|
574
|
-
|
|
575
|
-
if (activeResources.length === 0) {
|
|
576
|
-
console.log(' No active resources to verify.');
|
|
769
|
+
async _runPostSetupChain(options = {}) {
|
|
770
|
+
if ((options['skip-post-setup'] || options.skipPostSetup)) {
|
|
771
|
+
console.log('\n⏭️ Skipping post-setup chain (--skip-post-setup)');
|
|
577
772
|
return;
|
|
578
773
|
}
|
|
579
774
|
|
|
580
|
-
|
|
581
|
-
let drifted = 0;
|
|
582
|
-
let unchecked = 0;
|
|
775
|
+
console.log('\n🔗 Running post-setup configuration...\n');
|
|
583
776
|
|
|
584
|
-
|
|
585
|
-
const checkCommand = this._buildDriftCheckCommand(resource);
|
|
777
|
+
const failures = [];
|
|
586
778
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
779
|
+
// 1. MCP init — register bundled MCP servers
|
|
780
|
+
console.log('📡 Registering MCP servers...');
|
|
781
|
+
try {
|
|
782
|
+
const generatorAdapter = {
|
|
783
|
+
destinationPath(...segments) {
|
|
784
|
+
return path.resolve(process.cwd(), ...segments);
|
|
785
|
+
}
|
|
786
|
+
};
|
|
787
|
+
const mcpHandler = new McpCommandHandler(generatorAdapter);
|
|
788
|
+
await mcpHandler.handle(['init'], {});
|
|
789
|
+
} catch (error) {
|
|
790
|
+
failures.push({ step: 'mcp init', error: error.message });
|
|
791
|
+
console.log(` ⚠️ mcp init failed: ${error.message}`);
|
|
792
|
+
}
|
|
591
793
|
|
|
592
|
-
|
|
593
|
-
|
|
794
|
+
// 2. Registry sync-architectures — populate supportedModelTypes
|
|
795
|
+
console.log('\n📋 Syncing model architecture registry...');
|
|
796
|
+
try {
|
|
797
|
+
const registryHandler = new RegistryCommandHandler();
|
|
798
|
+
await registryHandler.handle(['sync-architectures'], {});
|
|
799
|
+
} catch (error) {
|
|
800
|
+
failures.push({ step: 'registry sync-architectures', error: error.message });
|
|
801
|
+
console.log(` ⚠️ registry sync-architectures failed: ${error.message}`);
|
|
802
|
+
}
|
|
594
803
|
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
}
|
|
603
|
-
} catch {
|
|
604
|
-
unchecked++;
|
|
605
|
-
console.log(` ⚠️ ${resource.resourceType}: ${resource.resourceId} — could not verify (credentials or API unavailable)`);
|
|
606
|
-
}
|
|
804
|
+
// 3. Schema sync — download AWS service models
|
|
805
|
+
console.log('\n📐 Syncing service schemas...');
|
|
806
|
+
try {
|
|
807
|
+
await this._handleSyncSchemas();
|
|
808
|
+
} catch (error) {
|
|
809
|
+
failures.push({ step: 'sync-schemas', error: error.message });
|
|
810
|
+
console.log(` ⚠️ sync-schemas failed: ${error.message}`);
|
|
607
811
|
}
|
|
608
812
|
|
|
609
|
-
|
|
813
|
+
// Report results
|
|
814
|
+
if (failures.length === 0) {
|
|
815
|
+
console.log('\n✅ Bootstrap complete — all systems operational');
|
|
816
|
+
} else {
|
|
817
|
+
console.log(`\n⚠️ Bootstrap complete with ${failures.length} warning${failures.length === 1 ? '' : 's'}:`);
|
|
818
|
+
for (const { step, error } of failures) {
|
|
819
|
+
console.log(` • ${step}: ${error}`);
|
|
820
|
+
}
|
|
821
|
+
console.log('\n These steps can be re-run individually:');
|
|
822
|
+
console.log(' ml-container-creator mcp init');
|
|
823
|
+
console.log(' ml-container-creator registry sync-architectures');
|
|
824
|
+
console.log(' ml-container-creator bootstrap sync-schemas');
|
|
825
|
+
}
|
|
610
826
|
}
|
|
611
827
|
|
|
612
828
|
/**
|
|
@@ -631,7 +847,6 @@ export default class BootstrapCommandHandler {
|
|
|
631
847
|
return `sagemaker describe-inference-component --inference-component-name ${name}`;
|
|
632
848
|
}
|
|
633
849
|
case 'ecr-image': {
|
|
634
|
-
// resourceId is a full image URI like 111111111111.dkr.ecr.us-east-1.amazonaws.com/repo:tag
|
|
635
850
|
const parts = resourceId.split('/');
|
|
636
851
|
const repoAndTag = parts[parts.length - 1];
|
|
637
852
|
const [repo, tag] = repoAndTag.split(':');
|
|
@@ -652,573 +867,30 @@ export default class BootstrapCommandHandler {
|
|
|
652
867
|
|
|
653
868
|
/**
|
|
654
869
|
* Extract the resource name from an ARN.
|
|
655
|
-
* ARN format: arn:aws:service:region:account:resource-type/resource-name
|
|
656
870
|
* @param {string} arn - AWS ARN string
|
|
657
871
|
* @returns {string} The resource name portion
|
|
658
872
|
*/
|
|
659
873
|
_extractNameFromArn(arn) {
|
|
660
|
-
// Handle ARN formats like:
|
|
661
|
-
// arn:aws:sagemaker:us-east-1:111111111111:endpoint/my-endpoint
|
|
662
|
-
// arn:aws:iam::111111111111:role/my-role
|
|
663
|
-
// arn:aws:codebuild:us-east-1:111111111111:project/my-project
|
|
664
874
|
const parts = arn.split('/');
|
|
665
875
|
return parts[parts.length - 1];
|
|
666
876
|
}
|
|
667
877
|
|
|
668
878
|
/**
|
|
669
|
-
*
|
|
670
|
-
* @param {string}
|
|
879
|
+
* Infer the resource type from an ARN.
|
|
880
|
+
* @param {string} arn - AWS ARN
|
|
881
|
+
* @returns {string|null} Resource type or null if not recognized
|
|
671
882
|
*/
|
|
672
|
-
|
|
673
|
-
if (
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
if (
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
}
|
|
684
|
-
|
|
685
|
-
const profile = this.config.getProfile(profileName);
|
|
686
|
-
if (!profile) {
|
|
687
|
-
const available = this.config.listProfiles();
|
|
688
|
-
console.log(`Profile "${profileName}" not found.`);
|
|
689
|
-
if (available.length > 0) {
|
|
690
|
-
console.log(`Available profiles: ${available.join(', ')}`);
|
|
691
|
-
} else {
|
|
692
|
-
console.log('No profiles configured. Run `ml-container-creator bootstrap` to create one.');
|
|
693
|
-
}
|
|
694
|
-
return;
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
this.config.setActiveProfile(profileName);
|
|
698
|
-
console.log(`Switched active profile to "${profileName}".`);
|
|
699
|
-
}
|
|
700
|
-
|
|
701
|
-
/**
|
|
702
|
-
* List all bootstrap profiles.
|
|
703
|
-
*/
|
|
704
|
-
async _handleList() {
|
|
705
|
-
const profiles = this.config.listProfiles();
|
|
706
|
-
|
|
707
|
-
if (profiles.length === 0) {
|
|
708
|
-
console.log('No bootstrap profiles configured.');
|
|
709
|
-
console.log('Run `ml-container-creator bootstrap` to set up shared infrastructure.');
|
|
710
|
-
return;
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
const config = this.config.read();
|
|
714
|
-
const activeProfileName = config ? config.activeProfile : null;
|
|
715
|
-
|
|
716
|
-
console.log('\nBootstrap Profiles:');
|
|
717
|
-
for (const name of profiles) {
|
|
718
|
-
if (name === activeProfileName) {
|
|
719
|
-
console.log(` * ${name} (active)`);
|
|
720
|
-
} else {
|
|
721
|
-
console.log(` ${name}`);
|
|
722
|
-
}
|
|
723
|
-
}
|
|
724
|
-
}
|
|
725
|
-
|
|
726
|
-
/**
|
|
727
|
-
* Remove a bootstrap profile.
|
|
728
|
-
* @param {string} profileName - Profile name to remove
|
|
729
|
-
* @param {object} options - Parsed CLI options (e.g., --force)
|
|
730
|
-
*/
|
|
731
|
-
async _handleRemove(profileName, options) {
|
|
732
|
-
if (!profileName) {
|
|
733
|
-
console.log('Usage: ml-container-creator bootstrap remove <profile> [--force]');
|
|
734
|
-
return;
|
|
735
|
-
}
|
|
736
|
-
|
|
737
|
-
const profile = this.config.getProfile(profileName);
|
|
738
|
-
if (!profile) {
|
|
739
|
-
console.log(`Profile "${profileName}" not found.`);
|
|
740
|
-
return;
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
// Check for manifest file with active resources
|
|
744
|
-
const assetManager = new AssetManager(profileName);
|
|
745
|
-
const hasManifest = existsSync(assetManager.manifestPath);
|
|
746
|
-
|
|
747
|
-
if (hasManifest) {
|
|
748
|
-
const counts = assetManager.getStatusCounts();
|
|
749
|
-
if (counts.active > 0 && !options.force) {
|
|
750
|
-
console.log(`⚠️ Profile "${profileName}" has ${counts.active} active resource${counts.active === 1 ? '' : 's'} in the deployment manifest.`);
|
|
751
|
-
}
|
|
752
|
-
}
|
|
753
|
-
|
|
754
|
-
// Check for CloudFormation stack
|
|
755
|
-
const stackName = profile.stackName || `${STACK_NAME_PREFIX}-${profileName}`;
|
|
756
|
-
let hasStack = false;
|
|
757
|
-
try {
|
|
758
|
-
hasStack = this._resourceExists(
|
|
759
|
-
`cloudformation describe-stacks --stack-name ${stackName} --region ${profile.awsRegion}`,
|
|
760
|
-
profile.awsProfile
|
|
761
|
-
);
|
|
762
|
-
} catch {
|
|
763
|
-
// ignore
|
|
764
|
-
}
|
|
765
|
-
|
|
766
|
-
if (hasStack && !options.force) {
|
|
767
|
-
console.log(`⚠️ Profile "${profileName}" has a CloudFormation stack: ${stackName}`);
|
|
768
|
-
console.log(' Use --delete-stack to also delete the AWS resources, or --force to remove the profile only.');
|
|
769
|
-
}
|
|
770
|
-
|
|
771
|
-
if (!options.force) {
|
|
772
|
-
const { confirm } = await this._promptFn([{
|
|
773
|
-
type: 'confirm',
|
|
774
|
-
name: 'confirm',
|
|
775
|
-
message: `Remove bootstrap profile "${profileName}"?`,
|
|
776
|
-
default: false
|
|
777
|
-
}]);
|
|
778
|
-
|
|
779
|
-
if (!confirm) {
|
|
780
|
-
console.log('Removal cancelled.');
|
|
781
|
-
return;
|
|
782
|
-
}
|
|
783
|
-
}
|
|
784
|
-
|
|
785
|
-
// Delete CloudFormation stack if requested
|
|
786
|
-
if (hasStack && options['delete-stack']) {
|
|
787
|
-
try {
|
|
788
|
-
console.log(`🗑️ Deleting CloudFormation stack: ${stackName}`);
|
|
789
|
-
execSync(
|
|
790
|
-
`aws cloudformation delete-stack --stack-name ${stackName} --region ${profile.awsRegion} --profile ${profile.awsProfile}`,
|
|
791
|
-
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
|
|
792
|
-
);
|
|
793
|
-
console.log('⏳ Waiting for stack deletion...');
|
|
794
|
-
execSync(
|
|
795
|
-
`aws cloudformation wait stack-delete-complete --stack-name ${stackName} --region ${profile.awsRegion} --profile ${profile.awsProfile}`,
|
|
796
|
-
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
|
|
797
|
-
);
|
|
798
|
-
console.log(`✅ Stack "${stackName}" deleted.`);
|
|
799
|
-
} catch (err) {
|
|
800
|
-
console.log(`⚠️ Could not delete stack "${stackName}": ${err.message}`);
|
|
801
|
-
console.log(' You may need to delete it manually from the CloudFormation console.');
|
|
802
|
-
}
|
|
803
|
-
} else if (hasStack) {
|
|
804
|
-
console.log(`Note: CloudFormation stack "${stackName}" was left in place.`);
|
|
805
|
-
console.log(' To delete AWS resources, re-run with --delete-stack');
|
|
806
|
-
}
|
|
807
|
-
|
|
808
|
-
// Delete manifest file if it exists
|
|
809
|
-
if (hasManifest) {
|
|
810
|
-
try {
|
|
811
|
-
unlinkSync(assetManager.manifestPath);
|
|
812
|
-
console.log(`Manifest file for "${profileName}" deleted.`);
|
|
813
|
-
} catch {
|
|
814
|
-
console.log(`⚠️ Could not delete manifest file for "${profileName}".`);
|
|
815
|
-
}
|
|
816
|
-
}
|
|
817
|
-
|
|
818
|
-
this.config.removeProfile(profileName);
|
|
819
|
-
console.log(`Profile "${profileName}" removed.`);
|
|
820
|
-
}
|
|
821
|
-
|
|
822
|
-
/**
|
|
823
|
-
* Scan AWS for pre-existing MLCC-managed resources and add them to the manifest.
|
|
824
|
-
*/
|
|
825
|
-
async _handleScan() {
|
|
826
|
-
const profile = this.config.getActiveProfile();
|
|
827
|
-
if (!profile) {
|
|
828
|
-
console.log('No active bootstrap profile found.');
|
|
829
|
-
console.log('Run `ml-container-creator bootstrap` to set up shared infrastructure.');
|
|
830
|
-
return;
|
|
831
|
-
}
|
|
832
|
-
|
|
833
|
-
console.log(`\n🔍 Scanning for pre-existing resources in ${profile.config.awsRegion}...`);
|
|
834
|
-
|
|
835
|
-
const assetManager = new AssetManager(profile.name);
|
|
836
|
-
const now = new Date().toISOString();
|
|
837
|
-
let discovered = 0;
|
|
838
|
-
let added = 0;
|
|
839
|
-
let skipped = 0;
|
|
840
|
-
|
|
841
|
-
// 1. Query Resource Groups Tagging API for mlcc:managed-by tagged resources
|
|
842
|
-
try {
|
|
843
|
-
console.log('\n Checking tagged resources...');
|
|
844
|
-
const tagResult = this._execAws(
|
|
845
|
-
`resourcegroupstaggingapi get-resources --tag-filters Key=mlcc:managed-by,Values=ml-container-creator --region ${profile.config.awsRegion}`,
|
|
846
|
-
profile.config.awsProfile
|
|
847
|
-
);
|
|
848
|
-
|
|
849
|
-
const taggedResources = tagResult.ResourceTagMappingList || [];
|
|
850
|
-
for (const tagged of taggedResources) {
|
|
851
|
-
discovered++;
|
|
852
|
-
const arn = tagged.ResourceARN;
|
|
853
|
-
const existing = assetManager.getResource(arn);
|
|
854
|
-
if (existing) {
|
|
855
|
-
skipped++;
|
|
856
|
-
continue;
|
|
857
|
-
}
|
|
858
|
-
|
|
859
|
-
const resourceType = this._inferResourceTypeFromArn(arn);
|
|
860
|
-
if (!resourceType) {
|
|
861
|
-
skipped++;
|
|
862
|
-
continue;
|
|
863
|
-
}
|
|
864
|
-
|
|
865
|
-
const project = this._inferProjectFromTags(tagged.Tags) || 'unknown';
|
|
866
|
-
|
|
867
|
-
try {
|
|
868
|
-
assetManager.addResource({
|
|
869
|
-
resourceId: arn,
|
|
870
|
-
resourceType,
|
|
871
|
-
createdAt: now,
|
|
872
|
-
lastUpdatedAt: now,
|
|
873
|
-
project,
|
|
874
|
-
status: 'active',
|
|
875
|
-
metadata: { discoveredBy: 'scan' }
|
|
876
|
-
});
|
|
877
|
-
added++;
|
|
878
|
-
} catch {
|
|
879
|
-
skipped++;
|
|
880
|
-
}
|
|
881
|
-
}
|
|
882
|
-
} catch {
|
|
883
|
-
console.log(' ⚠️ Could not query tagged resources (credentials or API unavailable)');
|
|
884
|
-
}
|
|
885
|
-
|
|
886
|
-
// 2. Query ECR for images in ml-container-creator repository
|
|
887
|
-
try {
|
|
888
|
-
console.log(' Checking ECR images...');
|
|
889
|
-
const ecrResult = this._execAws(
|
|
890
|
-
`ecr describe-images --repository-name ml-container-creator --region ${profile.config.awsRegion}`,
|
|
891
|
-
profile.config.awsProfile
|
|
892
|
-
);
|
|
893
|
-
|
|
894
|
-
const images = ecrResult.imageDetails || [];
|
|
895
|
-
for (const image of images) {
|
|
896
|
-
const tags = image.imageTags || [];
|
|
897
|
-
for (const tag of tags) {
|
|
898
|
-
discovered++;
|
|
899
|
-
const imageUri = `${profile.config.accountId}.dkr.ecr.${profile.config.awsRegion}.amazonaws.com/ml-container-creator:${tag}`;
|
|
900
|
-
const existing = assetManager.getResource(imageUri);
|
|
901
|
-
if (existing) {
|
|
902
|
-
skipped++;
|
|
903
|
-
continue;
|
|
904
|
-
}
|
|
905
|
-
|
|
906
|
-
try {
|
|
907
|
-
assetManager.addResource({
|
|
908
|
-
resourceId: imageUri,
|
|
909
|
-
resourceType: 'ecr-image',
|
|
910
|
-
createdAt: now,
|
|
911
|
-
lastUpdatedAt: now,
|
|
912
|
-
project: this._inferProjectFromImageTag(tag),
|
|
913
|
-
status: 'active',
|
|
914
|
-
metadata: {
|
|
915
|
-
repositoryName: 'ml-container-creator',
|
|
916
|
-
imageTag: tag,
|
|
917
|
-
region: profile.config.awsRegion,
|
|
918
|
-
discoveredBy: 'scan'
|
|
919
|
-
}
|
|
920
|
-
});
|
|
921
|
-
added++;
|
|
922
|
-
} catch {
|
|
923
|
-
skipped++;
|
|
924
|
-
}
|
|
925
|
-
}
|
|
926
|
-
}
|
|
927
|
-
} catch {
|
|
928
|
-
console.log(' ⚠️ Could not query ECR images (credentials or API unavailable)');
|
|
929
|
-
}
|
|
930
|
-
|
|
931
|
-
// 3. Query CodeBuild for *-build-* projects
|
|
932
|
-
try {
|
|
933
|
-
console.log(' Checking CodeBuild projects...');
|
|
934
|
-
const cbResult = this._execAws(
|
|
935
|
-
`codebuild list-projects --region ${profile.config.awsRegion}`,
|
|
936
|
-
profile.config.awsProfile
|
|
937
|
-
);
|
|
938
|
-
|
|
939
|
-
const projects = (cbResult.projects || []).filter(name => name.includes('-build-'));
|
|
940
|
-
for (const projectName of projects) {
|
|
941
|
-
discovered++;
|
|
942
|
-
const arn = `arn:aws:codebuild:${profile.config.awsRegion}:${profile.config.accountId}:project/${projectName}`;
|
|
943
|
-
const existing = assetManager.getResource(arn);
|
|
944
|
-
if (existing) {
|
|
945
|
-
skipped++;
|
|
946
|
-
continue;
|
|
947
|
-
}
|
|
948
|
-
|
|
949
|
-
try {
|
|
950
|
-
assetManager.addResource({
|
|
951
|
-
resourceId: arn,
|
|
952
|
-
resourceType: 'codebuild-project',
|
|
953
|
-
createdAt: now,
|
|
954
|
-
lastUpdatedAt: now,
|
|
955
|
-
project: this._inferProjectFromCodeBuildName(projectName),
|
|
956
|
-
status: 'active',
|
|
957
|
-
metadata: {
|
|
958
|
-
projectName,
|
|
959
|
-
region: profile.config.awsRegion,
|
|
960
|
-
discoveredBy: 'scan'
|
|
961
|
-
}
|
|
962
|
-
});
|
|
963
|
-
added++;
|
|
964
|
-
} catch {
|
|
965
|
-
skipped++;
|
|
966
|
-
}
|
|
967
|
-
}
|
|
968
|
-
} catch {
|
|
969
|
-
console.log(' ⚠️ Could not query CodeBuild projects (credentials or API unavailable)');
|
|
970
|
-
}
|
|
971
|
-
|
|
972
|
-
// Display summary
|
|
973
|
-
console.log(`\n Scan complete: ${discovered} discovered, ${added} added, ${skipped} skipped (duplicates or unsupported)`);
|
|
974
|
-
|
|
975
|
-
if (discovered === 0) {
|
|
976
|
-
console.log(' No MLCC-managed resources were discovered.');
|
|
977
|
-
}
|
|
978
|
-
}
|
|
979
|
-
|
|
980
|
-
/**
|
|
981
|
-
* Prune stale records from the manifest — removes entries with status
|
|
982
|
-
* 'deleted' or 'unknown' that are no longer useful.
|
|
983
|
-
*/
|
|
984
|
-
async _handlePrune() {
|
|
985
|
-
const profile = this.config.getActiveProfile();
|
|
986
|
-
if (!profile) {
|
|
987
|
-
console.log('No active bootstrap profile found.');
|
|
988
|
-
return;
|
|
989
|
-
}
|
|
990
|
-
|
|
991
|
-
const assetManager = new AssetManager(profile.name);
|
|
992
|
-
|
|
993
|
-
if (!existsSync(assetManager.manifestPath)) {
|
|
994
|
-
console.log('No deployment tracking data to prune.');
|
|
995
|
-
return;
|
|
996
|
-
}
|
|
997
|
-
|
|
998
|
-
const before = assetManager.listResources();
|
|
999
|
-
const toRemove = before.filter(r => r.status === 'deleted' || r.status === 'unknown');
|
|
1000
|
-
|
|
1001
|
-
if (toRemove.length === 0) {
|
|
1002
|
-
console.log('Nothing to prune — no deleted or unknown records found.');
|
|
1003
|
-
return;
|
|
1004
|
-
}
|
|
1005
|
-
|
|
1006
|
-
console.log(`\n🧹 Pruning ${toRemove.length} stale record${toRemove.length === 1 ? '' : 's'}:\n`);
|
|
1007
|
-
|
|
1008
|
-
for (const resource of toRemove) {
|
|
1009
|
-
assetManager.removeResource(resource.resourceId);
|
|
1010
|
-
console.log(` 🗑️ [${resource.status}] ${resource.resourceType}: ${resource.resourceId}`);
|
|
1011
|
-
}
|
|
1012
|
-
|
|
1013
|
-
const after = assetManager.listResources();
|
|
1014
|
-
console.log(`\n Done. ${toRemove.length} removed, ${after.length} remaining.`);
|
|
1015
|
-
}
|
|
1016
|
-
|
|
1017
|
-
/**
|
|
1018
|
-
* Handle sync-schemas subcommand: download service models and verify AWS CLI.
|
|
1019
|
-
*/
|
|
1020
|
-
async _handleSyncSchemas() {
|
|
1021
|
-
console.log('\n📦 Schema Sync — Downloading AWS service models...\n');
|
|
1022
|
-
|
|
1023
|
-
// Verify AWS CLI is installed
|
|
1024
|
-
try {
|
|
1025
|
-
const version = execSync('aws --version', { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
|
|
1026
|
-
console.log(` AWS CLI: ${version}`);
|
|
1027
|
-
} catch {
|
|
1028
|
-
console.log(' ⚠️ AWS CLI not found.');
|
|
1029
|
-
console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html');
|
|
1030
|
-
console.log(' Continuing without AWS CLI verification...\n');
|
|
1031
|
-
}
|
|
1032
|
-
|
|
1033
|
-
// Dynamic import to avoid circular dependencies
|
|
1034
|
-
const { syncSchemas } = await import('./schema-sync.js');
|
|
1035
|
-
const result = await syncSchemas();
|
|
1036
|
-
|
|
1037
|
-
if (result.success) {
|
|
1038
|
-
console.log('\n ✅ Schema sync complete.');
|
|
1039
|
-
} else {
|
|
1040
|
-
console.log('\n ⚠️ Schema sync completed with errors (some services may be unavailable).');
|
|
1041
|
-
}
|
|
1042
|
-
|
|
1043
|
-
console.log(` Manifest written: lastSynced = ${result.manifest.lastSynced}\n`);
|
|
1044
|
-
}
|
|
1045
|
-
|
|
1046
|
-
/**
|
|
1047
|
-
* Re-deploy bootstrap infrastructure using the active profile.
|
|
1048
|
-
* No prompts — reads all config from the existing profile and re-applies
|
|
1049
|
-
* the CloudFormation stack and optionally the CI CDK stack.
|
|
1050
|
-
*
|
|
1051
|
-
* @param {object} [options] - Parsed CLI options (e.g., --ci to force CI update)
|
|
1052
|
-
*/
|
|
1053
|
-
async _handleUpdate(options = {}) {
|
|
1054
|
-
const profile = this.config.getActiveProfile();
|
|
1055
|
-
if (!profile) {
|
|
1056
|
-
console.log('No active bootstrap profile found.');
|
|
1057
|
-
console.log('Run `ml-container-creator bootstrap` to set up shared infrastructure first.');
|
|
1058
|
-
return;
|
|
1059
|
-
}
|
|
1060
|
-
|
|
1061
|
-
const { name, config: profileConfig } = profile;
|
|
1062
|
-
console.log(`\n🔄 Updating bootstrap infrastructure for profile "${name}"`);
|
|
1063
|
-
console.log(` Region: ${profileConfig.awsRegion}`);
|
|
1064
|
-
console.log(` Account: ${profileConfig.accountId}`);
|
|
1065
|
-
|
|
1066
|
-
// Re-deploy the CloudFormation bootstrap stack
|
|
1067
|
-
const stackName = profileConfig.stackName || `${STACK_NAME_PREFIX}-${name}`;
|
|
1068
|
-
this._displayProgress('☁️', 'Updating bootstrap stack...');
|
|
1069
|
-
|
|
1070
|
-
try {
|
|
1071
|
-
const stackOutputs = this._deployStack(stackName, {
|
|
1072
|
-
CreateS3Buckets: (profileConfig.asyncS3Bucket || profileConfig.batchS3Bucket) ? 'true' : 'false',
|
|
1073
|
-
UseExistingRoleArn: ''
|
|
1074
|
-
}, profileConfig.awsProfile, profileConfig.awsRegion);
|
|
1075
|
-
|
|
1076
|
-
// Update profile with any new outputs
|
|
1077
|
-
if (stackOutputs.RoleArn) profileConfig.roleArn = stackOutputs.RoleArn;
|
|
1078
|
-
if (stackOutputs.EcrRepositoryName) profileConfig.ecrRepositoryName = stackOutputs.EcrRepositoryName;
|
|
1079
|
-
if (stackOutputs.AsyncS3BucketName) profileConfig.asyncS3Bucket = stackOutputs.AsyncS3BucketName;
|
|
1080
|
-
if (stackOutputs.BatchS3BucketName) profileConfig.batchS3Bucket = stackOutputs.BatchS3BucketName;
|
|
1081
|
-
if (stackOutputs.BenchmarkS3BucketName) profileConfig.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
|
|
1082
|
-
profileConfig.stackName = stackName;
|
|
1083
|
-
|
|
1084
|
-
console.log(' ✅ Bootstrap stack updated');
|
|
1085
|
-
} catch (error) {
|
|
1086
|
-
console.log(` ❌ Stack update failed: ${error.message}`);
|
|
1087
|
-
}
|
|
1088
|
-
|
|
1089
|
-
// Re-deploy CI stack if it was provisioned or --ci flag is set
|
|
1090
|
-
const shouldUpdateCi = profileConfig.ciInfraProvisioned || options.ci;
|
|
1091
|
-
if (shouldUpdateCi) {
|
|
1092
|
-
this._displayProgress('🧪', 'Updating CI harness stack...');
|
|
1093
|
-
|
|
1094
|
-
try {
|
|
1095
|
-
const ciHarnessDir = path.resolve(__dirname, '../../infra/ci-harness');
|
|
1096
|
-
|
|
1097
|
-
// CI harness source is not bundled in the npm package — only available from git clone
|
|
1098
|
-
if (!existsSync(ciHarnessDir)) {
|
|
1099
|
-
console.log(' ⏭️ CI harness source not available (npm install does not include infra/)');
|
|
1100
|
-
console.log(' To update the CI stack, run from a git clone: git clone https://github.com/awslabs/ml-container-creator && cd ml-container-creator && npx cdk deploy -c region=REGION');
|
|
1101
|
-
} else {
|
|
1102
|
-
// Ensure dependencies are installed (handles cold starts / fresh clones)
|
|
1103
|
-
execSync('npm install --silent', {
|
|
1104
|
-
cwd: ciHarnessDir,
|
|
1105
|
-
encoding: 'utf8',
|
|
1106
|
-
stdio: ['pipe', 'pipe', 'pipe']
|
|
1107
|
-
});
|
|
1108
|
-
|
|
1109
|
-
execSync(
|
|
1110
|
-
'npx cdk deploy MlccCiHarnessStack --require-approval never',
|
|
1111
|
-
{
|
|
1112
|
-
cwd: ciHarnessDir,
|
|
1113
|
-
encoding: 'utf8',
|
|
1114
|
-
stdio: 'inherit',
|
|
1115
|
-
env: {
|
|
1116
|
-
...process.env,
|
|
1117
|
-
CDK_DEFAULT_REGION: profileConfig.awsRegion,
|
|
1118
|
-
CDK_DEFAULT_ACCOUNT: profileConfig.accountId,
|
|
1119
|
-
AWS_PROFILE: profileConfig.awsProfile
|
|
1120
|
-
}
|
|
1121
|
-
}
|
|
1122
|
-
);
|
|
1123
|
-
profileConfig.ciInfraProvisioned = true;
|
|
1124
|
-
console.log(' ✅ CI harness stack updated');
|
|
1125
|
-
}
|
|
1126
|
-
} catch (error) {
|
|
1127
|
-
console.log(` ❌ CI stack update failed: ${error.message}`);
|
|
1128
|
-
}
|
|
1129
|
-
} else {
|
|
1130
|
-
console.log(' ⏭️ CI stack skipped (not provisioned — use --ci to force)');
|
|
1131
|
-
}
|
|
1132
|
-
|
|
1133
|
-
// Save updated profile
|
|
1134
|
-
this.config.setProfile(name, profileConfig);
|
|
1135
|
-
console.log(`\n✅ Update complete for profile "${name}"`);
|
|
1136
|
-
|
|
1137
|
-
// Re-run post-setup chain after updating AWS resources
|
|
1138
|
-
await this._runPostSetupChain(options);
|
|
1139
|
-
}
|
|
1140
|
-
|
|
1141
|
-
/**
|
|
1142
|
-
* Run the post-setup chain: mcp init → registry sync-architectures → sync-schemas.
|
|
1143
|
-
* Each step is independent — failures are collected and reported at the end.
|
|
1144
|
-
*
|
|
1145
|
-
* @param {object} options - Parsed CLI options (checks skipPostSetup)
|
|
1146
|
-
*/
|
|
1147
|
-
async _runPostSetupChain(options = {}) {
|
|
1148
|
-
if (options['skip-post-setup']) {
|
|
1149
|
-
console.log('\n⏭️ Skipping post-setup chain (--skip-post-setup)');
|
|
1150
|
-
return;
|
|
1151
|
-
}
|
|
1152
|
-
|
|
1153
|
-
console.log('\n🔗 Running post-setup configuration...\n');
|
|
1154
|
-
|
|
1155
|
-
const failures = [];
|
|
1156
|
-
|
|
1157
|
-
// 1. MCP init — register bundled MCP servers
|
|
1158
|
-
console.log('📡 Registering MCP servers...');
|
|
1159
|
-
try {
|
|
1160
|
-
const generatorAdapter = {
|
|
1161
|
-
destinationPath(...segments) {
|
|
1162
|
-
return path.resolve(process.cwd(), ...segments);
|
|
1163
|
-
}
|
|
1164
|
-
};
|
|
1165
|
-
const mcpHandler = new McpCommandHandler(generatorAdapter);
|
|
1166
|
-
await mcpHandler.handle(['init'], {});
|
|
1167
|
-
} catch (error) {
|
|
1168
|
-
failures.push({ step: 'mcp init', error: error.message });
|
|
1169
|
-
console.log(` ⚠️ mcp init failed: ${error.message}`);
|
|
1170
|
-
}
|
|
1171
|
-
|
|
1172
|
-
// 2. Registry sync-architectures — populate supportedModelTypes
|
|
1173
|
-
console.log('\n📋 Syncing model architecture registry...');
|
|
1174
|
-
try {
|
|
1175
|
-
const registryHandler = new RegistryCommandHandler();
|
|
1176
|
-
await registryHandler.handle(['sync-architectures'], {});
|
|
1177
|
-
} catch (error) {
|
|
1178
|
-
failures.push({ step: 'registry sync-architectures', error: error.message });
|
|
1179
|
-
console.log(` ⚠️ registry sync-architectures failed: ${error.message}`);
|
|
1180
|
-
}
|
|
1181
|
-
|
|
1182
|
-
// 3. Schema sync — download AWS service models
|
|
1183
|
-
console.log('\n📐 Syncing service schemas...');
|
|
1184
|
-
try {
|
|
1185
|
-
await this._handleSyncSchemas();
|
|
1186
|
-
} catch (error) {
|
|
1187
|
-
failures.push({ step: 'sync-schemas', error: error.message });
|
|
1188
|
-
console.log(` ⚠️ sync-schemas failed: ${error.message}`);
|
|
1189
|
-
}
|
|
1190
|
-
|
|
1191
|
-
// Report results
|
|
1192
|
-
if (failures.length === 0) {
|
|
1193
|
-
console.log('\n✅ Bootstrap complete — all systems operational');
|
|
1194
|
-
} else {
|
|
1195
|
-
console.log(`\n⚠️ Bootstrap complete with ${failures.length} warning${failures.length === 1 ? '' : 's'}:`);
|
|
1196
|
-
for (const { step, error } of failures) {
|
|
1197
|
-
console.log(` • ${step}: ${error}`);
|
|
1198
|
-
}
|
|
1199
|
-
console.log('\n These steps can be re-run individually:');
|
|
1200
|
-
console.log(' ml-container-creator mcp init');
|
|
1201
|
-
console.log(' ml-container-creator registry sync-architectures');
|
|
1202
|
-
console.log(' ml-container-creator bootstrap sync-schemas');
|
|
1203
|
-
}
|
|
1204
|
-
}
|
|
1205
|
-
|
|
1206
|
-
/**
|
|
1207
|
-
* Infer the resource type from an ARN.
|
|
1208
|
-
* @param {string} arn - AWS ARN
|
|
1209
|
-
* @returns {string|null} Resource type or null if not recognized
|
|
1210
|
-
*/
|
|
1211
|
-
_inferResourceTypeFromArn(arn) {
|
|
1212
|
-
if (arn.includes(':endpoint/')) return 'sagemaker-endpoint';
|
|
1213
|
-
if (arn.includes(':endpoint-config/')) return 'sagemaker-endpoint-config';
|
|
1214
|
-
if (arn.includes(':model/')) return 'sagemaker-model';
|
|
1215
|
-
if (arn.includes(':inference-component/')) return 'sagemaker-inference-component';
|
|
1216
|
-
if (arn.includes(':transform-job/')) return 'sagemaker-transform-job';
|
|
1217
|
-
if (arn.includes(':project/')) return 'codebuild-project';
|
|
1218
|
-
if (arn.includes(':role/')) return 'iam-role';
|
|
1219
|
-
if (arn.includes(':topic')) return 'sns-topic';
|
|
1220
|
-
return null;
|
|
1221
|
-
}
|
|
883
|
+
_inferResourceTypeFromArn(arn) {
|
|
884
|
+
if (arn.includes(':endpoint/')) return 'sagemaker-endpoint';
|
|
885
|
+
if (arn.includes(':endpoint-config/')) return 'sagemaker-endpoint-config';
|
|
886
|
+
if (arn.includes(':model/')) return 'sagemaker-model';
|
|
887
|
+
if (arn.includes(':inference-component/')) return 'sagemaker-inference-component';
|
|
888
|
+
if (arn.includes(':transform-job/')) return 'sagemaker-transform-job';
|
|
889
|
+
if (arn.includes(':project/')) return 'codebuild-project';
|
|
890
|
+
if (arn.includes(':role/')) return 'iam-role';
|
|
891
|
+
if (arn.includes(':topic')) return 'sns-topic';
|
|
892
|
+
return null;
|
|
893
|
+
}
|
|
1222
894
|
|
|
1223
895
|
/**
|
|
1224
896
|
* Infer the project name from resource tags.
|
|
@@ -1307,396 +979,9 @@ export default class BootstrapCommandHandler {
|
|
|
1307
979
|
return { accountId, region };
|
|
1308
980
|
}
|
|
1309
981
|
|
|
1310
|
-
/**
|
|
1311
|
-
* Create or reuse the SageMaker execution IAM role.
|
|
1312
|
-
* @param {object} options - Parsed CLI options
|
|
1313
|
-
* @returns {Promise<string>} Role ARN
|
|
1314
|
-
*/
|
|
1315
|
-
async _setupIamRole(_options) {
|
|
1316
|
-
const roleName = 'mlcc-sagemaker-execution-role';
|
|
1317
|
-
|
|
1318
|
-
// Define trust policy for SageMaker
|
|
1319
|
-
const trustPolicy = {
|
|
1320
|
-
Version: '2012-10-17',
|
|
1321
|
-
Statement: [
|
|
1322
|
-
{
|
|
1323
|
-
Effect: 'Allow',
|
|
1324
|
-
Principal: {
|
|
1325
|
-
Service: 'sagemaker.amazonaws.com'
|
|
1326
|
-
},
|
|
1327
|
-
Action: 'sts:AssumeRole'
|
|
1328
|
-
}
|
|
1329
|
-
]
|
|
1330
|
-
};
|
|
1331
|
-
|
|
1332
|
-
// Define execution policy with least-privilege permissions
|
|
1333
|
-
const executionPolicy = {
|
|
1334
|
-
Version: '2012-10-17',
|
|
1335
|
-
Statement: [
|
|
1336
|
-
{
|
|
1337
|
-
Sid: 'SageMakerEndpoints',
|
|
1338
|
-
Effect: 'Allow',
|
|
1339
|
-
Action: [
|
|
1340
|
-
'sagemaker:CreateEndpoint',
|
|
1341
|
-
'sagemaker:CreateEndpointConfig',
|
|
1342
|
-
'sagemaker:CreateModel',
|
|
1343
|
-
'sagemaker:CreateInferenceComponent',
|
|
1344
|
-
'sagemaker:UpdateEndpoint',
|
|
1345
|
-
'sagemaker:UpdateEndpointWeightsAndCapacities',
|
|
1346
|
-
'sagemaker:UpdateInferenceComponent',
|
|
1347
|
-
'sagemaker:DeleteEndpoint',
|
|
1348
|
-
'sagemaker:DeleteEndpointConfig',
|
|
1349
|
-
'sagemaker:DeleteModel',
|
|
1350
|
-
'sagemaker:DeleteInferenceComponent',
|
|
1351
|
-
'sagemaker:DescribeEndpoint',
|
|
1352
|
-
'sagemaker:DescribeEndpointConfig',
|
|
1353
|
-
'sagemaker:DescribeModel',
|
|
1354
|
-
'sagemaker:DescribeInferenceComponent',
|
|
1355
|
-
'sagemaker:ListInferenceComponents',
|
|
1356
|
-
'sagemaker:InvokeEndpoint',
|
|
1357
|
-
'sagemaker:InvokeEndpointAsync'
|
|
1358
|
-
],
|
|
1359
|
-
Resource: '*'
|
|
1360
|
-
},
|
|
1361
|
-
{
|
|
1362
|
-
Sid: 'SageMakerBenchmarking',
|
|
1363
|
-
Effect: 'Allow',
|
|
1364
|
-
Action: [
|
|
1365
|
-
'sagemaker:CreateAIBenchmarkJob',
|
|
1366
|
-
'sagemaker:DescribeAIBenchmarkJob',
|
|
1367
|
-
'sagemaker:ListAIBenchmarkJobs',
|
|
1368
|
-
'sagemaker:StopAIBenchmarkJob',
|
|
1369
|
-
'sagemaker:DeleteAIBenchmarkJob',
|
|
1370
|
-
'sagemaker:CreateAIWorkloadConfig',
|
|
1371
|
-
'sagemaker:DescribeAIWorkloadConfig',
|
|
1372
|
-
'sagemaker:ListAIWorkloadConfigs',
|
|
1373
|
-
'sagemaker:DeleteAIWorkloadConfig'
|
|
1374
|
-
],
|
|
1375
|
-
Resource: '*'
|
|
1376
|
-
},
|
|
1377
|
-
{
|
|
1378
|
-
Sid: 'ECRPull',
|
|
1379
|
-
Effect: 'Allow',
|
|
1380
|
-
Action: [
|
|
1381
|
-
'ecr:GetAuthorizationToken',
|
|
1382
|
-
'ecr:BatchCheckLayerAvailability',
|
|
1383
|
-
'ecr:GetDownloadUrlForLayer',
|
|
1384
|
-
'ecr:BatchGetImage'
|
|
1385
|
-
],
|
|
1386
|
-
Resource: 'arn:aws:ecr:*:*:repository/ml-container-creator'
|
|
1387
|
-
},
|
|
1388
|
-
{
|
|
1389
|
-
Sid: 'ECRAuth',
|
|
1390
|
-
Effect: 'Allow',
|
|
1391
|
-
Action: 'ecr:GetAuthorizationToken',
|
|
1392
|
-
Resource: '*'
|
|
1393
|
-
},
|
|
1394
|
-
{
|
|
1395
|
-
Sid: 'CloudWatchLogs',
|
|
1396
|
-
Effect: 'Allow',
|
|
1397
|
-
Action: [
|
|
1398
|
-
'logs:CreateLogGroup',
|
|
1399
|
-
'logs:CreateLogStream',
|
|
1400
|
-
'logs:PutLogEvents'
|
|
1401
|
-
],
|
|
1402
|
-
Resource: 'arn:aws:logs:*:*:*'
|
|
1403
|
-
},
|
|
1404
|
-
{
|
|
1405
|
-
Sid: 'S3ModelRead',
|
|
1406
|
-
Effect: 'Allow',
|
|
1407
|
-
Action: [
|
|
1408
|
-
's3:GetObject',
|
|
1409
|
-
's3:PutObject',
|
|
1410
|
-
's3:AbortMultipartUpload',
|
|
1411
|
-
's3:ListBucket'
|
|
1412
|
-
],
|
|
1413
|
-
Resource: [
|
|
1414
|
-
'arn:aws:s3:::ml-container-creator-*',
|
|
1415
|
-
'arn:aws:s3:::ml-container-creator-*/*'
|
|
1416
|
-
]
|
|
1417
|
-
},
|
|
1418
|
-
{
|
|
1419
|
-
Sid: 'SNSPublish',
|
|
1420
|
-
Effect: 'Allow',
|
|
1421
|
-
Action: 'sns:Publish',
|
|
1422
|
-
Resource: 'arn:aws:sns:*:*:ml-container-creator-*'
|
|
1423
|
-
},
|
|
1424
|
-
{
|
|
1425
|
-
Sid: 'SecretsManagerBenchmark',
|
|
1426
|
-
Effect: 'Allow',
|
|
1427
|
-
Action: [
|
|
1428
|
-
'secretsmanager:CreateSecret',
|
|
1429
|
-
'secretsmanager:PutSecretValue',
|
|
1430
|
-
'secretsmanager:GetSecretValue',
|
|
1431
|
-
'secretsmanager:DescribeSecret'
|
|
1432
|
-
],
|
|
1433
|
-
Resource: 'arn:aws:secretsmanager:*:*:secret:ml-container-creator/*'
|
|
1434
|
-
},
|
|
1435
|
-
{
|
|
1436
|
-
Sid: 'QuotaAndAvailability',
|
|
1437
|
-
Effect: 'Allow',
|
|
1438
|
-
Action: [
|
|
1439
|
-
'service-quotas:GetServiceQuota',
|
|
1440
|
-
'service-quotas:ListServiceQuotas',
|
|
1441
|
-
'sagemaker:ListTrainingPlans',
|
|
1442
|
-
'sagemaker:DescribeTrainingPlan',
|
|
1443
|
-
'sagemaker:ListEndpoints'
|
|
1444
|
-
],
|
|
1445
|
-
Resource: '*'
|
|
1446
|
-
}
|
|
1447
|
-
]
|
|
1448
|
-
};
|
|
1449
|
-
|
|
1450
|
-
// Check if role already exists
|
|
1451
|
-
const roleExists = this._resourceExists(
|
|
1452
|
-
`iam get-role --role-name ${roleName}`,
|
|
1453
|
-
this._currentProfile
|
|
1454
|
-
);
|
|
1455
|
-
|
|
1456
|
-
if (roleExists) {
|
|
1457
|
-
const existingRole = this._execAws(
|
|
1458
|
-
`iam get-role --role-name ${roleName}`,
|
|
1459
|
-
this._currentProfile
|
|
1460
|
-
);
|
|
1461
|
-
const roleArn = existingRole.Role.Arn;
|
|
1462
|
-
console.log(` ✅ IAM role "${roleName}" already exists — reused`);
|
|
1463
|
-
|
|
1464
|
-
// Always update the inline policy and tags to ensure they're current
|
|
1465
|
-
try {
|
|
1466
|
-
const execPolicyFile = this._writeJsonTempFile(executionPolicy, 'exec-policy');
|
|
1467
|
-
this._execAws(
|
|
1468
|
-
`iam put-role-policy --role-name ${roleName} --policy-name mlcc-execution-policy --policy-document ${execPolicyFile}`,
|
|
1469
|
-
this._currentProfile
|
|
1470
|
-
);
|
|
1471
|
-
console.log(' ✅ IAM policy "mlcc-execution-policy" — updated');
|
|
1472
|
-
} catch (err) {
|
|
1473
|
-
console.log(` ⚠️ Could not update inline policy: ${err.message}`);
|
|
1474
|
-
}
|
|
1475
|
-
|
|
1476
|
-
try {
|
|
1477
|
-
const tags = this._buildResourceTags();
|
|
1478
|
-
this._execAws(
|
|
1479
|
-
`iam tag-role --role-name ${roleName} --tags ${this._formatTagsForCli(tags)}`,
|
|
1480
|
-
this._currentProfile
|
|
1481
|
-
);
|
|
1482
|
-
console.log(' ✅ IAM role tags — updated');
|
|
1483
|
-
} catch (err) {
|
|
1484
|
-
console.log(` ⚠️ Could not update role tags: ${err.message}`);
|
|
1485
|
-
}
|
|
1486
|
-
|
|
1487
|
-
return roleArn;
|
|
1488
|
-
}
|
|
1489
|
-
|
|
1490
|
-
// Display policies to user before creation
|
|
1491
|
-
console.log('\n Trust Policy:');
|
|
1492
|
-
console.log(JSON.stringify(trustPolicy, null, 2));
|
|
1493
|
-
console.log('\n Execution Policy:');
|
|
1494
|
-
console.log(JSON.stringify(executionPolicy, null, 2));
|
|
1495
|
-
console.log('');
|
|
1496
|
-
|
|
1497
|
-
try {
|
|
1498
|
-
// Create the IAM role — write policy to temp file to avoid shell escaping issues
|
|
1499
|
-
const trustPolicyFile = this._writeJsonTempFile(trustPolicy, 'trust-policy');
|
|
1500
|
-
const createRoleResult = this._execAws(
|
|
1501
|
-
`iam create-role --role-name ${roleName} --assume-role-policy-document ${trustPolicyFile}`,
|
|
1502
|
-
this._currentProfile
|
|
1503
|
-
);
|
|
1504
|
-
const roleArn = createRoleResult.Role.Arn;
|
|
1505
|
-
|
|
1506
|
-
// Attach inline execution policy
|
|
1507
|
-
const execPolicyFile = this._writeJsonTempFile(executionPolicy, 'exec-policy');
|
|
1508
|
-
this._execAws(
|
|
1509
|
-
`iam put-role-policy --role-name ${roleName} --policy-name mlcc-execution-policy --policy-document ${execPolicyFile}`,
|
|
1510
|
-
this._currentProfile
|
|
1511
|
-
);
|
|
1512
|
-
|
|
1513
|
-
// Apply resource tags
|
|
1514
|
-
const tags = this._buildResourceTags();
|
|
1515
|
-
this._execAws(
|
|
1516
|
-
`iam tag-role --role-name ${roleName} --tags ${this._formatTagsForCli(tags)}`,
|
|
1517
|
-
this._currentProfile
|
|
1518
|
-
);
|
|
1519
|
-
|
|
1520
|
-
console.log(` ✅ IAM role "${roleName}" — created`);
|
|
1521
|
-
return roleArn;
|
|
1522
|
-
} catch (error) {
|
|
1523
|
-
const errorMessage = error.message || '';
|
|
1524
|
-
if (errorMessage.includes('AccessDenied') || errorMessage.includes('UnauthorizedAccess')) {
|
|
1525
|
-
console.log(' ⚠️ Permission denied for iam:CreateRole. Please provide an existing role ARN.');
|
|
1526
|
-
const { roleArn } = await this._promptFn([{
|
|
1527
|
-
type: 'input',
|
|
1528
|
-
name: 'roleArn',
|
|
1529
|
-
message: 'Enter an existing IAM role ARN for SageMaker execution:'
|
|
1530
|
-
}]);
|
|
1531
|
-
return roleArn;
|
|
1532
|
-
}
|
|
1533
|
-
throw error;
|
|
1534
|
-
}
|
|
1535
|
-
}
|
|
1536
|
-
|
|
1537
|
-
/**
|
|
1538
|
-
* Create or reuse the ECR repository.
|
|
1539
|
-
* @returns {Promise<string>} ECR repository name
|
|
1540
|
-
*/
|
|
1541
|
-
async _setupEcrRepository() {
|
|
1542
|
-
const repoName = 'ml-container-creator';
|
|
1543
|
-
|
|
1544
|
-
// Check if repository already exists
|
|
1545
|
-
const repoExists = this._resourceExists(
|
|
1546
|
-
`ecr describe-repositories --repository-names ${repoName} --region ${this._currentRegion}`,
|
|
1547
|
-
this._currentProfile
|
|
1548
|
-
);
|
|
1549
|
-
|
|
1550
|
-
if (repoExists) {
|
|
1551
|
-
console.log(` ✅ ECR repository "${repoName}" already exists — reused`);
|
|
1552
|
-
return repoName;
|
|
1553
|
-
}
|
|
1554
|
-
|
|
1555
|
-
// Build resource tags
|
|
1556
|
-
const tags = this._buildResourceTags();
|
|
1557
|
-
|
|
1558
|
-
// Create the ECR repository with image scanning and AES256 encryption
|
|
1559
|
-
this._execAws(
|
|
1560
|
-
`ecr create-repository --repository-name ${repoName} --image-scanning-configuration scanOnPush=true --encryption-configuration encryptionType=AES256 --region ${this._currentRegion} --tags ${this._formatTagsForCli(tags)}`,
|
|
1561
|
-
this._currentProfile
|
|
1562
|
-
);
|
|
1563
|
-
|
|
1564
|
-
// Apply lifecycle policy to expire untagged images after 30 days
|
|
1565
|
-
const lifecyclePolicy = {
|
|
1566
|
-
rules: [
|
|
1567
|
-
{
|
|
1568
|
-
rulePriority: 1,
|
|
1569
|
-
description: 'Expire untagged images after 30 days',
|
|
1570
|
-
selection: {
|
|
1571
|
-
tagStatus: 'untagged',
|
|
1572
|
-
countType: 'sinceImagePushed',
|
|
1573
|
-
countUnit: 'days',
|
|
1574
|
-
countNumber: 30
|
|
1575
|
-
},
|
|
1576
|
-
action: {
|
|
1577
|
-
type: 'expire'
|
|
1578
|
-
}
|
|
1579
|
-
}
|
|
1580
|
-
]
|
|
1581
|
-
};
|
|
1582
|
-
|
|
1583
|
-
const lifecyclePolicyFile = this._writeJsonTempFile(lifecyclePolicy, 'ecr-lifecycle');
|
|
1584
|
-
this._execAws(
|
|
1585
|
-
`ecr put-lifecycle-policy --repository-name ${repoName} --lifecycle-policy-text ${lifecyclePolicyFile} --region ${this._currentRegion}`,
|
|
1586
|
-
this._currentProfile
|
|
1587
|
-
);
|
|
1588
|
-
|
|
1589
|
-
console.log(` ✅ ECR repository "${repoName}" — created`);
|
|
1590
|
-
return repoName;
|
|
1591
|
-
}
|
|
1592
|
-
|
|
1593
|
-
/**
|
|
1594
|
-
* Optionally create S3 buckets for async/batch deployments.
|
|
1595
|
-
* Always creates the benchmark S3 bucket (unconditional).
|
|
1596
|
-
* @returns {Promise<object|null>} Bucket names or null if skipped
|
|
1597
|
-
*/
|
|
1598
|
-
async _setupS3Buckets() {
|
|
1599
|
-
// Always create benchmark bucket (unconditional — avoids re-bootstrap when benchmarking is enabled later)
|
|
1600
|
-
const benchmarkBucketName = `ml-container-creator-benchmark-${this._currentRegion}-${this._currentAccountId}`;
|
|
1601
|
-
const tags = this._buildResourceTags();
|
|
1602
|
-
const benchmarkS3Bucket = await this._createS3Bucket(benchmarkBucketName, tags);
|
|
1603
|
-
|
|
1604
|
-
const { useS3 } = await this._promptFn([{
|
|
1605
|
-
type: 'confirm',
|
|
1606
|
-
name: 'useS3',
|
|
1607
|
-
message: 'Will you use async inference or batch transform?',
|
|
1608
|
-
default: false
|
|
1609
|
-
}]);
|
|
1610
|
-
|
|
1611
|
-
if (!useS3) {
|
|
1612
|
-
return { benchmarkS3Bucket };
|
|
1613
|
-
}
|
|
1614
|
-
|
|
1615
|
-
const asyncBucketName = `ml-container-creator-async-${this._currentRegion}-${this._currentAccountId}`;
|
|
1616
|
-
const batchBucketName = `ml-container-creator-batch-${this._currentRegion}-${this._currentAccountId}`;
|
|
1617
|
-
|
|
1618
|
-
const asyncS3Bucket = await this._createS3Bucket(asyncBucketName, tags);
|
|
1619
|
-
const batchS3Bucket = await this._createS3Bucket(batchBucketName, tags);
|
|
1620
|
-
|
|
1621
|
-
return { asyncS3Bucket, batchS3Bucket, benchmarkS3Bucket };
|
|
1622
|
-
}
|
|
1623
|
-
|
|
1624
|
-
/**
|
|
1625
|
-
* Create or reuse a single S3 bucket with versioning, encryption, and tags.
|
|
1626
|
-
* @param {string} bucketName - S3 bucket name
|
|
1627
|
-
* @param {Array<{Key: string, Value: string}>} tags - Resource tags
|
|
1628
|
-
* @returns {Promise<string>} Bucket name
|
|
1629
|
-
*/
|
|
1630
|
-
async _createS3Bucket(bucketName, tags) {
|
|
1631
|
-
// Check if bucket already exists
|
|
1632
|
-
const bucketExists = this._resourceExists(
|
|
1633
|
-
`s3api head-bucket --bucket ${bucketName}`,
|
|
1634
|
-
this._currentProfile
|
|
1635
|
-
);
|
|
1636
|
-
|
|
1637
|
-
if (bucketExists) {
|
|
1638
|
-
console.log(` ✅ S3 bucket "${bucketName}" already exists — reused`);
|
|
1639
|
-
return bucketName;
|
|
1640
|
-
}
|
|
1641
|
-
|
|
1642
|
-
// Build create-bucket command with region-appropriate configuration
|
|
1643
|
-
let createCommand = `s3api create-bucket --bucket ${bucketName} --region ${this._currentRegion}`;
|
|
1644
|
-
if (this._currentRegion !== 'us-east-1') {
|
|
1645
|
-
createCommand += ` --create-bucket-configuration LocationConstraint=${this._currentRegion}`;
|
|
1646
|
-
}
|
|
1647
|
-
|
|
1648
|
-
this._execAws(createCommand, this._currentProfile);
|
|
1649
|
-
|
|
1650
|
-
// Enable versioning
|
|
1651
|
-
this._execAws(
|
|
1652
|
-
`s3api put-bucket-versioning --bucket ${bucketName} --versioning-configuration Status=Enabled`,
|
|
1653
|
-
this._currentProfile
|
|
1654
|
-
);
|
|
1655
|
-
|
|
1656
|
-
// Enable AES256 server-side encryption
|
|
1657
|
-
const encryptionConfig = { Rules: [{ ApplyServerSideEncryptionByDefault: { SSEAlgorithm: 'AES256' } }] };
|
|
1658
|
-
const encryptionFile = this._writeJsonTempFile(encryptionConfig, 's3-encryption');
|
|
1659
|
-
this._execAws(
|
|
1660
|
-
`s3api put-bucket-encryption --bucket ${bucketName} --server-side-encryption-configuration ${encryptionFile}`,
|
|
1661
|
-
this._currentProfile
|
|
1662
|
-
);
|
|
1663
|
-
|
|
1664
|
-
// Apply resource tags
|
|
1665
|
-
const tagging = { TagSet: tags };
|
|
1666
|
-
const taggingFile = this._writeJsonTempFile(tagging, 's3-tagging');
|
|
1667
|
-
this._execAws(
|
|
1668
|
-
`s3api put-bucket-tagging --bucket ${bucketName} --tagging ${taggingFile}`,
|
|
1669
|
-
this._currentProfile
|
|
1670
|
-
);
|
|
1671
|
-
|
|
1672
|
-
console.log(` ✅ S3 bucket "${bucketName}" — created`);
|
|
1673
|
-
return bucketName;
|
|
1674
|
-
}
|
|
1675
982
|
|
|
1676
983
|
// ── AWS CLI helpers ─────────────────────────────────────────────
|
|
1677
984
|
|
|
1678
|
-
/**
|
|
1679
|
-
* Verify AWS CLI v2 is installed. Returns true if v2 is detected, false otherwise.
|
|
1680
|
-
* Extracted as a method so tests can override it.
|
|
1681
|
-
* @returns {boolean}
|
|
1682
|
-
*/
|
|
1683
|
-
_verifyCliV2() {
|
|
1684
|
-
try {
|
|
1685
|
-
const versionOutput = execSync('aws --version', { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
|
|
1686
|
-
if (!versionOutput.includes('aws-cli/2')) {
|
|
1687
|
-
console.log(` ❌ AWS CLI v2 is required. Detected: ${versionOutput.split(' ')[0]}`);
|
|
1688
|
-
console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html');
|
|
1689
|
-
console.log(' Some features (benchmarking, newer SageMaker APIs) require CLI v2.\n');
|
|
1690
|
-
return false;
|
|
1691
|
-
}
|
|
1692
|
-
return true;
|
|
1693
|
-
} catch {
|
|
1694
|
-
console.log(' ❌ AWS CLI not found.');
|
|
1695
|
-
console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html\n');
|
|
1696
|
-
return false;
|
|
1697
|
-
}
|
|
1698
|
-
}
|
|
1699
|
-
|
|
1700
985
|
/**
|
|
1701
986
|
* Execute an AWS CLI command and return parsed JSON output.
|
|
1702
987
|
* @param {string} command - AWS CLI command (without 'aws' prefix)
|
|
@@ -1716,6 +1001,12 @@ export default class BootstrapCommandHandler {
|
|
|
1716
1001
|
/**
|
|
1717
1002
|
* Deploy the bootstrap CloudFormation stack and return its outputs.
|
|
1718
1003
|
*
|
|
1004
|
+
* Before deploying, checks for pre-existing S3 buckets that would cause
|
|
1005
|
+
* ResourceExistenceCheck failures. If the stack is in REVIEW_IN_PROGRESS
|
|
1006
|
+
* state (empty shell from a failed prior attempt), deletes it first.
|
|
1007
|
+
* If buckets exist but aren't managed by the stack, uses a CloudFormation
|
|
1008
|
+
* import changeset to adopt them before proceeding with the normal deploy.
|
|
1009
|
+
*
|
|
1719
1010
|
* Uses `aws cloudformation deploy` which is idempotent — it creates the
|
|
1720
1011
|
* stack on first run and updates it on subsequent runs. If the template
|
|
1721
1012
|
* hasn't changed, it exits with "No changes to deploy" which we handle
|
|
@@ -1728,6 +1019,9 @@ export default class BootstrapCommandHandler {
|
|
|
1728
1019
|
* @returns {object} Map of output key → output value
|
|
1729
1020
|
*/
|
|
1730
1021
|
_deployStack(stackName, parameters, profile, region) {
|
|
1022
|
+
// Handle ghost stacks and pre-existing resources
|
|
1023
|
+
this._resolveStackConflicts(stackName, parameters, profile, region);
|
|
1024
|
+
|
|
1731
1025
|
// Build parameter overrides string
|
|
1732
1026
|
const paramOverrides = Object.entries(parameters)
|
|
1733
1027
|
.map(([key, value]) => `${key}=${value}`)
|
|
@@ -1751,6 +1045,32 @@ export default class BootstrapCommandHandler {
|
|
|
1751
1045
|
const stderr = error.stderr || error.message || '';
|
|
1752
1046
|
if (stderr.includes('No changes to deploy')) {
|
|
1753
1047
|
console.log(' ℹ️ Stack is up to date — no changes needed');
|
|
1048
|
+
} else if (stderr.includes('ResourceExistenceCheck')) {
|
|
1049
|
+
// Resources already exist outside the stack — attempt import and retry
|
|
1050
|
+
console.log(' ⚠️ Pre-existing resources detected — attempting import...');
|
|
1051
|
+
this._resolveStackConflicts(stackName, parameters, profile, region);
|
|
1052
|
+
// Rebuild deploy command with updated parameters (e.g., CreateS3Buckets may now be 'false')
|
|
1053
|
+
const retryParamOverrides = Object.entries(parameters)
|
|
1054
|
+
.map(([key, value]) => `${key}=${value}`)
|
|
1055
|
+
.join(' ');
|
|
1056
|
+
const retryDeployCommand = [
|
|
1057
|
+
'aws cloudformation deploy',
|
|
1058
|
+
`--template-file ${STACK_TEMPLATE_PATH}`,
|
|
1059
|
+
`--stack-name ${stackName}`,
|
|
1060
|
+
'--capabilities CAPABILITY_NAMED_IAM',
|
|
1061
|
+
`--parameter-overrides ${retryParamOverrides}`,
|
|
1062
|
+
`--profile ${profile}`,
|
|
1063
|
+
`--region ${region}`
|
|
1064
|
+
].join(' ');
|
|
1065
|
+
// Retry the deploy after import
|
|
1066
|
+
try {
|
|
1067
|
+
execSync(retryDeployCommand, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] });
|
|
1068
|
+
} catch (retryError) {
|
|
1069
|
+
const retryStderr = retryError.stderr || retryError.message || '';
|
|
1070
|
+
if (!retryStderr.includes('No changes to deploy')) {
|
|
1071
|
+
throw retryError;
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1754
1074
|
} else {
|
|
1755
1075
|
throw error;
|
|
1756
1076
|
}
|
|
@@ -1772,9 +1092,144 @@ export default class BootstrapCommandHandler {
|
|
|
1772
1092
|
outputs[output.OutputKey] = output.OutputValue;
|
|
1773
1093
|
}
|
|
1774
1094
|
|
|
1095
|
+
// If S3 buckets already existed (skipped creation), inject their names
|
|
1096
|
+
// into outputs so the profile config gets populated correctly.
|
|
1097
|
+
if (this._preExistingBuckets && this._preExistingBuckets.length > 0) {
|
|
1098
|
+
const bucketOutputMap = {
|
|
1099
|
+
'AsyncS3Bucket': 'AsyncS3BucketName',
|
|
1100
|
+
'BatchS3Bucket': 'BatchS3BucketName',
|
|
1101
|
+
'AdapterS3Bucket': 'AdapterS3BucketName',
|
|
1102
|
+
'BenchmarkS3Bucket': 'BenchmarkS3BucketName',
|
|
1103
|
+
'TuneS3Bucket': 'TuneS3BucketName'
|
|
1104
|
+
};
|
|
1105
|
+
for (const bucket of this._preExistingBuckets) {
|
|
1106
|
+
const outputKey = bucketOutputMap[bucket.logicalId];
|
|
1107
|
+
if (outputKey && !outputs[outputKey]) {
|
|
1108
|
+
outputs[outputKey] = bucket.name;
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
this._preExistingBuckets = null;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1775
1114
|
return outputs;
|
|
1776
1115
|
}
|
|
1777
1116
|
|
|
1117
|
+
/**
|
|
1118
|
+
* Resolve stack conflicts before deploying.
|
|
1119
|
+
*
|
|
1120
|
+
* Handles two scenarios that cause ResourceExistenceCheck failures:
|
|
1121
|
+
* 1. Ghost stacks (REVIEW_IN_PROGRESS) — delete them first
|
|
1122
|
+
* 2. Pre-existing S3 buckets not managed by the stack — import them
|
|
1123
|
+
*
|
|
1124
|
+
* @param {string} stackName - CloudFormation stack name
|
|
1125
|
+
* @param {object} parameters - Stack parameter key-value pairs
|
|
1126
|
+
* @param {string} profile - AWS CLI profile name
|
|
1127
|
+
* @param {string} region - AWS region
|
|
1128
|
+
*/
|
|
1129
|
+
_resolveStackConflicts(stackName, parameters, profile, region) {
|
|
1130
|
+
// Check if stack exists and its status
|
|
1131
|
+
let stackStatus = null;
|
|
1132
|
+
let managedResources = [];
|
|
1133
|
+
|
|
1134
|
+
try {
|
|
1135
|
+
const describeResult = this._execAws(
|
|
1136
|
+
`cloudformation describe-stacks --stack-name ${stackName} --region ${region}`,
|
|
1137
|
+
profile
|
|
1138
|
+
);
|
|
1139
|
+
const stack = describeResult.Stacks && describeResult.Stacks[0];
|
|
1140
|
+
if (stack) {
|
|
1141
|
+
stackStatus = stack.StackStatus;
|
|
1142
|
+
}
|
|
1143
|
+
} catch (_) {
|
|
1144
|
+
// Stack doesn't exist — no conflicts possible
|
|
1145
|
+
return;
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
// Handle ghost stacks (created but never successfully deployed)
|
|
1149
|
+
if (stackStatus === 'REVIEW_IN_PROGRESS') {
|
|
1150
|
+
console.log(' ⚠️ Found ghost stack (REVIEW_IN_PROGRESS) — deleting before redeploy...');
|
|
1151
|
+
try {
|
|
1152
|
+
execSync(
|
|
1153
|
+
`aws cloudformation delete-stack --stack-name ${stackName} --profile ${profile} --region ${region}`,
|
|
1154
|
+
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
|
|
1155
|
+
);
|
|
1156
|
+
execSync(
|
|
1157
|
+
`aws cloudformation wait stack-delete-complete --stack-name ${stackName} --profile ${profile} --region ${region}`,
|
|
1158
|
+
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 60000 }
|
|
1159
|
+
);
|
|
1160
|
+
console.log(' ✅ Ghost stack deleted');
|
|
1161
|
+
} catch (err) {
|
|
1162
|
+
console.log(` ⚠️ Could not delete ghost stack: ${err.message || err}`);
|
|
1163
|
+
}
|
|
1164
|
+
// Don't return — fall through to check for pre-existing S3 buckets
|
|
1165
|
+
// that need to be imported on the fresh deploy. The ghost stack had
|
|
1166
|
+
// DeletionPolicy:Retain buckets that survive stack deletion.
|
|
1167
|
+
stackStatus = null;
|
|
1168
|
+
managedResources = [];
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
// For active stacks (or post-ghost-deletion), check if S3 buckets exist but aren't managed
|
|
1172
|
+
if (parameters.CreateS3Buckets !== 'true') {
|
|
1173
|
+
return; // Not creating buckets — no conflict
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
// Get list of resources currently managed by the stack (empty if stack was just deleted)
|
|
1177
|
+
if (stackStatus) {
|
|
1178
|
+
try {
|
|
1179
|
+
const resources = this._execAws(
|
|
1180
|
+
`cloudformation list-stack-resources --stack-name ${stackName} --region ${region}`,
|
|
1181
|
+
profile
|
|
1182
|
+
);
|
|
1183
|
+
managedResources = (resources.StackResourceSummaries || [])
|
|
1184
|
+
.map(r => r.LogicalResourceId);
|
|
1185
|
+
} catch (_) {
|
|
1186
|
+
// Stack doesn't exist or can't be queried — proceed with empty managedResources
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
// Check each S3 bucket that the template would create
|
|
1191
|
+
const accountId = this._currentAccountId;
|
|
1192
|
+
const bucketConfigs = [
|
|
1193
|
+
{ logicalId: 'AsyncS3Bucket', name: `mlcc-async-${accountId}-${region}` },
|
|
1194
|
+
{ logicalId: 'BatchS3Bucket', name: `mlcc-batch-${accountId}-${region}` },
|
|
1195
|
+
{ logicalId: 'AdapterS3Bucket', name: `mlcc-adapters-${accountId}-${region}` },
|
|
1196
|
+
{ logicalId: 'BenchmarkS3Bucket', name: `mlcc-benchmark-${accountId}-${region}` },
|
|
1197
|
+
{ logicalId: 'TuneS3Bucket', name: `mlcc-tune-${accountId}-${region}` }
|
|
1198
|
+
];
|
|
1199
|
+
|
|
1200
|
+
const bucketsToImport = [];
|
|
1201
|
+
|
|
1202
|
+
for (const bucket of bucketConfigs) {
|
|
1203
|
+
if (managedResources.includes(bucket.logicalId)) {
|
|
1204
|
+
continue; // Already managed by the stack — no conflict
|
|
1205
|
+
}
|
|
1206
|
+
// Check if bucket exists in AWS
|
|
1207
|
+
try {
|
|
1208
|
+
execSync(
|
|
1209
|
+
`aws s3api head-bucket --bucket ${bucket.name} --profile ${profile} --region ${region}`,
|
|
1210
|
+
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
|
|
1211
|
+
);
|
|
1212
|
+
// Bucket exists but not in stack — needs import
|
|
1213
|
+
bucketsToImport.push(bucket);
|
|
1214
|
+
} catch (_) {
|
|
1215
|
+
// Bucket doesn't exist — will be created normally
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
if (bucketsToImport.length > 0) {
|
|
1220
|
+
console.log(` ℹ️ ${bucketsToImport.length} pre-existing S3 bucket(s) detected — skipping S3 creation (buckets already exist)`);
|
|
1221
|
+
|
|
1222
|
+
// Pre-existing S3 buckets survive stack deletion (DeletionPolicy: Retain).
|
|
1223
|
+
// Rather than fighting CloudFormation's IMPORT limitations, just skip S3
|
|
1224
|
+
// creation and wire the existing bucket names into the profile config directly.
|
|
1225
|
+
// The naming convention is deterministic, so we know exactly what they are.
|
|
1226
|
+
this._preExistingBuckets = bucketsToImport;
|
|
1227
|
+
|
|
1228
|
+
// Modify the parameters to skip S3 bucket creation in the deploy
|
|
1229
|
+
parameters.CreateS3Buckets = 'false';
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1778
1233
|
/**
|
|
1779
1234
|
* Write a JSON object to a temp file and return the `file://` path.
|
|
1780
1235
|
* Used for passing complex JSON to AWS CLI commands without shell escaping issues.
|
|
@@ -1808,20 +1263,123 @@ export default class BootstrapCommandHandler {
|
|
|
1808
1263
|
}
|
|
1809
1264
|
}
|
|
1810
1265
|
|
|
1811
|
-
|
|
1266
|
+
/**
|
|
1267
|
+
* Get the AWS account ID from the caller's credentials.
|
|
1268
|
+
* Uses `sts get-caller-identity` to resolve the actual account.
|
|
1269
|
+
*
|
|
1270
|
+
* @param {string} awsProfile - AWS CLI profile name
|
|
1271
|
+
* @returns {string} The 12-digit AWS account ID
|
|
1272
|
+
*/
|
|
1273
|
+
_getCallerAccount(awsProfile) {
|
|
1274
|
+
const identity = this._execAws('sts get-caller-identity', awsProfile);
|
|
1275
|
+
return identity.Account;
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
/**
|
|
1279
|
+
* Scan all profiles to find one with ciInfraProvisioned=true,
|
|
1280
|
+
* excluding the given profile name.
|
|
1281
|
+
*
|
|
1282
|
+
* @param {string} excludeProfile - Profile name to exclude from the scan
|
|
1283
|
+
* @returns {{ name: string, config: Object }|null} The CI profile, or null if none found
|
|
1284
|
+
*/
|
|
1285
|
+
_findExistingCiProfile(excludeProfile) {
|
|
1286
|
+
const config = this.config.read();
|
|
1287
|
+
if (!config || !config.profiles) return null;
|
|
1288
|
+
|
|
1289
|
+
for (const [name, profileConfig] of Object.entries(config.profiles)) {
|
|
1290
|
+
if (name === excludeProfile) continue;
|
|
1291
|
+
if (profileConfig.ciInfraProvisioned) {
|
|
1292
|
+
return { name, config: profileConfig };
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
return null;
|
|
1296
|
+
}
|
|
1812
1297
|
|
|
1813
1298
|
/**
|
|
1814
|
-
*
|
|
1815
|
-
*
|
|
1299
|
+
* Ensure an MLCC-owned MLflow App exists for experiment tracking.
|
|
1300
|
+
* Creates one if it doesn't exist, using the tune S3 bucket as artifact store.
|
|
1301
|
+
*
|
|
1302
|
+
* @param {object} profileData - Bootstrap profile data (needs roleArn, awsRegion, accountId)
|
|
1303
|
+
* @param {string} awsProfile - AWS CLI profile name
|
|
1304
|
+
* @returns {string|null} MLflow App ARN or null if creation failed
|
|
1816
1305
|
*/
|
|
1817
|
-
|
|
1818
|
-
const
|
|
1819
|
-
const
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1306
|
+
_ensureMlflowApp(profileData, awsProfile) {
|
|
1307
|
+
const region = profileData.awsRegion;
|
|
1308
|
+
const accountId = profileData.accountId;
|
|
1309
|
+
const roleArn = profileData.roleArn;
|
|
1310
|
+
const appName = 'mlcc-tune-tracking';
|
|
1311
|
+
const artifactBucket = `mlcc-tune-${accountId}-${region}`;
|
|
1312
|
+
|
|
1313
|
+
// Check if MLCC app already exists
|
|
1314
|
+
try {
|
|
1315
|
+
const apps = this._execAws(
|
|
1316
|
+
`sagemaker list-mlflow-apps --region ${region}`,
|
|
1317
|
+
awsProfile
|
|
1318
|
+
);
|
|
1319
|
+
const summaries = apps.Summaries || [];
|
|
1320
|
+
const existing = summaries.find(a => a.Name === appName);
|
|
1321
|
+
if (existing) {
|
|
1322
|
+
return existing.Arn;
|
|
1323
|
+
}
|
|
1324
|
+
} catch {
|
|
1325
|
+
// list-mlflow-apps may not be available in all CLI versions — proceed to create
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
// Create the MLflow App
|
|
1329
|
+
console.log(` Creating MLflow App "${appName}" with artifact store s3://${artifactBucket}...`);
|
|
1330
|
+
|
|
1331
|
+
// Ensure the artifact bucket exists (it's the tune bucket from the stack)
|
|
1332
|
+
try {
|
|
1333
|
+
this._execAws(
|
|
1334
|
+
`s3api head-bucket --bucket ${artifactBucket} --region ${region}`,
|
|
1335
|
+
awsProfile
|
|
1336
|
+
);
|
|
1337
|
+
} catch {
|
|
1338
|
+
// Bucket doesn't exist — create it
|
|
1339
|
+
console.log(` Creating artifact bucket: ${artifactBucket}`);
|
|
1340
|
+
try {
|
|
1341
|
+
this._execAws(
|
|
1342
|
+
`s3api create-bucket --bucket ${artifactBucket} --region ${region} --create-bucket-configuration LocationConstraint=${region}`,
|
|
1343
|
+
awsProfile
|
|
1344
|
+
);
|
|
1345
|
+
} catch (bucketErr) {
|
|
1346
|
+
// May already exist or region doesn't need LocationConstraint (us-east-1)
|
|
1347
|
+
if (!bucketErr.message?.includes('BucketAlreadyOwnedByYou')) {
|
|
1348
|
+
try {
|
|
1349
|
+
this._execAws(
|
|
1350
|
+
`s3api create-bucket --bucket ${artifactBucket} --region ${region}`,
|
|
1351
|
+
awsProfile
|
|
1352
|
+
);
|
|
1353
|
+
} catch {
|
|
1354
|
+
// Bucket likely exists, continue
|
|
1355
|
+
}
|
|
1356
|
+
}
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
// Create the app
|
|
1361
|
+
try {
|
|
1362
|
+
const result = this._execAws(
|
|
1363
|
+
`sagemaker create-mlflow-app --name ${appName} --artifact-store-uri s3://${artifactBucket} --role-arn ${roleArn} --model-registration-mode AutoModelRegistrationEnabled --region ${region}`,
|
|
1364
|
+
awsProfile
|
|
1365
|
+
);
|
|
1366
|
+
return result.Arn;
|
|
1367
|
+
} catch (err) {
|
|
1368
|
+
// If app already exists (race condition), try to describe it
|
|
1369
|
+
if (err.message?.includes('ResourceLimitExceeded') || err.message?.includes('already exists')) {
|
|
1370
|
+
try {
|
|
1371
|
+
const apps = this._execAws(
|
|
1372
|
+
`sagemaker list-mlflow-apps --region ${region}`,
|
|
1373
|
+
awsProfile
|
|
1374
|
+
);
|
|
1375
|
+
const found = (apps.Summaries || []).find(a => a.Name === appName);
|
|
1376
|
+
if (found) return found.Arn;
|
|
1377
|
+
} catch {
|
|
1378
|
+
// Fall through
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
throw err;
|
|
1382
|
+
}
|
|
1825
1383
|
}
|
|
1826
1384
|
|
|
1827
1385
|
/**
|
|
@@ -1861,6 +1419,8 @@ SUBCOMMANDS:
|
|
|
1861
1419
|
scan Discover pre-existing MLCC-managed resources in AWS
|
|
1862
1420
|
prune Remove deleted and unknown records from the deployment manifest
|
|
1863
1421
|
update Re-deploy bootstrap stacks using active profile (no prompts)
|
|
1422
|
+
migrate Upgrade legacy profiles to current naming conventions
|
|
1423
|
+
sync-model-families Discover tune-eligible models from JumpStart Hub and update catalog
|
|
1864
1424
|
|
|
1865
1425
|
SETUP OPTIONS:
|
|
1866
1426
|
--non-interactive Run without interactive prompts
|
|
@@ -1889,6 +1449,8 @@ EXAMPLES:
|
|
|
1889
1449
|
ml-container-creator bootstrap remove dev
|
|
1890
1450
|
ml-container-creator bootstrap remove dev --force --delete-stack
|
|
1891
1451
|
ml-container-creator bootstrap scan
|
|
1452
|
+
ml-container-creator bootstrap sync-model-families
|
|
1453
|
+
ml-container-creator bootstrap migrate
|
|
1892
1454
|
ml-container-creator bootstrap --non-interactive --profile my-aws-profile --region us-west-2
|
|
1893
1455
|
ml-container-creator bootstrap --non-interactive --profile my-aws-profile --role-arn arn:aws:iam::123456789012:role/MyRole --skip-s3
|
|
1894
1456
|
ml-container-creator bootstrap --non-interactive --profile my-aws-profile --region us-west-2 --ci
|