@aws/ml-container-creator 0.10.3 → 0.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/parameter-schema-v2.json +28 -1
- package/infra/ci-harness/lib/ci-harness-stack.ts +50 -36
- package/package.json +14 -5
- package/servers/instance-sizer/index.js +30 -17
- package/servers/instance-sizer/lib/instance-ranker.js +44 -0
- package/servers/lib/catalogs/instances.json +27 -0
- package/src/app.js +22 -1
- package/src/lib/bootstrap-command-handler.js +32 -3
- package/src/lib/config-validator.js +1 -1
- package/src/lib/generated/cli-options.js +7 -2
- package/src/lib/generated/parameter-matrix.js +16 -5
- package/src/lib/generated/validation-rules.js +7 -3
- package/src/lib/path-prover-brain.js +58 -1
- package/src/lib/prompts/infrastructure-prompts.js +2 -2
- package/src/lib/prompts/model-prompts.js +6 -0
- package/src/lib/prove-pipeline-executor.js +294 -0
- package/src/lib/secrets-prompt-runner.js +4 -0
- package/src/lib/template-manager.js +1 -1
- package/src/lib/template-variable-resolver.js +62 -0
- package/templates/do/README.md +37 -0
- package/templates/do/adapter +8 -0
- package/templates/do/build +8 -0
- package/templates/do/clean.d/async-inference.ejs +8 -0
- package/templates/do/clean.d/batch-transform.ejs +8 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +8 -0
- package/templates/do/clean.d/managed-inference.ejs +8 -0
- package/templates/do/config +12 -45
- package/templates/do/deploy.d/async-inference.ejs +33 -3
- package/templates/do/deploy.d/batch-transform.ejs +32 -3
- package/templates/do/deploy.d/hyperpod-eks.ejs +7 -0
- package/templates/do/deploy.d/managed-inference.ejs +27 -3
- package/templates/do/lib/endpoint-config.sh +1 -1
- package/templates/do/lib/profile.sh +44 -0
- package/templates/do/lib/staged-assets.sh +217 -0
- package/templates/do/push +8 -0
- package/templates/do/register +8 -0
- package/templates/do/stage +569 -0
- package/templates/do/submit +10 -0
- package/templates/do/test +1 -0
- package/templates/do/tune +7 -0
|
@@ -174,7 +174,7 @@
|
|
|
174
174
|
"configKey": "instanceType",
|
|
175
175
|
"default": null,
|
|
176
176
|
"validation": {
|
|
177
|
-
"pattern": "^ml\\.[a-z0-9]+\\.[a-z0-9]+$"
|
|
177
|
+
"pattern": "^ml\\.[a-z0-9-]+\\.[a-z0-9]+$"
|
|
178
178
|
},
|
|
179
179
|
"phase": "infrastructure",
|
|
180
180
|
"group": "infrastructure",
|
|
@@ -1238,6 +1238,33 @@
|
|
|
1238
1238
|
"deprecated": false,
|
|
1239
1239
|
"since": "0.4.0"
|
|
1240
1240
|
},
|
|
1241
|
+
"capacityReservationArn": {
|
|
1242
|
+
"type": "string",
|
|
1243
|
+
"description": "Capacity reservation ARN (FTP or ODCR) for reserved instance deployment",
|
|
1244
|
+
"cliFlag": "--capacity-reservation-arn",
|
|
1245
|
+
"cliArgName": "arn",
|
|
1246
|
+
"envVar": "ML_CAPACITY_RESERVATION_ARN",
|
|
1247
|
+
"templateVar": "capacityReservationArn",
|
|
1248
|
+
"configKey": "capacityReservationArn",
|
|
1249
|
+
"default": null,
|
|
1250
|
+
"validation": {
|
|
1251
|
+
"pattern": "^arn:aws:sagemaker:"
|
|
1252
|
+
},
|
|
1253
|
+
"phase": "infrastructure",
|
|
1254
|
+
"group": "endpoint",
|
|
1255
|
+
"appliesTo": {
|
|
1256
|
+
"deploymentTargets": [
|
|
1257
|
+
"managed-inference"
|
|
1258
|
+
],
|
|
1259
|
+
"architectures": [
|
|
1260
|
+
"*"
|
|
1261
|
+
]
|
|
1262
|
+
},
|
|
1263
|
+
"widget": null,
|
|
1264
|
+
"prompt": null,
|
|
1265
|
+
"deprecated": false,
|
|
1266
|
+
"since": "0.11.0"
|
|
1267
|
+
},
|
|
1241
1268
|
"icCpuCount": {
|
|
1242
1269
|
"type": "number",
|
|
1243
1270
|
"description": "vCPUs allocated to the inference component",
|
|
@@ -983,43 +983,57 @@ export class MlccCiHarnessStack extends cdk.Stack {
|
|
|
983
983
|
},
|
|
984
984
|
storageDescriptor: {
|
|
985
985
|
columns: [
|
|
986
|
-
//
|
|
987
|
-
{ name: '
|
|
988
|
-
{ name: 'model_name', type: 'string', comment: 'HuggingFace model ID
|
|
989
|
-
{ name: 'model_family', type: 'string', comment: 'Derived: qwen3, llama3, deepseek-r1
|
|
990
|
-
{ name: 'instance_type', type: 'string', comment: 'SageMaker instance
|
|
991
|
-
{ name: '
|
|
992
|
-
{ name: '
|
|
993
|
-
{ name: '
|
|
994
|
-
{ name: '
|
|
995
|
-
|
|
996
|
-
{ name: '
|
|
997
|
-
{ name: '
|
|
998
|
-
{ name: '
|
|
999
|
-
{ name: '
|
|
1000
|
-
{ name: '
|
|
1001
|
-
{ name: '
|
|
1002
|
-
//
|
|
1003
|
-
{ name: '
|
|
1004
|
-
{ name: '
|
|
1005
|
-
{ name: '
|
|
1006
|
-
{ name: '
|
|
1007
|
-
//
|
|
1008
|
-
{ name: '
|
|
1009
|
-
{ name: '
|
|
1010
|
-
{ name: '
|
|
1011
|
-
{ name: '
|
|
1012
|
-
{ name: '
|
|
1013
|
-
{ name: '
|
|
1014
|
-
{ name: '
|
|
1015
|
-
{ name: '
|
|
1016
|
-
{ name: '
|
|
986
|
+
// Identity & config (matches Parquet writer get_parquet_schema() exactly)
|
|
987
|
+
{ name: 'project_name', type: 'string', comment: 'MCC project name' },
|
|
988
|
+
{ name: 'model_name', type: 'string', comment: 'HuggingFace model ID' },
|
|
989
|
+
{ name: 'model_family', type: 'string', comment: 'Derived: qwen3, llama3, deepseek-r1' },
|
|
990
|
+
{ name: 'instance_type', type: 'string', comment: 'SageMaker instance type' },
|
|
991
|
+
{ name: 'deployment_config', type: 'string', comment: 'Architecture-backend' },
|
|
992
|
+
{ name: 'deployment_target', type: 'string', comment: 'Deployment target' },
|
|
993
|
+
{ name: 'quantization', type: 'string', comment: 'none, fp8, awq, gptq' },
|
|
994
|
+
{ name: 'tensor_parallel_degree', type: 'int', comment: 'TP degree' },
|
|
995
|
+
{ name: 'serving_config', type: 'string', comment: 'Full serving config JSON blob' },
|
|
996
|
+
{ name: 'workload', type: 'string', comment: 'Named workload profile' },
|
|
997
|
+
{ name: 'concurrency', type: 'int', comment: 'Concurrent requests' },
|
|
998
|
+
{ name: 'input_tokens_mean', type: 'int', comment: 'Mean input tokens' },
|
|
999
|
+
{ name: 'output_tokens_mean', type: 'int', comment: 'Mean output tokens' },
|
|
1000
|
+
{ name: 'streaming', type: 'boolean', comment: 'Streaming enabled' },
|
|
1001
|
+
{ name: 'duration_seconds', type: 'int', comment: 'Duration in seconds' },
|
|
1002
|
+
// Throughput metrics
|
|
1003
|
+
{ name: 'request_throughput_rps', type: 'double', comment: 'Requests/sec' },
|
|
1004
|
+
{ name: 'total_token_throughput_tps', type: 'double', comment: 'Total tokens/sec (in+out)' },
|
|
1005
|
+
{ name: 'output_token_throughput_tps', type: 'double', comment: 'Output tokens/sec' },
|
|
1006
|
+
{ name: 'request_count', type: 'double', comment: 'Total requests in run' },
|
|
1007
|
+
// Latency metrics (avg/p50/p90/p99)
|
|
1008
|
+
{ name: 'ttft_avg_ms', type: 'double', comment: 'TTFT average (ms)' },
|
|
1009
|
+
{ name: 'ttft_p50_ms', type: 'double', comment: 'TTFT p50 (ms)' },
|
|
1010
|
+
{ name: 'ttft_p90_ms', type: 'double', comment: 'TTFT p90 (ms)' },
|
|
1011
|
+
{ name: 'ttft_p99_ms', type: 'double', comment: 'TTFT p99 (ms)' },
|
|
1012
|
+
{ name: 'itl_avg_ms', type: 'double', comment: 'ITL average (ms)' },
|
|
1013
|
+
{ name: 'itl_p50_ms', type: 'double', comment: 'ITL p50 (ms)' },
|
|
1014
|
+
{ name: 'itl_p90_ms', type: 'double', comment: 'ITL p90 (ms)' },
|
|
1015
|
+
{ name: 'itl_p99_ms', type: 'double', comment: 'ITL p99 (ms)' },
|
|
1016
|
+
{ name: 'e2e_latency_avg_ms', type: 'double', comment: 'E2E latency average (ms)' },
|
|
1017
|
+
{ name: 'e2e_latency_p50_ms', type: 'double', comment: 'E2E latency p50 (ms)' },
|
|
1018
|
+
{ name: 'e2e_latency_p90_ms', type: 'double', comment: 'E2E latency p90 (ms)' },
|
|
1019
|
+
{ name: 'e2e_latency_p99_ms', type: 'double', comment: 'E2E latency p99 (ms)' },
|
|
1020
|
+
{ name: 'prefill_tps_avg', type: 'double', comment: 'Prefill throughput avg (tokens/sec)' },
|
|
1021
|
+
{ name: 'prefill_tps_p50', type: 'double', comment: 'Prefill throughput p50' },
|
|
1022
|
+
{ name: 'output_token_tps_avg', type: 'double', comment: 'Per-user output TPS avg' },
|
|
1023
|
+
{ name: 'output_token_tps_p50', type: 'double', comment: 'Per-user output TPS p50' },
|
|
1024
|
+
{ name: 'output_token_tps_p90', type: 'double', comment: 'Per-user output TPS p90' },
|
|
1025
|
+
{ name: 'ttst_p50_ms', type: 'double', comment: 'Time to second token p50 (ms)' },
|
|
1026
|
+
{ name: 'ttst_p90_ms', type: 'double', comment: 'Time to second token p90 (ms)' },
|
|
1027
|
+
{ name: 'output_sequence_length_avg', type: 'double', comment: 'Avg output sequence length' },
|
|
1028
|
+
{ name: 'input_sequence_length_avg', type: 'double', comment: 'Avg input sequence length' },
|
|
1029
|
+
{ name: 'error_rate', type: 'double', comment: 'Error rate (0.0-1.0)' },
|
|
1030
|
+
{ name: 'benchmark_duration_sec', type: 'double', comment: 'Wall-clock duration (sec)' },
|
|
1017
1031
|
// Provenance
|
|
1018
|
-
{ name: 'run_type', type: 'string', comment: '
|
|
1019
|
-
{ name: '
|
|
1020
|
-
{ name: '
|
|
1021
|
-
{ name: '
|
|
1022
|
-
{ name: '
|
|
1032
|
+
{ name: 'run_type', type: 'string', comment: 'ci, path_prove, manual' },
|
|
1033
|
+
{ name: 'benchmark_job_name', type: 'string', comment: 'SageMaker benchmark job name' },
|
|
1034
|
+
{ name: 'mcc_version', type: 'string', comment: 'MCC version' },
|
|
1035
|
+
{ name: 'run_timestamp', type: 'string', comment: 'ISO 8601 UTC timestamp' },
|
|
1036
|
+
{ name: 'region', type: 'string', comment: 'AWS region' },
|
|
1023
1037
|
],
|
|
1024
1038
|
location: `s3://mlcc-benchmark-results-${this.account}-${this.region}/results/`,
|
|
1025
1039
|
inputFormat: 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat',
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aws/ml-container-creator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.13.3",
|
|
4
4
|
"description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -85,8 +85,8 @@
|
|
|
85
85
|
"npm": ">=11.6.2"
|
|
86
86
|
},
|
|
87
87
|
"scripts": {
|
|
88
|
-
"test": "mocha 'test/**/*.test.js' --recursive --timeout 30000",
|
|
89
|
-
"test:property": "mocha 'test/property/**/*.test.js' --recursive --timeout 60000",
|
|
88
|
+
"test": "mocha 'test/**/*.test.js' --ignore 'test/property/**' --recursive --timeout 30000 --parallel",
|
|
89
|
+
"test:property": "mocha 'test/property/**/*.test.js' --recursive --timeout 60000 --parallel",
|
|
90
90
|
"test:all": "npm run test && npm run test:property",
|
|
91
91
|
"test:fast": "mocha 'test/**/*.test.js' --recursive --timeout 15000 --parallel",
|
|
92
92
|
"test:unit": "mocha 'test/unit/**/*.test.js' --recursive --timeout 15000",
|
|
@@ -98,10 +98,11 @@
|
|
|
98
98
|
"test:perf": "node scripts/analyze-test-performance.js",
|
|
99
99
|
"lint": "eslint src/ servers/ bin/ --ext .js,.cjs,.mjs",
|
|
100
100
|
"lint:fix": "eslint src/ servers/ bin/ --ext .js,.cjs,.mjs --fix",
|
|
101
|
-
"codegen": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js && node scripts/codegen-parameter-matrix.js",
|
|
101
|
+
"codegen": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js && node scripts/codegen-parameter-matrix.js && eslint src/lib/generated/ --fix",
|
|
102
102
|
"validate:doc-commands": "node scripts/validate-docs-commands.js",
|
|
103
103
|
"sbom": "sbom --format spdx --output sbom.json",
|
|
104
|
-
"prepublishOnly": "npm run lint && npm run test:all"
|
|
104
|
+
"prepublishOnly": "npm run lint && npm run test:all",
|
|
105
|
+
"prepare": "husky || true"
|
|
105
106
|
},
|
|
106
107
|
"dependencies": {
|
|
107
108
|
"@inquirer/prompts": "^8.4.2",
|
|
@@ -119,11 +120,19 @@
|
|
|
119
120
|
"@aws-sdk/client-service-quotas": "^3.700.0",
|
|
120
121
|
"@microsoft/eslint-formatter-sarif": "^3.1.0",
|
|
121
122
|
"eslint": "^8.57.0",
|
|
123
|
+
"eslint-plugin-property-test-rules": "file:eslint-rules",
|
|
122
124
|
"fast-check": "^4.5.2",
|
|
125
|
+
"husky": "^9.1.7",
|
|
123
126
|
"license-report": "^6.8.0",
|
|
127
|
+
"lint-staged": "^17.0.7",
|
|
124
128
|
"mocha": "^10.2.0",
|
|
125
129
|
"npm-force-resolutions": "^0.0.10",
|
|
126
130
|
"nyc": "^15.1.0",
|
|
127
131
|
"sbom": "^0.0.0"
|
|
132
|
+
},
|
|
133
|
+
"lint-staged": {
|
|
134
|
+
"*.js": [
|
|
135
|
+
"eslint --fix --quiet --max-warnings 0"
|
|
136
|
+
]
|
|
128
137
|
}
|
|
129
138
|
}
|
|
@@ -327,31 +327,44 @@ async function handleGetInstanceRecommendation(params) {
|
|
|
327
327
|
// If model metadata cannot be resolved, return all GPU instances unfiltered
|
|
328
328
|
if (!modelMetadata) {
|
|
329
329
|
log(`Model metadata not found for "${modelName}", returning unfiltered GPU instances`);
|
|
330
|
-
|
|
330
|
+
let unfilteredRecs = Object.keys(effectiveCatalog)
|
|
331
331
|
.filter(key => effectiveCatalog[key].category === 'gpu')
|
|
332
|
-
.slice(0, limit)
|
|
332
|
+
.slice(0, limit)
|
|
333
|
+
.map(instanceType => ({
|
|
334
|
+
instanceType,
|
|
335
|
+
gpuCount: effectiveCatalog[instanceType]?.gpus || 0,
|
|
336
|
+
totalVramGb: null,
|
|
337
|
+
utilizationPercent: null,
|
|
338
|
+
tensorParallelism: null,
|
|
339
|
+
costTier: null
|
|
340
|
+
}));
|
|
341
|
+
|
|
342
|
+
// Still apply availability ranking so quota/FTP info is displayed
|
|
343
|
+
if (DISCOVER_MODE && unfilteredRecs.length > 0) {
|
|
344
|
+
try {
|
|
345
|
+
const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION;
|
|
346
|
+
const quotaResolver = new QuotaResolver(region);
|
|
347
|
+
const instanceTypes = unfilteredRecs.map(r => r.instanceType);
|
|
348
|
+
const [quotas, reservations, ftps] = await Promise.allSettled([
|
|
349
|
+
quotaResolver.getQuotaHeadroom(instanceTypes),
|
|
350
|
+
quotaResolver.getCapacityReservations(),
|
|
351
|
+
quotaResolver.getTrainingPlans()
|
|
352
|
+
]);
|
|
353
|
+
unfilteredRecs = applyAvailabilityRanking(unfilteredRecs, quotas.status === 'fulfilled' ? quotas.value : null, reservations.status === 'fulfilled' ? reservations.value : null, ftps.status === 'fulfilled' ? ftps.value : null);
|
|
354
|
+
} catch (err) {
|
|
355
|
+
log(`Quota resolution skipped (unfiltered path): ${err.message}`);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
333
358
|
|
|
334
359
|
return {
|
|
335
360
|
content: [{
|
|
336
361
|
type: 'text',
|
|
337
362
|
text: JSON.stringify({
|
|
338
|
-
values: { instanceType:
|
|
339
|
-
choices: { instanceType:
|
|
363
|
+
values: { instanceType: unfilteredRecs[0]?.instanceType || null },
|
|
364
|
+
choices: { instanceType: unfilteredRecs.map(r => r.instanceType) },
|
|
340
365
|
metadata: {
|
|
341
366
|
modelName,
|
|
342
|
-
|
|
343
|
-
dtype: null,
|
|
344
|
-
quantization: quantization || null,
|
|
345
|
-
estimatedVramGb: null,
|
|
346
|
-
vramBreakdown: null,
|
|
347
|
-
recommendations: allGpuInstances.map(instanceType => ({
|
|
348
|
-
instanceType,
|
|
349
|
-
gpuCount: effectiveCatalog[instanceType]?.gpus || 0,
|
|
350
|
-
totalVramGb: null,
|
|
351
|
-
utilizationPercent: null,
|
|
352
|
-
tensorParallelism: null,
|
|
353
|
-
costTier: null
|
|
354
|
-
})),
|
|
367
|
+
recommendations: unfilteredRecs,
|
|
355
368
|
source: 'unfiltered',
|
|
356
369
|
cudaVersionFilter: cudaVersion || null,
|
|
357
370
|
warning: `Could not resolve model metadata for "${modelName}". Returning all GPU instances without filtering.`
|
|
@@ -343,6 +343,50 @@ const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) =
|
|
|
343
343
|
}
|
|
344
344
|
}
|
|
345
345
|
|
|
346
|
+
// Inject FTP/reserved instances that aren't already in the recommendation list.
|
|
347
|
+
// These instances may not be in the static catalog (e.g., ml.p6-b200.48xlarge)
|
|
348
|
+
// but are available via capacity reservation — always surface them.
|
|
349
|
+
const existingTypes = new Set(recommendations.map(r => r.instanceType));
|
|
350
|
+
|
|
351
|
+
if (reservations) {
|
|
352
|
+
for (const [instanceType, info] of reservations) {
|
|
353
|
+
if (!existingTypes.has(instanceType)) {
|
|
354
|
+
recommendations.push({
|
|
355
|
+
instanceType,
|
|
356
|
+
capacityType: 'reserved',
|
|
357
|
+
reservationInfo: info,
|
|
358
|
+
reservationType: 'training-plan',
|
|
359
|
+
quotaStatus: 'available',
|
|
360
|
+
gpuCount: null,
|
|
361
|
+
totalVramGb: null,
|
|
362
|
+
utilizationPercent: null,
|
|
363
|
+
tensorParallelism: null,
|
|
364
|
+
costTier: null,
|
|
365
|
+
injectedFromReservation: true
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
if (ftps) {
|
|
372
|
+
for (const [instanceType, info] of ftps) {
|
|
373
|
+
if (!existingTypes.has(instanceType)) {
|
|
374
|
+
recommendations.push({
|
|
375
|
+
instanceType,
|
|
376
|
+
capacityType: 'ftp',
|
|
377
|
+
ftpInfo: info,
|
|
378
|
+
quotaStatus: 'available',
|
|
379
|
+
gpuCount: null,
|
|
380
|
+
totalVramGb: null,
|
|
381
|
+
utilizationPercent: null,
|
|
382
|
+
tensorParallelism: null,
|
|
383
|
+
costTier: null,
|
|
384
|
+
injectedFromFtp: true
|
|
385
|
+
});
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
346
390
|
// Filter out zero-quota instances (but never filter reserved/FTP — you have the capacity)
|
|
347
391
|
const filtered = recommendations.filter(r =>
|
|
348
392
|
r.quotaStatus !== 'zero-quota' || r.capacityType === 'reserved' || r.capacityType === 'ftp'
|
|
@@ -228,6 +228,33 @@
|
|
|
228
228
|
"gpuMemoryGb": 24,
|
|
229
229
|
"gpuType": "NVIDIA A10G",
|
|
230
230
|
"costTier": "medium"
|
|
231
|
+
},
|
|
232
|
+
"ml.p6-b200.48xlarge": {
|
|
233
|
+
"category": "gpu",
|
|
234
|
+
"gpus": 8,
|
|
235
|
+
"vcpus": 192,
|
|
236
|
+
"memGb": 1536,
|
|
237
|
+
"accelerator": "8x B200 1440GB",
|
|
238
|
+
"cudaVersions": [
|
|
239
|
+
"12.4",
|
|
240
|
+
"12.6"
|
|
241
|
+
],
|
|
242
|
+
"tags": [
|
|
243
|
+
"gpu",
|
|
244
|
+
"multi-gpu",
|
|
245
|
+
"b200",
|
|
246
|
+
"cuda-12",
|
|
247
|
+
"high-performance"
|
|
248
|
+
],
|
|
249
|
+
"family": "p6",
|
|
250
|
+
"acceleratorType": "cuda",
|
|
251
|
+
"hardware": "NVIDIA B200",
|
|
252
|
+
"gpuArchitecture": "Blackwell",
|
|
253
|
+
"defaultCudaVersion": "12.6",
|
|
254
|
+
"notes": "8x NVIDIA B200 GPUs (1440GB total). Next-gen Blackwell architecture",
|
|
255
|
+
"gpuMemoryGb": 180,
|
|
256
|
+
"gpuType": "NVIDIA B200",
|
|
257
|
+
"costTier": "high"
|
|
231
258
|
}
|
|
232
259
|
},
|
|
233
260
|
"recommendations": {
|
package/src/app.js
CHANGED
|
@@ -377,6 +377,12 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
377
377
|
ignorePatterns.push('**/do/lib/feedback.sh');
|
|
378
378
|
}
|
|
379
379
|
|
|
380
|
+
// Exclude do/stage when model is already S3-sourced (nothing to stage)
|
|
381
|
+
const modelName = answers.modelName || answers.customModelName || '';
|
|
382
|
+
if (answers.modelSource === 's3' || modelName.startsWith('s3://')) {
|
|
383
|
+
ignorePatterns.push('**/do/stage');
|
|
384
|
+
}
|
|
385
|
+
|
|
380
386
|
// Exclude do/test when hosted-model-endpoint is not selected
|
|
381
387
|
const testTypes = answers.testTypes || [];
|
|
382
388
|
if (!testTypes.includes('hosted-model-endpoint')) {
|
|
@@ -556,6 +562,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
556
562
|
fs.writeFileSync(gitignorePath, gitignoreContent);
|
|
557
563
|
}
|
|
558
564
|
}
|
|
565
|
+
|
|
566
|
+
// Add .mlcc/ to .gitignore (staged-assets tracking — account-specific URIs)
|
|
567
|
+
{
|
|
568
|
+
const gitignorePath = path.join(destDir, '.gitignore');
|
|
569
|
+
const mlccIgnore = '# Staged assets tracking (account-specific, generated by do/stage)\n.mlcc/\n';
|
|
570
|
+
if (fs.existsSync(gitignorePath)) {
|
|
571
|
+
const existing = fs.readFileSync(gitignorePath, 'utf8');
|
|
572
|
+
if (!existing.includes('.mlcc/')) {
|
|
573
|
+
fs.appendFileSync(gitignorePath, `\n${mlccIgnore}`);
|
|
574
|
+
}
|
|
575
|
+
} else {
|
|
576
|
+
fs.writeFileSync(gitignorePath, mlccIgnore);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
559
579
|
}
|
|
560
580
|
|
|
561
581
|
/**
|
|
@@ -818,7 +838,8 @@ function _setExecutablePermissions(destDir, answers = {}) {
|
|
|
818
838
|
'do/add-ic',
|
|
819
839
|
'do/adapter',
|
|
820
840
|
'do/tune',
|
|
821
|
-
'do/train'
|
|
841
|
+
'do/train',
|
|
842
|
+
'do/stage'
|
|
822
843
|
];
|
|
823
844
|
|
|
824
845
|
const shellScripts = architecture === 'marketplace' ? marketplaceScripts : defaultScripts;
|
|
@@ -459,8 +459,22 @@ export default class BootstrapCommandHandler {
|
|
|
459
459
|
|
|
460
460
|
// --no-rollback prevents rollback on AlreadyExists errors for IAM roles
|
|
461
461
|
// that may pre-exist from a prior deployment or another region.
|
|
462
|
+
// Check if benchmark bucket already exists (from a prior torn-down stack with RETAIN policy)
|
|
463
|
+
let importBucketCtx = '';
|
|
464
|
+
if (options.benchmarkInfra) {
|
|
465
|
+
try {
|
|
466
|
+
execSync(
|
|
467
|
+
`aws s3api head-bucket --bucket mlcc-benchmark-results-${profileData.accountId}-${profileData.awsRegion}${profileData.awsProfile ? ` --profile ${profileData.awsProfile}` : ''} --region ${profileData.awsRegion}`,
|
|
468
|
+
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
|
|
469
|
+
);
|
|
470
|
+
importBucketCtx = ' -c importExistingBenchmarkBucket=true';
|
|
471
|
+
console.log(' ℹ️ Benchmark results bucket already exists — importing into stack');
|
|
472
|
+
} catch {
|
|
473
|
+
// Bucket doesn't exist — will be created fresh
|
|
474
|
+
}
|
|
475
|
+
}
|
|
462
476
|
const cdkDeployCmd = options.benchmarkInfra
|
|
463
|
-
?
|
|
477
|
+
? `npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true${importBucketCtx}`
|
|
464
478
|
: 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
|
|
465
479
|
execSync(
|
|
466
480
|
cdkDeployCmd,
|
|
@@ -634,8 +648,22 @@ export default class BootstrapCommandHandler {
|
|
|
634
648
|
|
|
635
649
|
// --no-rollback prevents rollback on AlreadyExists errors for IAM roles
|
|
636
650
|
// that may pre-exist from a prior deployment or another region.
|
|
651
|
+
// Check if benchmark bucket already exists (from a prior torn-down stack with RETAIN policy)
|
|
652
|
+
let updateImportBucketCtx = '';
|
|
653
|
+
if (options.benchmarkInfra || profileConfig.benchmarkInfraProvisioned) {
|
|
654
|
+
try {
|
|
655
|
+
execSync(
|
|
656
|
+
`aws s3api head-bucket --bucket mlcc-benchmark-results-${profileConfig.accountId}-${profileConfig.awsRegion}${profileConfig.awsProfile ? ` --profile ${profileConfig.awsProfile}` : ''} --region ${profileConfig.awsRegion}`,
|
|
657
|
+
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
|
|
658
|
+
);
|
|
659
|
+
updateImportBucketCtx = ' -c importExistingBenchmarkBucket=true';
|
|
660
|
+
console.log(' ℹ️ Benchmark results bucket already exists — importing into stack');
|
|
661
|
+
} catch {
|
|
662
|
+
// Bucket doesn't exist — will be created fresh
|
|
663
|
+
}
|
|
664
|
+
}
|
|
637
665
|
const updateCdkCmd = (options.benchmarkInfra || profileConfig.benchmarkInfraProvisioned)
|
|
638
|
-
?
|
|
666
|
+
? `npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true${updateImportBucketCtx}`
|
|
639
667
|
: 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
|
|
640
668
|
execSync(
|
|
641
669
|
updateCdkCmd,
|
|
@@ -989,7 +1017,8 @@ export default class BootstrapCommandHandler {
|
|
|
989
1017
|
* @returns {object} Parsed JSON output
|
|
990
1018
|
*/
|
|
991
1019
|
_execAws(command, profile) {
|
|
992
|
-
const
|
|
1020
|
+
const profileFlag = profile ? `--profile ${profile}` : '';
|
|
1021
|
+
const fullCommand = `aws ${command} ${profileFlag} --output json`.replace(/\s+/g, ' ').trim();
|
|
993
1022
|
const output = execSync(fullCommand, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] });
|
|
994
1023
|
const trimmed = output.trim();
|
|
995
1024
|
if (!trimmed) {
|
|
@@ -361,7 +361,7 @@ export default class ConfigValidator {
|
|
|
361
361
|
|
|
362
362
|
case 'instanceType':
|
|
363
363
|
if (value) {
|
|
364
|
-
const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
364
|
+
const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
365
365
|
if (!instancePattern.test(value)) {
|
|
366
366
|
throw new ValidationError(
|
|
367
367
|
`Invalid instance type format: ${value}. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)`,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
|
|
2
2
|
// Source: config/parameter-schema-v2.json
|
|
3
|
-
// Generated: 2026-06-
|
|
3
|
+
// Generated: 2026-06-12T22:03:00.429Z
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* CLI option definitions derived from parameter-schema-v2.json.
|
|
@@ -84,7 +84,7 @@ export const cliOptions = [
|
|
|
84
84
|
},
|
|
85
85
|
{
|
|
86
86
|
'flag': '--include-benchmark',
|
|
87
|
-
'description': 'Include SageMaker AI Benchmarking',
|
|
87
|
+
'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
|
|
88
88
|
'defaultValue': false
|
|
89
89
|
},
|
|
90
90
|
{
|
|
@@ -244,6 +244,10 @@ export const cliOptions = [
|
|
|
244
244
|
'flag': '--endpoint-volume-size <gb>',
|
|
245
245
|
'description': 'ML storage volume size in GB'
|
|
246
246
|
},
|
|
247
|
+
{
|
|
248
|
+
'flag': '--capacity-reservation-arn <arn>',
|
|
249
|
+
'description': 'Capacity reservation ARN (FTP or ODCR) for reserved instance deployment'
|
|
250
|
+
},
|
|
247
251
|
{
|
|
248
252
|
'flag': '--ic-cpu-count <n>',
|
|
249
253
|
'description': 'vCPUs allocated to the inference component'
|
|
@@ -439,6 +443,7 @@ export const helpGroups = {
|
|
|
439
443
|
'--endpoint-data-capture-percent': 'endpoint',
|
|
440
444
|
'--endpoint-variant-name': 'endpoint',
|
|
441
445
|
'--endpoint-volume-size': 'endpoint',
|
|
446
|
+
'--capacity-reservation-arn': 'endpoint',
|
|
442
447
|
'--ic-cpu-count': 'ic',
|
|
443
448
|
'--ic-model-weight': 'ic',
|
|
444
449
|
'--async-s3-output-path': 'async',
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
|
|
2
2
|
// Source: config/parameter-schema-v2.json
|
|
3
|
-
// Generated: 2026-06-
|
|
3
|
+
// Generated: 2026-06-12T22:03:00.552Z
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Parameter matrix defining how each parameter is loaded from various sources.
|
|
@@ -148,7 +148,7 @@ export const parameterMatrix = {
|
|
|
148
148
|
'configFile': true,
|
|
149
149
|
'packageJson': false,
|
|
150
150
|
'mcp': false,
|
|
151
|
-
'promptable':
|
|
151
|
+
'promptable': false,
|
|
152
152
|
'required': false,
|
|
153
153
|
'default': 10,
|
|
154
154
|
'valueSpace': 'unbounded'
|
|
@@ -159,7 +159,7 @@ export const parameterMatrix = {
|
|
|
159
159
|
'configFile': true,
|
|
160
160
|
'packageJson': false,
|
|
161
161
|
'mcp': false,
|
|
162
|
-
'promptable':
|
|
162
|
+
'promptable': false,
|
|
163
163
|
'required': false,
|
|
164
164
|
'default': 550,
|
|
165
165
|
'valueSpace': 'unbounded'
|
|
@@ -170,7 +170,7 @@ export const parameterMatrix = {
|
|
|
170
170
|
'configFile': true,
|
|
171
171
|
'packageJson': false,
|
|
172
172
|
'mcp': false,
|
|
173
|
-
'promptable':
|
|
173
|
+
'promptable': false,
|
|
174
174
|
'required': false,
|
|
175
175
|
'default': 150,
|
|
176
176
|
'valueSpace': 'unbounded'
|
|
@@ -181,7 +181,7 @@ export const parameterMatrix = {
|
|
|
181
181
|
'configFile': true,
|
|
182
182
|
'packageJson': false,
|
|
183
183
|
'mcp': false,
|
|
184
|
-
'promptable':
|
|
184
|
+
'promptable': false,
|
|
185
185
|
'required': false,
|
|
186
186
|
'default': true,
|
|
187
187
|
'valueSpace': 'bounded'
|
|
@@ -358,6 +358,17 @@ export const parameterMatrix = {
|
|
|
358
358
|
'valueSpace': 'unbounded',
|
|
359
359
|
'schemaValidated': true
|
|
360
360
|
},
|
|
361
|
+
'capacityReservationArn': {
|
|
362
|
+
'cliOption': 'capacity-reservation-arn',
|
|
363
|
+
'envVar': 'ML_CAPACITY_RESERVATION_ARN',
|
|
364
|
+
'configFile': true,
|
|
365
|
+
'packageJson': false,
|
|
366
|
+
'mcp': false,
|
|
367
|
+
'promptable': false,
|
|
368
|
+
'required': false,
|
|
369
|
+
'default': null,
|
|
370
|
+
'valueSpace': 'unbounded'
|
|
371
|
+
},
|
|
361
372
|
'icCpuCount': {
|
|
362
373
|
'cliOption': 'ic-cpu-count',
|
|
363
374
|
'envVar': 'ML_IC_CPU_COUNT',
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
|
|
2
2
|
// Source: config/parameter-schema-v2.json
|
|
3
|
-
// Generated: 2026-06-
|
|
3
|
+
// Generated: 2026-06-12T22:03:00.468Z
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Validation rules derived from parameter-schema-v2.json.
|
|
@@ -26,7 +26,7 @@ export const validationRules = {
|
|
|
26
26
|
return null;
|
|
27
27
|
},
|
|
28
28
|
'instanceType': (value) => {
|
|
29
|
-
if (!new RegExp('^ml\\.[a-z0-9]+\\.[a-z0-9]+$').test(value)) return 'instanceType does not match required pattern';
|
|
29
|
+
if (!new RegExp('^ml\\.[a-z0-9-]+\\.[a-z0-9]+$').test(value)) return 'instanceType does not match required pattern';
|
|
30
30
|
return null;
|
|
31
31
|
},
|
|
32
32
|
'icGpuCount': (value) => {
|
|
@@ -132,6 +132,10 @@ export const validationRules = {
|
|
|
132
132
|
if (value > 16384) return `endpointVolumeSize must be <= 16384, got ${value}`;
|
|
133
133
|
return null;
|
|
134
134
|
},
|
|
135
|
+
'capacityReservationArn': (value) => {
|
|
136
|
+
if (!new RegExp('^arn:aws:sagemaker:').test(value)) return 'capacityReservationArn does not match required pattern';
|
|
137
|
+
return null;
|
|
138
|
+
},
|
|
135
139
|
'icCpuCount': (value) => {
|
|
136
140
|
if (value < 0.25) return `icCpuCount must be >= 0.25, got ${value}`;
|
|
137
141
|
if (value > 768) return `icCpuCount must be <= 768, got ${value}`;
|
|
@@ -199,4 +203,4 @@ export const validationRules = {
|
|
|
199
203
|
}
|
|
200
204
|
};
|
|
201
205
|
|
|
202
|
-
//
|
|
206
|
+
// 44 parameters have validation rules
|