@aws/ml-container-creator 0.13.3 → 0.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,8 +18,6 @@ import {
18
18
  modelLoadStrategyPrompts,
19
19
  modelProfilePrompts,
20
20
  modulePrompts,
21
- loraPrompts,
22
- benchmarkPrompts,
23
21
  infraRegionAndTargetPrompts,
24
22
  infraExistingEndpointPrompts,
25
23
  infraInstancePrompts,
@@ -521,38 +519,23 @@ export default class PromptRunner {
521
519
  const ngcApiKeyAnswers = { ngcApiKey: secretAnswers.ngcApiKey, ngcTokenArn: secretAnswers.ngcTokenArn };
522
520
 
523
521
  // Module selection
524
- const moduleAnswers = await this._runPhase(modulePrompts, { ...frameworkAnswers, ...engineAnswers }, explicitConfig, existingConfig);
525
-
526
- // Ensure transformers, diffusors, and ineligible Triton backends don't get sample model
527
- if (frameworkAnswers.architecture === 'transformers' ||
528
- frameworkAnswers.architecture === 'diffusors' ||
529
- (frameworkAnswers.architecture === 'triton' &&
530
- !this._tritonBackends[frameworkAnswers.backend]?.supportsSampleModel)) {
522
+ // Only ask about sample model for non-transformers/diffusors (Triton etc.)
523
+ const moduleAnswers = {};
524
+ if (frameworkAnswers.architecture !== 'transformers' &&
525
+ frameworkAnswers.architecture !== 'diffusors') {
526
+ const sampleModelAnswers = await this._runPhase(
527
+ modulePrompts.filter(p => p.name === 'includeSampleModel'),
528
+ { ...frameworkAnswers, ...engineAnswers }, explicitConfig, existingConfig
529
+ );
530
+ Object.assign(moduleAnswers, sampleModelAnswers);
531
+ } else {
531
532
  moduleAnswers.includeSampleModel = false;
532
533
  }
533
534
 
534
- // Benchmark prompts derive includeBenchmark from testTypes selection or CLI flag
535
- // Requirements: 1.1, 1.2
536
- let benchmarkAnswers = {};
537
- if (frameworkAnswers.architecture === 'transformers' || frameworkAnswers.architecture === 'diffusors') {
538
- const testTypes = moduleAnswers.testTypes || [];
539
- const includeBenchmark = testTypes.includes('sagemaker-ai-automated-benchmarking') ||
540
- explicitConfig.includeBenchmark === true ||
541
- explicitConfig.includeBenchmark === 'true';
542
- benchmarkAnswers.includeBenchmark = includeBenchmark;
543
- if (includeBenchmark) {
544
- const subAnswers = await this._runPhase(benchmarkPrompts, { ...frameworkAnswers, ...moduleAnswers, includeBenchmark }, explicitConfig, existingConfig);
545
- benchmarkAnswers = { ...benchmarkAnswers, ...subAnswers };
546
- }
547
- }
548
-
549
- // LoRA adapter prompts — only for transformers with vllm/sglang/djl-lmi
550
- // Requirements: 1.1, 1.2, 1.4
551
- let loraAnswers = {};
552
- const loraSubAnswers = await this._runPhase(loraPrompts, { ...frameworkAnswers, ...engineAnswers }, explicitConfig, existingConfig);
553
- if (loraSubAnswers.enableLora !== undefined) {
554
- loraAnswers = loraSubAnswers;
555
- }
535
+ // Test types, benchmark, and LoRA are always-on (BL-122)
536
+ moduleAnswers.testTypes = ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
537
+ const benchmarkAnswers = { includeBenchmark: true };
538
+ const loraAnswers = { enableLora: true };
556
539
 
557
540
  // Validate instance type against framework requirements (now that framework version is known)
558
541
  const finalInstanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
@@ -10,14 +10,47 @@ This project uses three sets of IAM permissions:
10
10
 
11
11
  ## SageMaker Execution Role
12
12
 
13
- The bootstrap command creates an IAM role (`mlcc-sagemaker-execution-role`) with permissions for:
13
+ The bootstrap command creates an IAM role (`mlcc-sagemaker-execution-role`) with these permission groups:
14
14
 
15
- - **SageMaker**: Create, update, delete, and invoke endpoints, endpoint configs, models, and inference components
16
- - **ECR**: Pull images from the `ml-container-creator` repository
17
- - **CloudWatch Logs**: Write container logs
18
- - **S3**: Read model artifacts from `ml-container-creator-*` buckets
15
+ ### Endpoint Management
16
+ Create, update, delete, describe, and invoke endpoints, endpoint configs, models, and inference components.
19
17
 
20
- The role is defined in the CloudFormation stack template (`config/bootstrap-stack.json`) and updated automatically when you re-run bootstrap after upgrading.
18
+ ### AI Benchmarking
19
+ Create, describe, list, stop, and delete AI benchmark jobs, AI recommendation jobs, and AI workload configs.
20
+
21
+ ### Training & Model Customization
22
+ Create/describe/stop training jobs, model packages, model package groups. Access SageMaker Hub contents. Manage training plans.
23
+
24
+ ### MLflow Integration
25
+ List/describe MLflow tracking servers and apps. Create presigned URLs. Call MLflow app APIs.
26
+
27
+ ### ECR
28
+ Pull container images (GetAuthorizationToken, BatchGetImage, GetDownloadUrlForLayer, BatchCheckLayerAvailability).
29
+
30
+ ### S3
31
+ Read and write model artifacts, adapters, benchmark results:
32
+ - `s3:GetObject`, `s3:PutObject`, `s3:AbortMultipartUpload`, `s3:ListBucket`
33
+ - Scoped to `mlcc-*` and `ml-container-creator-*` buckets
34
+
35
+ ### CloudWatch Logs
36
+ Create log groups/streams and put log events.
37
+
38
+ ### Secrets Manager
39
+ Read and write secrets prefixed with `mlcc/` or `ml-container-creator/` (used for HF tokens, API keys).
40
+
41
+ ### SNS
42
+ Publish notifications to `mlcc-*` and `ml-container-creator-*` topics (benchmark completion alerts).
43
+
44
+ ### Service Quotas & Capacity
45
+ Query service quotas and training plan availability for instance selection.
46
+
47
+ ### Lambda
48
+ Invoke functions (reward model evaluation during training/tuning).
49
+
50
+ ### PassRole
51
+ Self-pass to SageMaker service, scoped to `mlcc-sagemaker-execution-role`.
52
+
53
+ The role is defined in `config/bootstrap-stack.json` and updated automatically when you re-run bootstrap after upgrading.
21
54
 
22
55
  If you use a custom role (`--role-arn`), ensure it has at minimum:
23
56
 
@@ -25,12 +58,15 @@ If you use a custom role (`--role-arn`), ensure it has at minimum:
25
58
  |-----------|---------|
26
59
  | `sagemaker:CreateEndpoint`, `CreateEndpointConfig`, `CreateModel`, `CreateInferenceComponent` | Deploy |
27
60
  | `sagemaker:DeleteEndpoint`, `DeleteEndpointConfig`, `DeleteModel`, `DeleteInferenceComponent` | Clean up |
28
- | `sagemaker:DescribeEndpoint`, `DescribeEndpointConfig`, `DescribeModel`, `DescribeInferenceComponent` | Status checks |
61
+ | `sagemaker:DescribeEndpoint`, `DescribeEndpointConfig`, `DescribeModel`, `DescribeInferenceComponent`, `ListInferenceComponents` | Status |
29
62
  | `sagemaker:InvokeEndpoint`, `InvokeEndpointAsync` | Inference |
30
63
  | `sagemaker:UpdateEndpoint`, `UpdateEndpointWeightsAndCapacities`, `UpdateInferenceComponent` | Updates |
31
- | `ecr:GetAuthorizationToken`, `BatchGetImage`, `GetDownloadUrlForLayer`, `BatchCheckLayerAvailability` | Pull container image |
32
- | `logs:CreateLogGroup`, `CreateLogStream`, `PutLogEvents` | Container logging |
33
- | `s3:GetObject`, `s3:ListBucket` on `ml-container-creator-*` | Model artifact access |
64
+ | `sagemaker:CreateAIBenchmarkJob`, `DescribeAIBenchmarkJob`, `ListAIBenchmarkJobs` | Benchmark |
65
+ | `sagemaker:CreateTrainingJob`, `DescribeTrainingJob`, `StopTrainingJob` | Training/tuning |
66
+ | `ecr:GetAuthorizationToken`, `BatchGetImage`, `GetDownloadUrlForLayer`, `BatchCheckLayerAvailability` | Pull image |
67
+ | `logs:CreateLogGroup`, `CreateLogStream`, `PutLogEvents` | Logging |
68
+ | `s3:GetObject`, `s3:PutObject`, `s3:ListBucket` on project buckets | Artifacts |
69
+ | `iam:PassRole` (to sagemaker.amazonaws.com) | Role delegation |
34
70
 
35
71
  Trust policy must allow `sagemaker.amazonaws.com` to assume the role.
36
72
 
@@ -48,12 +84,27 @@ Your AWS user or CI system needs these permissions to run the do-scripts:
48
84
 
49
85
  | Script | Permissions Needed |
50
86
  |--------|-------------------|
87
+ | `./do/build` | Local only — no AWS permissions |
88
+ | `./do/run` | Local only — no AWS permissions |
51
89
  | `./do/push` | `ecr:GetAuthorizationToken`, `ecr:PutImage`, `ecr:InitiateLayerUpload`, `ecr:UploadLayerPart`, `ecr:CompleteLayerUpload`, `ecr:BatchCheckLayerAvailability` |
52
90
  | `./do/submit` | `codebuild:CreateProject`, `codebuild:StartBuild`, `codebuild:BatchGetBuilds`, `iam:CreateRole`, `iam:PutRolePolicy`, `iam:PassRole`, `s3:PutObject`, `s3:CreateBucket` |
53
- | `./do/deploy` | `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint`, `sagemaker:CreateInferenceComponent`, `sagemaker:DescribeEndpoint`, `iam:PassRole` |
54
- | `./do/clean` | `sagemaker:DeleteEndpoint`, `sagemaker:DeleteEndpointConfig`, `sagemaker:DeleteInferenceComponent`, `codebuild:DeleteProject`, `iam:DeleteRole`, `iam:DeleteRolePolicy` |
91
+ | `./do/stage` | `s3:PutObject`, `s3:GetObject`, `s3:ListBucket` on mlcc-* buckets |
92
+ | `./do/deploy` | `sagemaker:CreateEndpointConfig`, `sagemaker:CreateEndpoint`, `sagemaker:CreateModel`, `sagemaker:CreateInferenceComponent`, `sagemaker:DescribeEndpoint`, `iam:PassRole` |
93
+ | `./do/add-ic` | `sagemaker:CreateInferenceComponent`, `sagemaker:DescribeEndpoint`, `sagemaker:ListInferenceComponents`, `iam:PassRole` |
55
94
  | `./do/test` | `sagemaker-runtime:InvokeEndpoint` |
56
- | `bootstrap` | `cloudformation:*`, `iam:CreateRole`, `iam:PutRolePolicy`, `iam:TagRole`, `ecr:CreateRepository`, `s3:CreateBucket` (and `sts:GetCallerIdentity`) |
95
+ | `./do/benchmark` | `sagemaker:CreateAIBenchmarkJob`, `sagemaker:DescribeAIBenchmarkJob`, `sagemaker:ListAIBenchmarkJobs`, `sagemaker:CreateAIWorkloadConfig`, `iam:PassRole`, `s3:GetObject` |
96
+ | `./do/train` | `sagemaker:CreateTrainingJob`, `sagemaker:DescribeTrainingJob`, `iam:PassRole`, `s3:GetObject`, `s3:PutObject` |
97
+ | `./do/tune` | `sagemaker:CreateTrainingJob`, `sagemaker:DescribeTrainingJob`, `iam:PassRole`, `s3:GetObject`, `s3:PutObject` |
98
+ | `./do/adapter` | `sagemaker:CreateInferenceComponent`, `sagemaker:UpdateInferenceComponent`, `sagemaker:DescribeInferenceComponent`, `s3:GetObject` |
99
+ | `./do/optimize` | `sagemaker:CreateModel`, `sagemaker:DescribeModel`, `s3:GetObject`, `s3:PutObject` |
100
+ | `./do/register` | `sagemaker:CreateModelPackage`, `sagemaker:CreateModelPackageGroup`, `sagemaker:DescribeModelPackage` |
101
+ | `./do/logs` | `logs:GetLogEvents`, `logs:FilterLogEvents`, `logs:DescribeLogStreams` |
102
+ | `./do/status` | `sagemaker:DescribeEndpoint`, `sagemaker:DescribeInferenceComponent`, `sagemaker:ListInferenceComponents` |
103
+ | `./do/clean` | `sagemaker:DeleteEndpoint`, `sagemaker:DeleteEndpointConfig`, `sagemaker:DeleteModel`, `sagemaker:DeleteInferenceComponent`, `codebuild:DeleteProject`, `iam:DeleteRole`, `iam:DeleteRolePolicy` |
104
+ | `./do/export` | Local only — reads config files |
105
+ | `./do/validate` | Local only — validates project structure |
106
+ | `./do/manifest` | Local only — generates deployment manifest |
107
+ | `bootstrap` | `cloudformation:*`, `iam:CreateRole`, `iam:PutRolePolicy`, `iam:TagRole`, `ecr:CreateRepository`, `s3:CreateBucket`, `sts:GetCallerIdentity` |
57
108
 
58
109
  <% if (framework === 'transformers' && hfToken) { %>
59
110
  ## HuggingFace Token Security
@@ -1510,8 +1510,6 @@ def cmd_discover(args):
1510
1510
 
1511
1511
  Returns: {"models": [str], "count": int}
1512
1512
  """
1513
- import boto3
1514
-
1515
1513
  region = args.region or os.environ.get('AWS_REGION', 'us-east-1')
1516
1514
 
1517
1515
  family = args.family or ""
@@ -1528,6 +1526,11 @@ def cmd_discover(args):
1528
1526
  if not prefix:
1529
1527
  _error_exit("No family or filter provided for discovery")
1530
1528
 
1529
+ try:
1530
+ import boto3
1531
+ except ImportError:
1532
+ _error_exit("Hub discovery failed: boto3 is not installed. Install with: pip install boto3")
1533
+
1531
1534
  try:
1532
1535
  client = boto3.client("sagemaker", region_name=region)
1533
1536
  models = []