@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +33 -22
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -67
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +166 -153
  23. package/servers/instance-sizer/lib/instance-ranker.js +120 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/instances.json +27 -0
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/custom-validators.js +13 -13
  31. package/servers/lib/dynamic-resolver.js +4 -4
  32. package/servers/marketplace-picker/index.js +342 -0
  33. package/servers/marketplace-picker/manifest.json +14 -0
  34. package/servers/marketplace-picker/package.json +18 -0
  35. package/servers/model-picker/index.js +382 -382
  36. package/servers/region-picker/index.js +56 -56
  37. package/servers/workload-picker/LICENSE +202 -0
  38. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  39. package/servers/workload-picker/index.js +171 -0
  40. package/servers/workload-picker/manifest.json +16 -0
  41. package/servers/workload-picker/package.json +16 -0
  42. package/src/app.js +12 -3
  43. package/src/lib/bootstrap-command-handler.js +609 -15
  44. package/src/lib/bootstrap-config.js +36 -0
  45. package/src/lib/bootstrap-profile-manager.js +48 -41
  46. package/src/lib/ci-register-helpers.js +74 -0
  47. package/src/lib/config-loader.js +3 -0
  48. package/src/lib/config-manager.js +7 -0
  49. package/src/lib/config-validator.js +1 -1
  50. package/src/lib/cuda-resolver.js +17 -8
  51. package/src/lib/generated/cli-options.js +319 -314
  52. package/src/lib/generated/parameter-matrix.js +672 -661
  53. package/src/lib/generated/validation-rules.js +76 -72
  54. package/src/lib/path-prover-brain.js +664 -0
  55. package/src/lib/prompts/infrastructure-prompts.js +2 -2
  56. package/src/lib/prompts/model-prompts.js +6 -0
  57. package/src/lib/prompts/project-prompts.js +12 -0
  58. package/src/lib/secrets-prompt-runner.js +4 -0
  59. package/src/lib/template-manager.js +1 -1
  60. package/src/lib/template-variable-resolver.js +87 -1
  61. package/src/lib/tune-catalog-validator.js +37 -4
  62. package/templates/Dockerfile +9 -0
  63. package/templates/code/adapter_sidecar.py +444 -0
  64. package/templates/code/serve +6 -0
  65. package/templates/code/serve.d/vllm.ejs +1 -1
  66. package/templates/do/.benchmark_writer.py +1476 -0
  67. package/templates/do/.tune_helper.py +982 -57
  68. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  69. package/templates/do/adapter +154 -0
  70. package/templates/do/benchmark +639 -85
  71. package/templates/do/build +5 -0
  72. package/templates/do/clean.d/async-inference.ejs +5 -0
  73. package/templates/do/clean.d/batch-transform.ejs +5 -0
  74. package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
  75. package/templates/do/clean.d/managed-inference.ejs +5 -0
  76. package/templates/do/config +115 -45
  77. package/templates/do/deploy.d/async-inference.ejs +30 -3
  78. package/templates/do/deploy.d/batch-transform.ejs +29 -3
  79. package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
  80. package/templates/do/deploy.d/managed-inference.ejs +216 -14
  81. package/templates/do/lib/endpoint-config.sh +1 -1
  82. package/templates/do/lib/profile.sh +44 -0
  83. package/templates/do/optimize +106 -37
  84. package/templates/do/push +5 -0
  85. package/templates/do/register +94 -0
  86. package/templates/do/stage +567 -0
  87. package/templates/do/submit +7 -0
  88. package/templates/do/test +14 -0
  89. package/templates/do/tune +382 -59
  90. package/templates/do/validate +44 -4
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  echo "🚀 Building Docker image for ${PROJECT_NAME}"
14
19
  echo " Deployment config: ${DEPLOYMENT_CONFIG}"
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  # Parse arguments
14
19
  CLEANUP_TARGET=""
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  # Parse arguments
14
19
  CLEANUP_TARGET=""
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  # Parse arguments
14
19
  CLEANUP_TARGET=""
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  # Parse arguments
14
19
  CLEANUP_TARGET=""
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # do-framework configuration
3
3
  # This file is sourced by all do scripts
4
+ # Generated: <%= new Date().toISOString() %>
4
5
 
5
6
  # Project identification
6
7
  export PROJECT_NAME="<%= projectName %>"
@@ -10,21 +11,25 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
10
11
  export FRAMEWORK="<%= framework %>"
11
12
  export MODEL_SERVER="<%= modelServer %>"
12
13
 
14
+ # AWS configuration
15
+ export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
16
+
17
+ # ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
18
+ # ECR_REPOSITORY_NAME, ROLE_ARN, ADAPTER_S3_BUCKET — see do/lib/profile.sh
19
+
13
20
  <% if (typeof enableLora !== 'undefined' && enableLora) { %>
14
21
  # LoRA adapter serving
15
22
  export ENABLE_LORA=true
16
- export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
23
+ <% } else if (framework === 'transformers' || framework === 'diffusors') { %>
24
+ # LoRA adapter serving (uncomment to enable)
25
+ # export ENABLE_LORA=true
17
26
  <% } %>
18
27
 
19
- # AWS configuration
20
- export AWS_REGION="<%= awsRegion %>"
21
- export ECR_REPOSITORY_NAME="ml-container-creator"
22
-
23
28
  # Build configuration — WHERE the Docker image gets built
24
29
  export BUILD_TARGET="<%= buildTarget %>"
25
30
  <% if (buildTarget === 'codebuild') { %>
26
31
  export CODEBUILD_COMPUTE_TYPE="<%= codebuildComputeType %>"
27
- export CODEBUILD_PROJECT_NAME="${PROJECT_NAME}-build-$(date +%Y%m%d)"
32
+ # CODEBUILD_PROJECT_NAME — derived in do/submit at runtime
28
33
  <% } %>
29
34
 
30
35
  # Deployment configuration — WHERE the model runs
@@ -42,14 +47,27 @@ export INSTANCE_TYPE="<%= instanceType %>"
42
47
  # Instance pools: heterogeneous instance types with priority-based fallback
43
48
  # Priority = selection order (1 = preferred, higher = fallback)
44
49
  export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
50
+ <% } else { %>
51
+ # Instance pools: heterogeneous instance types with priority-based fallback (uncomment to enable)
52
+ # Format: [{"InstanceType":"ml.g6e.48xlarge","Priority":1},{"InstanceType":"ml.g5.48xlarge","Priority":2}]
53
+ # export INSTANCE_POOLS='[]'
45
54
  <% } %>
46
55
  <% if (inferenceAmiVersion) { %>
47
56
  export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
57
+ <% } else { %>
58
+ # Inference AMI version — auto-resolved from CUDA version (uncomment to override)
59
+ # Valid: al2-ami-sagemaker-inference-gpu-2, al2-ami-sagemaker-inference-gpu-2-1,
60
+ # al2-ami-sagemaker-inference-gpu-3-1, al2023-ami-sagemaker-inference-gpu-4-1
61
+ # export INFERENCE_AMI_VERSION=""
48
62
  <% } %>
49
63
  <% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
50
64
  # Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
51
65
  # If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
52
66
  export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
67
+ <% } else { %>
68
+ # Capacity reservation (uncomment to use reserved capacity)
69
+ # Note: Mutually exclusive with INSTANCE_POOLS — reservation takes precedence.
70
+ # export CAPACITY_RESERVATION_ARN=""
53
71
  <% } %>
54
72
  <% } %>
55
73
  <% } %>
@@ -59,32 +77,21 @@ export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
59
77
  export INSTANCE_TYPE="<%= instanceType %>"
60
78
  <% if (inferenceAmiVersion) { %>
61
79
  export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
62
- <% } %>
63
-
64
- # Async-specific configuration
65
- # Resolve AWS account ID at runtime for default resource names
66
- ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
67
-
68
- <% if (asyncS3OutputPath) { %>
69
- export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
70
- <% } else { %>
71
- export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
72
- <% } %>
73
-
74
- <% if (asyncSnsSuccessTopic) { %>
75
- export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
76
80
  <% } else { %>
77
- export ASYNC_SNS_SUCCESS_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-success"
81
+ # Inference AMI version — auto-resolved from CUDA version (uncomment to override)
82
+ # export INFERENCE_AMI_VERSION=""
78
83
  <% } %>
79
84
 
80
- <% if (asyncSnsErrorTopic) { %>
81
- export ASYNC_SNS_ERROR_TOPIC="<%= asyncSnsErrorTopic %>"
82
- <% } else { %>
83
- export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-error"
84
- <% } %>
85
+ # Async-specific configuration
86
+ # ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
87
+ # ASYNC_S3_OUTPUT_PATH, ASYNC_SNS_SUCCESS_TOPIC, ASYNC_SNS_ERROR_TOPIC — see do/lib/profile.sh
88
+ # ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/async-inference)
85
89
 
86
90
  <% if (asyncMaxConcurrentInvocations) { %>
87
91
  export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
92
+ <% } else { %>
93
+ # Max concurrent invocations per instance (uncomment to set)
94
+ # export ASYNC_MAX_CONCURRENT_INVOCATIONS=""
88
95
  <% } %>
89
96
  <% } %>
90
97
 
@@ -95,6 +102,9 @@ export HYPERPOD_NAMESPACE="<%= hyperPodNamespace %>"
95
102
  export HYPERPOD_REPLICAS="<%= hyperPodReplicas %>"
96
103
  <% if (fsxVolumeHandle) { %>
97
104
  export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
105
+ <% } else { %>
106
+ # FSx for Lustre volume for shared model storage (uncomment to enable)
107
+ # export FSX_VOLUME_HANDLE=""
98
108
  <% } %>
99
109
  <% } %>
100
110
 
@@ -102,28 +112,25 @@ export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
102
112
  # SageMaker Batch Transform configuration
103
113
  export INSTANCE_TYPE="<%= instanceType %>"
104
114
 
105
- # Resolve AWS account ID at runtime for default resource names
106
- ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
115
+ # ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
116
+ # BATCH_INPUT_PATH, BATCH_OUTPUT_PATH see do/lib/profile.sh
117
+ # ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/batch-transform)
107
118
 
108
- <% if (batchInputPath) { %>
109
- export BATCH_INPUT_PATH="<%= batchInputPath %>"
110
- <% } else { %>
111
- export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
112
- <% } %>
113
- <% if (batchOutputPath) { %>
114
- export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
115
- <% } else { %>
116
- export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
117
- <% } %>
118
119
  export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
119
120
  export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
120
121
  export BATCH_STRATEGY="<%= batchStrategy %>"
121
122
  export BATCH_JOIN_SOURCE="<%= batchJoinSource || 'None' %>"
122
123
  <% if (batchMaxConcurrentTransforms) { %>
123
124
  export BATCH_MAX_CONCURRENT_TRANSFORMS="<%= batchMaxConcurrentTransforms %>"
125
+ <% } else { %>
126
+ # Max concurrent transforms per instance (uncomment to set)
127
+ # export BATCH_MAX_CONCURRENT_TRANSFORMS=""
124
128
  <% } %>
125
129
  <% if (batchMaxPayloadInMB) { %>
126
130
  export BATCH_MAX_PAYLOAD_IN_MB="<%= batchMaxPayloadInMB %>"
131
+ <% } else { %>
132
+ # Max payload size in MB (uncomment to set, default: 6)
133
+ # export BATCH_MAX_PAYLOAD_IN_MB=""
127
134
  <% } %>
128
135
  <% } %>
129
136
 
@@ -140,6 +147,22 @@ export ENDPOINT_VARIANT_NAME="<%= endpointVariantName %>"
140
147
  export ENDPOINT_VOLUME_SIZE="<%= endpointVolumeSize %>"
141
148
  <% } %>
142
149
 
150
+ <% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
151
+ # ─── Endpoint overrides (uncomment to customize) ───────────────────────────────
152
+ <% if (typeof endpointInitialInstanceCount === 'undefined' || endpointInitialInstanceCount == null) { %>
153
+ # export ENDPOINT_INITIAL_INSTANCE_COUNT="1" # Number of instances for the endpoint
154
+ <% } %>
155
+ <% if (typeof endpointDataCapturePercent === 'undefined' || endpointDataCapturePercent == null) { %>
156
+ # export ENDPOINT_DATA_CAPTURE_PERCENT="" # Percentage of requests to capture (0-100)
157
+ <% } %>
158
+ <% if (typeof endpointVariantName === 'undefined' || endpointVariantName == null) { %>
159
+ # export ENDPOINT_VARIANT_NAME="" # Custom variant name (default: AllTraffic)
160
+ <% } %>
161
+ <% if (typeof endpointVolumeSize === 'undefined' || endpointVolumeSize == null) { %>
162
+ # export ENDPOINT_VOLUME_SIZE="" # EBS volume size in GB for model download
163
+ <% } %>
164
+ <% } %>
165
+
143
166
  <% if (typeof icCpuCount !== 'undefined' && icCpuCount != null) { %>
144
167
  export IC_CPU_COUNT="<%= icCpuCount %>"
145
168
  <% } %>
@@ -158,6 +181,22 @@ export IC_COPY_COUNT="<%= icCopyCount %>"
158
181
  export IC_MODEL_WEIGHT="<%= icModelWeight %>"
159
182
  <% } %>
160
183
 
184
+ <% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
185
+ # ─── Inference Component overrides (uncomment to customize) ────────────────────
186
+ <% if (typeof icCpuCount === 'undefined' || icCpuCount == null) { %>
187
+ # export IC_CPU_COUNT="" # CPU cores reserved for this IC
188
+ <% } %>
189
+ <% if (typeof icMemorySize === 'undefined' || icMemorySize == null) { %>
190
+ # export IC_MEMORY_SIZE="" # Memory in MB reserved for this IC
191
+ <% } %>
192
+ <% if (typeof icCopyCount === 'undefined' || icCopyCount == null) { %>
193
+ # export IC_COPY_COUNT="" # Number of model copies (multi-IC scaling)
194
+ <% } %>
195
+ <% if (typeof icModelWeight === 'undefined' || icModelWeight == null) { %>
196
+ # export IC_MODEL_WEIGHT="" # Traffic weight for this IC (0-100)
197
+ <% } %>
198
+ <% } %>
199
+
161
200
  <% if (typeof modelEnvVars !== 'undefined' && modelEnvVars && Object.keys(modelEnvVars).length > 0) { %>
162
201
  # Model environment variables
163
202
  <% Object.entries(modelEnvVars).forEach(([key, value]) => { %>
@@ -191,8 +230,24 @@ export NGC_API_KEY="<%= ngcApiKey %>"
191
230
 
192
231
  <% if (deploymentTarget !== 'batch-transform') { %>
193
232
  # Managed Model Customization (do/tune)
233
+ # ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
234
+ # TUNE_S3_BUCKET — see do/lib/profile.sh
194
235
  export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
195
- export TUNE_S3_BUCKET="mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
236
+ <% if (typeof tuneSupported !== 'undefined' && tuneSupported) { %>
237
+ <% if (typeof tuneModelId !== 'undefined' && tuneModelId) { %>
238
+ # SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
239
+ # Flow: JumpStart model (tune) → LoRA adapter (S3) → do/adapter add → vLLM
240
+ export TUNE_MODEL_ID="<%= tuneModelId %>"
241
+ <% } else { %>
242
+ # SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
243
+ # To find your model's Hub ID:
244
+ # aws sagemaker list-hub-contents --hub-name SageMakerPublicHub \
245
+ # --hub-content-type Model --query "HubContentSummaries[].HubContentName"
246
+ # export TUNE_MODEL_ID=""
247
+ <% } %>
248
+ <% } %>
249
+ # MLflow App ARN for experiment tracking (set by bootstrap, or override manually)
250
+ # export MLFLOW_APP_ARN=""
196
251
  <% } %>
197
252
  <% } %>
198
253
 
@@ -210,10 +265,10 @@ export HF_TOKEN="<%= hfToken %>"
210
265
 
211
266
  <% if (modelFormat) { %>
212
267
  export MODEL_FORMAT="<%= modelFormat %>"
213
- <% } %>
214
-
215
- <% if (roleArn) { %>
216
- export ROLE_ARN="<%= roleArn %>"
268
+ <% } else { %>
269
+ # Model format (uncomment if using quantized models)
270
+ # Valid: pkl, json, keras, safetensors, gguf, awq, gptq
271
+ # export MODEL_FORMAT=""
217
272
  <% } %>
218
273
 
219
274
  <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
@@ -234,6 +289,23 @@ export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identi
234
289
  <% } %>
235
290
  export BENCHMARK_JOB_NAME=""
236
291
  export BENCHMARK_WORKLOAD_CONFIG_NAME=""
292
+
293
+ # CI Benchmark Athena persistence (set automatically from bootstrap --benchmark-infra)
294
+ <% if (typeof ciBenchmarkResultsBucket !== 'undefined' && ciBenchmarkResultsBucket) { %>
295
+ export CI_BENCHMARK_RESULTS_BUCKET="<%= ciBenchmarkResultsBucket %>"
296
+ <% } else { %>
297
+ # export CI_BENCHMARK_RESULTS_BUCKET="" # S3 bucket for Athena Parquet results (set by bootstrap --benchmark-infra)
298
+ <% } %>
299
+ <% } else if (framework === 'transformers' && deploymentTarget !== 'batch-transform') { %>
300
+ # ─── SageMaker AI Benchmarking (uncomment to enable) ──────────────────────────
301
+ # export BENCHMARK_CONCURRENCY="10" # Concurrent requests
302
+ # export BENCHMARK_INPUT_TOKENS_MEAN="550" # Mean input tokens per request
303
+ # export BENCHMARK_OUTPUT_TOKENS_MEAN="150" # Mean output tokens per request
304
+ # export BENCHMARK_STREAMING="true" # Enable streaming
305
+ # export BENCHMARK_REQUEST_COUNT="" # Total requests (empty = auto)
306
+ # export BENCHMARK_S3_OUTPUT_PATH="" # S3 path for results (empty = auto)
307
+ # export BENCHMARK_JOB_NAME="" # Resume/check existing job
308
+ # export BENCHMARK_WORKLOAD_CONFIG_NAME="" # Reuse existing workload config
237
309
  <% } %>
238
310
 
239
311
  <% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
@@ -246,11 +318,9 @@ export <%= key %>=${<%= key %>:-<%= value %>}
246
318
  export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
247
319
 
248
320
  # Allow environment variable overrides
249
- export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
250
321
  <% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
251
322
  export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
252
323
  <% } %>
253
- export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
254
324
 
255
325
  # Print configuration summary
256
326
  echo "⚙️ Configuration loaded"
@@ -38,6 +38,18 @@ done
38
38
  # Source configuration
39
39
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
40
40
  source "${SCRIPT_DIR}/config"
41
+ source "${SCRIPT_DIR}/lib/profile.sh"
42
+
43
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
44
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
45
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
46
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
47
+
48
+ # Async-specific derived variables
49
+ _ASYNC_BUCKET="${_PROFILE[asyncS3Bucket]:-mlcc-async-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
50
+ ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
51
+ ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
52
+ ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
41
53
 
42
54
  echo "🚀 Deploying to AWS"
43
55
  echo " Project: ${PROJECT_NAME}"
@@ -137,16 +149,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
137
149
  # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
138
150
  resolve_secrets
139
151
 
152
+ <% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
153
+ # ============================================================
154
+ # Inject server environment variables into container Environment
155
+ # ============================================================
156
+ <% Object.keys(serverEnvVars).forEach(function(key) { %>
157
+ if [ -n "${<%= key %>:-}" ]; then
158
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
159
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
160
+ else
161
+ CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
162
+ fi
163
+ fi
164
+ <% }); %>
165
+ <% } %>
166
+
140
167
  # Validate execution role ARN
141
168
  if [ -z "${ROLE_ARN:-}" ]; then
142
- echo "❌ Execution role ARN not provided"
169
+ echo "❌ ROLE_ARN is not set."
170
+ echo " Run 'ml-container-creator bootstrap' to configure your profile,"
171
+ echo " or set ROLE_ARN as an environment variable."
143
172
  echo ""
144
173
  echo "Usage:"
145
174
  echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
146
175
  echo " ./do/deploy"
147
176
  echo ""
148
- echo "Or set ROLE_ARN in do/config"
149
- echo ""
150
177
  echo "The execution role must have permissions for:"
151
178
  echo " • SageMaker model and endpoint management"
152
179
  echo " • ECR image access"
@@ -38,6 +38,17 @@ done
38
38
  # Source configuration
39
39
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
40
40
  source "${SCRIPT_DIR}/config"
41
+ source "${SCRIPT_DIR}/lib/profile.sh"
42
+
43
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
44
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
45
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
46
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
47
+
48
+ # Batch-specific derived variables
49
+ _BATCH_BUCKET="${_PROFILE[batchS3Bucket]:-mlcc-batch-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
50
+ BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
51
+ BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
41
52
 
42
53
  echo "🚀 Deploying to AWS"
43
54
  echo " Project: ${PROJECT_NAME}"
@@ -135,16 +146,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
135
146
  # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
136
147
  resolve_secrets
137
148
 
149
+ <% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
150
+ # ============================================================
151
+ # Inject server environment variables into container Environment
152
+ # ============================================================
153
+ <% Object.keys(serverEnvVars).forEach(function(key) { %>
154
+ if [ -n "${<%= key %>:-}" ]; then
155
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
156
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
157
+ else
158
+ CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
159
+ fi
160
+ fi
161
+ <% }); %>
162
+ <% } %>
163
+
138
164
  # Validate execution role ARN
139
165
  if [ -z "${ROLE_ARN:-}" ]; then
140
- echo "❌ Execution role ARN not provided"
166
+ echo "❌ ROLE_ARN is not set."
167
+ echo " Run 'ml-container-creator bootstrap' to configure your profile,"
168
+ echo " or set ROLE_ARN as an environment variable."
141
169
  echo ""
142
170
  echo "Usage:"
143
171
  echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
144
172
  echo " ./do/deploy"
145
173
  echo ""
146
- echo "Or set ROLE_ARN in do/config"
147
- echo ""
148
174
  echo "The execution role must have permissions for:"
149
175
  echo " • SageMaker model and transform job management"
150
176
  echo " • ECR image access"
@@ -38,6 +38,10 @@ done
38
38
  # Source configuration
39
39
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
40
40
  source "${SCRIPT_DIR}/config"
41
+ source "${SCRIPT_DIR}/lib/profile.sh"
42
+
43
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
44
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
41
45
 
42
46
  echo "🚀 Deploying to AWS"
43
47
  echo " Project: ${PROJECT_NAME}"