@aws/ml-container-creator 0.10.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +33 -22
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -67
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +166 -153
- package/servers/instance-sizer/lib/instance-ranker.js +120 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/instances.json +27 -0
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +12 -3
- package/src/lib/bootstrap-command-handler.js +609 -15
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +48 -41
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +3 -0
- package/src/lib/config-manager.js +7 -0
- package/src/lib/config-validator.js +1 -1
- package/src/lib/cuda-resolver.js +17 -8
- package/src/lib/generated/cli-options.js +319 -314
- package/src/lib/generated/parameter-matrix.js +672 -661
- package/src/lib/generated/validation-rules.js +76 -72
- package/src/lib/path-prover-brain.js +664 -0
- package/src/lib/prompts/infrastructure-prompts.js +2 -2
- package/src/lib/prompts/model-prompts.js +6 -0
- package/src/lib/prompts/project-prompts.js +12 -0
- package/src/lib/secrets-prompt-runner.js +4 -0
- package/src/lib/template-manager.js +1 -1
- package/src/lib/template-variable-resolver.js +87 -1
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +154 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/build +5 -0
- package/templates/do/clean.d/async-inference.ejs +5 -0
- package/templates/do/clean.d/batch-transform.ejs +5 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
- package/templates/do/clean.d/managed-inference.ejs +5 -0
- package/templates/do/config +115 -45
- package/templates/do/deploy.d/async-inference.ejs +30 -3
- package/templates/do/deploy.d/batch-transform.ejs +29 -3
- package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
- package/templates/do/deploy.d/managed-inference.ejs +216 -14
- package/templates/do/lib/endpoint-config.sh +1 -1
- package/templates/do/lib/profile.sh +44 -0
- package/templates/do/optimize +106 -37
- package/templates/do/push +5 -0
- package/templates/do/register +94 -0
- package/templates/do/stage +567 -0
- package/templates/do/submit +7 -0
- package/templates/do/test +14 -0
- package/templates/do/tune +382 -59
- package/templates/do/validate +44 -4
package/templates/do/build
CHANGED
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
echo "🚀 Building Docker image for ${PROJECT_NAME}"
|
|
14
19
|
echo " Deployment config: ${DEPLOYMENT_CONFIG}"
|
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
# Parse arguments
|
|
14
19
|
CLEANUP_TARGET=""
|
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
# Parse arguments
|
|
14
19
|
CLEANUP_TARGET=""
|
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
# Parse arguments
|
|
14
19
|
CLEANUP_TARGET=""
|
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
# Parse arguments
|
|
14
19
|
CLEANUP_TARGET=""
|
package/templates/do/config
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
# do-framework configuration
|
|
3
3
|
# This file is sourced by all do scripts
|
|
4
|
+
# Generated: <%= new Date().toISOString() %>
|
|
4
5
|
|
|
5
6
|
# Project identification
|
|
6
7
|
export PROJECT_NAME="<%= projectName %>"
|
|
@@ -10,21 +11,25 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
|
|
|
10
11
|
export FRAMEWORK="<%= framework %>"
|
|
11
12
|
export MODEL_SERVER="<%= modelServer %>"
|
|
12
13
|
|
|
14
|
+
# AWS configuration
|
|
15
|
+
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
16
|
+
|
|
17
|
+
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
|
|
18
|
+
# ECR_REPOSITORY_NAME, ROLE_ARN, ADAPTER_S3_BUCKET — see do/lib/profile.sh
|
|
19
|
+
|
|
13
20
|
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
14
21
|
# LoRA adapter serving
|
|
15
22
|
export ENABLE_LORA=true
|
|
16
|
-
|
|
23
|
+
<% } else if (framework === 'transformers' || framework === 'diffusors') { %>
|
|
24
|
+
# LoRA adapter serving (uncomment to enable)
|
|
25
|
+
# export ENABLE_LORA=true
|
|
17
26
|
<% } %>
|
|
18
27
|
|
|
19
|
-
# AWS configuration
|
|
20
|
-
export AWS_REGION="<%= awsRegion %>"
|
|
21
|
-
export ECR_REPOSITORY_NAME="ml-container-creator"
|
|
22
|
-
|
|
23
28
|
# Build configuration — WHERE the Docker image gets built
|
|
24
29
|
export BUILD_TARGET="<%= buildTarget %>"
|
|
25
30
|
<% if (buildTarget === 'codebuild') { %>
|
|
26
31
|
export CODEBUILD_COMPUTE_TYPE="<%= codebuildComputeType %>"
|
|
27
|
-
|
|
32
|
+
# CODEBUILD_PROJECT_NAME — derived in do/submit at runtime
|
|
28
33
|
<% } %>
|
|
29
34
|
|
|
30
35
|
# Deployment configuration — WHERE the model runs
|
|
@@ -42,14 +47,27 @@ export INSTANCE_TYPE="<%= instanceType %>"
|
|
|
42
47
|
# Instance pools: heterogeneous instance types with priority-based fallback
|
|
43
48
|
# Priority = selection order (1 = preferred, higher = fallback)
|
|
44
49
|
export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
|
|
50
|
+
<% } else { %>
|
|
51
|
+
# Instance pools: heterogeneous instance types with priority-based fallback (uncomment to enable)
|
|
52
|
+
# Format: [{"InstanceType":"ml.g6e.48xlarge","Priority":1},{"InstanceType":"ml.g5.48xlarge","Priority":2}]
|
|
53
|
+
# export INSTANCE_POOLS='[]'
|
|
45
54
|
<% } %>
|
|
46
55
|
<% if (inferenceAmiVersion) { %>
|
|
47
56
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
57
|
+
<% } else { %>
|
|
58
|
+
# Inference AMI version — auto-resolved from CUDA version (uncomment to override)
|
|
59
|
+
# Valid: al2-ami-sagemaker-inference-gpu-2, al2-ami-sagemaker-inference-gpu-2-1,
|
|
60
|
+
# al2-ami-sagemaker-inference-gpu-3-1, al2023-ami-sagemaker-inference-gpu-4-1
|
|
61
|
+
# export INFERENCE_AMI_VERSION=""
|
|
48
62
|
<% } %>
|
|
49
63
|
<% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
|
|
50
64
|
# Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
|
|
51
65
|
# If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
|
|
52
66
|
export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
|
|
67
|
+
<% } else { %>
|
|
68
|
+
# Capacity reservation (uncomment to use reserved capacity)
|
|
69
|
+
# Note: Mutually exclusive with INSTANCE_POOLS — reservation takes precedence.
|
|
70
|
+
# export CAPACITY_RESERVATION_ARN=""
|
|
53
71
|
<% } %>
|
|
54
72
|
<% } %>
|
|
55
73
|
<% } %>
|
|
@@ -59,32 +77,21 @@ export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
|
|
|
59
77
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
60
78
|
<% if (inferenceAmiVersion) { %>
|
|
61
79
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
62
|
-
<% } %>
|
|
63
|
-
|
|
64
|
-
# Async-specific configuration
|
|
65
|
-
# Resolve AWS account ID at runtime for default resource names
|
|
66
|
-
ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
|
|
67
|
-
|
|
68
|
-
<% if (asyncS3OutputPath) { %>
|
|
69
|
-
export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
|
|
70
|
-
<% } else { %>
|
|
71
|
-
export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
72
|
-
<% } %>
|
|
73
|
-
|
|
74
|
-
<% if (asyncSnsSuccessTopic) { %>
|
|
75
|
-
export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
|
|
76
80
|
<% } else { %>
|
|
77
|
-
|
|
81
|
+
# Inference AMI version — auto-resolved from CUDA version (uncomment to override)
|
|
82
|
+
# export INFERENCE_AMI_VERSION=""
|
|
78
83
|
<% } %>
|
|
79
84
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
<% } %>
|
|
85
|
+
# Async-specific configuration
|
|
86
|
+
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
|
|
87
|
+
# ASYNC_S3_OUTPUT_PATH, ASYNC_SNS_SUCCESS_TOPIC, ASYNC_SNS_ERROR_TOPIC — see do/lib/profile.sh
|
|
88
|
+
# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/async-inference)
|
|
85
89
|
|
|
86
90
|
<% if (asyncMaxConcurrentInvocations) { %>
|
|
87
91
|
export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
|
|
92
|
+
<% } else { %>
|
|
93
|
+
# Max concurrent invocations per instance (uncomment to set)
|
|
94
|
+
# export ASYNC_MAX_CONCURRENT_INVOCATIONS=""
|
|
88
95
|
<% } %>
|
|
89
96
|
<% } %>
|
|
90
97
|
|
|
@@ -95,6 +102,9 @@ export HYPERPOD_NAMESPACE="<%= hyperPodNamespace %>"
|
|
|
95
102
|
export HYPERPOD_REPLICAS="<%= hyperPodReplicas %>"
|
|
96
103
|
<% if (fsxVolumeHandle) { %>
|
|
97
104
|
export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
|
|
105
|
+
<% } else { %>
|
|
106
|
+
# FSx for Lustre volume for shared model storage (uncomment to enable)
|
|
107
|
+
# export FSX_VOLUME_HANDLE=""
|
|
98
108
|
<% } %>
|
|
99
109
|
<% } %>
|
|
100
110
|
|
|
@@ -102,28 +112,25 @@ export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
|
|
|
102
112
|
# SageMaker Batch Transform configuration
|
|
103
113
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
104
114
|
|
|
105
|
-
#
|
|
106
|
-
|
|
115
|
+
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
|
|
116
|
+
# BATCH_INPUT_PATH, BATCH_OUTPUT_PATH — see do/lib/profile.sh
|
|
117
|
+
# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/batch-transform)
|
|
107
118
|
|
|
108
|
-
<% if (batchInputPath) { %>
|
|
109
|
-
export BATCH_INPUT_PATH="<%= batchInputPath %>"
|
|
110
|
-
<% } else { %>
|
|
111
|
-
export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
|
|
112
|
-
<% } %>
|
|
113
|
-
<% if (batchOutputPath) { %>
|
|
114
|
-
export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
|
|
115
|
-
<% } else { %>
|
|
116
|
-
export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
117
|
-
<% } %>
|
|
118
119
|
export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
|
|
119
120
|
export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
|
|
120
121
|
export BATCH_STRATEGY="<%= batchStrategy %>"
|
|
121
122
|
export BATCH_JOIN_SOURCE="<%= batchJoinSource || 'None' %>"
|
|
122
123
|
<% if (batchMaxConcurrentTransforms) { %>
|
|
123
124
|
export BATCH_MAX_CONCURRENT_TRANSFORMS="<%= batchMaxConcurrentTransforms %>"
|
|
125
|
+
<% } else { %>
|
|
126
|
+
# Max concurrent transforms per instance (uncomment to set)
|
|
127
|
+
# export BATCH_MAX_CONCURRENT_TRANSFORMS=""
|
|
124
128
|
<% } %>
|
|
125
129
|
<% if (batchMaxPayloadInMB) { %>
|
|
126
130
|
export BATCH_MAX_PAYLOAD_IN_MB="<%= batchMaxPayloadInMB %>"
|
|
131
|
+
<% } else { %>
|
|
132
|
+
# Max payload size in MB (uncomment to set, default: 6)
|
|
133
|
+
# export BATCH_MAX_PAYLOAD_IN_MB=""
|
|
127
134
|
<% } %>
|
|
128
135
|
<% } %>
|
|
129
136
|
|
|
@@ -140,6 +147,22 @@ export ENDPOINT_VARIANT_NAME="<%= endpointVariantName %>"
|
|
|
140
147
|
export ENDPOINT_VOLUME_SIZE="<%= endpointVolumeSize %>"
|
|
141
148
|
<% } %>
|
|
142
149
|
|
|
150
|
+
<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
|
|
151
|
+
# ─── Endpoint overrides (uncomment to customize) ───────────────────────────────
|
|
152
|
+
<% if (typeof endpointInitialInstanceCount === 'undefined' || endpointInitialInstanceCount == null) { %>
|
|
153
|
+
# export ENDPOINT_INITIAL_INSTANCE_COUNT="1" # Number of instances for the endpoint
|
|
154
|
+
<% } %>
|
|
155
|
+
<% if (typeof endpointDataCapturePercent === 'undefined' || endpointDataCapturePercent == null) { %>
|
|
156
|
+
# export ENDPOINT_DATA_CAPTURE_PERCENT="" # Percentage of requests to capture (0-100)
|
|
157
|
+
<% } %>
|
|
158
|
+
<% if (typeof endpointVariantName === 'undefined' || endpointVariantName == null) { %>
|
|
159
|
+
# export ENDPOINT_VARIANT_NAME="" # Custom variant name (default: AllTraffic)
|
|
160
|
+
<% } %>
|
|
161
|
+
<% if (typeof endpointVolumeSize === 'undefined' || endpointVolumeSize == null) { %>
|
|
162
|
+
# export ENDPOINT_VOLUME_SIZE="" # EBS volume size in GB for model download
|
|
163
|
+
<% } %>
|
|
164
|
+
<% } %>
|
|
165
|
+
|
|
143
166
|
<% if (typeof icCpuCount !== 'undefined' && icCpuCount != null) { %>
|
|
144
167
|
export IC_CPU_COUNT="<%= icCpuCount %>"
|
|
145
168
|
<% } %>
|
|
@@ -158,6 +181,22 @@ export IC_COPY_COUNT="<%= icCopyCount %>"
|
|
|
158
181
|
export IC_MODEL_WEIGHT="<%= icModelWeight %>"
|
|
159
182
|
<% } %>
|
|
160
183
|
|
|
184
|
+
<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
|
|
185
|
+
# ─── Inference Component overrides (uncomment to customize) ────────────────────
|
|
186
|
+
<% if (typeof icCpuCount === 'undefined' || icCpuCount == null) { %>
|
|
187
|
+
# export IC_CPU_COUNT="" # CPU cores reserved for this IC
|
|
188
|
+
<% } %>
|
|
189
|
+
<% if (typeof icMemorySize === 'undefined' || icMemorySize == null) { %>
|
|
190
|
+
# export IC_MEMORY_SIZE="" # Memory in MB reserved for this IC
|
|
191
|
+
<% } %>
|
|
192
|
+
<% if (typeof icCopyCount === 'undefined' || icCopyCount == null) { %>
|
|
193
|
+
# export IC_COPY_COUNT="" # Number of model copies (multi-IC scaling)
|
|
194
|
+
<% } %>
|
|
195
|
+
<% if (typeof icModelWeight === 'undefined' || icModelWeight == null) { %>
|
|
196
|
+
# export IC_MODEL_WEIGHT="" # Traffic weight for this IC (0-100)
|
|
197
|
+
<% } %>
|
|
198
|
+
<% } %>
|
|
199
|
+
|
|
161
200
|
<% if (typeof modelEnvVars !== 'undefined' && modelEnvVars && Object.keys(modelEnvVars).length > 0) { %>
|
|
162
201
|
# Model environment variables
|
|
163
202
|
<% Object.entries(modelEnvVars).forEach(([key, value]) => { %>
|
|
@@ -191,8 +230,24 @@ export NGC_API_KEY="<%= ngcApiKey %>"
|
|
|
191
230
|
|
|
192
231
|
<% if (deploymentTarget !== 'batch-transform') { %>
|
|
193
232
|
# Managed Model Customization (do/tune)
|
|
233
|
+
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
|
|
234
|
+
# TUNE_S3_BUCKET — see do/lib/profile.sh
|
|
194
235
|
export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
|
|
195
|
-
|
|
236
|
+
<% if (typeof tuneSupported !== 'undefined' && tuneSupported) { %>
|
|
237
|
+
<% if (typeof tuneModelId !== 'undefined' && tuneModelId) { %>
|
|
238
|
+
# SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
|
|
239
|
+
# Flow: JumpStart model (tune) → LoRA adapter (S3) → do/adapter add → vLLM
|
|
240
|
+
export TUNE_MODEL_ID="<%= tuneModelId %>"
|
|
241
|
+
<% } else { %>
|
|
242
|
+
# SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
|
|
243
|
+
# To find your model's Hub ID:
|
|
244
|
+
# aws sagemaker list-hub-contents --hub-name SageMakerPublicHub \
|
|
245
|
+
# --hub-content-type Model --query "HubContentSummaries[].HubContentName"
|
|
246
|
+
# export TUNE_MODEL_ID=""
|
|
247
|
+
<% } %>
|
|
248
|
+
<% } %>
|
|
249
|
+
# MLflow App ARN for experiment tracking (set by bootstrap, or override manually)
|
|
250
|
+
# export MLFLOW_APP_ARN=""
|
|
196
251
|
<% } %>
|
|
197
252
|
<% } %>
|
|
198
253
|
|
|
@@ -210,10 +265,10 @@ export HF_TOKEN="<%= hfToken %>"
|
|
|
210
265
|
|
|
211
266
|
<% if (modelFormat) { %>
|
|
212
267
|
export MODEL_FORMAT="<%= modelFormat %>"
|
|
213
|
-
<% } %>
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
export
|
|
268
|
+
<% } else { %>
|
|
269
|
+
# Model format (uncomment if using quantized models)
|
|
270
|
+
# Valid: pkl, json, keras, safetensors, gguf, awq, gptq
|
|
271
|
+
# export MODEL_FORMAT=""
|
|
217
272
|
<% } %>
|
|
218
273
|
|
|
219
274
|
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
@@ -234,6 +289,23 @@ export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identi
|
|
|
234
289
|
<% } %>
|
|
235
290
|
export BENCHMARK_JOB_NAME=""
|
|
236
291
|
export BENCHMARK_WORKLOAD_CONFIG_NAME=""
|
|
292
|
+
|
|
293
|
+
# CI Benchmark Athena persistence (set automatically from bootstrap --benchmark-infra)
|
|
294
|
+
<% if (typeof ciBenchmarkResultsBucket !== 'undefined' && ciBenchmarkResultsBucket) { %>
|
|
295
|
+
export CI_BENCHMARK_RESULTS_BUCKET="<%= ciBenchmarkResultsBucket %>"
|
|
296
|
+
<% } else { %>
|
|
297
|
+
# export CI_BENCHMARK_RESULTS_BUCKET="" # S3 bucket for Athena Parquet results (set by bootstrap --benchmark-infra)
|
|
298
|
+
<% } %>
|
|
299
|
+
<% } else if (framework === 'transformers' && deploymentTarget !== 'batch-transform') { %>
|
|
300
|
+
# ─── SageMaker AI Benchmarking (uncomment to enable) ──────────────────────────
|
|
301
|
+
# export BENCHMARK_CONCURRENCY="10" # Concurrent requests
|
|
302
|
+
# export BENCHMARK_INPUT_TOKENS_MEAN="550" # Mean input tokens per request
|
|
303
|
+
# export BENCHMARK_OUTPUT_TOKENS_MEAN="150" # Mean output tokens per request
|
|
304
|
+
# export BENCHMARK_STREAMING="true" # Enable streaming
|
|
305
|
+
# export BENCHMARK_REQUEST_COUNT="" # Total requests (empty = auto)
|
|
306
|
+
# export BENCHMARK_S3_OUTPUT_PATH="" # S3 path for results (empty = auto)
|
|
307
|
+
# export BENCHMARK_JOB_NAME="" # Resume/check existing job
|
|
308
|
+
# export BENCHMARK_WORKLOAD_CONFIG_NAME="" # Reuse existing workload config
|
|
237
309
|
<% } %>
|
|
238
310
|
|
|
239
311
|
<% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
|
|
@@ -246,11 +318,9 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
246
318
|
export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
|
|
247
319
|
|
|
248
320
|
# Allow environment variable overrides
|
|
249
|
-
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
250
321
|
<% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
251
322
|
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
|
|
252
323
|
<% } %>
|
|
253
|
-
export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
|
|
254
324
|
|
|
255
325
|
# Print configuration summary
|
|
256
326
|
echo "⚙️ Configuration loaded"
|
|
@@ -38,6 +38,18 @@ done
|
|
|
38
38
|
# Source configuration
|
|
39
39
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
40
|
source "${SCRIPT_DIR}/config"
|
|
41
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
|
+
|
|
43
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
44
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
45
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
46
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
47
|
+
|
|
48
|
+
# Async-specific derived variables
|
|
49
|
+
_ASYNC_BUCKET="${_PROFILE[asyncS3Bucket]:-mlcc-async-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
50
|
+
ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
|
|
51
|
+
ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
|
|
52
|
+
ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
|
|
41
53
|
|
|
42
54
|
echo "🚀 Deploying to AWS"
|
|
43
55
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -137,16 +149,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
|
|
|
137
149
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
138
150
|
resolve_secrets
|
|
139
151
|
|
|
152
|
+
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
153
|
+
# ============================================================
|
|
154
|
+
# Inject server environment variables into container Environment
|
|
155
|
+
# ============================================================
|
|
156
|
+
<% Object.keys(serverEnvVars).forEach(function(key) { %>
|
|
157
|
+
if [ -n "${<%= key %>:-}" ]; then
|
|
158
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
159
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
|
|
160
|
+
else
|
|
161
|
+
CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
|
|
162
|
+
fi
|
|
163
|
+
fi
|
|
164
|
+
<% }); %>
|
|
165
|
+
<% } %>
|
|
166
|
+
|
|
140
167
|
# Validate execution role ARN
|
|
141
168
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
142
|
-
echo "❌
|
|
169
|
+
echo "❌ ROLE_ARN is not set."
|
|
170
|
+
echo " Run 'ml-container-creator bootstrap' to configure your profile,"
|
|
171
|
+
echo " or set ROLE_ARN as an environment variable."
|
|
143
172
|
echo ""
|
|
144
173
|
echo "Usage:"
|
|
145
174
|
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
146
175
|
echo " ./do/deploy"
|
|
147
176
|
echo ""
|
|
148
|
-
echo "Or set ROLE_ARN in do/config"
|
|
149
|
-
echo ""
|
|
150
177
|
echo "The execution role must have permissions for:"
|
|
151
178
|
echo " • SageMaker model and endpoint management"
|
|
152
179
|
echo " • ECR image access"
|
|
@@ -38,6 +38,17 @@ done
|
|
|
38
38
|
# Source configuration
|
|
39
39
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
40
|
source "${SCRIPT_DIR}/config"
|
|
41
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
|
+
|
|
43
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
44
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
45
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
46
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
47
|
+
|
|
48
|
+
# Batch-specific derived variables
|
|
49
|
+
_BATCH_BUCKET="${_PROFILE[batchS3Bucket]:-mlcc-batch-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
50
|
+
BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
|
|
51
|
+
BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
|
|
41
52
|
|
|
42
53
|
echo "🚀 Deploying to AWS"
|
|
43
54
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -135,16 +146,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
|
|
|
135
146
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
136
147
|
resolve_secrets
|
|
137
148
|
|
|
149
|
+
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
150
|
+
# ============================================================
|
|
151
|
+
# Inject server environment variables into container Environment
|
|
152
|
+
# ============================================================
|
|
153
|
+
<% Object.keys(serverEnvVars).forEach(function(key) { %>
|
|
154
|
+
if [ -n "${<%= key %>:-}" ]; then
|
|
155
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
156
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
|
|
157
|
+
else
|
|
158
|
+
CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
|
|
159
|
+
fi
|
|
160
|
+
fi
|
|
161
|
+
<% }); %>
|
|
162
|
+
<% } %>
|
|
163
|
+
|
|
138
164
|
# Validate execution role ARN
|
|
139
165
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
140
|
-
echo "❌
|
|
166
|
+
echo "❌ ROLE_ARN is not set."
|
|
167
|
+
echo " Run 'ml-container-creator bootstrap' to configure your profile,"
|
|
168
|
+
echo " or set ROLE_ARN as an environment variable."
|
|
141
169
|
echo ""
|
|
142
170
|
echo "Usage:"
|
|
143
171
|
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
144
172
|
echo " ./do/deploy"
|
|
145
173
|
echo ""
|
|
146
|
-
echo "Or set ROLE_ARN in do/config"
|
|
147
|
-
echo ""
|
|
148
174
|
echo "The execution role must have permissions for:"
|
|
149
175
|
echo " • SageMaker model and transform job management"
|
|
150
176
|
echo " • ECR image access"
|
|
@@ -38,6 +38,10 @@ done
|
|
|
38
38
|
# Source configuration
|
|
39
39
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
40
|
source "${SCRIPT_DIR}/config"
|
|
41
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
|
+
|
|
43
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
44
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
41
45
|
|
|
42
46
|
echo "🚀 Deploying to AWS"
|
|
43
47
|
echo " Project: ${PROJECT_NAME}"
|