@aws/ml-container-creator 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/config/bootstrap-stack.json +86 -7
- package/config/defaults.json +1 -1
- package/infra/ci-harness/buildspec.yml +60 -0
- package/package.json +3 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +42 -2
- package/servers/instance-sizer/lib/instance-ranker.js +114 -10
- package/servers/instance-sizer/lib/quota-resolver.js +368 -0
- package/servers/instance-sizer/package.json +2 -0
- package/servers/lib/catalogs/instances.json +527 -12
- package/servers/lib/catalogs/model-servers.json +15 -15
- package/servers/lib/catalogs/model-sizes.json +27 -0
- package/servers/lib/catalogs/models.json +71 -0
- package/servers/lib/schemas/image-catalog.schema.json +9 -1
- package/src/app.js +109 -3
- package/src/lib/bootstrap-command-handler.js +96 -3
- package/src/lib/cli-handler.js +2 -2
- package/src/lib/config-manager.js +117 -1
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/prompt-runner.js +270 -12
- package/src/lib/prompts.js +288 -6
- package/src/lib/registry-command-handler.js +12 -0
- package/src/lib/schema-sync.js +31 -0
- package/src/lib/template-manager.js +49 -1
- package/src/lib/validate-runner.js +125 -2
- package/templates/Dockerfile +22 -2
- package/templates/code/cuda_compat.sh +22 -0
- package/templates/code/serve +3 -0
- package/templates/code/serving.properties +14 -0
- package/templates/code/start_server.sh +3 -0
- package/templates/diffusors/Dockerfile +2 -1
- package/templates/diffusors/serve +3 -0
- package/templates/do/README.md +33 -0
- package/templates/do/adapter +1214 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +718 -0
- package/templates/do/clean +593 -17
- package/templates/do/config +49 -4
- package/templates/do/deploy +513 -362
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +119 -2
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
- package/templates/triton/Dockerfile +5 -0
package/templates/do/config
CHANGED
|
@@ -10,6 +10,11 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
|
|
|
10
10
|
export FRAMEWORK="<%= framework %>"
|
|
11
11
|
export MODEL_SERVER="<%= modelServer %>"
|
|
12
12
|
|
|
13
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
14
|
+
# LoRA adapter serving
|
|
15
|
+
export ENABLE_LORA=true
|
|
16
|
+
<% } %>
|
|
17
|
+
|
|
13
18
|
# AWS configuration
|
|
14
19
|
export AWS_REGION="<%= awsRegion %>"
|
|
15
20
|
export ECR_REPOSITORY_NAME="ml-container-creator"
|
|
@@ -26,10 +31,26 @@ export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
|
|
|
26
31
|
|
|
27
32
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
28
33
|
# SageMaker Real-Time Inference configuration
|
|
34
|
+
<% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
|
|
35
|
+
# External endpoint — attaching IC to an existing running endpoint
|
|
36
|
+
export ENDPOINT_NAME="<%= existingEndpointName %>"
|
|
37
|
+
export ENDPOINT_EXTERNAL=true
|
|
38
|
+
<% } else { %>
|
|
29
39
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
40
|
+
<% if (typeof instancePools !== 'undefined' && instancePools && instancePools.length > 1) { %>
|
|
41
|
+
# Instance pools: heterogeneous instance types with priority-based fallback
|
|
42
|
+
# Priority = selection order (1 = preferred, higher = fallback)
|
|
43
|
+
export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
|
|
44
|
+
<% } %>
|
|
30
45
|
<% if (inferenceAmiVersion) { %>
|
|
31
46
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
32
47
|
<% } %>
|
|
48
|
+
<% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
|
|
49
|
+
# Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
|
|
50
|
+
# If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
|
|
51
|
+
export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
|
|
52
|
+
<% } %>
|
|
53
|
+
<% } %>
|
|
33
54
|
<% } %>
|
|
34
55
|
|
|
35
56
|
<% if (deploymentTarget === 'async-inference') { %>
|
|
@@ -126,6 +147,8 @@ export IC_MEMORY_SIZE="<%= icMemorySize %>"
|
|
|
126
147
|
<% } %>
|
|
127
148
|
<% if (typeof icGpuCount !== 'undefined' && icGpuCount != null) { %>
|
|
128
149
|
export IC_GPU_COUNT="<%= icGpuCount %>"
|
|
150
|
+
<% } else { %>
|
|
151
|
+
export IC_GPU_COUNT="${IC_GPU_COUNT:-1}"
|
|
129
152
|
<% } %>
|
|
130
153
|
<% if (typeof icCopyCount !== 'undefined' && icCopyCount != null) { %>
|
|
131
154
|
export IC_COPY_COUNT="<%= icCopyCount %>"
|
|
@@ -186,6 +209,26 @@ export MODEL_FORMAT="<%= modelFormat %>"
|
|
|
186
209
|
export ROLE_ARN="<%= roleArn %>"
|
|
187
210
|
<% } %>
|
|
188
211
|
|
|
212
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
213
|
+
# SageMaker AI Benchmarking configuration
|
|
214
|
+
export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
|
|
215
|
+
export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
|
|
216
|
+
export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
|
|
217
|
+
export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
|
|
218
|
+
<% if (benchmarkRequestCount) { %>
|
|
219
|
+
export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
|
|
220
|
+
<% } else { %>
|
|
221
|
+
export BENCHMARK_REQUEST_COUNT=""
|
|
222
|
+
<% } %>
|
|
223
|
+
<% if (benchmarkS3OutputPath) { %>
|
|
224
|
+
export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
|
|
225
|
+
<% } else { %>
|
|
226
|
+
export BENCHMARK_S3_OUTPUT_PATH="s3://ml-container-creator-benchmark-${AWS_REGION}-$(aws sts get-caller-identity --query Account --output text)/${PROJECT_NAME}/"
|
|
227
|
+
<% } %>
|
|
228
|
+
export BENCHMARK_JOB_NAME=""
|
|
229
|
+
export BENCHMARK_WORKLOAD_CONFIG_NAME=""
|
|
230
|
+
<% } %>
|
|
231
|
+
|
|
189
232
|
<% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
|
|
190
233
|
# Runtime environment variables (from catalog)
|
|
191
234
|
<% orderedEnvVars.forEach(({ key, value }) => { %>
|
|
@@ -193,13 +236,11 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
193
236
|
<% }); %>
|
|
194
237
|
<% } %>
|
|
195
238
|
|
|
196
|
-
|
|
197
|
-
export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage %>}
|
|
198
|
-
<% } %>
|
|
239
|
+
export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
|
|
199
240
|
|
|
200
241
|
# Allow environment variable overrides
|
|
201
242
|
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
202
|
-
<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
243
|
+
<% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
203
244
|
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
|
|
204
245
|
<% } %>
|
|
205
246
|
export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
|
|
@@ -254,7 +295,11 @@ echo " Model env vars: <%= Object.keys(modelEnvVars).length %>"
|
|
|
254
295
|
echo " Server env vars: <%= Object.keys(serverEnvVars).length %>"
|
|
255
296
|
<% } %>
|
|
256
297
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
298
|
+
<% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
|
|
299
|
+
echo " Endpoint: ${ENDPOINT_NAME} (external)"
|
|
300
|
+
<% } else { %>
|
|
257
301
|
echo " Instance: ${INSTANCE_TYPE}"
|
|
302
|
+
<% } %>
|
|
258
303
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
259
304
|
echo " Instance: ${INSTANCE_TYPE}"
|
|
260
305
|
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|