@aws/ml-container-creator 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/config/bootstrap-stack.json +40 -9
- package/infra/ci-harness/buildspec.yml +60 -0
- package/infra/ci-harness/package-lock.json +5 -1
- package/package.json +1 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +10 -4
- package/servers/instance-sizer/lib/model-resolver.js +1 -1
- package/servers/lib/catalogs/model-sizes.json +135 -90
- package/servers/lib/catalogs/models.json +483 -411
- package/src/app.js +33 -2
- package/src/lib/bootstrap-command-handler.js +6 -0
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +41 -2
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/mcp-client.js +3 -3
- package/src/lib/prompt-runner.js +179 -8
- package/src/lib/prompts.js +253 -7
- package/src/lib/registry-command-handler.js +12 -0
- package/templates/Dockerfile +12 -0
- package/templates/code/serving.properties +14 -0
- package/templates/do/adapter +1230 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +81 -9
- package/templates/do/clean +507 -17
- package/templates/do/config +28 -5
- package/templates/do/deploy +513 -367
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +111 -1
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
package/templates/do/config
CHANGED
|
@@ -10,6 +10,12 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
|
|
|
10
10
|
export FRAMEWORK="<%= framework %>"
|
|
11
11
|
export MODEL_SERVER="<%= modelServer %>"
|
|
12
12
|
|
|
13
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
14
|
+
# LoRA adapter serving
|
|
15
|
+
export ENABLE_LORA=true
|
|
16
|
+
export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
17
|
+
<% } %>
|
|
18
|
+
|
|
13
19
|
# AWS configuration
|
|
14
20
|
export AWS_REGION="<%= awsRegion %>"
|
|
15
21
|
export ECR_REPOSITORY_NAME="ml-container-creator"
|
|
@@ -26,14 +32,27 @@ export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
|
|
|
26
32
|
|
|
27
33
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
28
34
|
# SageMaker Real-Time Inference configuration
|
|
35
|
+
<% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
|
|
36
|
+
# External endpoint — attaching IC to an existing running endpoint
|
|
37
|
+
export ENDPOINT_NAME="<%= existingEndpointName %>"
|
|
38
|
+
export ENDPOINT_EXTERNAL=true
|
|
39
|
+
<% } else { %>
|
|
29
40
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
41
|
+
<% if (typeof instancePools !== 'undefined' && instancePools && instancePools.length > 1) { %>
|
|
42
|
+
# Instance pools: heterogeneous instance types with priority-based fallback
|
|
43
|
+
# Priority = selection order (1 = preferred, higher = fallback)
|
|
44
|
+
export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
|
|
45
|
+
<% } %>
|
|
30
46
|
<% if (inferenceAmiVersion) { %>
|
|
31
47
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
32
48
|
<% } %>
|
|
33
49
|
<% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
|
|
50
|
+
# Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
|
|
51
|
+
# If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
|
|
34
52
|
export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
|
|
35
53
|
<% } %>
|
|
36
54
|
<% } %>
|
|
55
|
+
<% } %>
|
|
37
56
|
|
|
38
57
|
<% if (deploymentTarget === 'async-inference') { %>
|
|
39
58
|
# SageMaker Async Inference configuration
|
|
@@ -49,7 +68,7 @@ ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/nu
|
|
|
49
68
|
<% if (asyncS3OutputPath) { %>
|
|
50
69
|
export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
|
|
51
70
|
<% } else { %>
|
|
52
|
-
export ASYNC_S3_OUTPUT_PATH="s3://
|
|
71
|
+
export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
53
72
|
<% } %>
|
|
54
73
|
|
|
55
74
|
<% if (asyncSnsSuccessTopic) { %>
|
|
@@ -89,12 +108,12 @@ ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/nu
|
|
|
89
108
|
<% if (batchInputPath) { %>
|
|
90
109
|
export BATCH_INPUT_PATH="<%= batchInputPath %>"
|
|
91
110
|
<% } else { %>
|
|
92
|
-
export BATCH_INPUT_PATH="s3://
|
|
111
|
+
export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
|
|
93
112
|
<% } %>
|
|
94
113
|
<% if (batchOutputPath) { %>
|
|
95
114
|
export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
|
|
96
115
|
<% } else { %>
|
|
97
|
-
export BATCH_OUTPUT_PATH="s3://
|
|
116
|
+
export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
98
117
|
<% } %>
|
|
99
118
|
export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
|
|
100
119
|
export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
|
|
@@ -205,7 +224,7 @@ export BENCHMARK_REQUEST_COUNT=""
|
|
|
205
224
|
<% if (benchmarkS3OutputPath) { %>
|
|
206
225
|
export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
|
|
207
226
|
<% } else { %>
|
|
208
|
-
export BENCHMARK_S3_OUTPUT_PATH="s3://
|
|
227
|
+
export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identity --query Account --output text)-${AWS_REGION}/${PROJECT_NAME}/"
|
|
209
228
|
<% } %>
|
|
210
229
|
export BENCHMARK_JOB_NAME=""
|
|
211
230
|
export BENCHMARK_WORKLOAD_CONFIG_NAME=""
|
|
@@ -222,7 +241,7 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
|
|
|
222
241
|
|
|
223
242
|
# Allow environment variable overrides
|
|
224
243
|
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
225
|
-
<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
244
|
+
<% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
226
245
|
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
|
|
227
246
|
<% } %>
|
|
228
247
|
export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
|
|
@@ -277,7 +296,11 @@ echo " Model env vars: <%= Object.keys(modelEnvVars).length %>"
|
|
|
277
296
|
echo " Server env vars: <%= Object.keys(serverEnvVars).length %>"
|
|
278
297
|
<% } %>
|
|
279
298
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
299
|
+
<% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
|
|
300
|
+
echo " Endpoint: ${ENDPOINT_NAME} (external)"
|
|
301
|
+
<% } else { %>
|
|
280
302
|
echo " Instance: ${INSTANCE_TYPE}"
|
|
303
|
+
<% } %>
|
|
281
304
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
282
305
|
echo " Instance: ${INSTANCE_TYPE}"
|
|
283
306
|
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|