@aws/ml-container-creator 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/infra/ci-harness/buildspec.yml +60 -0
- package/package.json +1 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +6 -0
- package/src/app.js +33 -2
- package/src/lib/config-manager.js +40 -1
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/prompt-runner.js +174 -3
- package/src/lib/prompts.js +222 -2
- package/src/lib/registry-command-handler.js +12 -0
- package/templates/Dockerfile +12 -0
- package/templates/code/serving.properties +14 -0
- package/templates/do/adapter +1214 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +81 -9
- package/templates/do/clean +507 -17
- package/templates/do/config +23 -1
- package/templates/do/deploy +513 -367
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +111 -1
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
package/templates/do/config
CHANGED
|
@@ -10,6 +10,11 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
|
|
|
10
10
|
export FRAMEWORK="<%= framework %>"
|
|
11
11
|
export MODEL_SERVER="<%= modelServer %>"
|
|
12
12
|
|
|
13
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
14
|
+
# LoRA adapter serving
|
|
15
|
+
export ENABLE_LORA=true
|
|
16
|
+
<% } %>
|
|
17
|
+
|
|
13
18
|
# AWS configuration
|
|
14
19
|
export AWS_REGION="<%= awsRegion %>"
|
|
15
20
|
export ECR_REPOSITORY_NAME="ml-container-creator"
|
|
@@ -26,14 +31,27 @@ export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
|
|
|
26
31
|
|
|
27
32
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
28
33
|
# SageMaker Real-Time Inference configuration
|
|
34
|
+
<% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
|
|
35
|
+
# External endpoint — attaching IC to an existing running endpoint
|
|
36
|
+
export ENDPOINT_NAME="<%= existingEndpointName %>"
|
|
37
|
+
export ENDPOINT_EXTERNAL=true
|
|
38
|
+
<% } else { %>
|
|
29
39
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
40
|
+
<% if (typeof instancePools !== 'undefined' && instancePools && instancePools.length > 1) { %>
|
|
41
|
+
# Instance pools: heterogeneous instance types with priority-based fallback
|
|
42
|
+
# Priority = selection order (1 = preferred, higher = fallback)
|
|
43
|
+
export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
|
|
44
|
+
<% } %>
|
|
30
45
|
<% if (inferenceAmiVersion) { %>
|
|
31
46
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
32
47
|
<% } %>
|
|
33
48
|
<% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
|
|
49
|
+
# Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
|
|
50
|
+
# If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
|
|
34
51
|
export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
|
|
35
52
|
<% } %>
|
|
36
53
|
<% } %>
|
|
54
|
+
<% } %>
|
|
37
55
|
|
|
38
56
|
<% if (deploymentTarget === 'async-inference') { %>
|
|
39
57
|
# SageMaker Async Inference configuration
|
|
@@ -222,7 +240,7 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
|
|
|
222
240
|
|
|
223
241
|
# Allow environment variable overrides
|
|
224
242
|
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
225
|
-
<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
243
|
+
<% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
226
244
|
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
|
|
227
245
|
<% } %>
|
|
228
246
|
export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
|
|
@@ -277,7 +295,11 @@ echo " Model env vars: <%= Object.keys(modelEnvVars).length %>"
|
|
|
277
295
|
echo " Server env vars: <%= Object.keys(serverEnvVars).length %>"
|
|
278
296
|
<% } %>
|
|
279
297
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
298
|
+
<% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
|
|
299
|
+
echo " Endpoint: ${ENDPOINT_NAME} (external)"
|
|
300
|
+
<% } else { %>
|
|
280
301
|
echo " Instance: ${INSTANCE_TYPE}"
|
|
302
|
+
<% } %>
|
|
281
303
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
282
304
|
echo " Instance: ${INSTANCE_TYPE}"
|
|
283
305
|
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|