@aws/ml-container-creator 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,11 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
10
10
  export FRAMEWORK="<%= framework %>"
11
11
  export MODEL_SERVER="<%= modelServer %>"
12
12
 
13
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
14
+ # LoRA adapter serving
15
+ export ENABLE_LORA=true
16
+ <% } %>
17
+
13
18
  # AWS configuration
14
19
  export AWS_REGION="<%= awsRegion %>"
15
20
  export ECR_REPOSITORY_NAME="ml-container-creator"
@@ -26,14 +31,27 @@ export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
26
31
 
27
32
  <% if (deploymentTarget === 'realtime-inference') { %>
28
33
  # SageMaker Real-Time Inference configuration
34
+ <% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
35
+ # External endpoint — attaching IC to an existing running endpoint
36
+ export ENDPOINT_NAME="<%= existingEndpointName %>"
37
+ export ENDPOINT_EXTERNAL=true
38
+ <% } else { %>
29
39
  export INSTANCE_TYPE="<%= instanceType %>"
40
+ <% if (typeof instancePools !== 'undefined' && instancePools && instancePools.length > 1) { %>
41
+ # Instance pools: heterogeneous instance types with priority-based fallback
42
+ # Priority = selection order (1 = preferred, higher = fallback)
43
+ export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
44
+ <% } %>
30
45
  <% if (inferenceAmiVersion) { %>
31
46
  export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
32
47
  <% } %>
33
48
  <% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
49
+ # Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
50
+ # If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
34
51
  export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
35
52
  <% } %>
36
53
  <% } %>
54
+ <% } %>
37
55
 
38
56
  <% if (deploymentTarget === 'async-inference') { %>
39
57
  # SageMaker Async Inference configuration
@@ -222,7 +240,7 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
222
240
 
223
241
  # Allow environment variable overrides
224
242
  export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
225
- <% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
243
+ <% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
226
244
  export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
227
245
  <% } %>
228
246
  export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
@@ -277,7 +295,11 @@ echo " Model env vars: <%= Object.keys(modelEnvVars).length %>"
277
295
  echo " Server env vars: <%= Object.keys(serverEnvVars).length %>"
278
296
  <% } %>
279
297
  <% if (deploymentTarget === 'realtime-inference') { %>
298
+ <% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
299
+ echo " Endpoint: ${ENDPOINT_NAME} (external)"
300
+ <% } else { %>
280
301
  echo " Instance: ${INSTANCE_TYPE}"
302
+ <% } %>
281
303
  <% } else if (deploymentTarget === 'async-inference') { %>
282
304
  echo " Instance: ${INSTANCE_TYPE}"
283
305
  echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"