@aws/ml-container-creator 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +40 -9
  3. package/infra/ci-harness/buildspec.yml +60 -0
  4. package/infra/ci-harness/package-lock.json +5 -1
  5. package/package.json +1 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +10 -4
  8. package/servers/instance-sizer/lib/model-resolver.js +1 -1
  9. package/servers/lib/catalogs/model-sizes.json +135 -90
  10. package/servers/lib/catalogs/models.json +483 -411
  11. package/src/app.js +33 -2
  12. package/src/lib/bootstrap-command-handler.js +6 -0
  13. package/src/lib/cli-handler.js +1 -1
  14. package/src/lib/config-manager.js +41 -2
  15. package/src/lib/deployment-entry-schema.js +16 -0
  16. package/src/lib/mcp-client.js +3 -3
  17. package/src/lib/prompt-runner.js +179 -8
  18. package/src/lib/prompts.js +253 -7
  19. package/src/lib/registry-command-handler.js +12 -0
  20. package/templates/Dockerfile +12 -0
  21. package/templates/code/serving.properties +14 -0
  22. package/templates/do/adapter +1230 -0
  23. package/templates/do/adapters/.gitkeep +2 -0
  24. package/templates/do/add-ic +130 -0
  25. package/templates/do/benchmark +81 -9
  26. package/templates/do/clean +507 -17
  27. package/templates/do/config +28 -5
  28. package/templates/do/deploy +513 -367
  29. package/templates/do/ic/default.conf +32 -0
  30. package/templates/do/lib/endpoint-config.sh +216 -0
  31. package/templates/do/lib/inference-component.sh +167 -0
  32. package/templates/do/lib/secrets.sh +44 -0
  33. package/templates/do/lib/wait.sh +131 -0
  34. package/templates/do/logs +107 -27
  35. package/templates/do/optimize +528 -0
  36. package/templates/do/register +111 -1
  37. package/templates/do/status +337 -0
  38. package/templates/do/test +80 -28
@@ -10,6 +10,12 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
10
10
  export FRAMEWORK="<%= framework %>"
11
11
  export MODEL_SERVER="<%= modelServer %>"
12
12
 
13
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
14
+ # LoRA adapter serving
15
+ export ENABLE_LORA=true
16
+ export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
17
+ <% } %>
18
+
13
19
  # AWS configuration
14
20
  export AWS_REGION="<%= awsRegion %>"
15
21
  export ECR_REPOSITORY_NAME="ml-container-creator"
@@ -26,14 +32,27 @@ export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
26
32
 
27
33
  <% if (deploymentTarget === 'realtime-inference') { %>
28
34
  # SageMaker Real-Time Inference configuration
35
+ <% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
36
+ # External endpoint — attaching IC to an existing running endpoint
37
+ export ENDPOINT_NAME="<%= existingEndpointName %>"
38
+ export ENDPOINT_EXTERNAL=true
39
+ <% } else { %>
29
40
  export INSTANCE_TYPE="<%= instanceType %>"
41
+ <% if (typeof instancePools !== 'undefined' && instancePools && instancePools.length > 1) { %>
42
+ # Instance pools: heterogeneous instance types with priority-based fallback
43
+ # Priority = selection order (1 = preferred, higher = fallback)
44
+ export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
45
+ <% } %>
30
46
  <% if (inferenceAmiVersion) { %>
31
47
  export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
32
48
  <% } %>
33
49
  <% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
50
+ # Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
51
+ # If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
34
52
  export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
35
53
  <% } %>
36
54
  <% } %>
55
+ <% } %>
37
56
 
38
57
  <% if (deploymentTarget === 'async-inference') { %>
39
58
  # SageMaker Async Inference configuration
@@ -49,7 +68,7 @@ ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/nu
49
68
  <% if (asyncS3OutputPath) { %>
50
69
  export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
51
70
  <% } else { %>
52
- export ASYNC_S3_OUTPUT_PATH="s3://ml-container-creator-async-${AWS_REGION}-${ACCOUNT_ID}/${PROJECT_NAME}/output/"
71
+ export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
53
72
  <% } %>
54
73
 
55
74
  <% if (asyncSnsSuccessTopic) { %>
@@ -89,12 +108,12 @@ ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/nu
89
108
  <% if (batchInputPath) { %>
90
109
  export BATCH_INPUT_PATH="<%= batchInputPath %>"
91
110
  <% } else { %>
92
- export BATCH_INPUT_PATH="s3://ml-container-creator-batch-${AWS_REGION}-${ACCOUNT_ID}/${PROJECT_NAME}/input/"
111
+ export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
93
112
  <% } %>
94
113
  <% if (batchOutputPath) { %>
95
114
  export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
96
115
  <% } else { %>
97
- export BATCH_OUTPUT_PATH="s3://ml-container-creator-batch-${AWS_REGION}-${ACCOUNT_ID}/${PROJECT_NAME}/output/"
116
+ export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
98
117
  <% } %>
99
118
  export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
100
119
  export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
@@ -205,7 +224,7 @@ export BENCHMARK_REQUEST_COUNT=""
205
224
  <% if (benchmarkS3OutputPath) { %>
206
225
  export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
207
226
  <% } else { %>
208
- export BENCHMARK_S3_OUTPUT_PATH="s3://ml-container-creator-benchmark-${AWS_REGION}-$(aws sts get-caller-identity --query Account --output text)/${PROJECT_NAME}/"
227
+ export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identity --query Account --output text)-${AWS_REGION}/${PROJECT_NAME}/"
209
228
  <% } %>
210
229
  export BENCHMARK_JOB_NAME=""
211
230
  export BENCHMARK_WORKLOAD_CONFIG_NAME=""
@@ -222,7 +241,7 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
222
241
 
223
242
  # Allow environment variable overrides
224
243
  export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
225
- <% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
244
+ <% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
226
245
  export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
227
246
  <% } %>
228
247
  export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
@@ -277,7 +296,11 @@ echo " Model env vars: <%= Object.keys(modelEnvVars).length %>"
277
296
  echo " Server env vars: <%= Object.keys(serverEnvVars).length %>"
278
297
  <% } %>
279
298
  <% if (deploymentTarget === 'realtime-inference') { %>
299
+ <% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
300
+ echo " Endpoint: ${ENDPOINT_NAME} (external)"
301
+ <% } else { %>
280
302
  echo " Instance: ${INSTANCE_TYPE}"
303
+ <% } %>
281
304
  <% } else if (deploymentTarget === 'async-inference') { %>
282
305
  echo " Instance: ${INSTANCE_TYPE}"
283
306
  echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"