@aws/ml-container-creator 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +86 -7
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/buildspec.yml +60 -0
  5. package/package.json +3 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +42 -2
  8. package/servers/instance-sizer/lib/instance-ranker.js +114 -10
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +15 -15
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +71 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +9 -1
  16. package/src/app.js +109 -3
  17. package/src/lib/bootstrap-command-handler.js +96 -3
  18. package/src/lib/cli-handler.js +2 -2
  19. package/src/lib/config-manager.js +117 -1
  20. package/src/lib/deployment-entry-schema.js +16 -0
  21. package/src/lib/prompt-runner.js +270 -12
  22. package/src/lib/prompts.js +288 -6
  23. package/src/lib/registry-command-handler.js +12 -0
  24. package/src/lib/schema-sync.js +31 -0
  25. package/src/lib/template-manager.js +49 -1
  26. package/src/lib/validate-runner.js +125 -2
  27. package/templates/Dockerfile +22 -2
  28. package/templates/code/cuda_compat.sh +22 -0
  29. package/templates/code/serve +3 -0
  30. package/templates/code/serving.properties +14 -0
  31. package/templates/code/start_server.sh +3 -0
  32. package/templates/diffusors/Dockerfile +2 -1
  33. package/templates/diffusors/serve +3 -0
  34. package/templates/do/README.md +33 -0
  35. package/templates/do/adapter +1214 -0
  36. package/templates/do/adapters/.gitkeep +2 -0
  37. package/templates/do/add-ic +130 -0
  38. package/templates/do/benchmark +718 -0
  39. package/templates/do/clean +593 -17
  40. package/templates/do/config +49 -4
  41. package/templates/do/deploy +513 -362
  42. package/templates/do/ic/default.conf +32 -0
  43. package/templates/do/lib/endpoint-config.sh +216 -0
  44. package/templates/do/lib/inference-component.sh +167 -0
  45. package/templates/do/lib/secrets.sh +44 -0
  46. package/templates/do/lib/wait.sh +131 -0
  47. package/templates/do/logs +107 -27
  48. package/templates/do/optimize +528 -0
  49. package/templates/do/register +119 -2
  50. package/templates/do/status +337 -0
  51. package/templates/do/test +80 -28
  52. package/templates/triton/Dockerfile +5 -0
@@ -10,6 +10,11 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
10
10
  export FRAMEWORK="<%= framework %>"
11
11
  export MODEL_SERVER="<%= modelServer %>"
12
12
 
13
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
14
+ # LoRA adapter serving
15
+ export ENABLE_LORA=true
16
+ <% } %>
17
+
13
18
  # AWS configuration
14
19
  export AWS_REGION="<%= awsRegion %>"
15
20
  export ECR_REPOSITORY_NAME="ml-container-creator"
@@ -26,10 +31,26 @@ export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
26
31
 
27
32
  <% if (deploymentTarget === 'realtime-inference') { %>
28
33
  # SageMaker Real-Time Inference configuration
34
+ <% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
35
+ # External endpoint — attaching IC to an existing running endpoint
36
+ export ENDPOINT_NAME="<%= existingEndpointName %>"
37
+ export ENDPOINT_EXTERNAL=true
38
+ <% } else { %>
29
39
  export INSTANCE_TYPE="<%= instanceType %>"
40
+ <% if (typeof instancePools !== 'undefined' && instancePools && instancePools.length > 1) { %>
41
+ # Instance pools: heterogeneous instance types with priority-based fallback
42
+ # Priority = selection order (1 = preferred, higher = fallback)
43
+ export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
44
+ <% } %>
30
45
  <% if (inferenceAmiVersion) { %>
31
46
  export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
32
47
  <% } %>
48
+ <% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
49
+ # Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
50
+ # If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
51
+ export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
52
+ <% } %>
53
+ <% } %>
33
54
  <% } %>
34
55
 
35
56
  <% if (deploymentTarget === 'async-inference') { %>
@@ -126,6 +147,8 @@ export IC_MEMORY_SIZE="<%= icMemorySize %>"
126
147
  <% } %>
127
148
  <% if (typeof icGpuCount !== 'undefined' && icGpuCount != null) { %>
128
149
  export IC_GPU_COUNT="<%= icGpuCount %>"
150
+ <% } else { %>
151
+ export IC_GPU_COUNT="${IC_GPU_COUNT:-1}"
129
152
  <% } %>
130
153
  <% if (typeof icCopyCount !== 'undefined' && icCopyCount != null) { %>
131
154
  export IC_COPY_COUNT="<%= icCopyCount %>"
@@ -186,6 +209,26 @@ export MODEL_FORMAT="<%= modelFormat %>"
186
209
  export ROLE_ARN="<%= roleArn %>"
187
210
  <% } %>
188
211
 
212
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
213
+ # SageMaker AI Benchmarking configuration
214
+ export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
215
+ export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
216
+ export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
217
+ export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
218
+ <% if (benchmarkRequestCount) { %>
219
+ export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
220
+ <% } else { %>
221
+ export BENCHMARK_REQUEST_COUNT=""
222
+ <% } %>
223
+ <% if (benchmarkS3OutputPath) { %>
224
+ export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
225
+ <% } else { %>
226
+ export BENCHMARK_S3_OUTPUT_PATH="s3://ml-container-creator-benchmark-${AWS_REGION}-$(aws sts get-caller-identity --query Account --output text)/${PROJECT_NAME}/"
227
+ <% } %>
228
+ export BENCHMARK_JOB_NAME=""
229
+ export BENCHMARK_WORKLOAD_CONFIG_NAME=""
230
+ <% } %>
231
+
189
232
  <% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
190
233
  # Runtime environment variables (from catalog)
191
234
  <% orderedEnvVars.forEach(({ key, value }) => { %>
@@ -193,13 +236,11 @@ export <%= key %>=${<%= key %>:-<%= value %>}
193
236
  <% }); %>
194
237
  <% } %>
195
238
 
196
- <% if (baseImage) { %>
197
- export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage %>}
198
- <% } %>
239
+ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
199
240
 
200
241
  # Allow environment variable overrides
201
242
  export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
202
- <% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
243
+ <% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
203
244
  export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
204
245
  <% } %>
205
246
  export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
@@ -254,7 +295,11 @@ echo " Model env vars: <%= Object.keys(modelEnvVars).length %>"
254
295
  echo " Server env vars: <%= Object.keys(serverEnvVars).length %>"
255
296
  <% } %>
256
297
  <% if (deploymentTarget === 'realtime-inference') { %>
298
+ <% if (typeof existingEndpointName !== 'undefined' && existingEndpointName) { %>
299
+ echo " Endpoint: ${ENDPOINT_NAME} (external)"
300
+ <% } else { %>
257
301
  echo " Instance: ${INSTANCE_TYPE}"
302
+ <% } %>
258
303
  <% } else if (deploymentTarget === 'async-inference') { %>
259
304
  echo " Instance: ${INSTANCE_TYPE}"
260
305
  echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"