@aws/ml-container-creator 0.3.0 โ†’ 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -740,6 +740,49 @@ case "${CLEANUP_TARGET}" in
740
740
  codebuild)
741
741
  clean_codebuild
742
742
  ;;
743
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
744
+ benchmark)
745
+ echo "๐Ÿงน Cleaning benchmark resources..."
746
+ WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
747
+
748
+ # Delete workload config if exists
749
+ if aws sagemaker describe-ai-workload-config \
750
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
751
+ --region "$AWS_REGION" 2>/dev/null; then
752
+ aws sagemaker delete-ai-workload-config \
753
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
754
+ --region "$AWS_REGION"
755
+ echo " โœ“ Deleted workload config: $WORKLOAD_CONFIG_NAME"
756
+ fi
757
+
758
+ # Delete terminal benchmark jobs matching project prefix
759
+ aws sagemaker list-ai-benchmark-jobs \
760
+ --name-contains "${PROJECT_NAME}-benchmark-" \
761
+ --region "$AWS_REGION" \
762
+ --query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
763
+ --output text | tr '\t' '\n' | while read -r job; do
764
+ [ -z "$job" ] && continue
765
+ aws sagemaker delete-ai-benchmark-job \
766
+ --ai-benchmark-job-name "$job" \
767
+ --region "$AWS_REGION"
768
+ echo " โœ“ Deleted benchmark job: $job"
769
+ done
770
+
771
+ # Delete local benchmark results
772
+ if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
773
+ read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
774
+ CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
775
+ if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
776
+ rm -rf "${SCRIPT_DIR}/../benchmarks"
777
+ echo " โœ“ Deleted local benchmarks/ directory"
778
+ else
779
+ echo " โญ Skipped local benchmarks/ deletion"
780
+ fi
781
+ fi
782
+
783
+ echo "โœ… Benchmark cleanup complete"
784
+ ;;
785
+ <% } %>
743
786
  all)
744
787
  echo "๐Ÿงน Performing complete cleanup"
745
788
  echo ""
@@ -790,6 +833,49 @@ case "${CLEANUP_TARGET}" in
790
833
  CLEANED_ITEMS+=("CodeBuild resources")
791
834
  fi
792
835
 
836
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
837
+ echo ""
838
+
839
+ # Clean benchmark resources
840
+ WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
841
+
842
+ # Delete workload config if exists
843
+ if aws sagemaker describe-ai-workload-config \
844
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
845
+ --region "$AWS_REGION" 2>/dev/null; then
846
+ aws sagemaker delete-ai-workload-config \
847
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
848
+ --region "$AWS_REGION"
849
+ echo " โœ“ Deleted workload config: $WORKLOAD_CONFIG_NAME"
850
+ fi
851
+
852
+ # Delete terminal benchmark jobs matching project prefix
853
+ aws sagemaker list-ai-benchmark-jobs \
854
+ --name-contains "${PROJECT_NAME}-benchmark-" \
855
+ --region "$AWS_REGION" \
856
+ --query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
857
+ --output text | tr '\t' '\n' | while read -r job; do
858
+ [ -z "$job" ] && continue
859
+ aws sagemaker delete-ai-benchmark-job \
860
+ --ai-benchmark-job-name "$job" \
861
+ --region "$AWS_REGION"
862
+ echo " โœ“ Deleted benchmark job: $job"
863
+ done
864
+
865
+ # Delete local benchmark results
866
+ if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
867
+ read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
868
+ CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
869
+ if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
870
+ rm -rf "${SCRIPT_DIR}/../benchmarks"
871
+ echo " โœ“ Deleted local benchmarks/ directory"
872
+ else
873
+ echo " โญ Skipped local benchmarks/ deletion"
874
+ fi
875
+ fi
876
+
877
+ CLEANED_ITEMS+=("Benchmark resources")
878
+ <% } %>
793
879
  # Display summary
794
880
  echo ""
795
881
  echo "โœ… Cleanup complete!"
@@ -30,6 +30,9 @@ export INSTANCE_TYPE="<%= instanceType %>"
30
30
  <% if (inferenceAmiVersion) { %>
31
31
  export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
32
32
  <% } %>
33
+ <% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
34
+ export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
35
+ <% } %>
33
36
  <% } %>
34
37
 
35
38
  <% if (deploymentTarget === 'async-inference') { %>
@@ -126,6 +129,8 @@ export IC_MEMORY_SIZE="<%= icMemorySize %>"
126
129
  <% } %>
127
130
  <% if (typeof icGpuCount !== 'undefined' && icGpuCount != null) { %>
128
131
  export IC_GPU_COUNT="<%= icGpuCount %>"
132
+ <% } else { %>
133
+ export IC_GPU_COUNT="${IC_GPU_COUNT:-1}"
129
134
  <% } %>
130
135
  <% if (typeof icCopyCount !== 'undefined' && icCopyCount != null) { %>
131
136
  export IC_COPY_COUNT="<%= icCopyCount %>"
@@ -186,6 +191,26 @@ export MODEL_FORMAT="<%= modelFormat %>"
186
191
  export ROLE_ARN="<%= roleArn %>"
187
192
  <% } %>
188
193
 
194
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
195
+ # SageMaker AI Benchmarking configuration
196
+ export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
197
+ export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
198
+ export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
199
+ export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
200
+ <% if (benchmarkRequestCount) { %>
201
+ export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
202
+ <% } else { %>
203
+ export BENCHMARK_REQUEST_COUNT=""
204
+ <% } %>
205
+ <% if (benchmarkS3OutputPath) { %>
206
+ export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
207
+ <% } else { %>
208
+ export BENCHMARK_S3_OUTPUT_PATH="s3://ml-container-creator-benchmark-${AWS_REGION}-$(aws sts get-caller-identity --query Account --output text)/${PROJECT_NAME}/"
209
+ <% } %>
210
+ export BENCHMARK_JOB_NAME=""
211
+ export BENCHMARK_WORKLOAD_CONFIG_NAME=""
212
+ <% } %>
213
+
189
214
  <% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
190
215
  # Runtime environment variables (from catalog)
191
216
  <% orderedEnvVars.forEach(({ key, value }) => { %>
@@ -193,9 +218,7 @@ export <%= key %>=${<%= key %>:-<%= value %>}
193
218
  <% }); %>
194
219
  <% } %>
195
220
 
196
- <% if (baseImage) { %>
197
- export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage %>}
198
- <% } %>
221
+ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
199
222
 
200
223
  # Allow environment variable overrides
201
224
  export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
@@ -336,6 +336,11 @@ if [ -z "${SKIP_TO}" ]; then
336
336
  echo " AMI version: ${INFERENCE_AMI_VERSION}"
337
337
  fi
338
338
 
339
+ if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
340
+ VARIANT_JSON="${VARIANT_JSON},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
341
+ echo " โš ๏ธ Capacity reservation (experimental): ${CAPACITY_RESERVATION_ARN}"
342
+ fi
343
+
339
344
  VARIANT_JSON="${VARIANT_JSON}}]"
340
345
 
341
346
  echo "โš™๏ธ Creating endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
@@ -453,7 +458,7 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wa
453
458
  \"ContainerStartupHealthCheckTimeoutInSeconds\": 900
454
459
  },
455
460
  \"ComputeResourceRequirements\": {
456
- \"NumberOfAcceleratorDevicesRequired\": 1,
461
+ \"NumberOfAcceleratorDevicesRequired\": ${IC_GPU_COUNT},
457
462
  \"MinMemoryRequiredInMb\": 1024
458
463
  }
459
464
  }" \
@@ -393,6 +393,7 @@ CJEOF
393
393
 
394
394
  # Try put-item with condition (new record)
395
395
  if aws dynamodb put-item \
396
+ --region "${AWS_REGION}" \
396
397
  --table-name "${CI_TABLE_NAME}" \
397
398
  --item "{
398
399
  \"configId\": {\"S\": \"${config_id}\"},
@@ -412,6 +413,7 @@ CJEOF
412
413
  else
413
414
  # Record already exists โ€” update it (reset testStatus, update configJson, preserve createdAt)
414
415
  if aws dynamodb update-item \
416
+ --region "${AWS_REGION}" \
415
417
  --table-name "${CI_TABLE_NAME}" \
416
418
  --key "{\"configId\": {\"S\": \"${config_id}\"}}" \
417
419
  --update-expression "SET configJson = :cj, testStatus = :ts, deploymentConfig = :dc, baseImage = :bi, baseImageVersion = :bv, buildStrategy = :bs, projectName = :pn, schemaVersion = :sv" \
@@ -496,6 +498,11 @@ DJEOF
496
498
  echo "${DEPLOYMENT_JSON}" | python3 -c "import sys,json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "${DEPLOYMENT_JSON}"
497
499
 
498
500
  if [ "${CI_MODE}" = true ]; then
501
+ # Strip capacity reservation ARN for CI โ€” force on-demand deployment
502
+ # CI projects must never use reserved capacity (reservations are account-specific
503
+ # and time-bound; CI replay should always target on-demand instances)
504
+ unset CAPACITY_RESERVATION_ARN 2>/dev/null || true
505
+
499
506
  echo ""
500
507
  echo "โš ๏ธ CI Integration is experimental and currently only tested for"
501
508
  echo " SageMaker Real-Time Inference endpoints."
@@ -507,7 +514,7 @@ DJEOF
507
514
  echo "๐Ÿ”‘ configId: ${CONFIG_ID}"
508
515
 
509
516
  # Check if CI_Table exists before writing
510
- if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" &>/dev/null; then
517
+ if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" --region "${AWS_REGION}" &>/dev/null; then
511
518
  echo ""
512
519
  echo "โš ๏ธ CI infrastructure not provisioned. Run 'ml-container-creator bootstrap' with CI enabled."
513
520
  echo " Skipping CI table write."
@@ -122,6 +122,11 @@ EXPOSE 8080
122
122
  # --http-port=8080: SageMaker requires port 8080
123
123
  # --model-repository: Path to model repository
124
124
  # --strict-model-config=false: Allow Triton to auto-complete config for some backends
125
+
126
+ # CUDA compatibility: ensure compat libs are on LD_LIBRARY_PATH for newer SageMaker AMIs
127
+ # (NVIDIA Container Toolkit 1.17.4+ no longer auto-mounts these)
128
+ ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH:-}"
129
+
125
130
  ENTRYPOINT ["tritonserver", \
126
131
  "--http-port=8080", \
127
132
  "--model-repository=/opt/ml/model/model_repository", \