@aws/ml-container-creator 0.3.0 โ 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/bootstrap-stack.json +86 -7
- package/config/defaults.json +1 -1
- package/package.json +3 -1
- package/servers/instance-sizer/index.js +36 -2
- package/servers/instance-sizer/lib/instance-ranker.js +114 -10
- package/servers/instance-sizer/lib/quota-resolver.js +368 -0
- package/servers/instance-sizer/package.json +2 -0
- package/servers/lib/catalogs/instances.json +527 -12
- package/servers/lib/catalogs/model-servers.json +15 -15
- package/servers/lib/catalogs/model-sizes.json +27 -0
- package/servers/lib/catalogs/models.json +71 -0
- package/servers/lib/schemas/image-catalog.schema.json +9 -1
- package/src/app.js +77 -2
- package/src/lib/bootstrap-command-handler.js +96 -3
- package/src/lib/cli-handler.js +2 -2
- package/src/lib/config-manager.js +78 -1
- package/src/lib/prompt-runner.js +96 -9
- package/src/lib/prompts.js +66 -4
- package/src/lib/schema-sync.js +31 -0
- package/src/lib/template-manager.js +49 -1
- package/src/lib/validate-runner.js +125 -2
- package/templates/Dockerfile +10 -2
- package/templates/code/cuda_compat.sh +22 -0
- package/templates/code/serve +3 -0
- package/templates/code/start_server.sh +3 -0
- package/templates/diffusors/Dockerfile +2 -1
- package/templates/diffusors/serve +3 -0
- package/templates/do/README.md +33 -0
- package/templates/do/benchmark +646 -0
- package/templates/do/clean +86 -0
- package/templates/do/config +26 -3
- package/templates/do/deploy +6 -1
- package/templates/do/register +8 -1
- package/templates/triton/Dockerfile +5 -0
package/templates/do/clean
CHANGED
|
@@ -740,6 +740,49 @@ case "${CLEANUP_TARGET}" in
|
|
|
740
740
|
codebuild)
|
|
741
741
|
clean_codebuild
|
|
742
742
|
;;
|
|
743
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
744
|
+
benchmark)
|
|
745
|
+
echo "๐งน Cleaning benchmark resources..."
|
|
746
|
+
WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
|
|
747
|
+
|
|
748
|
+
# Delete workload config if exists
|
|
749
|
+
if aws sagemaker describe-ai-workload-config \
|
|
750
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
751
|
+
--region "$AWS_REGION" 2>/dev/null; then
|
|
752
|
+
aws sagemaker delete-ai-workload-config \
|
|
753
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
754
|
+
--region "$AWS_REGION"
|
|
755
|
+
echo " โ Deleted workload config: $WORKLOAD_CONFIG_NAME"
|
|
756
|
+
fi
|
|
757
|
+
|
|
758
|
+
# Delete terminal benchmark jobs matching project prefix
|
|
759
|
+
aws sagemaker list-ai-benchmark-jobs \
|
|
760
|
+
--name-contains "${PROJECT_NAME}-benchmark-" \
|
|
761
|
+
--region "$AWS_REGION" \
|
|
762
|
+
--query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
|
|
763
|
+
--output text | tr '\t' '\n' | while read -r job; do
|
|
764
|
+
[ -z "$job" ] && continue
|
|
765
|
+
aws sagemaker delete-ai-benchmark-job \
|
|
766
|
+
--ai-benchmark-job-name "$job" \
|
|
767
|
+
--region "$AWS_REGION"
|
|
768
|
+
echo " โ Deleted benchmark job: $job"
|
|
769
|
+
done
|
|
770
|
+
|
|
771
|
+
# Delete local benchmark results
|
|
772
|
+
if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
|
|
773
|
+
read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
|
|
774
|
+
CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
|
|
775
|
+
if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
|
|
776
|
+
rm -rf "${SCRIPT_DIR}/../benchmarks"
|
|
777
|
+
echo " โ Deleted local benchmarks/ directory"
|
|
778
|
+
else
|
|
779
|
+
echo " โญ Skipped local benchmarks/ deletion"
|
|
780
|
+
fi
|
|
781
|
+
fi
|
|
782
|
+
|
|
783
|
+
echo "โ
Benchmark cleanup complete"
|
|
784
|
+
;;
|
|
785
|
+
<% } %>
|
|
743
786
|
all)
|
|
744
787
|
echo "๐งน Performing complete cleanup"
|
|
745
788
|
echo ""
|
|
@@ -790,6 +833,49 @@ case "${CLEANUP_TARGET}" in
|
|
|
790
833
|
CLEANED_ITEMS+=("CodeBuild resources")
|
|
791
834
|
fi
|
|
792
835
|
|
|
836
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
837
|
+
echo ""
|
|
838
|
+
|
|
839
|
+
# Clean benchmark resources
|
|
840
|
+
WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
|
|
841
|
+
|
|
842
|
+
# Delete workload config if exists
|
|
843
|
+
if aws sagemaker describe-ai-workload-config \
|
|
844
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
845
|
+
--region "$AWS_REGION" 2>/dev/null; then
|
|
846
|
+
aws sagemaker delete-ai-workload-config \
|
|
847
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
848
|
+
--region "$AWS_REGION"
|
|
849
|
+
echo " โ Deleted workload config: $WORKLOAD_CONFIG_NAME"
|
|
850
|
+
fi
|
|
851
|
+
|
|
852
|
+
# Delete terminal benchmark jobs matching project prefix
|
|
853
|
+
aws sagemaker list-ai-benchmark-jobs \
|
|
854
|
+
--name-contains "${PROJECT_NAME}-benchmark-" \
|
|
855
|
+
--region "$AWS_REGION" \
|
|
856
|
+
--query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
|
|
857
|
+
--output text | tr '\t' '\n' | while read -r job; do
|
|
858
|
+
[ -z "$job" ] && continue
|
|
859
|
+
aws sagemaker delete-ai-benchmark-job \
|
|
860
|
+
--ai-benchmark-job-name "$job" \
|
|
861
|
+
--region "$AWS_REGION"
|
|
862
|
+
echo " โ Deleted benchmark job: $job"
|
|
863
|
+
done
|
|
864
|
+
|
|
865
|
+
# Delete local benchmark results
|
|
866
|
+
if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
|
|
867
|
+
read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
|
|
868
|
+
CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
|
|
869
|
+
if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
|
|
870
|
+
rm -rf "${SCRIPT_DIR}/../benchmarks"
|
|
871
|
+
echo " โ Deleted local benchmarks/ directory"
|
|
872
|
+
else
|
|
873
|
+
echo " โญ Skipped local benchmarks/ deletion"
|
|
874
|
+
fi
|
|
875
|
+
fi
|
|
876
|
+
|
|
877
|
+
CLEANED_ITEMS+=("Benchmark resources")
|
|
878
|
+
<% } %>
|
|
793
879
|
# Display summary
|
|
794
880
|
echo ""
|
|
795
881
|
echo "โ
Cleanup complete!"
|
package/templates/do/config
CHANGED
|
@@ -30,6 +30,9 @@ export INSTANCE_TYPE="<%= instanceType %>"
|
|
|
30
30
|
<% if (inferenceAmiVersion) { %>
|
|
31
31
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
32
32
|
<% } %>
|
|
33
|
+
<% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
|
|
34
|
+
export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
|
|
35
|
+
<% } %>
|
|
33
36
|
<% } %>
|
|
34
37
|
|
|
35
38
|
<% if (deploymentTarget === 'async-inference') { %>
|
|
@@ -126,6 +129,8 @@ export IC_MEMORY_SIZE="<%= icMemorySize %>"
|
|
|
126
129
|
<% } %>
|
|
127
130
|
<% if (typeof icGpuCount !== 'undefined' && icGpuCount != null) { %>
|
|
128
131
|
export IC_GPU_COUNT="<%= icGpuCount %>"
|
|
132
|
+
<% } else { %>
|
|
133
|
+
export IC_GPU_COUNT="${IC_GPU_COUNT:-1}"
|
|
129
134
|
<% } %>
|
|
130
135
|
<% if (typeof icCopyCount !== 'undefined' && icCopyCount != null) { %>
|
|
131
136
|
export IC_COPY_COUNT="<%= icCopyCount %>"
|
|
@@ -186,6 +191,26 @@ export MODEL_FORMAT="<%= modelFormat %>"
|
|
|
186
191
|
export ROLE_ARN="<%= roleArn %>"
|
|
187
192
|
<% } %>
|
|
188
193
|
|
|
194
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
195
|
+
# SageMaker AI Benchmarking configuration
|
|
196
|
+
export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
|
|
197
|
+
export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
|
|
198
|
+
export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
|
|
199
|
+
export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
|
|
200
|
+
<% if (benchmarkRequestCount) { %>
|
|
201
|
+
export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
|
|
202
|
+
<% } else { %>
|
|
203
|
+
export BENCHMARK_REQUEST_COUNT=""
|
|
204
|
+
<% } %>
|
|
205
|
+
<% if (benchmarkS3OutputPath) { %>
|
|
206
|
+
export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
|
|
207
|
+
<% } else { %>
|
|
208
|
+
export BENCHMARK_S3_OUTPUT_PATH="s3://ml-container-creator-benchmark-${AWS_REGION}-$(aws sts get-caller-identity --query Account --output text)/${PROJECT_NAME}/"
|
|
209
|
+
<% } %>
|
|
210
|
+
export BENCHMARK_JOB_NAME=""
|
|
211
|
+
export BENCHMARK_WORKLOAD_CONFIG_NAME=""
|
|
212
|
+
<% } %>
|
|
213
|
+
|
|
189
214
|
<% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
|
|
190
215
|
# Runtime environment variables (from catalog)
|
|
191
216
|
<% orderedEnvVars.forEach(({ key, value }) => { %>
|
|
@@ -193,9 +218,7 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
193
218
|
<% }); %>
|
|
194
219
|
<% } %>
|
|
195
220
|
|
|
196
|
-
|
|
197
|
-
export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage %>}
|
|
198
|
-
<% } %>
|
|
221
|
+
export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
|
|
199
222
|
|
|
200
223
|
# Allow environment variable overrides
|
|
201
224
|
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
package/templates/do/deploy
CHANGED
|
@@ -336,6 +336,11 @@ if [ -z "${SKIP_TO}" ]; then
|
|
|
336
336
|
echo " AMI version: ${INFERENCE_AMI_VERSION}"
|
|
337
337
|
fi
|
|
338
338
|
|
|
339
|
+
if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
|
|
340
|
+
VARIANT_JSON="${VARIANT_JSON},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
|
|
341
|
+
echo " โ ๏ธ Capacity reservation (experimental): ${CAPACITY_RESERVATION_ARN}"
|
|
342
|
+
fi
|
|
343
|
+
|
|
339
344
|
VARIANT_JSON="${VARIANT_JSON}}]"
|
|
340
345
|
|
|
341
346
|
echo "โ๏ธ Creating endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
|
|
@@ -453,7 +458,7 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wa
|
|
|
453
458
|
\"ContainerStartupHealthCheckTimeoutInSeconds\": 900
|
|
454
459
|
},
|
|
455
460
|
\"ComputeResourceRequirements\": {
|
|
456
|
-
\"NumberOfAcceleratorDevicesRequired\":
|
|
461
|
+
\"NumberOfAcceleratorDevicesRequired\": ${IC_GPU_COUNT},
|
|
457
462
|
\"MinMemoryRequiredInMb\": 1024
|
|
458
463
|
}
|
|
459
464
|
}" \
|
package/templates/do/register
CHANGED
|
@@ -393,6 +393,7 @@ CJEOF
|
|
|
393
393
|
|
|
394
394
|
# Try put-item with condition (new record)
|
|
395
395
|
if aws dynamodb put-item \
|
|
396
|
+
--region "${AWS_REGION}" \
|
|
396
397
|
--table-name "${CI_TABLE_NAME}" \
|
|
397
398
|
--item "{
|
|
398
399
|
\"configId\": {\"S\": \"${config_id}\"},
|
|
@@ -412,6 +413,7 @@ CJEOF
|
|
|
412
413
|
else
|
|
413
414
|
# Record already exists โ update it (reset testStatus, update configJson, preserve createdAt)
|
|
414
415
|
if aws dynamodb update-item \
|
|
416
|
+
--region "${AWS_REGION}" \
|
|
415
417
|
--table-name "${CI_TABLE_NAME}" \
|
|
416
418
|
--key "{\"configId\": {\"S\": \"${config_id}\"}}" \
|
|
417
419
|
--update-expression "SET configJson = :cj, testStatus = :ts, deploymentConfig = :dc, baseImage = :bi, baseImageVersion = :bv, buildStrategy = :bs, projectName = :pn, schemaVersion = :sv" \
|
|
@@ -496,6 +498,11 @@ DJEOF
|
|
|
496
498
|
echo "${DEPLOYMENT_JSON}" | python3 -c "import sys,json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "${DEPLOYMENT_JSON}"
|
|
497
499
|
|
|
498
500
|
if [ "${CI_MODE}" = true ]; then
|
|
501
|
+
# Strip capacity reservation ARN for CI โ force on-demand deployment
|
|
502
|
+
# CI projects must never use reserved capacity (reservations are account-specific
|
|
503
|
+
# and time-bound; CI replay should always target on-demand instances)
|
|
504
|
+
unset CAPACITY_RESERVATION_ARN 2>/dev/null || true
|
|
505
|
+
|
|
499
506
|
echo ""
|
|
500
507
|
echo "โ ๏ธ CI Integration is experimental and currently only tested for"
|
|
501
508
|
echo " SageMaker Real-Time Inference endpoints."
|
|
@@ -507,7 +514,7 @@ DJEOF
|
|
|
507
514
|
echo "๐ configId: ${CONFIG_ID}"
|
|
508
515
|
|
|
509
516
|
# Check if CI_Table exists before writing
|
|
510
|
-
if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" &>/dev/null; then
|
|
517
|
+
if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" --region "${AWS_REGION}" &>/dev/null; then
|
|
511
518
|
echo ""
|
|
512
519
|
echo "โ ๏ธ CI infrastructure not provisioned. Run 'ml-container-creator bootstrap' with CI enabled."
|
|
513
520
|
echo " Skipping CI table write."
|
|
@@ -122,6 +122,11 @@ EXPOSE 8080
|
|
|
122
122
|
# --http-port=8080: SageMaker requires port 8080
|
|
123
123
|
# --model-repository: Path to model repository
|
|
124
124
|
# --strict-model-config=false: Allow Triton to auto-complete config for some backends
|
|
125
|
+
|
|
126
|
+
# CUDA compatibility: ensure compat libs are on LD_LIBRARY_PATH for newer SageMaker AMIs
|
|
127
|
+
# (NVIDIA Container Toolkit 1.17.4+ no longer auto-mounts these)
|
|
128
|
+
ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH:-}"
|
|
129
|
+
|
|
125
130
|
ENTRYPOINT ["tritonserver", \
|
|
126
131
|
"--http-port=8080", \
|
|
127
132
|
"--model-repository=/opt/ml/model/model_repository", \
|