@aws/ml-container-creator 0.2.6 โ 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +38 -2
- package/config/bootstrap-stack.json +94 -1
- package/config/defaults.json +1 -1
- package/infra/ci-harness/package-lock.json +22 -9
- package/package.json +3 -1
- package/servers/instance-sizer/index.js +45 -8
- package/servers/instance-sizer/lib/instance-ranker.js +140 -11
- package/servers/instance-sizer/lib/model-resolver.js +10 -6
- package/servers/instance-sizer/lib/quota-resolver.js +368 -0
- package/servers/instance-sizer/package.json +2 -0
- package/servers/lib/catalogs/instances.json +527 -12
- package/servers/lib/catalogs/model-servers.json +298 -20
- package/servers/lib/catalogs/model-sizes.json +27 -0
- package/servers/lib/catalogs/models.json +101 -0
- package/servers/lib/schemas/image-catalog.schema.json +15 -1
- package/servers/model-picker/index.js +2 -1
- package/src/app.js +96 -2
- package/src/lib/architecture-sync.js +171 -0
- package/src/lib/arn-detection.js +22 -0
- package/src/lib/bootstrap-command-handler.js +178 -3
- package/src/lib/cli-handler.js +2 -2
- package/src/lib/config-manager.js +121 -1
- package/src/lib/cross-cutting-checker.js +119 -0
- package/src/lib/deployment-entry-schema.js +1 -2
- package/src/lib/prompt-runner.js +514 -20
- package/src/lib/prompts.js +67 -5
- package/src/lib/registry-command-handler.js +236 -0
- package/src/lib/schema-sync.js +31 -0
- package/src/lib/secret-classification.js +56 -0
- package/src/lib/secrets-command-handler.js +550 -0
- package/src/lib/template-manager.js +49 -1
- package/src/lib/validate-runner.js +174 -2
- package/src/lib/validation-report.js +8 -1
- package/src/prompt-adapter.js +3 -2
- package/templates/Dockerfile +10 -2
- package/templates/code/cuda_compat.sh +22 -0
- package/templates/code/serve +3 -0
- package/templates/code/start_server.sh +3 -0
- package/templates/diffusors/Dockerfile +2 -1
- package/templates/diffusors/serve +3 -0
- package/templates/do/README.md +33 -0
- package/templates/do/benchmark +646 -0
- package/templates/do/build +22 -0
- package/templates/do/clean +86 -0
- package/templates/do/config +41 -6
- package/templates/do/deploy +66 -6
- package/templates/do/logs +18 -3
- package/templates/do/register +8 -1
- package/templates/do/run +10 -0
- package/templates/triton/Dockerfile +5 -0
package/templates/do/clean
CHANGED
|
@@ -740,6 +740,49 @@ case "${CLEANUP_TARGET}" in
|
|
|
740
740
|
codebuild)
|
|
741
741
|
clean_codebuild
|
|
742
742
|
;;
|
|
743
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
744
|
+
benchmark)
|
|
745
|
+
echo "๐งน Cleaning benchmark resources..."
|
|
746
|
+
WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
|
|
747
|
+
|
|
748
|
+
# Delete workload config if exists
|
|
749
|
+
if aws sagemaker describe-ai-workload-config \
|
|
750
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
751
|
+
--region "$AWS_REGION" 2>/dev/null; then
|
|
752
|
+
aws sagemaker delete-ai-workload-config \
|
|
753
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
754
|
+
--region "$AWS_REGION"
|
|
755
|
+
echo " โ Deleted workload config: $WORKLOAD_CONFIG_NAME"
|
|
756
|
+
fi
|
|
757
|
+
|
|
758
|
+
# Delete terminal benchmark jobs matching project prefix
|
|
759
|
+
aws sagemaker list-ai-benchmark-jobs \
|
|
760
|
+
--name-contains "${PROJECT_NAME}-benchmark-" \
|
|
761
|
+
--region "$AWS_REGION" \
|
|
762
|
+
--query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
|
|
763
|
+
--output text | tr '\t' '\n' | while read -r job; do
|
|
764
|
+
[ -z "$job" ] && continue
|
|
765
|
+
aws sagemaker delete-ai-benchmark-job \
|
|
766
|
+
--ai-benchmark-job-name "$job" \
|
|
767
|
+
--region "$AWS_REGION"
|
|
768
|
+
echo " โ Deleted benchmark job: $job"
|
|
769
|
+
done
|
|
770
|
+
|
|
771
|
+
# Delete local benchmark results
|
|
772
|
+
if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
|
|
773
|
+
read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
|
|
774
|
+
CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
|
|
775
|
+
if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
|
|
776
|
+
rm -rf "${SCRIPT_DIR}/../benchmarks"
|
|
777
|
+
echo " โ Deleted local benchmarks/ directory"
|
|
778
|
+
else
|
|
779
|
+
echo " โญ Skipped local benchmarks/ deletion"
|
|
780
|
+
fi
|
|
781
|
+
fi
|
|
782
|
+
|
|
783
|
+
echo "โ
Benchmark cleanup complete"
|
|
784
|
+
;;
|
|
785
|
+
<% } %>
|
|
743
786
|
all)
|
|
744
787
|
echo "๐งน Performing complete cleanup"
|
|
745
788
|
echo ""
|
|
@@ -790,6 +833,49 @@ case "${CLEANUP_TARGET}" in
|
|
|
790
833
|
CLEANED_ITEMS+=("CodeBuild resources")
|
|
791
834
|
fi
|
|
792
835
|
|
|
836
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
837
|
+
echo ""
|
|
838
|
+
|
|
839
|
+
# Clean benchmark resources
|
|
840
|
+
WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
|
|
841
|
+
|
|
842
|
+
# Delete workload config if exists
|
|
843
|
+
if aws sagemaker describe-ai-workload-config \
|
|
844
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
845
|
+
--region "$AWS_REGION" 2>/dev/null; then
|
|
846
|
+
aws sagemaker delete-ai-workload-config \
|
|
847
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
848
|
+
--region "$AWS_REGION"
|
|
849
|
+
echo " โ Deleted workload config: $WORKLOAD_CONFIG_NAME"
|
|
850
|
+
fi
|
|
851
|
+
|
|
852
|
+
# Delete terminal benchmark jobs matching project prefix
|
|
853
|
+
aws sagemaker list-ai-benchmark-jobs \
|
|
854
|
+
--name-contains "${PROJECT_NAME}-benchmark-" \
|
|
855
|
+
--region "$AWS_REGION" \
|
|
856
|
+
--query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
|
|
857
|
+
--output text | tr '\t' '\n' | while read -r job; do
|
|
858
|
+
[ -z "$job" ] && continue
|
|
859
|
+
aws sagemaker delete-ai-benchmark-job \
|
|
860
|
+
--ai-benchmark-job-name "$job" \
|
|
861
|
+
--region "$AWS_REGION"
|
|
862
|
+
echo " โ Deleted benchmark job: $job"
|
|
863
|
+
done
|
|
864
|
+
|
|
865
|
+
# Delete local benchmark results
|
|
866
|
+
if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
|
|
867
|
+
read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
|
|
868
|
+
CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
|
|
869
|
+
if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
|
|
870
|
+
rm -rf "${SCRIPT_DIR}/../benchmarks"
|
|
871
|
+
echo " โ Deleted local benchmarks/ directory"
|
|
872
|
+
else
|
|
873
|
+
echo " โญ Skipped local benchmarks/ deletion"
|
|
874
|
+
fi
|
|
875
|
+
fi
|
|
876
|
+
|
|
877
|
+
CLEANED_ITEMS+=("Benchmark resources")
|
|
878
|
+
<% } %>
|
|
793
879
|
# Display summary
|
|
794
880
|
echo ""
|
|
795
881
|
echo "โ
Cleanup complete!"
|
package/templates/do/config
CHANGED
|
@@ -30,6 +30,9 @@ export INSTANCE_TYPE="<%= instanceType %>"
|
|
|
30
30
|
<% if (inferenceAmiVersion) { %>
|
|
31
31
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
32
32
|
<% } %>
|
|
33
|
+
<% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
|
|
34
|
+
export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
|
|
35
|
+
<% } %>
|
|
33
36
|
<% } %>
|
|
34
37
|
|
|
35
38
|
<% if (deploymentTarget === 'async-inference') { %>
|
|
@@ -126,6 +129,8 @@ export IC_MEMORY_SIZE="<%= icMemorySize %>"
|
|
|
126
129
|
<% } %>
|
|
127
130
|
<% if (typeof icGpuCount !== 'undefined' && icGpuCount != null) { %>
|
|
128
131
|
export IC_GPU_COUNT="<%= icGpuCount %>"
|
|
132
|
+
<% } else { %>
|
|
133
|
+
export IC_GPU_COUNT="${IC_GPU_COUNT:-1}"
|
|
129
134
|
<% } %>
|
|
130
135
|
<% if (typeof icCopyCount !== 'undefined' && icCopyCount != null) { %>
|
|
131
136
|
export IC_COPY_COUNT="<%= icCopyCount %>"
|
|
@@ -151,17 +156,29 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
151
156
|
# Framework-specific configuration
|
|
152
157
|
<% if (framework === 'transformers') { %>
|
|
153
158
|
export MODEL_NAME="<%= modelName %>"
|
|
154
|
-
|
|
159
|
+
# Secrets Manager integration: when an ARN is configured, do-scripts resolve the
|
|
160
|
+
# secret at the appropriate stage (build-time or runtime). When a plaintext value
|
|
161
|
+
# is configured, it is exported directly. The _ARN suffix signals resolution is needed.
|
|
162
|
+
<% if (typeof hfTokenArn !== 'undefined' && hfTokenArn) { %>
|
|
163
|
+
export HF_TOKEN_ARN="<%= hfTokenArn %>"
|
|
164
|
+
<% } else if (hfToken) { %>
|
|
155
165
|
export HF_TOKEN="<%= hfToken %>"
|
|
156
166
|
<% } %>
|
|
157
|
-
<% if (
|
|
167
|
+
<% if (typeof ngcTokenArn !== 'undefined' && ngcTokenArn) { %>
|
|
168
|
+
export NGC_API_KEY_ARN="<%= ngcTokenArn %>"
|
|
169
|
+
<% } else if (ngcApiKey) { %>
|
|
158
170
|
export NGC_API_KEY="<%= ngcApiKey %>"
|
|
159
171
|
<% } %>
|
|
160
172
|
<% } %>
|
|
161
173
|
|
|
162
174
|
<% if (framework === 'diffusors') { %>
|
|
163
175
|
export MODEL_NAME="<%= modelName %>"
|
|
164
|
-
|
|
176
|
+
# Secrets Manager integration: when an ARN is configured, do-scripts resolve the
|
|
177
|
+
# secret at the appropriate stage (build-time or runtime). When a plaintext value
|
|
178
|
+
# is configured, it is exported directly. The _ARN suffix signals resolution is needed.
|
|
179
|
+
<% if (typeof hfTokenArn !== 'undefined' && hfTokenArn) { %>
|
|
180
|
+
export HF_TOKEN_ARN="<%= hfTokenArn %>"
|
|
181
|
+
<% } else if (hfToken) { %>
|
|
165
182
|
export HF_TOKEN="<%= hfToken %>"
|
|
166
183
|
<% } %>
|
|
167
184
|
<% } %>
|
|
@@ -174,6 +191,26 @@ export MODEL_FORMAT="<%= modelFormat %>"
|
|
|
174
191
|
export ROLE_ARN="<%= roleArn %>"
|
|
175
192
|
<% } %>
|
|
176
193
|
|
|
194
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
195
|
+
# SageMaker AI Benchmarking configuration
|
|
196
|
+
export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
|
|
197
|
+
export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
|
|
198
|
+
export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
|
|
199
|
+
export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
|
|
200
|
+
<% if (benchmarkRequestCount) { %>
|
|
201
|
+
export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
|
|
202
|
+
<% } else { %>
|
|
203
|
+
export BENCHMARK_REQUEST_COUNT=""
|
|
204
|
+
<% } %>
|
|
205
|
+
<% if (benchmarkS3OutputPath) { %>
|
|
206
|
+
export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
|
|
207
|
+
<% } else { %>
|
|
208
|
+
export BENCHMARK_S3_OUTPUT_PATH="s3://ml-container-creator-benchmark-${AWS_REGION}-$(aws sts get-caller-identity --query Account --output text)/${PROJECT_NAME}/"
|
|
209
|
+
<% } %>
|
|
210
|
+
export BENCHMARK_JOB_NAME=""
|
|
211
|
+
export BENCHMARK_WORKLOAD_CONFIG_NAME=""
|
|
212
|
+
<% } %>
|
|
213
|
+
|
|
177
214
|
<% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
|
|
178
215
|
# Runtime environment variables (from catalog)
|
|
179
216
|
<% orderedEnvVars.forEach(({ key, value }) => { %>
|
|
@@ -181,9 +218,7 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
181
218
|
<% }); %>
|
|
182
219
|
<% } %>
|
|
183
220
|
|
|
184
|
-
|
|
185
|
-
export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage %>}
|
|
186
|
-
<% } %>
|
|
221
|
+
export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
|
|
187
222
|
|
|
188
223
|
# Allow environment variable overrides
|
|
189
224
|
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
package/templates/do/deploy
CHANGED
|
@@ -95,6 +95,41 @@ fi
|
|
|
95
95
|
echo "โ
ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
96
96
|
IMAGE_TAG="${PROJECT_NAME}-latest"
|
|
97
97
|
|
|
98
|
+
# ============================================================
|
|
99
|
+
# Shared: Resolve secrets for container environment
|
|
100
|
+
# ============================================================
|
|
101
|
+
CONTAINER_ENV_JSON=""
|
|
102
|
+
|
|
103
|
+
if [ -n "${HF_TOKEN_ARN:-}" ]; then
|
|
104
|
+
echo "๐ Resolving HuggingFace token from Secrets Manager..."
|
|
105
|
+
RESOLVED_HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
106
|
+
echo "โ Failed to resolve HuggingFace token from Secrets Manager"
|
|
107
|
+
exit 3
|
|
108
|
+
}
|
|
109
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${RESOLVED_HF_TOKEN}\""
|
|
110
|
+
elif [ -n "${HF_TOKEN:-}" ]; then
|
|
111
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${HF_TOKEN}\""
|
|
112
|
+
fi
|
|
113
|
+
|
|
114
|
+
if [ -n "${NGC_API_KEY_ARN:-}" ]; then
|
|
115
|
+
echo "๐ Resolving NGC API key from Secrets Manager..."
|
|
116
|
+
RESOLVED_NGC_KEY=$(aws secretsmanager get-secret-value --secret-id "${NGC_API_KEY_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
117
|
+
echo "โ Failed to resolve NGC API key from Secrets Manager"
|
|
118
|
+
exit 3
|
|
119
|
+
}
|
|
120
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
121
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
122
|
+
else
|
|
123
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
124
|
+
fi
|
|
125
|
+
elif [ -n "${NGC_API_KEY:-}" ]; then
|
|
126
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
127
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
128
|
+
else
|
|
129
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
130
|
+
fi
|
|
131
|
+
fi
|
|
132
|
+
|
|
98
133
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
99
134
|
# ============================================================
|
|
100
135
|
# SageMaker Real-Time Inference Deployment (Inference Components)
|
|
@@ -301,6 +336,11 @@ if [ -z "${SKIP_TO}" ]; then
|
|
|
301
336
|
echo " AMI version: ${INFERENCE_AMI_VERSION}"
|
|
302
337
|
fi
|
|
303
338
|
|
|
339
|
+
if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
|
|
340
|
+
VARIANT_JSON="${VARIANT_JSON},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
|
|
341
|
+
echo " โ ๏ธ Capacity reservation (experimental): ${CAPACITY_RESERVATION_ARN}"
|
|
342
|
+
fi
|
|
343
|
+
|
|
304
344
|
VARIANT_JSON="${VARIANT_JSON}}]"
|
|
305
345
|
|
|
306
346
|
echo "โ๏ธ Creating endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
|
|
@@ -400,20 +440,25 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wa
|
|
|
400
440
|
_update_config_var "INFERENCE_COMPONENT_NAME" "${IC_NAME}"
|
|
401
441
|
fi
|
|
402
442
|
|
|
443
|
+
# Build container spec JSON
|
|
444
|
+
CONTAINER_SPEC="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
|
|
445
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
446
|
+
CONTAINER_SPEC="${CONTAINER_SPEC},\"Environment\":{${CONTAINER_ENV_JSON}}"
|
|
447
|
+
fi
|
|
448
|
+
CONTAINER_SPEC="${CONTAINER_SPEC}}"
|
|
449
|
+
|
|
403
450
|
echo "๐ฆ Creating inference component: ${IC_NAME}"
|
|
404
451
|
if ! aws sagemaker create-inference-component \
|
|
405
452
|
--inference-component-name "${IC_NAME}" \
|
|
406
453
|
--endpoint-name "${ENDPOINT_NAME}" \
|
|
407
454
|
--variant-name "AllTraffic" \
|
|
408
455
|
--specification "{
|
|
409
|
-
\"Container\": {
|
|
410
|
-
\"Image\": \"${ECR_REPOSITORY}:${IMAGE_TAG}\"
|
|
411
|
-
},
|
|
456
|
+
\"Container\": ${CONTAINER_SPEC},
|
|
412
457
|
\"StartupParameters\": {
|
|
413
458
|
\"ContainerStartupHealthCheckTimeoutInSeconds\": 900
|
|
414
459
|
},
|
|
415
460
|
\"ComputeResourceRequirements\": {
|
|
416
|
-
\"NumberOfAcceleratorDevicesRequired\":
|
|
461
|
+
\"NumberOfAcceleratorDevicesRequired\": ${IC_GPU_COUNT},
|
|
417
462
|
\"MinMemoryRequiredInMb\": 1024
|
|
418
463
|
}
|
|
419
464
|
}" \
|
|
@@ -767,10 +812,17 @@ if [ -z "${SKIP_TO}" ]; then
|
|
|
767
812
|
_update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
768
813
|
|
|
769
814
|
# Step 1: Create SageMaker model
|
|
815
|
+
# Build primary container spec
|
|
816
|
+
PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
|
|
817
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
818
|
+
PRIMARY_CONTAINER="${PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
|
|
819
|
+
fi
|
|
820
|
+
PRIMARY_CONTAINER="${PRIMARY_CONTAINER}}"
|
|
821
|
+
|
|
770
822
|
echo "๐ฆ Creating SageMaker model: ${MODEL_NAME_SM}"
|
|
771
823
|
if ! aws sagemaker create-model \
|
|
772
824
|
--model-name "${MODEL_NAME_SM}" \
|
|
773
|
-
--primary-container "
|
|
825
|
+
--primary-container "${PRIMARY_CONTAINER}" \
|
|
774
826
|
--execution-role-arn "${ROLE_ARN}" \
|
|
775
827
|
--region "${AWS_REGION}"; then
|
|
776
828
|
|
|
@@ -1361,9 +1413,17 @@ _update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
|
1361
1413
|
|
|
1362
1414
|
# Step 1: Create SageMaker model
|
|
1363
1415
|
echo "๐ฆ Creating SageMaker model: ${MODEL_NAME_SM}"
|
|
1416
|
+
|
|
1417
|
+
# Build primary container spec
|
|
1418
|
+
BATCH_PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
|
|
1419
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
1420
|
+
BATCH_PRIMARY_CONTAINER="${BATCH_PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
|
|
1421
|
+
fi
|
|
1422
|
+
BATCH_PRIMARY_CONTAINER="${BATCH_PRIMARY_CONTAINER}}"
|
|
1423
|
+
|
|
1364
1424
|
if ! aws sagemaker create-model \
|
|
1365
1425
|
--model-name "${MODEL_NAME_SM}" \
|
|
1366
|
-
--primary-container "
|
|
1426
|
+
--primary-container "${BATCH_PRIMARY_CONTAINER}" \
|
|
1367
1427
|
--execution-role-arn "${ROLE_ARN}" \
|
|
1368
1428
|
--region "${AWS_REGION}"; then
|
|
1369
1429
|
|
package/templates/do/logs
CHANGED
|
@@ -51,11 +51,15 @@ echo "โโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
51
51
|
echo ""
|
|
52
52
|
|
|
53
53
|
# Wait for log group to exist before tailing
|
|
54
|
-
MAX_WAIT=
|
|
54
|
+
MAX_WAIT=900
|
|
55
55
|
INTERVAL=10
|
|
56
56
|
ELAPSED=0
|
|
57
57
|
|
|
58
|
+
# Try IC-specific log group first, fall back to endpoint log group
|
|
59
|
+
FALLBACK_LOG_GROUP="/aws/sagemaker/Endpoints/${ENDPOINT}"
|
|
60
|
+
|
|
58
61
|
while true; do
|
|
62
|
+
# Check IC-specific log group
|
|
59
63
|
if aws logs describe-log-groups \
|
|
60
64
|
--log-group-name-prefix "${LOG_GROUP}" \
|
|
61
65
|
--region "${AWS_REGION}" \
|
|
@@ -64,6 +68,17 @@ while true; do
|
|
|
64
68
|
break
|
|
65
69
|
fi
|
|
66
70
|
|
|
71
|
+
# Check endpoint-level log group as fallback
|
|
72
|
+
if aws logs describe-log-groups \
|
|
73
|
+
--log-group-name-prefix "${FALLBACK_LOG_GROUP}" \
|
|
74
|
+
--region "${AWS_REGION}" \
|
|
75
|
+
--query "logGroups[?logGroupName=='${FALLBACK_LOG_GROUP}'].logGroupName" \
|
|
76
|
+
--output text 2>/dev/null | grep -q "${FALLBACK_LOG_GROUP}"; then
|
|
77
|
+
LOG_GROUP="${FALLBACK_LOG_GROUP}"
|
|
78
|
+
echo " โน๏ธ Using endpoint log group: ${LOG_GROUP}"
|
|
79
|
+
break
|
|
80
|
+
fi
|
|
81
|
+
|
|
67
82
|
if [ "${ELAPSED}" -ge "${MAX_WAIT}" ]; then
|
|
68
83
|
echo "โ Timed out after ${MAX_WAIT}s waiting for log group: ${LOG_GROUP}"
|
|
69
84
|
echo ""
|
|
@@ -123,7 +138,7 @@ echo "โโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
123
138
|
echo ""
|
|
124
139
|
|
|
125
140
|
# Wait for log group to exist before tailing
|
|
126
|
-
MAX_WAIT=
|
|
141
|
+
MAX_WAIT=900
|
|
127
142
|
INTERVAL=10
|
|
128
143
|
ELAPSED=0
|
|
129
144
|
|
|
@@ -195,7 +210,7 @@ echo "โโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
195
210
|
echo ""
|
|
196
211
|
|
|
197
212
|
# Wait for log group to exist before tailing
|
|
198
|
-
MAX_WAIT=
|
|
213
|
+
MAX_WAIT=900
|
|
199
214
|
INTERVAL=10
|
|
200
215
|
ELAPSED=0
|
|
201
216
|
|
package/templates/do/register
CHANGED
|
@@ -393,6 +393,7 @@ CJEOF
|
|
|
393
393
|
|
|
394
394
|
# Try put-item with condition (new record)
|
|
395
395
|
if aws dynamodb put-item \
|
|
396
|
+
--region "${AWS_REGION}" \
|
|
396
397
|
--table-name "${CI_TABLE_NAME}" \
|
|
397
398
|
--item "{
|
|
398
399
|
\"configId\": {\"S\": \"${config_id}\"},
|
|
@@ -412,6 +413,7 @@ CJEOF
|
|
|
412
413
|
else
|
|
413
414
|
# Record already exists โ update it (reset testStatus, update configJson, preserve createdAt)
|
|
414
415
|
if aws dynamodb update-item \
|
|
416
|
+
--region "${AWS_REGION}" \
|
|
415
417
|
--table-name "${CI_TABLE_NAME}" \
|
|
416
418
|
--key "{\"configId\": {\"S\": \"${config_id}\"}}" \
|
|
417
419
|
--update-expression "SET configJson = :cj, testStatus = :ts, deploymentConfig = :dc, baseImage = :bi, baseImageVersion = :bv, buildStrategy = :bs, projectName = :pn, schemaVersion = :sv" \
|
|
@@ -496,6 +498,11 @@ DJEOF
|
|
|
496
498
|
echo "${DEPLOYMENT_JSON}" | python3 -c "import sys,json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "${DEPLOYMENT_JSON}"
|
|
497
499
|
|
|
498
500
|
if [ "${CI_MODE}" = true ]; then
|
|
501
|
+
# Strip capacity reservation ARN for CI โ force on-demand deployment
|
|
502
|
+
# CI projects must never use reserved capacity (reservations are account-specific
|
|
503
|
+
# and time-bound; CI replay should always target on-demand instances)
|
|
504
|
+
unset CAPACITY_RESERVATION_ARN 2>/dev/null || true
|
|
505
|
+
|
|
499
506
|
echo ""
|
|
500
507
|
echo "โ ๏ธ CI Integration is experimental and currently only tested for"
|
|
501
508
|
echo " SageMaker Real-Time Inference endpoints."
|
|
@@ -507,7 +514,7 @@ DJEOF
|
|
|
507
514
|
echo "๐ configId: ${CONFIG_ID}"
|
|
508
515
|
|
|
509
516
|
# Check if CI_Table exists before writing
|
|
510
|
-
if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" &>/dev/null; then
|
|
517
|
+
if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" --region "${AWS_REGION}" &>/dev/null; then
|
|
511
518
|
echo ""
|
|
512
519
|
echo "โ ๏ธ CI infrastructure not provisioned. Run 'ml-container-creator bootstrap' with CI enabled."
|
|
513
520
|
echo " Skipping CI table write."
|
package/templates/do/run
CHANGED
|
@@ -68,6 +68,16 @@ if [ -n "${MODEL_DIR:-}" ]; then
|
|
|
68
68
|
fi
|
|
69
69
|
fi
|
|
70
70
|
|
|
71
|
+
# --- Secrets Manager resolution (runtime) ---
|
|
72
|
+
if [ -n "${HF_TOKEN_ARN:-}" ]; then
|
|
73
|
+
echo "๐ Resolving HuggingFace token from Secrets Manager..."
|
|
74
|
+
HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text) || {
|
|
75
|
+
echo "โ Failed to resolve HuggingFace token from Secrets Manager"
|
|
76
|
+
exit 3
|
|
77
|
+
}
|
|
78
|
+
export HF_TOKEN
|
|
79
|
+
fi
|
|
80
|
+
|
|
71
81
|
# Prepare environment variables
|
|
72
82
|
ENV_VARS=""
|
|
73
83
|
<% if (framework === 'transformers') { %>
|
|
@@ -122,6 +122,11 @@ EXPOSE 8080
|
|
|
122
122
|
# --http-port=8080: SageMaker requires port 8080
|
|
123
123
|
# --model-repository: Path to model repository
|
|
124
124
|
# --strict-model-config=false: Allow Triton to auto-complete config for some backends
|
|
125
|
+
|
|
126
|
+
# CUDA compatibility: ensure compat libs are on LD_LIBRARY_PATH for newer SageMaker AMIs
|
|
127
|
+
# (NVIDIA Container Toolkit 1.17.4+ no longer auto-mounts these)
|
|
128
|
+
ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH:-}"
|
|
129
|
+
|
|
125
130
|
ENTRYPOINT ["tritonserver", \
|
|
126
131
|
"--http-port=8080", \
|
|
127
132
|
"--model-repository=/opt/ml/model/model_repository", \
|