@aws/ml-container-creator 0.2.6 โ†’ 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/bin/cli.js +38 -2
  2. package/config/bootstrap-stack.json +94 -1
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/package-lock.json +22 -9
  5. package/package.json +3 -1
  6. package/servers/instance-sizer/index.js +45 -8
  7. package/servers/instance-sizer/lib/instance-ranker.js +140 -11
  8. package/servers/instance-sizer/lib/model-resolver.js +10 -6
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +298 -20
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +101 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +15 -1
  16. package/servers/model-picker/index.js +2 -1
  17. package/src/app.js +96 -2
  18. package/src/lib/architecture-sync.js +171 -0
  19. package/src/lib/arn-detection.js +22 -0
  20. package/src/lib/bootstrap-command-handler.js +178 -3
  21. package/src/lib/cli-handler.js +2 -2
  22. package/src/lib/config-manager.js +121 -1
  23. package/src/lib/cross-cutting-checker.js +119 -0
  24. package/src/lib/deployment-entry-schema.js +1 -2
  25. package/src/lib/prompt-runner.js +514 -20
  26. package/src/lib/prompts.js +67 -5
  27. package/src/lib/registry-command-handler.js +236 -0
  28. package/src/lib/schema-sync.js +31 -0
  29. package/src/lib/secret-classification.js +56 -0
  30. package/src/lib/secrets-command-handler.js +550 -0
  31. package/src/lib/template-manager.js +49 -1
  32. package/src/lib/validate-runner.js +174 -2
  33. package/src/lib/validation-report.js +8 -1
  34. package/src/prompt-adapter.js +3 -2
  35. package/templates/Dockerfile +10 -2
  36. package/templates/code/cuda_compat.sh +22 -0
  37. package/templates/code/serve +3 -0
  38. package/templates/code/start_server.sh +3 -0
  39. package/templates/diffusors/Dockerfile +2 -1
  40. package/templates/diffusors/serve +3 -0
  41. package/templates/do/README.md +33 -0
  42. package/templates/do/benchmark +646 -0
  43. package/templates/do/build +22 -0
  44. package/templates/do/clean +86 -0
  45. package/templates/do/config +41 -6
  46. package/templates/do/deploy +66 -6
  47. package/templates/do/logs +18 -3
  48. package/templates/do/register +8 -1
  49. package/templates/do/run +10 -0
  50. package/templates/triton/Dockerfile +5 -0
@@ -740,6 +740,49 @@ case "${CLEANUP_TARGET}" in
740
740
  codebuild)
741
741
  clean_codebuild
742
742
  ;;
743
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
744
+ benchmark)
745
+ echo "๐Ÿงน Cleaning benchmark resources..."
746
+ WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
747
+
748
+ # Delete workload config if exists
749
+ if aws sagemaker describe-ai-workload-config \
750
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
751
+ --region "$AWS_REGION" 2>/dev/null; then
752
+ aws sagemaker delete-ai-workload-config \
753
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
754
+ --region "$AWS_REGION"
755
+ echo " โœ“ Deleted workload config: $WORKLOAD_CONFIG_NAME"
756
+ fi
757
+
758
+ # Delete terminal benchmark jobs matching project prefix
759
+ aws sagemaker list-ai-benchmark-jobs \
760
+ --name-contains "${PROJECT_NAME}-benchmark-" \
761
+ --region "$AWS_REGION" \
762
+ --query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
763
+ --output text | tr '\t' '\n' | while read -r job; do
764
+ [ -z "$job" ] && continue
765
+ aws sagemaker delete-ai-benchmark-job \
766
+ --ai-benchmark-job-name "$job" \
767
+ --region "$AWS_REGION"
768
+ echo " โœ“ Deleted benchmark job: $job"
769
+ done
770
+
771
+ # Delete local benchmark results
772
+ if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
773
+ read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
774
+ CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
775
+ if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
776
+ rm -rf "${SCRIPT_DIR}/../benchmarks"
777
+ echo " โœ“ Deleted local benchmarks/ directory"
778
+ else
779
+ echo " โญ Skipped local benchmarks/ deletion"
780
+ fi
781
+ fi
782
+
783
+ echo "โœ… Benchmark cleanup complete"
784
+ ;;
785
+ <% } %>
743
786
  all)
744
787
  echo "๐Ÿงน Performing complete cleanup"
745
788
  echo ""
@@ -790,6 +833,49 @@ case "${CLEANUP_TARGET}" in
790
833
  CLEANED_ITEMS+=("CodeBuild resources")
791
834
  fi
792
835
 
836
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
837
+ echo ""
838
+
839
+ # Clean benchmark resources
840
+ WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
841
+
842
+ # Delete workload config if exists
843
+ if aws sagemaker describe-ai-workload-config \
844
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
845
+ --region "$AWS_REGION" 2>/dev/null; then
846
+ aws sagemaker delete-ai-workload-config \
847
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
848
+ --region "$AWS_REGION"
849
+ echo " โœ“ Deleted workload config: $WORKLOAD_CONFIG_NAME"
850
+ fi
851
+
852
+ # Delete terminal benchmark jobs matching project prefix
853
+ aws sagemaker list-ai-benchmark-jobs \
854
+ --name-contains "${PROJECT_NAME}-benchmark-" \
855
+ --region "$AWS_REGION" \
856
+ --query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
857
+ --output text | tr '\t' '\n' | while read -r job; do
858
+ [ -z "$job" ] && continue
859
+ aws sagemaker delete-ai-benchmark-job \
860
+ --ai-benchmark-job-name "$job" \
861
+ --region "$AWS_REGION"
862
+ echo " โœ“ Deleted benchmark job: $job"
863
+ done
864
+
865
+ # Delete local benchmark results
866
+ if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
867
+ read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
868
+ CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
869
+ if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
870
+ rm -rf "${SCRIPT_DIR}/../benchmarks"
871
+ echo " โœ“ Deleted local benchmarks/ directory"
872
+ else
873
+ echo " โญ Skipped local benchmarks/ deletion"
874
+ fi
875
+ fi
876
+
877
+ CLEANED_ITEMS+=("Benchmark resources")
878
+ <% } %>
793
879
  # Display summary
794
880
  echo ""
795
881
  echo "โœ… Cleanup complete!"
@@ -30,6 +30,9 @@ export INSTANCE_TYPE="<%= instanceType %>"
30
30
  <% if (inferenceAmiVersion) { %>
31
31
  export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
32
32
  <% } %>
33
+ <% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
34
+ export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
35
+ <% } %>
33
36
  <% } %>
34
37
 
35
38
  <% if (deploymentTarget === 'async-inference') { %>
@@ -126,6 +129,8 @@ export IC_MEMORY_SIZE="<%= icMemorySize %>"
126
129
  <% } %>
127
130
  <% if (typeof icGpuCount !== 'undefined' && icGpuCount != null) { %>
128
131
  export IC_GPU_COUNT="<%= icGpuCount %>"
132
+ <% } else { %>
133
+ export IC_GPU_COUNT="${IC_GPU_COUNT:-1}"
129
134
  <% } %>
130
135
  <% if (typeof icCopyCount !== 'undefined' && icCopyCount != null) { %>
131
136
  export IC_COPY_COUNT="<%= icCopyCount %>"
@@ -151,17 +156,29 @@ export <%= key %>=${<%= key %>:-<%= value %>}
151
156
  # Framework-specific configuration
152
157
  <% if (framework === 'transformers') { %>
153
158
  export MODEL_NAME="<%= modelName %>"
154
- <% if (hfToken) { %>
159
+ # Secrets Manager integration: when an ARN is configured, do-scripts resolve the
160
+ # secret at the appropriate stage (build-time or runtime). When a plaintext value
161
+ # is configured, it is exported directly. The _ARN suffix signals resolution is needed.
162
+ <% if (typeof hfTokenArn !== 'undefined' && hfTokenArn) { %>
163
+ export HF_TOKEN_ARN="<%= hfTokenArn %>"
164
+ <% } else if (hfToken) { %>
155
165
  export HF_TOKEN="<%= hfToken %>"
156
166
  <% } %>
157
- <% if (ngcApiKey) { %>
167
+ <% if (typeof ngcTokenArn !== 'undefined' && ngcTokenArn) { %>
168
+ export NGC_API_KEY_ARN="<%= ngcTokenArn %>"
169
+ <% } else if (ngcApiKey) { %>
158
170
  export NGC_API_KEY="<%= ngcApiKey %>"
159
171
  <% } %>
160
172
  <% } %>
161
173
 
162
174
  <% if (framework === 'diffusors') { %>
163
175
  export MODEL_NAME="<%= modelName %>"
164
- <% if (hfToken) { %>
176
+ # Secrets Manager integration: when an ARN is configured, do-scripts resolve the
177
+ # secret at the appropriate stage (build-time or runtime). When a plaintext value
178
+ # is configured, it is exported directly. The _ARN suffix signals resolution is needed.
179
+ <% if (typeof hfTokenArn !== 'undefined' && hfTokenArn) { %>
180
+ export HF_TOKEN_ARN="<%= hfTokenArn %>"
181
+ <% } else if (hfToken) { %>
165
182
  export HF_TOKEN="<%= hfToken %>"
166
183
  <% } %>
167
184
  <% } %>
@@ -174,6 +191,26 @@ export MODEL_FORMAT="<%= modelFormat %>"
174
191
  export ROLE_ARN="<%= roleArn %>"
175
192
  <% } %>
176
193
 
194
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
195
+ # SageMaker AI Benchmarking configuration
196
+ export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
197
+ export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
198
+ export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
199
+ export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
200
+ <% if (benchmarkRequestCount) { %>
201
+ export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
202
+ <% } else { %>
203
+ export BENCHMARK_REQUEST_COUNT=""
204
+ <% } %>
205
+ <% if (benchmarkS3OutputPath) { %>
206
+ export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
207
+ <% } else { %>
208
+ export BENCHMARK_S3_OUTPUT_PATH="s3://ml-container-creator-benchmark-${AWS_REGION}-$(aws sts get-caller-identity --query Account --output text)/${PROJECT_NAME}/"
209
+ <% } %>
210
+ export BENCHMARK_JOB_NAME=""
211
+ export BENCHMARK_WORKLOAD_CONFIG_NAME=""
212
+ <% } %>
213
+
177
214
  <% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
178
215
  # Runtime environment variables (from catalog)
179
216
  <% orderedEnvVars.forEach(({ key, value }) => { %>
@@ -181,9 +218,7 @@ export <%= key %>=${<%= key %>:-<%= value %>}
181
218
  <% }); %>
182
219
  <% } %>
183
220
 
184
- <% if (baseImage) { %>
185
- export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage %>}
186
- <% } %>
221
+ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
187
222
 
188
223
  # Allow environment variable overrides
189
224
  export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
@@ -95,6 +95,41 @@ fi
95
95
  echo "โœ… ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
96
96
  IMAGE_TAG="${PROJECT_NAME}-latest"
97
97
 
98
+ # ============================================================
99
+ # Shared: Resolve secrets for container environment
100
+ # ============================================================
101
+ CONTAINER_ENV_JSON=""
102
+
103
+ if [ -n "${HF_TOKEN_ARN:-}" ]; then
104
+ echo "๐Ÿ” Resolving HuggingFace token from Secrets Manager..."
105
+ RESOLVED_HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
106
+ echo "โŒ Failed to resolve HuggingFace token from Secrets Manager"
107
+ exit 3
108
+ }
109
+ CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${RESOLVED_HF_TOKEN}\""
110
+ elif [ -n "${HF_TOKEN:-}" ]; then
111
+ CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${HF_TOKEN}\""
112
+ fi
113
+
114
+ if [ -n "${NGC_API_KEY_ARN:-}" ]; then
115
+ echo "๐Ÿ” Resolving NGC API key from Secrets Manager..."
116
+ RESOLVED_NGC_KEY=$(aws secretsmanager get-secret-value --secret-id "${NGC_API_KEY_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
117
+ echo "โŒ Failed to resolve NGC API key from Secrets Manager"
118
+ exit 3
119
+ }
120
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
121
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
122
+ else
123
+ CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
124
+ fi
125
+ elif [ -n "${NGC_API_KEY:-}" ]; then
126
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
127
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${NGC_API_KEY}\""
128
+ else
129
+ CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${NGC_API_KEY}\""
130
+ fi
131
+ fi
132
+
98
133
  <% if (deploymentTarget === 'realtime-inference') { %>
99
134
  # ============================================================
100
135
  # SageMaker Real-Time Inference Deployment (Inference Components)
@@ -301,6 +336,11 @@ if [ -z "${SKIP_TO}" ]; then
301
336
  echo " AMI version: ${INFERENCE_AMI_VERSION}"
302
337
  fi
303
338
 
339
+ if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
340
+ VARIANT_JSON="${VARIANT_JSON},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
341
+ echo " โš ๏ธ Capacity reservation (experimental): ${CAPACITY_RESERVATION_ARN}"
342
+ fi
343
+
304
344
  VARIANT_JSON="${VARIANT_JSON}}]"
305
345
 
306
346
  echo "โš™๏ธ Creating endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
@@ -400,20 +440,25 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wa
400
440
  _update_config_var "INFERENCE_COMPONENT_NAME" "${IC_NAME}"
401
441
  fi
402
442
 
443
+ # Build container spec JSON
444
+ CONTAINER_SPEC="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
445
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
446
+ CONTAINER_SPEC="${CONTAINER_SPEC},\"Environment\":{${CONTAINER_ENV_JSON}}"
447
+ fi
448
+ CONTAINER_SPEC="${CONTAINER_SPEC}}"
449
+
403
450
  echo "๐Ÿ“ฆ Creating inference component: ${IC_NAME}"
404
451
  if ! aws sagemaker create-inference-component \
405
452
  --inference-component-name "${IC_NAME}" \
406
453
  --endpoint-name "${ENDPOINT_NAME}" \
407
454
  --variant-name "AllTraffic" \
408
455
  --specification "{
409
- \"Container\": {
410
- \"Image\": \"${ECR_REPOSITORY}:${IMAGE_TAG}\"
411
- },
456
+ \"Container\": ${CONTAINER_SPEC},
412
457
  \"StartupParameters\": {
413
458
  \"ContainerStartupHealthCheckTimeoutInSeconds\": 900
414
459
  },
415
460
  \"ComputeResourceRequirements\": {
416
- \"NumberOfAcceleratorDevicesRequired\": 1,
461
+ \"NumberOfAcceleratorDevicesRequired\": ${IC_GPU_COUNT},
417
462
  \"MinMemoryRequiredInMb\": 1024
418
463
  }
419
464
  }" \
@@ -767,10 +812,17 @@ if [ -z "${SKIP_TO}" ]; then
767
812
  _update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
768
813
 
769
814
  # Step 1: Create SageMaker model
815
+ # Build primary container spec
816
+ PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
817
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
818
+ PRIMARY_CONTAINER="${PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
819
+ fi
820
+ PRIMARY_CONTAINER="${PRIMARY_CONTAINER}}"
821
+
770
822
  echo "๐Ÿ“ฆ Creating SageMaker model: ${MODEL_NAME_SM}"
771
823
  if ! aws sagemaker create-model \
772
824
  --model-name "${MODEL_NAME_SM}" \
773
- --primary-container "{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\"}" \
825
+ --primary-container "${PRIMARY_CONTAINER}" \
774
826
  --execution-role-arn "${ROLE_ARN}" \
775
827
  --region "${AWS_REGION}"; then
776
828
 
@@ -1361,9 +1413,17 @@ _update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
1361
1413
 
1362
1414
  # Step 1: Create SageMaker model
1363
1415
  echo "๐Ÿ“ฆ Creating SageMaker model: ${MODEL_NAME_SM}"
1416
+
1417
+ # Build primary container spec
1418
+ BATCH_PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
1419
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
1420
+ BATCH_PRIMARY_CONTAINER="${BATCH_PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
1421
+ fi
1422
+ BATCH_PRIMARY_CONTAINER="${BATCH_PRIMARY_CONTAINER}}"
1423
+
1364
1424
  if ! aws sagemaker create-model \
1365
1425
  --model-name "${MODEL_NAME_SM}" \
1366
- --primary-container "{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\"}" \
1426
+ --primary-container "${BATCH_PRIMARY_CONTAINER}" \
1367
1427
  --execution-role-arn "${ROLE_ARN}" \
1368
1428
  --region "${AWS_REGION}"; then
1369
1429
 
package/templates/do/logs CHANGED
@@ -51,11 +51,15 @@ echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
51
51
  echo ""
52
52
 
53
53
  # Wait for log group to exist before tailing
54
- MAX_WAIT=300
54
+ MAX_WAIT=900
55
55
  INTERVAL=10
56
56
  ELAPSED=0
57
57
 
58
+ # Try IC-specific log group first, fall back to endpoint log group
59
+ FALLBACK_LOG_GROUP="/aws/sagemaker/Endpoints/${ENDPOINT}"
60
+
58
61
  while true; do
62
+ # Check IC-specific log group
59
63
  if aws logs describe-log-groups \
60
64
  --log-group-name-prefix "${LOG_GROUP}" \
61
65
  --region "${AWS_REGION}" \
@@ -64,6 +68,17 @@ while true; do
64
68
  break
65
69
  fi
66
70
 
71
+ # Check endpoint-level log group as fallback
72
+ if aws logs describe-log-groups \
73
+ --log-group-name-prefix "${FALLBACK_LOG_GROUP}" \
74
+ --region "${AWS_REGION}" \
75
+ --query "logGroups[?logGroupName=='${FALLBACK_LOG_GROUP}'].logGroupName" \
76
+ --output text 2>/dev/null | grep -q "${FALLBACK_LOG_GROUP}"; then
77
+ LOG_GROUP="${FALLBACK_LOG_GROUP}"
78
+ echo " โ„น๏ธ Using endpoint log group: ${LOG_GROUP}"
79
+ break
80
+ fi
81
+
67
82
  if [ "${ELAPSED}" -ge "${MAX_WAIT}" ]; then
68
83
  echo "โŒ Timed out after ${MAX_WAIT}s waiting for log group: ${LOG_GROUP}"
69
84
  echo ""
@@ -123,7 +138,7 @@ echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
123
138
  echo ""
124
139
 
125
140
  # Wait for log group to exist before tailing
126
- MAX_WAIT=300
141
+ MAX_WAIT=900
127
142
  INTERVAL=10
128
143
  ELAPSED=0
129
144
 
@@ -195,7 +210,7 @@ echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
195
210
  echo ""
196
211
 
197
212
  # Wait for log group to exist before tailing
198
- MAX_WAIT=300
213
+ MAX_WAIT=900
199
214
  INTERVAL=10
200
215
  ELAPSED=0
201
216
 
@@ -393,6 +393,7 @@ CJEOF
393
393
 
394
394
  # Try put-item with condition (new record)
395
395
  if aws dynamodb put-item \
396
+ --region "${AWS_REGION}" \
396
397
  --table-name "${CI_TABLE_NAME}" \
397
398
  --item "{
398
399
  \"configId\": {\"S\": \"${config_id}\"},
@@ -412,6 +413,7 @@ CJEOF
412
413
  else
413
414
  # Record already exists โ€” update it (reset testStatus, update configJson, preserve createdAt)
414
415
  if aws dynamodb update-item \
416
+ --region "${AWS_REGION}" \
415
417
  --table-name "${CI_TABLE_NAME}" \
416
418
  --key "{\"configId\": {\"S\": \"${config_id}\"}}" \
417
419
  --update-expression "SET configJson = :cj, testStatus = :ts, deploymentConfig = :dc, baseImage = :bi, baseImageVersion = :bv, buildStrategy = :bs, projectName = :pn, schemaVersion = :sv" \
@@ -496,6 +498,11 @@ DJEOF
496
498
  echo "${DEPLOYMENT_JSON}" | python3 -c "import sys,json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "${DEPLOYMENT_JSON}"
497
499
 
498
500
  if [ "${CI_MODE}" = true ]; then
501
+ # Strip capacity reservation ARN for CI โ€” force on-demand deployment
502
+ # CI projects must never use reserved capacity (reservations are account-specific
503
+ # and time-bound; CI replay should always target on-demand instances)
504
+ unset CAPACITY_RESERVATION_ARN 2>/dev/null || true
505
+
499
506
  echo ""
500
507
  echo "โš ๏ธ CI Integration is experimental and currently only tested for"
501
508
  echo " SageMaker Real-Time Inference endpoints."
@@ -507,7 +514,7 @@ DJEOF
507
514
  echo "๐Ÿ”‘ configId: ${CONFIG_ID}"
508
515
 
509
516
  # Check if CI_Table exists before writing
510
- if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" &>/dev/null; then
517
+ if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" --region "${AWS_REGION}" &>/dev/null; then
511
518
  echo ""
512
519
  echo "โš ๏ธ CI infrastructure not provisioned. Run 'ml-container-creator bootstrap' with CI enabled."
513
520
  echo " Skipping CI table write."
package/templates/do/run CHANGED
@@ -68,6 +68,16 @@ if [ -n "${MODEL_DIR:-}" ]; then
68
68
  fi
69
69
  fi
70
70
 
71
+ # --- Secrets Manager resolution (runtime) ---
72
+ if [ -n "${HF_TOKEN_ARN:-}" ]; then
73
+ echo "๐Ÿ” Resolving HuggingFace token from Secrets Manager..."
74
+ HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text) || {
75
+ echo "โŒ Failed to resolve HuggingFace token from Secrets Manager"
76
+ exit 3
77
+ }
78
+ export HF_TOKEN
79
+ fi
80
+
71
81
  # Prepare environment variables
72
82
  ENV_VARS=""
73
83
  <% if (framework === 'transformers') { %>
@@ -122,6 +122,11 @@ EXPOSE 8080
122
122
  # --http-port=8080: SageMaker requires port 8080
123
123
  # --model-repository: Path to model repository
124
124
  # --strict-model-config=false: Allow Triton to auto-complete config for some backends
125
+
126
+ # CUDA compatibility: ensure compat libs are on LD_LIBRARY_PATH for newer SageMaker AMIs
127
+ # (NVIDIA Container Toolkit 1.17.4+ no longer auto-mounts these)
128
+ ENV LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH:-}"
129
+
125
130
  ENTRYPOINT ["tritonserver", \
126
131
  "--http-port=8080", \
127
132
  "--model-repository=/opt/ml/model/model_repository", \