@aws/ml-container-creator 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/bin/cli.js +31 -137
  2. package/config/parameter-schema-v2.json +2065 -0
  3. package/package.json +6 -3
  4. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  5. package/servers/lib/catalogs/models.json +182 -26
  6. package/src/app.js +6 -389
  7. package/src/lib/bootstrap-command-handler.js +75 -1078
  8. package/src/lib/bootstrap-profile-manager.js +634 -0
  9. package/src/lib/bootstrap-provisioners.js +421 -0
  10. package/src/lib/config-loader.js +405 -0
  11. package/src/lib/config-manager.js +59 -1668
  12. package/src/lib/config-mcp-client.js +118 -0
  13. package/src/lib/config-validator.js +634 -0
  14. package/src/lib/cuda-resolver.js +140 -0
  15. package/src/lib/e2e-catalog-validator.js +251 -3
  16. package/src/lib/e2e-ci-recorder.js +103 -0
  17. package/src/lib/generated/cli-options.js +471 -0
  18. package/src/lib/generated/parameter-matrix.js +671 -0
  19. package/src/lib/generated/validation-rules.js +202 -0
  20. package/src/lib/marketplace-flow.js +276 -0
  21. package/src/lib/mcp-query-runner.js +768 -0
  22. package/src/lib/parameter-schema-validator.js +62 -18
  23. package/src/lib/prompt-runner.js +41 -1504
  24. package/src/lib/prompts/feature-prompts.js +172 -0
  25. package/src/lib/prompts/index.js +48 -0
  26. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  27. package/src/lib/prompts/model-prompts.js +552 -0
  28. package/src/lib/prompts/project-prompts.js +70 -0
  29. package/src/lib/prompts.js +2 -1446
  30. package/src/lib/registry-command-handler.js +135 -3
  31. package/src/lib/secrets-prompt-runner.js +251 -0
  32. package/src/lib/template-variable-resolver.js +398 -0
  33. package/templates/code/serve +5 -134
  34. package/templates/code/serve.d/lmi.ejs +19 -0
  35. package/templates/code/serve.d/sglang.ejs +47 -0
  36. package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
  37. package/templates/code/serve.d/vllm.ejs +48 -0
  38. package/templates/do/clean +1 -1387
  39. package/templates/do/clean.d/async-inference.ejs +508 -0
  40. package/templates/do/clean.d/batch-transform.ejs +512 -0
  41. package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
  42. package/templates/do/clean.d/managed-inference.ejs +1043 -0
  43. package/templates/do/deploy +1 -1766
  44. package/templates/do/deploy.d/async-inference.ejs +501 -0
  45. package/templates/do/deploy.d/batch-transform.ejs +529 -0
  46. package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
  47. package/templates/do/deploy.d/managed-inference.ejs +726 -0
  48. package/config/parameter-schema.json +0 -88
@@ -1,1387 +1 @@
1
- #!/bin/bash
2
- # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- set -e
6
- set -u
7
- set -o pipefail
8
-
9
- # Source configuration
10
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
- source "${SCRIPT_DIR}/config"
12
-
13
- # Parse arguments
14
- CLEANUP_TARGET=""
15
- CLEANUP_ARG=""
16
- FORCE_CLEAN=false
17
-
18
- for arg in "$@"; do
19
- case "$arg" in
20
- --force) FORCE_CLEAN=true ;;
21
- -*) ;; # ignore other flags
22
- *)
23
- if [ -z "${CLEANUP_TARGET}" ]; then
24
- CLEANUP_TARGET="$arg"
25
- elif [ -z "${CLEANUP_ARG}" ]; then
26
- CLEANUP_ARG="$arg"
27
- fi
28
- ;;
29
- esac
30
- done
31
-
32
- # Function to display usage
33
- show_usage() {
34
- <% if (deploymentTarget === 'realtime-inference') { %>
35
- echo "Usage: ./do/clean [local|ecr|endpoint|ic <name>|adapter <name>|adapters|codebuild|all]"
36
- <% } else if (deploymentTarget === 'async-inference') { %>
37
- echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
38
- <% } else if (deploymentTarget === 'batch-transform') { %>
39
- echo "Usage: ./do/clean [local|ecr|batch|codebuild|all]"
40
- <% } else if (deploymentTarget === 'hyperpod-eks') { %>
41
- echo "Usage: ./do/clean [local|ecr|hyperpod|codebuild|all]"
42
- <% } %>
43
- echo ""
44
- echo "Cleanup targets:"
45
- echo " local - Remove local Docker images"
46
- echo " ecr - Remove images from Amazon ECR"
47
- <% if (deploymentTarget === 'realtime-inference') { %>
48
- echo " endpoint - Delete SageMaker endpoint, configuration, and inference components"
49
- echo " ic <name> - Delete a single inference component (does not touch the endpoint)"
50
- <% if (typeof enableLora !== 'undefined' && enableLora) { %>
51
- echo " adapter <name> - Delete a single LoRA adapter (synonym for do/adapter remove)"
52
- echo " adapters - Remove ALL LoRA adapters (keeps base IC and endpoint running)"
53
- <% } %>
54
- <% } else if (deploymentTarget === 'async-inference') { %>
55
- echo " endpoint - Delete SageMaker async endpoint, configuration, and inference component"
56
- <% } else if (deploymentTarget === 'batch-transform') { %>
57
- echo " batch - Stop running transform job and delete SageMaker model"
58
- <% } else if (deploymentTarget === 'hyperpod-eks') { %>
59
- echo " hyperpod - Delete HyperPod EKS deployment and services"
60
- <% } %>
61
- echo " codebuild - Delete CodeBuild project, IAM role, and S3 source artifacts"
62
- echo " all - Perform all cleanup operations"
63
- echo ""
64
- echo "Examples:"
65
- echo " ./do/clean local # Remove local Docker images only"
66
- <% if (deploymentTarget === 'realtime-inference') { %>
67
- echo " ./do/clean endpoint # Delete SageMaker resources only"
68
- echo " ./do/clean ic llama # Delete a single inference component"
69
- <% if (typeof enableLora !== 'undefined' && enableLora) { %>
70
- echo " ./do/clean adapter ectsum # Delete a single LoRA adapter"
71
- echo " ./do/clean adapters # Remove all LoRA adapters"
72
- <% } %>
73
- <% } else if (deploymentTarget === 'async-inference') { %>
74
- echo " ./do/clean endpoint # Delete SageMaker async resources only"
75
- <% } else if (deploymentTarget === 'batch-transform') { %>
76
- echo " ./do/clean batch # Stop transform job and delete model"
77
- <% } else if (deploymentTarget === 'hyperpod-eks') { %>
78
- echo " ./do/clean hyperpod # Delete HyperPod EKS resources only"
79
- <% } %>
80
- echo " ./do/clean codebuild # Delete CodeBuild project and rebuild fresh"
81
- echo " ./do/clean all # Clean up everything"
82
- }
83
-
84
- # Function to confirm action (skipped when --force is set)
85
- confirm_action() {
86
- local message="$1"
87
- if [ "${FORCE_CLEAN}" = true ]; then
88
- return 0
89
- fi
90
- echo ""
91
- echo "⚠️ ${message}"
92
- read -p " Are you sure? (yes/no): " -r
93
- echo ""
94
- if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then
95
- echo "❌ Operation cancelled"
96
- return 1
97
- fi
98
- return 0
99
- }
100
-
101
- # Function to clean local Docker images
102
- clean_local() {
103
- echo "🧹 Cleaning local Docker images"
104
- echo " Project: ${PROJECT_NAME}"
105
-
106
- # Build list of image patterns to clean
107
- # Pattern 1: locally built images (e.g., my-project:latest)
108
- # Pattern 2: ECR-pulled images (e.g., <account>.dkr.ecr.<region>.amazonaws.com/<repo>:<project>-latest)
109
- LOCAL_PATTERN="^${PROJECT_NAME}:"
110
- ECR_PATTERN="\.dkr\.ecr\..*\.amazonaws\.com/${ECR_REPOSITORY_NAME}:${PROJECT_NAME}-"
111
-
112
- # Check if any matching images exist
113
- if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -qE "${LOCAL_PATTERN}|${ECR_PATTERN}"; then
114
- echo "ℹ️ No local images found for ${PROJECT_NAME}"
115
- return 0
116
- fi
117
-
118
- # List images to be removed
119
- echo ""
120
- echo "Images to be removed:"
121
- docker images --format "{{.Repository}}:{{.Tag}}" | grep -E "${LOCAL_PATTERN}|${ECR_PATTERN}" | while read -r image; do
122
- echo " ${image}"
123
- done
124
-
125
- if ! confirm_action "This will remove all local Docker images for ${PROJECT_NAME}"; then
126
- return 1
127
- fi
128
-
129
- # Remove images
130
- echo "🗑️ Removing local images..."
131
- docker images --format "{{.Repository}}:{{.Tag}}" | grep -E "${LOCAL_PATTERN}|${ECR_PATTERN}" | while read -r image; do
132
- echo " Removing: ${image}"
133
- docker rmi "${image}" || echo " ⚠️ Failed to remove ${image}"
134
- done
135
-
136
- echo "✅ Local images cleaned"
137
- }
138
-
139
- # Function to clean ECR images
140
- clean_ecr() {
141
- echo "🧹 Cleaning ECR images"
142
- echo " Repository: ${ECR_REPOSITORY_NAME}"
143
- echo " Region: ${AWS_REGION}"
144
-
145
- # Validate AWS credentials
146
- if ! aws sts get-caller-identity &> /dev/null; then
147
- echo "❌ AWS credentials not configured"
148
- echo " Run: aws configure"
149
- exit 4
150
- fi
151
-
152
- # Get AWS account ID
153
- AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
154
-
155
- # Check if repository exists
156
- if ! aws ecr describe-repositories \
157
- --repository-names "${ECR_REPOSITORY_NAME}" \
158
- --region "${AWS_REGION}" &> /dev/null; then
159
- echo "ℹ️ ECR repository ${ECR_REPOSITORY_NAME} does not exist"
160
- return 0
161
- fi
162
-
163
- # List images in repository
164
- echo ""
165
- echo "Checking for images in repository..."
166
-
167
- if ! IMAGE_IDS=$(aws ecr list-images \
168
- --repository-name "${ECR_REPOSITORY_NAME}" \
169
- --region "${AWS_REGION}" \
170
- --query "imageIds[?starts_with(imageTag, '${PROJECT_NAME}-')].[imageTag]" \
171
- --output text 2>&1); then
172
- echo "ℹ️ No images found for project: ${PROJECT_NAME}"
173
- return 0
174
- fi
175
-
176
- if [ -z "${IMAGE_IDS}" ] || [ "${IMAGE_IDS}" = "None" ]; then
177
- echo "ℹ️ No images found for project: ${PROJECT_NAME}"
178
- return 0
179
- fi
180
-
181
- # Display images
182
- echo "Images for project ${PROJECT_NAME}:"
183
- echo "${IMAGE_IDS}" | while read -r tag; do
184
- if [ -n "${tag}" ] && [ "${tag}" != "None" ]; then
185
- echo " - ${tag}"
186
- fi
187
- done
188
-
189
- if ! confirm_action "This will remove all images from ECR repository ${ECR_REPOSITORY_NAME}"; then
190
- return 1
191
- fi
192
-
193
- # Remove images
194
- echo "🗑️ Removing ECR images..."
195
-
196
- # Only delete images tagged with this project's name (not all images in the shared repo)
197
- IMAGE_IDS_JSON=$(aws ecr list-images \
198
- --repository-name "${ECR_REPOSITORY_NAME}" \
199
- --region "${AWS_REGION}" \
200
- --query "imageIds[?starts_with(imageTag, '${PROJECT_NAME}-')]" \
201
- --output json)
202
-
203
- if [ "${IMAGE_IDS_JSON}" != "[]" ] && [ -n "${IMAGE_IDS_JSON}" ]; then
204
- if aws ecr batch-delete-image \
205
- --repository-name "${ECR_REPOSITORY_NAME}" \
206
- --region "${AWS_REGION}" \
207
- --image-ids "${IMAGE_IDS_JSON}" &> /dev/null; then
208
- echo "✅ ECR images removed for project: ${PROJECT_NAME}"
209
- else
210
- echo "❌ Failed to remove some ECR images"
211
- return 1
212
- fi
213
- else
214
- echo "ℹ️ No images to remove for project: ${PROJECT_NAME}"
215
- fi
216
- }
217
-
218
- <% if (deploymentTarget === 'realtime-inference') { %>
219
- # Function to clean SageMaker endpoint and inference components
220
- clean_endpoint() {
221
- echo "🧹 Cleaning SageMaker resources"
222
- echo " Project: ${PROJECT_NAME}"
223
- echo " Region: ${AWS_REGION}"
224
-
225
- # Validate AWS credentials
226
- if ! aws sts get-caller-identity &> /dev/null; then
227
- echo "❌ AWS credentials not configured"
228
- echo " Run: aws configure"
229
- exit 4
230
- fi
231
-
232
- AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
233
-
234
- # Use names from config (set by do/deploy) or argument
235
- local EP_NAME="${ENDPOINT_NAME:-}"
236
- local IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
237
- local EPC_NAME="${ENDPOINT_CONFIG_NAME:-}"
238
-
239
- if [ -z "${EP_NAME}" ]; then
240
- echo "❌ No endpoint name found"
241
- echo " Run ./do/deploy first, or set ENDPOINT_NAME in do/config"
242
- return 1
243
- fi
244
-
245
- # External endpoint: only remove inference components, not the endpoint itself
246
- if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
247
- echo ""
248
- echo "⚠️ Endpoint is external — only removing inference components"
249
- echo " Endpoint ${EP_NAME} will NOT be deleted (managed externally)."
250
- echo ""
251
-
252
- <% if (typeof enableLora !== 'undefined' && enableLora) { %>
253
- # Delete adapter ICs first (adapters depend on base ICs)
254
- if [ -d "${SCRIPT_DIR}/adapters" ]; then
255
- local ADAPTER_COUNT=0
256
- for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
257
- [ -f "${adapter_conf}" ] || continue
258
- ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
259
- done
260
-
261
- if [ "${ADAPTER_COUNT}" -gt 0 ]; then
262
- echo "🔌 Deleting ${ADAPTER_COUNT} LoRA adapter(s) first..."
263
- for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
264
- [ -f "${adapter_conf}" ] || continue
265
- local adapter_ic_name=""
266
- adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
267
- local adapter_display_name
268
- adapter_display_name=$(basename "${adapter_conf}" .conf)
269
-
270
- if [ -n "${adapter_ic_name}" ]; then
271
- echo "🗑️ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
272
- if aws sagemaker delete-inference-component \
273
- --inference-component-name "${adapter_ic_name}" \
274
- --region "${AWS_REGION}" 2>/dev/null; then
275
- echo "⏳ Waiting for adapter deletion..."
276
- aws sagemaker wait inference-component-deleted \
277
- --inference-component-name "${adapter_ic_name}" \
278
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
279
- echo "✅ Adapter deleted: ${adapter_display_name}"
280
-
281
- # Mark adapter IC as deleted in manifest (non-blocking)
282
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
283
- else
284
- echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
285
- fi
286
- fi
287
-
288
- # Remove adapter conf file
289
- rm -f "${adapter_conf}"
290
- done
291
- echo "✅ All adapters deleted"
292
- echo ""
293
- fi
294
- fi
295
-
296
- <% } %>
297
- # Iterate do/ic/*.conf and delete each IC owned by this project
298
- local IC_DELETED=0
299
- if [ -d "${SCRIPT_DIR}/ic" ]; then
300
- for conf in "${SCRIPT_DIR}"/ic/*.conf; do
301
- [ -f "${conf}" ] || continue
302
- local ic_deployed_name=""
303
- if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
304
- ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
305
- fi
306
- if [ -n "${ic_deployed_name}" ]; then
307
- echo "🗑️ Deleting inference component: ${ic_deployed_name}"
308
- if aws sagemaker delete-inference-component \
309
- --inference-component-name "${ic_deployed_name}" \
310
- --region "${AWS_REGION}" 2>/dev/null; then
311
- echo "⏳ Waiting for inference component deletion..."
312
- aws sagemaker wait inference-component-deleted \
313
- --inference-component-name "${ic_deployed_name}" \
314
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
315
- echo "✅ Inference component deleted: ${ic_deployed_name}"
316
-
317
- # Mark inference component as deleted in manifest (non-blocking)
318
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
319
-
320
- # Clear deployed state from config
321
- sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${conf}"
322
- rm -f "${conf}.bak"
323
- else
324
- echo "⚠️ Failed to delete inference component: ${ic_deployed_name}"
325
- fi
326
- IC_DELETED=$((IC_DELETED + 1))
327
- fi
328
- done
329
- fi
330
-
331
- # Also handle legacy single IC from config
332
- if [ -n "${IC_NAME}" ]; then
333
- if aws sagemaker describe-inference-component \
334
- --inference-component-name "${IC_NAME}" \
335
- --region "${AWS_REGION}" &> /dev/null; then
336
- echo "🗑️ Deleting inference component: ${IC_NAME}"
337
- if aws sagemaker delete-inference-component \
338
- --inference-component-name "${IC_NAME}" \
339
- --region "${AWS_REGION}" 2>/dev/null; then
340
- echo "⏳ Waiting for inference component deletion..."
341
- aws sagemaker wait inference-component-deleted \
342
- --inference-component-name "${IC_NAME}" \
343
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
344
- echo "✅ Inference component deleted: ${IC_NAME}"
345
-
346
- # Mark inference component as deleted in manifest (non-blocking)
347
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_NAME}" 2>/dev/null || true
348
- fi
349
- IC_DELETED=$((IC_DELETED + 1))
350
- fi
351
- fi
352
-
353
- if [ "${IC_DELETED}" -eq 0 ]; then
354
- echo "ℹ️ No deployed inference components found to clean"
355
- fi
356
-
357
- echo "✅ External endpoint cleanup complete (endpoint preserved)"
358
- return 0
359
- fi
360
-
361
- echo ""
362
- echo "Checking for SageMaker resources..."
363
-
364
- local ENDPOINT_EXISTS=false
365
-
366
- if aws sagemaker describe-endpoint \
367
- --endpoint-name "${EP_NAME}" \
368
- --region "${AWS_REGION}" &> /dev/null; then
369
- ENDPOINT_EXISTS=true
370
- echo " ✓ Endpoint: ${EP_NAME}"
371
- else
372
- echo "ℹ️ Endpoint not found: ${EP_NAME}"
373
- return 0
374
- fi
375
-
376
- # Count ICs to be deleted (multi-IC path)
377
- local IC_COUNT=0
378
- local IC_NAMES_TO_DELETE=()
379
- local IC_CONFS_TO_CLEAN=()
380
-
381
- if [ -d "${SCRIPT_DIR}/ic" ]; then
382
- for conf in "${SCRIPT_DIR}"/ic/*.conf; do
383
- [ -f "${conf}" ] || continue
384
- local ic_deployed_name=""
385
- if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
386
- ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
387
- fi
388
- if [ -n "${ic_deployed_name}" ]; then
389
- IC_NAMES_TO_DELETE+=("${ic_deployed_name}")
390
- IC_CONFS_TO_CLEAN+=("${conf}")
391
- IC_COUNT=$((IC_COUNT + 1))
392
- echo " ✓ Inference component: ${ic_deployed_name}"
393
- fi
394
- done
395
- fi
396
-
397
- # Legacy: check single IC from config (no do/ic/ directory)
398
- local IC_EXISTS=false
399
- if [ "${IC_COUNT}" -eq 0 ] && [ -n "${IC_NAME}" ]; then
400
- if aws sagemaker describe-inference-component \
401
- --inference-component-name "${IC_NAME}" \
402
- --region "${AWS_REGION}" &> /dev/null; then
403
- IC_EXISTS=true
404
- IC_COUNT=1
405
- echo " ✓ Inference component: ${IC_NAME}"
406
- fi
407
- fi
408
-
409
- # Confirmation with IC count
410
- local confirm_msg="Delete ${IC_COUNT} inference component"
411
- if [ "${IC_COUNT}" -ne 1 ]; then
412
- confirm_msg="${confirm_msg}s"
413
- fi
414
- confirm_msg="${confirm_msg} and endpoint?"
415
-
416
- if ! confirm_action "${confirm_msg}"; then
417
- return 1
418
- fi
419
-
420
- <% if (typeof enableLora !== 'undefined' && enableLora) { %>
421
- # Delete adapter ICs first (adapters depend on base ICs)
422
- if [ -d "${SCRIPT_DIR}/adapters" ]; then
423
- local ADAPTER_COUNT=0
424
- for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
425
- [ -f "${adapter_conf}" ] || continue
426
- ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
427
- done
428
-
429
- if [ "${ADAPTER_COUNT}" -gt 0 ]; then
430
- echo ""
431
- echo "🔌 Deleting ${ADAPTER_COUNT} LoRA adapter(s) first..."
432
- for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
433
- [ -f "${adapter_conf}" ] || continue
434
- local adapter_ic_name=""
435
- adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
436
- local adapter_display_name
437
- adapter_display_name=$(basename "${adapter_conf}" .conf)
438
-
439
- if [ -n "${adapter_ic_name}" ]; then
440
- echo "🗑️ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
441
- if aws sagemaker delete-inference-component \
442
- --inference-component-name "${adapter_ic_name}" \
443
- --region "${AWS_REGION}" 2>/dev/null; then
444
- echo "⏳ Waiting for adapter deletion..."
445
- aws sagemaker wait inference-component-deleted \
446
- --inference-component-name "${adapter_ic_name}" \
447
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
448
- echo "✅ Adapter deleted: ${adapter_display_name}"
449
-
450
- # Mark adapter IC as deleted in manifest (non-blocking)
451
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
452
- else
453
- echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
454
- fi
455
- fi
456
-
457
- # Remove adapter conf file
458
- rm -f "${adapter_conf}"
459
- done
460
- echo "✅ All adapters deleted"
461
- echo ""
462
- fi
463
- fi
464
-
465
- <% } %>
466
- # Delete inference components first (must be deleted before endpoint)
467
- if [ ${#IC_NAMES_TO_DELETE[@]} -gt 0 ]; then
468
- # Multi-IC path: iterate do/ic/*.conf
469
- local idx=0
470
- for ic_deployed_name in "${IC_NAMES_TO_DELETE[@]}"; do
471
- local conf="${IC_CONFS_TO_CLEAN[$idx]}"
472
- echo "🗑️ Deleting inference component: ${ic_deployed_name}"
473
- if aws sagemaker delete-inference-component \
474
- --inference-component-name "${ic_deployed_name}" \
475
- --region "${AWS_REGION}" 2>/dev/null; then
476
- echo "⏳ Waiting for inference component deletion..."
477
- aws sagemaker wait inference-component-deleted \
478
- --inference-component-name "${ic_deployed_name}" \
479
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
480
- echo "✅ Inference component deleted: ${ic_deployed_name}"
481
-
482
- # Mark inference component as deleted in manifest (non-blocking)
483
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
484
-
485
- # Clear deployed state from config
486
- sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${conf}"
487
- rm -f "${conf}.bak"
488
- else
489
- echo "❌ Failed to delete inference component: ${ic_deployed_name}"
490
- fi
491
- idx=$((idx + 1))
492
- done
493
- elif [ "${IC_EXISTS}" = true ]; then
494
- # Legacy single IC path
495
- echo "🗑️ Deleting inference component: ${IC_NAME}"
496
- if aws sagemaker delete-inference-component \
497
- --inference-component-name "${IC_NAME}" \
498
- --region "${AWS_REGION}" &> /dev/null; then
499
- echo "⏳ Waiting for inference component deletion..."
500
- aws sagemaker wait inference-component-deleted \
501
- --inference-component-name "${IC_NAME}" \
502
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
503
- echo "✅ Inference component deleted"
504
-
505
- # Mark inference component as deleted in manifest (non-blocking)
506
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_NAME}" 2>/dev/null || true
507
- else
508
- echo "❌ Failed to delete inference component"
509
- fi
510
- fi
511
-
512
- # Delete endpoint
513
- echo "🗑️ Deleting endpoint: ${EP_NAME}"
514
- if aws sagemaker delete-endpoint \
515
- --endpoint-name "${EP_NAME}" \
516
- --region "${AWS_REGION}" &> /dev/null; then
517
- echo "✅ Endpoint deleted"
518
- echo "⏳ Waiting for endpoint deletion..."
519
- aws sagemaker wait endpoint-deleted \
520
- --endpoint-name "${EP_NAME}" \
521
- --region "${AWS_REGION}" 2>/dev/null || sleep 10
522
-
523
- # Mark endpoint as deleted in manifest (non-blocking)
524
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${EP_NAME}" 2>/dev/null || true
525
- else
526
- echo "❌ Failed to delete endpoint"
527
- fi
528
-
529
- # Delete endpoint configuration
530
- if [ -n "${EPC_NAME}" ]; then
531
- echo "🗑️ Deleting endpoint configuration: ${EPC_NAME}"
532
- if aws sagemaker delete-endpoint-config \
533
- --endpoint-config-name "${EPC_NAME}" \
534
- --region "${AWS_REGION}" &> /dev/null; then
535
- echo "✅ Endpoint configuration deleted"
536
-
537
- # Mark endpoint config as deleted in manifest (non-blocking)
538
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${EPC_NAME}" 2>/dev/null || true
539
- else
540
- echo "❌ Failed to delete endpoint configuration"
541
- fi
542
- fi
543
-
544
- # Remove saved names from config
545
- if grep -q "^export ENDPOINT_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
546
- sed -i.bak '/^# Last deployed resources/d;/^export ENDPOINT_NAME=/d;/^export ENDPOINT_CONFIG_NAME=/d;/^export INFERENCE_COMPONENT_NAME=/d' "${SCRIPT_DIR}/config"
547
- rm -f "${SCRIPT_DIR}/config.bak"
548
- fi
549
-
550
- echo "✅ SageMaker resources cleaned"
551
- }
552
-
553
- # Function to clean a single inference component by name
554
- clean_ic() {
555
- local ic_name="$1"
556
- echo "🧹 Cleaning inference component: ${ic_name}"
557
- echo " Project: ${PROJECT_NAME}"
558
- echo " Region: ${AWS_REGION}"
559
-
560
- # Validate IC name argument
561
- if [ -z "${ic_name}" ]; then
562
- echo "❌ IC name required"
563
- echo " Usage: ./do/clean ic <name>"
564
- return 1
565
- fi
566
-
567
- # Check that the IC config file exists
568
- local ic_conf="${SCRIPT_DIR}/ic/${ic_name}.conf"
569
- if [ ! -f "${ic_conf}" ]; then
570
- echo "❌ IC config not found: do/ic/${ic_name}.conf"
571
- echo " Available ICs:"
572
- if [ -d "${SCRIPT_DIR}/ic" ]; then
573
- for conf in "${SCRIPT_DIR}"/ic/*.conf; do
574
- [ -f "${conf}" ] || continue
575
- echo " - $(basename "${conf}" .conf)"
576
- done
577
- else
578
- echo " (none)"
579
- fi
580
- return 1
581
- fi
582
-
583
- # Validate AWS credentials
584
- if ! aws sts get-caller-identity &> /dev/null; then
585
- echo "❌ AWS credentials not configured"
586
- echo " Run: aws configure"
587
- exit 4
588
- fi
589
-
590
- AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
591
-
592
- # Look up IC_DEPLOYED_NAME from the config file
593
- local ic_deployed_name=""
594
- if grep -q "^export IC_DEPLOYED_NAME=" "${ic_conf}" 2>/dev/null; then
595
- ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${ic_conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
596
- fi
597
-
598
- if [ -z "${ic_deployed_name}" ]; then
599
- echo "ℹ️ IC '${ic_name}' has not been deployed (no IC_DEPLOYED_NAME in config)"
600
- return 0
601
- fi
602
-
603
- echo " Deployed as: ${ic_deployed_name}"
604
-
605
- if ! confirm_action "This will delete inference component '${ic_deployed_name}'"; then
606
- return 1
607
- fi
608
-
609
- # Delete the inference component
610
- echo "🗑️ Deleting inference component: ${ic_deployed_name}"
611
- if aws sagemaker delete-inference-component \
612
- --inference-component-name "${ic_deployed_name}" \
613
- --region "${AWS_REGION}" 2>/dev/null; then
614
- echo "⏳ Waiting for inference component deletion..."
615
- aws sagemaker wait inference-component-deleted \
616
- --inference-component-name "${ic_deployed_name}" \
617
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
618
- echo "✅ Inference component deleted: ${ic_deployed_name}"
619
-
620
- # Mark inference component as deleted in manifest (non-blocking)
621
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
622
-
623
- # Clear deployed state from config
624
- sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${ic_conf}"
625
- rm -f "${ic_conf}.bak"
626
- else
627
- echo "❌ Failed to delete inference component: ${ic_deployed_name}"
628
- return 1
629
- fi
630
-
631
- echo "✅ Inference component '${ic_name}' cleaned"
632
- }
633
-
634
- <% if (typeof enableLora !== 'undefined' && enableLora) { %>
635
- # Function to clean a single LoRA adapter by name (synonym for do/adapter remove)
636
- clean_adapter() {
637
- local adapter_name="$1"
638
- echo "🧹 Cleaning LoRA adapter: ${adapter_name}"
639
- echo " Project: ${PROJECT_NAME}"
640
- echo " Region: ${AWS_REGION}"
641
-
642
- # Validate adapter name argument
643
- if [ -z "${adapter_name}" ]; then
644
- echo "❌ Adapter name required"
645
- echo " Usage: ./do/clean adapter <name>"
646
- return 1
647
- fi
648
-
649
- # Check that the adapter config file exists
650
- local adapter_conf="${SCRIPT_DIR}/adapters/${adapter_name}.conf"
651
- if [ ! -f "${adapter_conf}" ]; then
652
- echo "❌ Adapter config not found: do/adapters/${adapter_name}.conf"
653
- echo " Available adapters:"
654
- if [ -d "${SCRIPT_DIR}/adapters" ]; then
655
- for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
656
- [ -f "${conf}" ] || continue
657
- echo " - $(basename "${conf}" .conf)"
658
- done
659
- else
660
- echo " (none)"
661
- fi
662
- return 1
663
- fi
664
-
665
- # Validate AWS credentials
666
- if ! aws sts get-caller-identity &> /dev/null; then
667
- echo "❌ AWS credentials not configured"
668
- echo " Run: aws configure"
669
- exit 4
670
- fi
671
-
672
- AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
673
-
674
- # Read ADAPTER_IC_NAME from the config file
675
- local adapter_ic_name=""
676
- adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//')
677
-
678
- if [ -z "${adapter_ic_name}" ]; then
679
- echo "⚠️ No ADAPTER_IC_NAME found in do/adapters/${adapter_name}.conf"
680
- echo " Removing local config file."
681
- rm -f "${adapter_conf}"
682
- return 0
683
- fi
684
-
685
- echo " Adapter IC: ${adapter_ic_name}"
686
-
687
- if ! confirm_action "This will delete LoRA adapter '${adapter_name}' (IC: ${adapter_ic_name})"; then
688
- return 1
689
- fi
690
-
691
- # Delete the adapter inference component
692
- echo "🗑️ Deleting adapter inference component: ${adapter_ic_name}"
693
- if aws sagemaker delete-inference-component \
694
- --inference-component-name "${adapter_ic_name}" \
695
- --region "${AWS_REGION}" 2>/dev/null; then
696
- echo "⏳ Waiting for adapter IC deletion..."
697
- aws sagemaker wait inference-component-deleted \
698
- --inference-component-name "${adapter_ic_name}" \
699
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
700
- echo "✅ Adapter IC deleted: ${adapter_ic_name}"
701
-
702
- # Mark adapter IC as deleted in manifest (non-blocking)
703
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
704
- else
705
- echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
706
- fi
707
-
708
- # Remove local conf file
709
- rm -f "${adapter_conf}"
710
- echo "✅ Removed: do/adapters/${adapter_name}.conf"
711
-
712
- echo "✅ Adapter '${adapter_name}' cleaned"
713
- }
714
-
715
- # Function to clean ALL LoRA adapters (keeps base IC and endpoint running)
716
- clean_adapters() {
717
- echo "🧹 Cleaning all LoRA adapters"
718
- echo " Project: ${PROJECT_NAME}"
719
- echo " Region: ${AWS_REGION}"
720
-
721
- # Check if adapters directory exists and has conf files
722
- if [ ! -d "${SCRIPT_DIR}/adapters" ]; then
723
- echo "ℹ️ No adapters directory found"
724
- return 0
725
- fi
726
-
727
- local ADAPTER_COUNT=0
728
- local ADAPTER_NAMES=()
729
- for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
730
- [ -f "${conf}" ] || continue
731
- ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
732
- ADAPTER_NAMES+=("$(basename "${conf}" .conf)")
733
- done
734
-
735
- if [ "${ADAPTER_COUNT}" -eq 0 ]; then
736
- echo "ℹ️ No adapters found to clean"
737
- return 0
738
- fi
739
-
740
- echo ""
741
- echo "Adapters to be removed (${ADAPTER_COUNT}):"
742
- for name in "${ADAPTER_NAMES[@]}"; do
743
- echo " • ${name}"
744
- done
745
-
746
- if ! confirm_action "This will delete ${ADAPTER_COUNT} LoRA adapter(s). Base IC and endpoint will remain running."; then
747
- return 1
748
- fi
749
-
750
- # Validate AWS credentials
751
- if ! aws sts get-caller-identity &> /dev/null; then
752
- echo "❌ AWS credentials not configured"
753
- echo " Run: aws configure"
754
- exit 4
755
- fi
756
-
757
- AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
758
-
759
- # Delete each adapter
760
- local DELETED=0
761
- for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
762
- [ -f "${adapter_conf}" ] || continue
763
- local adapter_ic_name=""
764
- adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
765
- local adapter_display_name
766
- adapter_display_name=$(basename "${adapter_conf}" .conf)
767
-
768
- if [ -n "${adapter_ic_name}" ]; then
769
- echo "🗑️ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
770
- if aws sagemaker delete-inference-component \
771
- --inference-component-name "${adapter_ic_name}" \
772
- --region "${AWS_REGION}" 2>/dev/null; then
773
- echo "⏳ Waiting for adapter deletion..."
774
- aws sagemaker wait inference-component-deleted \
775
- --inference-component-name "${adapter_ic_name}" \
776
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
777
- echo "✅ Adapter deleted: ${adapter_display_name}"
778
-
779
- # Mark adapter IC as deleted in manifest (non-blocking)
780
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
781
- else
782
- echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
783
- fi
784
- fi
785
-
786
- # Remove adapter conf file
787
- rm -f "${adapter_conf}"
788
- DELETED=$((DELETED + 1))
789
- done
790
-
791
- echo ""
792
- echo "✅ All adapters cleaned (${DELETED} removed)"
793
- echo " Base IC and endpoint remain running."
794
- }
795
- <% } %>
796
- <% } else if (deploymentTarget === 'async-inference') { %>
797
- # Function to clean SageMaker async endpoint and model
798
- clean_endpoint() {
799
- echo "🧹 Cleaning SageMaker async resources"
800
- echo " Project: ${PROJECT_NAME}"
801
- echo " Region: ${AWS_REGION}"
802
-
803
- # Validate AWS credentials
804
- if ! aws sts get-caller-identity &> /dev/null; then
805
- echo "❌ AWS credentials not configured"
806
- echo " Run: aws configure"
807
- exit 4
808
- fi
809
-
810
- AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
811
-
812
- # Use names from config (set by do/deploy) or argument
813
- local EP_NAME="${ENDPOINT_NAME:-}"
814
- local EPC_NAME="${ENDPOINT_CONFIG_NAME:-}"
815
- local SM_MODEL_NAME="${SAGEMAKER_MODEL_NAME:-}"
816
-
817
- if [ -z "${EP_NAME}" ]; then
818
- echo "❌ No endpoint name found"
819
- echo " Run ./do/deploy first, or set ENDPOINT_NAME in do/config"
820
- return 1
821
- fi
822
-
823
- echo ""
824
- echo "Checking for SageMaker resources..."
825
-
826
- local ENDPOINT_EXISTS=false
827
-
828
- if aws sagemaker describe-endpoint \
829
- --endpoint-name "${EP_NAME}" \
830
- --region "${AWS_REGION}" &> /dev/null; then
831
- ENDPOINT_EXISTS=true
832
- echo " ✓ Endpoint: ${EP_NAME}"
833
- else
834
- echo "ℹ️ Endpoint not found: ${EP_NAME}"
835
- return 0
836
- fi
837
-
838
- if ! confirm_action "This will delete the SageMaker async endpoint and model"; then
839
- return 1
840
- fi
841
-
842
- # Delete endpoint
843
- echo "🗑️ Deleting endpoint: ${EP_NAME}"
844
- if aws sagemaker delete-endpoint \
845
- --endpoint-name "${EP_NAME}" \
846
- --region "${AWS_REGION}" &> /dev/null; then
847
- echo "✅ Endpoint deleted"
848
- echo "⏳ Waiting for endpoint deletion..."
849
- aws sagemaker wait endpoint-deleted \
850
- --endpoint-name "${EP_NAME}" \
851
- --region "${AWS_REGION}" 2>/dev/null || sleep 10
852
-
853
- # Mark endpoint as deleted in manifest (non-blocking)
854
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${EP_NAME}" 2>/dev/null || true
855
- else
856
- echo "❌ Failed to delete endpoint"
857
- fi
858
-
859
- # Delete endpoint configuration
860
- if [ -n "${EPC_NAME}" ]; then
861
- echo "🗑️ Deleting endpoint configuration: ${EPC_NAME}"
862
- if aws sagemaker delete-endpoint-config \
863
- --endpoint-config-name "${EPC_NAME}" \
864
- --region "${AWS_REGION}" &> /dev/null; then
865
- echo "✅ Endpoint configuration deleted"
866
-
867
- # Mark endpoint config as deleted in manifest (non-blocking)
868
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${EPC_NAME}" 2>/dev/null || true
869
- else
870
- echo "❌ Failed to delete endpoint configuration"
871
- fi
872
- fi
873
-
874
- # Delete SageMaker model (async uses classic model-based flow)
875
- if [ -n "${SM_MODEL_NAME}" ]; then
876
- echo "🗑️ Deleting SageMaker model: ${SM_MODEL_NAME}"
877
- if aws sagemaker delete-model \
878
- --model-name "${SM_MODEL_NAME}" \
879
- --region "${AWS_REGION}" &> /dev/null; then
880
- echo "✅ SageMaker model deleted"
881
-
882
- # Mark model as deleted in manifest (non-blocking)
883
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${SM_MODEL_NAME}" 2>/dev/null || true
884
- else
885
- echo "❌ Failed to delete SageMaker model"
886
- fi
887
- fi
888
-
889
- # Remove saved names from config
890
- if grep -q "^export ENDPOINT_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
891
- sed -i.bak '/^# Last deployed resources/d;/^export ENDPOINT_NAME=/d;/^export ENDPOINT_CONFIG_NAME=/d;/^export SAGEMAKER_MODEL_NAME=/d' "${SCRIPT_DIR}/config"
892
- rm -f "${SCRIPT_DIR}/config.bak"
893
- fi
894
-
895
- echo "✅ SageMaker async resources cleaned"
896
- }
897
- <% } else if (deploymentTarget === 'batch-transform') { %>
898
- # Function to clean SageMaker managed inference batch resources
899
- clean_batch() {
900
- echo "🧹 Cleaning SageMaker managed inference batch resources"
901
- echo " Project: ${PROJECT_NAME}"
902
- echo " Region: ${AWS_REGION}"
903
-
904
- # Validate AWS credentials
905
- if ! aws sts get-caller-identity &> /dev/null; then
906
- echo "❌ AWS credentials not configured"
907
- echo " Run: aws configure"
908
- exit 4
909
- fi
910
-
911
- # Use names from config (set by do/deploy)
912
- local TJ_NAME="${TRANSFORM_JOB_NAME:-}"
913
- local SM_MODEL_NAME="${SAGEMAKER_MODEL_NAME:-}"
914
-
915
- if [ -z "${TJ_NAME}" ] && [ -z "${SM_MODEL_NAME}" ]; then
916
- echo "❌ No transform job or model name found"
917
- echo " Run ./do/deploy first, or set TRANSFORM_JOB_NAME in do/config"
918
- return 1
919
- fi
920
-
921
- echo ""
922
- echo "Checking for SageMaker resources..."
923
-
924
- # Check transform job status
925
- local JOB_EXISTS=false
926
- local JOB_STATUS=""
927
- if [ -n "${TJ_NAME}" ]; then
928
- JOB_STATUS=$(aws sagemaker describe-transform-job \
929
- --transform-job-name "${TJ_NAME}" \
930
- --region "${AWS_REGION}" \
931
- --query 'TransformJobStatus' \
932
- --output text 2>/dev/null || echo "")
933
- if [ -n "${JOB_STATUS}" ]; then
934
- JOB_EXISTS=true
935
- echo " ✓ Transform job: ${TJ_NAME} (${JOB_STATUS})"
936
- else
937
- echo "ℹ️ Transform job not found: ${TJ_NAME}"
938
- fi
939
- fi
940
-
941
- # Check model
942
- local MODEL_EXISTS=false
943
- if [ -n "${SM_MODEL_NAME}" ]; then
944
- if aws sagemaker describe-model \
945
- --model-name "${SM_MODEL_NAME}" \
946
- --region "${AWS_REGION}" &> /dev/null; then
947
- MODEL_EXISTS=true
948
- echo " ✓ SageMaker model: ${SM_MODEL_NAME}"
949
- else
950
- echo "ℹ️ SageMaker model not found: ${SM_MODEL_NAME}"
951
- fi
952
- fi
953
-
954
- if [ "${JOB_EXISTS}" = false ] && [ "${MODEL_EXISTS}" = false ]; then
955
- echo "ℹ️ No batch transform resources found to clean"
956
- return 0
957
- fi
958
-
959
- if ! confirm_action "This will stop the transform job (if running) and delete the SageMaker model"; then
960
- return 1
961
- fi
962
-
963
- # Stop transform job if in progress
964
- if [ "${JOB_EXISTS}" = true ] && [ "${JOB_STATUS}" = "InProgress" ]; then
965
- echo "🗑️ Stopping transform job: ${TJ_NAME}"
966
- if aws sagemaker stop-transform-job \
967
- --transform-job-name "${TJ_NAME}" \
968
- --region "${AWS_REGION}" &> /dev/null; then
969
- echo "⏳ Waiting for transform job to stop..."
970
- aws sagemaker wait transform-job-completed-or-stopped \
971
- --transform-job-name "${TJ_NAME}" \
972
- --region "${AWS_REGION}" 2>/dev/null || sleep 15
973
- echo "✅ Transform job stopped"
974
-
975
- # Mark transform job as deleted in manifest (non-blocking)
976
- AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
977
- ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:transform-job/${TJ_NAME}" 2>/dev/null || true
978
- else
979
- echo "❌ Failed to stop transform job"
980
- fi
981
- fi
982
-
983
- # Delete SageMaker model
984
- if [ "${MODEL_EXISTS}" = true ]; then
985
- echo "🗑️ Deleting SageMaker model: ${SM_MODEL_NAME}"
986
- if aws sagemaker delete-model \
987
- --model-name "${SM_MODEL_NAME}" \
988
- --region "${AWS_REGION}" &> /dev/null; then
989
- echo "✅ SageMaker model deleted"
990
- else
991
- echo "❌ Failed to delete SageMaker model"
992
- fi
993
- fi
994
-
995
- # Remove saved names from config
996
- if grep -q "^export TRANSFORM_JOB_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
997
- sed -i.bak '/^# Last deployed resources/d;/^export TRANSFORM_JOB_NAME=/d;/^export SAGEMAKER_MODEL_NAME=/d' "${SCRIPT_DIR}/config"
998
- rm -f "${SCRIPT_DIR}/config.bak"
999
- fi
1000
-
1001
- echo "✅ SageMaker managed inference batch resources cleaned"
1002
- }
1003
- <% } else if (deploymentTarget === 'hyperpod-eks') { %>
1004
- # Function to clean HyperPod EKS deployment
1005
- clean_hyperpod() {
1006
- echo "🧹 Cleaning HyperPod EKS resources"
1007
- echo " Cluster: ${HYPERPOD_CLUSTER_NAME}"
1008
- echo " Namespace: ${HYPERPOD_NAMESPACE}"
1009
-
1010
- # Validate AWS credentials
1011
- if ! aws sts get-caller-identity &> /dev/null; then
1012
- echo "❌ AWS credentials not configured"
1013
- echo " Run: aws configure"
1014
- exit 4
1015
- fi
1016
-
1017
- # Get kubeconfig for HyperPod cluster
1018
- echo "🔑 Configuring kubectl for HyperPod cluster..."
1019
- KUBECONFIG_PATH="${HOME}/.kube/hyperpod-${HYPERPOD_CLUSTER_NAME}"
1020
-
1021
- EKS_CLUSTER_ARN=$(aws sagemaker describe-cluster \
1022
- --cluster-name "${HYPERPOD_CLUSTER_NAME}" \
1023
- --region "${AWS_REGION}" \
1024
- --query "Orchestrator.Eks.ClusterArn" \
1025
- --output text 2>&1) || {
1026
- echo "❌ Failed to describe HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
1027
- echo " Check that the cluster exists and you have permission to access it"
1028
- return 1
1029
- }
1030
-
1031
- EKS_CLUSTER_NAME=$(echo "${EKS_CLUSTER_ARN}" | awk -F'/' '{print $NF}')
1032
-
1033
- if ! aws eks update-kubeconfig \
1034
- --name "${EKS_CLUSTER_NAME}" \
1035
- --region "${AWS_REGION}" \
1036
- --kubeconfig "${KUBECONFIG_PATH}" 2>&1; then
1037
- echo "❌ Failed to configure kubectl for EKS cluster: ${EKS_CLUSTER_NAME}"
1038
- return 1
1039
- fi
1040
-
1041
- export KUBECONFIG="${KUBECONFIG_PATH}"
1042
-
1043
- if ! confirm_action "This will delete the HyperPod deployment in namespace ${HYPERPOD_NAMESPACE}"; then
1044
- return 1
1045
- fi
1046
-
1047
- # Delete Kubernetes resources
1048
- echo "🗑️ Deleting Kubernetes resources..."
1049
- AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
1050
- export AWS_ACCOUNT_ID
1051
- DELETE_FAILED=false
1052
- for manifest in hyperpod/*.yaml; do
1053
- # Skip files that contain no Kubernetes objects (e.g. comment-only PVC stubs)
1054
- RENDERED=$(envsubst < "${manifest}")
1055
- if echo "${RENDERED}" | grep -q '^kind:'; then
1056
- if ! echo "${RENDERED}" | kubectl delete -n "${HYPERPOD_NAMESPACE}" --ignore-not-found -f - 2>&1; then
1057
- DELETE_FAILED=true
1058
- fi
1059
- fi
1060
- done
1061
- if [ "${DELETE_FAILED}" = true ]; then
1062
- echo "❌ Failed to delete some Kubernetes resources"
1063
- echo " You may need to manually clean up:"
1064
- echo " kubectl get all -n ${HYPERPOD_NAMESPACE}"
1065
- return 1
1066
- fi
1067
-
1068
- # Mark k8s resources as deleted in manifest (non-blocking)
1069
- ./do/manifest delete --id "${HYPERPOD_NAMESPACE}/${PROJECT_NAME}" 2>/dev/null || true
1070
-
1071
- echo "✅ HyperPod EKS resources cleaned"
1072
- }
1073
- <% } %>
1074
-
1075
- # Function to clean CodeBuild project and related resources
1076
- clean_codebuild() {
1077
- echo "🧹 Cleaning CodeBuild resources"
1078
- echo " Project: ${CODEBUILD_PROJECT_NAME:-not set}"
1079
- echo " Region: ${AWS_REGION}"
1080
-
1081
- if [ -z "${CODEBUILD_PROJECT_NAME:-}" ]; then
1082
- echo "ℹ️ No CodeBuild project name configured (build target may not be codebuild)"
1083
- return 0
1084
- fi
1085
-
1086
- # Validate AWS credentials
1087
- if ! aws sts get-caller-identity &> /dev/null; then
1088
- echo "❌ AWS credentials not configured"
1089
- echo " Run: aws configure"
1090
- exit 4
1091
- fi
1092
-
1093
- AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
1094
-
1095
- # Check if project exists
1096
- PROJECT_CHECK=$(aws codebuild batch-get-projects \
1097
- --names "${CODEBUILD_PROJECT_NAME}" \
1098
- --region "${AWS_REGION}" \
1099
- --query 'projects[0].name' \
1100
- --output text 2>/dev/null)
1101
-
1102
- if [ "$PROJECT_CHECK" = "None" ] || [ -z "$PROJECT_CHECK" ] || [ "$PROJECT_CHECK" = "null" ]; then
1103
- echo "ℹ️ CodeBuild project not found: ${CODEBUILD_PROJECT_NAME}"
1104
- return 0
1105
- fi
1106
-
1107
- echo ""
1108
- echo "Resources to be removed:"
1109
- echo " • CodeBuild project: ${CODEBUILD_PROJECT_NAME}"
1110
-
1111
- # Check for service role
1112
- ROLE_NAME="${CODEBUILD_PROJECT_NAME}-service-role"
1113
- ROLE_EXISTS=false
1114
- if aws iam get-role --role-name "${ROLE_NAME}" &> /dev/null; then
1115
- ROLE_EXISTS=true
1116
- echo " • IAM service role: ${ROLE_NAME}"
1117
- fi
1118
-
1119
- # Check for S3 source bucket
1120
- S3_BUCKET="codebuild-source-${AWS_ACCOUNT_ID}-${AWS_REGION}"
1121
- S3_PREFIX="${PROJECT_NAME}/"
1122
- S3_EXISTS=false
1123
- if aws s3api head-bucket --bucket "$S3_BUCKET" --region "${AWS_REGION}" &> /dev/null; then
1124
- S3_COUNT=$(aws s3 ls "s3://$S3_BUCKET/$S3_PREFIX" --region "${AWS_REGION}" 2>/dev/null | wc -l | tr -d ' ')
1125
- if [ "$S3_COUNT" -gt 0 ]; then
1126
- S3_EXISTS=true
1127
- echo " • S3 source artifacts: s3://$S3_BUCKET/$S3_PREFIX ($S3_COUNT objects)"
1128
- fi
1129
- fi
1130
-
1131
- if ! confirm_action "This will delete the CodeBuild project and associated resources"; then
1132
- return 1
1133
- fi
1134
-
1135
- # Delete CodeBuild project
1136
- echo "🗑️ Deleting CodeBuild project: ${CODEBUILD_PROJECT_NAME}"
1137
- if aws codebuild delete-project \
1138
- --name "${CODEBUILD_PROJECT_NAME}" \
1139
- --region "${AWS_REGION}" &> /dev/null; then
1140
- echo "✅ CodeBuild project deleted"
1141
-
1142
- # Mark CodeBuild project as deleted in manifest (non-blocking)
1143
- ./do/manifest delete --id "arn:aws:codebuild:${AWS_REGION}:${AWS_ACCOUNT_ID}:project/${CODEBUILD_PROJECT_NAME}" 2>/dev/null || true
1144
- else
1145
- echo "❌ Failed to delete CodeBuild project"
1146
- fi
1147
-
1148
- # Delete IAM service role
1149
- if [ "$ROLE_EXISTS" = true ]; then
1150
- echo "🗑️ Deleting IAM service role: ${ROLE_NAME}"
1151
- # Remove inline policies first
1152
- POLICIES=$(aws iam list-role-policies --role-name "${ROLE_NAME}" --query 'PolicyNames' --output text 2>/dev/null || echo "")
1153
- for policy in $POLICIES; do
1154
- aws iam delete-role-policy --role-name "${ROLE_NAME}" --policy-name "$policy" 2>/dev/null || true
1155
- done
1156
- if aws iam delete-role --role-name "${ROLE_NAME}" &> /dev/null; then
1157
- echo "✅ IAM service role deleted"
1158
-
1159
- # Mark IAM role as deleted in manifest (non-blocking)
1160
- ./do/manifest delete --id "arn:aws:iam::${AWS_ACCOUNT_ID}:role/${ROLE_NAME}" 2>/dev/null || true
1161
- else
1162
- echo "❌ Failed to delete IAM service role"
1163
- fi
1164
- fi
1165
-
1166
- # Delete S3 source artifacts
1167
- if [ "$S3_EXISTS" = true ]; then
1168
- echo "🗑️ Deleting S3 source artifacts: s3://$S3_BUCKET/$S3_PREFIX"
1169
- if aws s3 rm "s3://$S3_BUCKET/$S3_PREFIX" --recursive --region "${AWS_REGION}" &> /dev/null; then
1170
- echo "✅ S3 source artifacts deleted"
1171
- else
1172
- echo "❌ Failed to delete S3 source artifacts"
1173
- fi
1174
- fi
1175
-
1176
- echo "✅ CodeBuild resources cleaned"
1177
- }
1178
-
1179
- # Main script logic
1180
- echo "🧹 Cleanup script for ${PROJECT_NAME}"
1181
- echo ""
1182
-
1183
- if [ -z "${CLEANUP_TARGET}" ]; then
1184
- <% if (deploymentTarget === 'batch-transform') { %>
1185
- CLEANUP_TARGET="batch"
1186
- <% } else if (deploymentTarget === 'hyperpod-eks') { %>
1187
- show_usage
1188
- exit 0
1189
- <% } else { %>
1190
- show_usage
1191
- exit 0
1192
- <% } %>
1193
- fi
1194
-
1195
- case "${CLEANUP_TARGET}" in
1196
- local)
1197
- clean_local
1198
- ;;
1199
- ecr)
1200
- clean_ecr
1201
- ;;
1202
- <% if (deploymentTarget === 'realtime-inference') { %>
1203
- endpoint)
1204
- clean_endpoint
1205
- ;;
1206
- ic)
1207
- clean_ic "${CLEANUP_ARG}"
1208
- ;;
1209
- <% if (typeof enableLora !== 'undefined' && enableLora) { %>
1210
- adapter)
1211
- clean_adapter "${CLEANUP_ARG}"
1212
- ;;
1213
- adapters)
1214
- clean_adapters
1215
- ;;
1216
- <% } %>
1217
- <% } else if (deploymentTarget === 'async-inference') { %>
1218
- endpoint)
1219
- clean_endpoint
1220
- ;;
1221
- <% } else if (deploymentTarget === 'batch-transform') { %>
1222
- batch)
1223
- clean_batch
1224
- ;;
1225
- <% } else if (deploymentTarget === 'hyperpod-eks') { %>
1226
- hyperpod)
1227
- clean_hyperpod
1228
- ;;
1229
- <% } %>
1230
- codebuild)
1231
- clean_codebuild
1232
- ;;
1233
- <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
1234
- benchmark)
1235
- echo "🧹 Cleaning benchmark resources..."
1236
- WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
1237
-
1238
- # Delete workload config if exists
1239
- if aws sagemaker describe-ai-workload-config \
1240
- --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
1241
- --region "$AWS_REGION" 2>/dev/null; then
1242
- aws sagemaker delete-ai-workload-config \
1243
- --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
1244
- --region "$AWS_REGION"
1245
- echo " ✓ Deleted workload config: $WORKLOAD_CONFIG_NAME"
1246
- fi
1247
-
1248
- # Delete terminal benchmark jobs matching project prefix
1249
- aws sagemaker list-ai-benchmark-jobs \
1250
- --name-contains "${PROJECT_NAME}-benchmark-" \
1251
- --region "$AWS_REGION" \
1252
- --query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
1253
- --output text | tr '\t' '\n' | while read -r job; do
1254
- [ -z "$job" ] && continue
1255
- aws sagemaker delete-ai-benchmark-job \
1256
- --ai-benchmark-job-name "$job" \
1257
- --region "$AWS_REGION"
1258
- echo " ✓ Deleted benchmark job: $job"
1259
- done
1260
-
1261
- # Delete local benchmark results
1262
- if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
1263
- read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
1264
- CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
1265
- if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
1266
- rm -rf "${SCRIPT_DIR}/../benchmarks"
1267
- echo " ✓ Deleted local benchmarks/ directory"
1268
- else
1269
- echo " ⏭ Skipped local benchmarks/ deletion"
1270
- fi
1271
- fi
1272
-
1273
- echo "✅ Benchmark cleanup complete"
1274
- ;;
1275
- <% } %>
1276
- all)
1277
- echo "🧹 Performing complete cleanup"
1278
- echo ""
1279
-
1280
- # Track what was cleaned
1281
- CLEANED_ITEMS=()
1282
-
1283
- # Clean local images
1284
- if clean_local; then
1285
- CLEANED_ITEMS+=("Local Docker images")
1286
- fi
1287
-
1288
- echo ""
1289
-
1290
- # Clean ECR images
1291
- if clean_ecr; then
1292
- CLEANED_ITEMS+=("ECR images")
1293
- fi
1294
-
1295
- echo ""
1296
-
1297
- <% if (deploymentTarget === 'realtime-inference') { %>
1298
- # Clean SageMaker resources
1299
- if clean_endpoint; then
1300
- CLEANED_ITEMS+=("SageMaker resources")
1301
- fi
1302
- <% } else if (deploymentTarget === 'async-inference') { %>
1303
- # Clean SageMaker async resources
1304
- if clean_endpoint; then
1305
- CLEANED_ITEMS+=("SageMaker async resources")
1306
- fi
1307
- <% } else if (deploymentTarget === 'batch-transform') { %>
1308
- # Clean SageMaker managed inference batch resources
1309
- if clean_batch; then
1310
- CLEANED_ITEMS+=("SageMaker managed inference batch resources")
1311
- fi
1312
- <% } else if (deploymentTarget === 'hyperpod-eks') { %>
1313
- # Clean HyperPod EKS resources
1314
- if clean_hyperpod; then
1315
- CLEANED_ITEMS+=("HyperPod EKS resources")
1316
- fi
1317
- <% } %>
1318
-
1319
- echo ""
1320
-
1321
- # Clean CodeBuild resources
1322
- if clean_codebuild; then
1323
- CLEANED_ITEMS+=("CodeBuild resources")
1324
- fi
1325
-
1326
- <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
1327
- echo ""
1328
-
1329
- # Clean benchmark resources
1330
- WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
1331
-
1332
- # Delete workload config if exists
1333
- if aws sagemaker describe-ai-workload-config \
1334
- --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
1335
- --region "$AWS_REGION" 2>/dev/null; then
1336
- aws sagemaker delete-ai-workload-config \
1337
- --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
1338
- --region "$AWS_REGION"
1339
- echo " ✓ Deleted workload config: $WORKLOAD_CONFIG_NAME"
1340
- fi
1341
-
1342
- # Delete terminal benchmark jobs matching project prefix
1343
- aws sagemaker list-ai-benchmark-jobs \
1344
- --name-contains "${PROJECT_NAME}-benchmark-" \
1345
- --region "$AWS_REGION" \
1346
- --query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
1347
- --output text | tr '\t' '\n' | while read -r job; do
1348
- [ -z "$job" ] && continue
1349
- aws sagemaker delete-ai-benchmark-job \
1350
- --ai-benchmark-job-name "$job" \
1351
- --region "$AWS_REGION"
1352
- echo " ✓ Deleted benchmark job: $job"
1353
- done
1354
-
1355
- # Delete local benchmark results
1356
- if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
1357
- read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
1358
- CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
1359
- if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
1360
- rm -rf "${SCRIPT_DIR}/../benchmarks"
1361
- echo " ✓ Deleted local benchmarks/ directory"
1362
- else
1363
- echo " ⏭ Skipped local benchmarks/ deletion"
1364
- fi
1365
- fi
1366
-
1367
- CLEANED_ITEMS+=("Benchmark resources")
1368
- <% } %>
1369
- # Display summary
1370
- echo ""
1371
- echo "✅ Cleanup complete!"
1372
- echo ""
1373
- echo "Summary of cleaned resources:"
1374
- for item in "${CLEANED_ITEMS[@]}"; do
1375
- echo " ✓ ${item}"
1376
- done
1377
- ;;
1378
- *)
1379
- echo "❌ Unknown cleanup target: ${CLEANUP_TARGET}"
1380
- echo ""
1381
- show_usage
1382
- exit 1
1383
- ;;
1384
- esac
1385
-
1386
- echo ""
1387
- echo "Cleanup finished!"
1
+ <%- include('clean.d/' + (deploymentTarget === 'realtime-inference' ? 'managed-inference' : deploymentTarget)) %>