@aws/ml-container-creator 0.3.0 โ†’ 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +86 -7
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/buildspec.yml +60 -0
  5. package/package.json +3 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +42 -2
  8. package/servers/instance-sizer/lib/instance-ranker.js +114 -10
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +15 -15
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +71 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +9 -1
  16. package/src/app.js +109 -3
  17. package/src/lib/bootstrap-command-handler.js +96 -3
  18. package/src/lib/cli-handler.js +2 -2
  19. package/src/lib/config-manager.js +117 -1
  20. package/src/lib/deployment-entry-schema.js +16 -0
  21. package/src/lib/prompt-runner.js +270 -12
  22. package/src/lib/prompts.js +288 -6
  23. package/src/lib/registry-command-handler.js +12 -0
  24. package/src/lib/schema-sync.js +31 -0
  25. package/src/lib/template-manager.js +49 -1
  26. package/src/lib/validate-runner.js +125 -2
  27. package/templates/Dockerfile +22 -2
  28. package/templates/code/cuda_compat.sh +22 -0
  29. package/templates/code/serve +3 -0
  30. package/templates/code/serving.properties +14 -0
  31. package/templates/code/start_server.sh +3 -0
  32. package/templates/diffusors/Dockerfile +2 -1
  33. package/templates/diffusors/serve +3 -0
  34. package/templates/do/README.md +33 -0
  35. package/templates/do/adapter +1214 -0
  36. package/templates/do/adapters/.gitkeep +2 -0
  37. package/templates/do/add-ic +130 -0
  38. package/templates/do/benchmark +718 -0
  39. package/templates/do/clean +593 -17
  40. package/templates/do/config +49 -4
  41. package/templates/do/deploy +513 -362
  42. package/templates/do/ic/default.conf +32 -0
  43. package/templates/do/lib/endpoint-config.sh +216 -0
  44. package/templates/do/lib/inference-component.sh +167 -0
  45. package/templates/do/lib/secrets.sh +44 -0
  46. package/templates/do/lib/wait.sh +131 -0
  47. package/templates/do/logs +107 -27
  48. package/templates/do/optimize +528 -0
  49. package/templates/do/register +119 -2
  50. package/templates/do/status +337 -0
  51. package/templates/do/test +80 -28
  52. package/templates/triton/Dockerfile +5 -0
@@ -12,20 +12,27 @@ source "${SCRIPT_DIR}/config"
12
12
 
13
13
  # Parse arguments
14
14
  CLEANUP_TARGET=""
15
+ CLEANUP_ARG=""
15
16
  FORCE_CLEAN=false
16
17
 
17
18
  for arg in "$@"; do
18
19
  case "$arg" in
19
20
  --force) FORCE_CLEAN=true ;;
20
21
  -*) ;; # ignore other flags
21
- *) CLEANUP_TARGET="$arg" ;;
22
+ *)
23
+ if [ -z "${CLEANUP_TARGET}" ]; then
24
+ CLEANUP_TARGET="$arg"
25
+ elif [ -z "${CLEANUP_ARG}" ]; then
26
+ CLEANUP_ARG="$arg"
27
+ fi
28
+ ;;
22
29
  esac
23
30
  done
24
31
 
25
32
  # Function to display usage
26
33
  show_usage() {
27
34
  <% if (deploymentTarget === 'realtime-inference') { %>
28
- echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
35
+ echo "Usage: ./do/clean [local|ecr|endpoint|ic <name>|adapter <name>|adapters|codebuild|all]"
29
36
  <% } else if (deploymentTarget === 'async-inference') { %>
30
37
  echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
31
38
  <% } else if (deploymentTarget === 'batch-transform') { %>
@@ -38,7 +45,12 @@ show_usage() {
38
45
  echo " local - Remove local Docker images"
39
46
  echo " ecr - Remove images from Amazon ECR"
40
47
  <% if (deploymentTarget === 'realtime-inference') { %>
41
- echo " endpoint - Delete SageMaker endpoint, configuration, and model"
48
+ echo " endpoint - Delete SageMaker endpoint, configuration, and inference components"
49
+ echo " ic <name> - Delete a single inference component (does not touch the endpoint)"
50
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
51
+ echo " adapter <name> - Delete a single LoRA adapter (synonym for do/adapter remove)"
52
+ echo " adapters - Remove ALL LoRA adapters (keeps base IC and endpoint running)"
53
+ <% } %>
42
54
  <% } else if (deploymentTarget === 'async-inference') { %>
43
55
  echo " endpoint - Delete SageMaker async endpoint, configuration, and inference component"
44
56
  <% } else if (deploymentTarget === 'batch-transform') { %>
@@ -53,6 +65,11 @@ show_usage() {
53
65
  echo " ./do/clean local # Remove local Docker images only"
54
66
  <% if (deploymentTarget === 'realtime-inference') { %>
55
67
  echo " ./do/clean endpoint # Delete SageMaker resources only"
68
+ echo " ./do/clean ic llama # Delete a single inference component"
69
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
70
+ echo " ./do/clean adapter ectsum # Delete a single LoRA adapter"
71
+ echo " ./do/clean adapters # Remove all LoRA adapters"
72
+ <% } %>
56
73
  <% } else if (deploymentTarget === 'async-inference') { %>
57
74
  echo " ./do/clean endpoint # Delete SageMaker async resources only"
58
75
  <% } else if (deploymentTarget === 'batch-transform') { %>
@@ -150,19 +167,19 @@ clean_ecr() {
150
167
  if ! IMAGE_IDS=$(aws ecr list-images \
151
168
  --repository-name "${ECR_REPOSITORY_NAME}" \
152
169
  --region "${AWS_REGION}" \
153
- --query 'imageIds[*].[imageTag]' \
170
+ --query "imageIds[?starts_with(imageTag, '${PROJECT_NAME}-')].[imageTag]" \
154
171
  --output text 2>&1); then
155
- echo "โ„น๏ธ No images found in repository"
172
+ echo "โ„น๏ธ No images found for project: ${PROJECT_NAME}"
156
173
  return 0
157
174
  fi
158
175
 
159
176
  if [ -z "${IMAGE_IDS}" ] || [ "${IMAGE_IDS}" = "None" ]; then
160
- echo "โ„น๏ธ No images found in repository"
177
+ echo "โ„น๏ธ No images found for project: ${PROJECT_NAME}"
161
178
  return 0
162
179
  fi
163
180
 
164
181
  # Display images
165
- echo "Images in repository:"
182
+ echo "Images for project ${PROJECT_NAME}:"
166
183
  echo "${IMAGE_IDS}" | while read -r tag; do
167
184
  if [ -n "${tag}" ] && [ "${tag}" != "None" ]; then
168
185
  echo " - ${tag}"
@@ -176,25 +193,25 @@ clean_ecr() {
176
193
  # Remove images
177
194
  echo "๐Ÿ—‘๏ธ Removing ECR images..."
178
195
 
179
- # Get image IDs in JSON format for batch delete
196
+ # Only delete images tagged with this project's name (not all images in the shared repo)
180
197
  IMAGE_IDS_JSON=$(aws ecr list-images \
181
198
  --repository-name "${ECR_REPOSITORY_NAME}" \
182
199
  --region "${AWS_REGION}" \
183
- --query 'imageIds' \
200
+ --query "imageIds[?starts_with(imageTag, '${PROJECT_NAME}-')]" \
184
201
  --output json)
185
202
 
186
- if [ "${IMAGE_IDS_JSON}" != "[]" ]; then
203
+ if [ "${IMAGE_IDS_JSON}" != "[]" ] && [ -n "${IMAGE_IDS_JSON}" ]; then
187
204
  if aws ecr batch-delete-image \
188
205
  --repository-name "${ECR_REPOSITORY_NAME}" \
189
206
  --region "${AWS_REGION}" \
190
207
  --image-ids "${IMAGE_IDS_JSON}" &> /dev/null; then
191
- echo "โœ… ECR images removed"
208
+ echo "โœ… ECR images removed for project: ${PROJECT_NAME}"
192
209
  else
193
210
  echo "โŒ Failed to remove some ECR images"
194
211
  return 1
195
212
  fi
196
213
  else
197
- echo "โ„น๏ธ No images to remove"
214
+ echo "โ„น๏ธ No images to remove for project: ${PROJECT_NAME}"
198
215
  fi
199
216
  }
200
217
 
@@ -225,6 +242,122 @@ clean_endpoint() {
225
242
  return 1
226
243
  fi
227
244
 
245
+ # External endpoint: only remove inference components, not the endpoint itself
246
+ if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
247
+ echo ""
248
+ echo "โš ๏ธ Endpoint is external โ€” only removing inference components"
249
+ echo " Endpoint ${EP_NAME} will NOT be deleted (managed externally)."
250
+ echo ""
251
+
252
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
253
+ # Delete adapter ICs first (adapters depend on base ICs)
254
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
255
+ local ADAPTER_COUNT=0
256
+ for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
257
+ [ -f "${adapter_conf}" ] || continue
258
+ ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
259
+ done
260
+
261
+ if [ "${ADAPTER_COUNT}" -gt 0 ]; then
262
+ echo "๐Ÿ”Œ Deleting ${ADAPTER_COUNT} LoRA adapter(s) first..."
263
+ for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
264
+ [ -f "${adapter_conf}" ] || continue
265
+ local adapter_ic_name=""
266
+ adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
267
+ local adapter_display_name
268
+ adapter_display_name=$(basename "${adapter_conf}" .conf)
269
+
270
+ if [ -n "${adapter_ic_name}" ]; then
271
+ echo "๐Ÿ—‘๏ธ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
272
+ if aws sagemaker delete-inference-component \
273
+ --inference-component-name "${adapter_ic_name}" \
274
+ --region "${AWS_REGION}" 2>/dev/null; then
275
+ echo "โณ Waiting for adapter deletion..."
276
+ aws sagemaker wait inference-component-deleted \
277
+ --inference-component-name "${adapter_ic_name}" \
278
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
279
+ echo "โœ… Adapter deleted: ${adapter_display_name}"
280
+
281
+ # Mark adapter IC as deleted in manifest (non-blocking)
282
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
283
+ else
284
+ echo "โš ๏ธ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
285
+ fi
286
+ fi
287
+
288
+ # Remove adapter conf file
289
+ rm -f "${adapter_conf}"
290
+ done
291
+ echo "โœ… All adapters deleted"
292
+ echo ""
293
+ fi
294
+ fi
295
+
296
+ <% } %>
297
+ # Iterate do/ic/*.conf and delete each IC owned by this project
298
+ local IC_DELETED=0
299
+ if [ -d "${SCRIPT_DIR}/ic" ]; then
300
+ for conf in "${SCRIPT_DIR}"/ic/*.conf; do
301
+ [ -f "${conf}" ] || continue
302
+ local ic_deployed_name=""
303
+ if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
304
+ ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
305
+ fi
306
+ if [ -n "${ic_deployed_name}" ]; then
307
+ echo "๐Ÿ—‘๏ธ Deleting inference component: ${ic_deployed_name}"
308
+ if aws sagemaker delete-inference-component \
309
+ --inference-component-name "${ic_deployed_name}" \
310
+ --region "${AWS_REGION}" 2>/dev/null; then
311
+ echo "โณ Waiting for inference component deletion..."
312
+ aws sagemaker wait inference-component-deleted \
313
+ --inference-component-name "${ic_deployed_name}" \
314
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
315
+ echo "โœ… Inference component deleted: ${ic_deployed_name}"
316
+
317
+ # Mark inference component as deleted in manifest (non-blocking)
318
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
319
+
320
+ # Clear deployed state from config
321
+ sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${conf}"
322
+ rm -f "${conf}.bak"
323
+ else
324
+ echo "โš ๏ธ Failed to delete inference component: ${ic_deployed_name}"
325
+ fi
326
+ IC_DELETED=$((IC_DELETED + 1))
327
+ fi
328
+ done
329
+ fi
330
+
331
+ # Also handle legacy single IC from config
332
+ if [ -n "${IC_NAME}" ]; then
333
+ if aws sagemaker describe-inference-component \
334
+ --inference-component-name "${IC_NAME}" \
335
+ --region "${AWS_REGION}" &> /dev/null; then
336
+ echo "๐Ÿ—‘๏ธ Deleting inference component: ${IC_NAME}"
337
+ if aws sagemaker delete-inference-component \
338
+ --inference-component-name "${IC_NAME}" \
339
+ --region "${AWS_REGION}" 2>/dev/null; then
340
+ echo "โณ Waiting for inference component deletion..."
341
+ aws sagemaker wait inference-component-deleted \
342
+ --inference-component-name "${IC_NAME}" \
343
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
344
+ echo "โœ… Inference component deleted: ${IC_NAME}"
345
+
346
+ # Mark inference component as deleted in manifest (non-blocking)
347
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_NAME}" 2>/dev/null || true
348
+ fi
349
+ IC_DELETED=$((IC_DELETED + 1))
350
+ fi
351
+ fi
352
+
353
+ if [ "${IC_DELETED}" -eq 0 ]; then
354
+ echo "โ„น๏ธ No deployed inference components found to clean"
355
+ fi
356
+
357
+ echo "โœ… External endpoint cleanup complete (endpoint preserved)"
358
+ return 0
359
+ fi
360
+
228
361
  echo ""
229
362
  echo "Checking for SageMaker resources..."
230
363
 
@@ -240,23 +373,125 @@ clean_endpoint() {
240
373
  return 0
241
374
  fi
242
375
 
243
- # Check for inference component
376
+ # Count ICs to be deleted (multi-IC path)
377
+ local IC_COUNT=0
378
+ local IC_NAMES_TO_DELETE=()
379
+ local IC_CONFS_TO_CLEAN=()
380
+
381
+ if [ -d "${SCRIPT_DIR}/ic" ]; then
382
+ for conf in "${SCRIPT_DIR}"/ic/*.conf; do
383
+ [ -f "${conf}" ] || continue
384
+ local ic_deployed_name=""
385
+ if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
386
+ ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
387
+ fi
388
+ if [ -n "${ic_deployed_name}" ]; then
389
+ IC_NAMES_TO_DELETE+=("${ic_deployed_name}")
390
+ IC_CONFS_TO_CLEAN+=("${conf}")
391
+ IC_COUNT=$((IC_COUNT + 1))
392
+ echo " โœ“ Inference component: ${ic_deployed_name}"
393
+ fi
394
+ done
395
+ fi
396
+
397
+ # Legacy: check single IC from config (no do/ic/ directory)
244
398
  local IC_EXISTS=false
245
- if [ -n "${IC_NAME}" ]; then
399
+ if [ "${IC_COUNT}" -eq 0 ] && [ -n "${IC_NAME}" ]; then
246
400
  if aws sagemaker describe-inference-component \
247
401
  --inference-component-name "${IC_NAME}" \
248
402
  --region "${AWS_REGION}" &> /dev/null; then
249
403
  IC_EXISTS=true
404
+ IC_COUNT=1
250
405
  echo " โœ“ Inference component: ${IC_NAME}"
251
406
  fi
252
407
  fi
253
408
 
254
- if ! confirm_action "This will delete the SageMaker endpoint and inference component(s)"; then
409
+ # Confirmation with IC count
410
+ local confirm_msg="Delete ${IC_COUNT} inference component"
411
+ if [ "${IC_COUNT}" -ne 1 ]; then
412
+ confirm_msg="${confirm_msg}s"
413
+ fi
414
+ confirm_msg="${confirm_msg} and endpoint?"
415
+
416
+ if ! confirm_action "${confirm_msg}"; then
255
417
  return 1
256
418
  fi
257
419
 
258
- # Delete inference component first (must be deleted before endpoint)
259
- if [ "${IC_EXISTS}" = true ]; then
420
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
421
+ # Delete adapter ICs first (adapters depend on base ICs)
422
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
423
+ local ADAPTER_COUNT=0
424
+ for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
425
+ [ -f "${adapter_conf}" ] || continue
426
+ ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
427
+ done
428
+
429
+ if [ "${ADAPTER_COUNT}" -gt 0 ]; then
430
+ echo ""
431
+ echo "๐Ÿ”Œ Deleting ${ADAPTER_COUNT} LoRA adapter(s) first..."
432
+ for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
433
+ [ -f "${adapter_conf}" ] || continue
434
+ local adapter_ic_name=""
435
+ adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
436
+ local adapter_display_name
437
+ adapter_display_name=$(basename "${adapter_conf}" .conf)
438
+
439
+ if [ -n "${adapter_ic_name}" ]; then
440
+ echo "๐Ÿ—‘๏ธ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
441
+ if aws sagemaker delete-inference-component \
442
+ --inference-component-name "${adapter_ic_name}" \
443
+ --region "${AWS_REGION}" 2>/dev/null; then
444
+ echo "โณ Waiting for adapter deletion..."
445
+ aws sagemaker wait inference-component-deleted \
446
+ --inference-component-name "${adapter_ic_name}" \
447
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
448
+ echo "โœ… Adapter deleted: ${adapter_display_name}"
449
+
450
+ # Mark adapter IC as deleted in manifest (non-blocking)
451
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
452
+ else
453
+ echo "โš ๏ธ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
454
+ fi
455
+ fi
456
+
457
+ # Remove adapter conf file
458
+ rm -f "${adapter_conf}"
459
+ done
460
+ echo "โœ… All adapters deleted"
461
+ echo ""
462
+ fi
463
+ fi
464
+
465
+ <% } %>
466
+ # Delete inference components first (must be deleted before endpoint)
467
+ if [ ${#IC_NAMES_TO_DELETE[@]} -gt 0 ]; then
468
+ # Multi-IC path: iterate do/ic/*.conf
469
+ local idx=0
470
+ for ic_deployed_name in "${IC_NAMES_TO_DELETE[@]}"; do
471
+ local conf="${IC_CONFS_TO_CLEAN[$idx]}"
472
+ echo "๐Ÿ—‘๏ธ Deleting inference component: ${ic_deployed_name}"
473
+ if aws sagemaker delete-inference-component \
474
+ --inference-component-name "${ic_deployed_name}" \
475
+ --region "${AWS_REGION}" 2>/dev/null; then
476
+ echo "โณ Waiting for inference component deletion..."
477
+ aws sagemaker wait inference-component-deleted \
478
+ --inference-component-name "${ic_deployed_name}" \
479
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
480
+ echo "โœ… Inference component deleted: ${ic_deployed_name}"
481
+
482
+ # Mark inference component as deleted in manifest (non-blocking)
483
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
484
+
485
+ # Clear deployed state from config
486
+ sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${conf}"
487
+ rm -f "${conf}.bak"
488
+ else
489
+ echo "โŒ Failed to delete inference component: ${ic_deployed_name}"
490
+ fi
491
+ idx=$((idx + 1))
492
+ done
493
+ elif [ "${IC_EXISTS}" = true ]; then
494
+ # Legacy single IC path
260
495
  echo "๐Ÿ—‘๏ธ Deleting inference component: ${IC_NAME}"
261
496
  if aws sagemaker delete-inference-component \
262
497
  --inference-component-name "${IC_NAME}" \
@@ -314,6 +549,250 @@ clean_endpoint() {
314
549
 
315
550
  echo "โœ… SageMaker resources cleaned"
316
551
  }
552
+
553
+ # Function to clean a single inference component by name
554
+ clean_ic() {
555
+ local ic_name="$1"
556
+ echo "๐Ÿงน Cleaning inference component: ${ic_name}"
557
+ echo " Project: ${PROJECT_NAME}"
558
+ echo " Region: ${AWS_REGION}"
559
+
560
+ # Validate IC name argument
561
+ if [ -z "${ic_name}" ]; then
562
+ echo "โŒ IC name required"
563
+ echo " Usage: ./do/clean ic <name>"
564
+ return 1
565
+ fi
566
+
567
+ # Check that the IC config file exists
568
+ local ic_conf="${SCRIPT_DIR}/ic/${ic_name}.conf"
569
+ if [ ! -f "${ic_conf}" ]; then
570
+ echo "โŒ IC config not found: do/ic/${ic_name}.conf"
571
+ echo " Available ICs:"
572
+ if [ -d "${SCRIPT_DIR}/ic" ]; then
573
+ for conf in "${SCRIPT_DIR}"/ic/*.conf; do
574
+ [ -f "${conf}" ] || continue
575
+ echo " - $(basename "${conf}" .conf)"
576
+ done
577
+ else
578
+ echo " (none)"
579
+ fi
580
+ return 1
581
+ fi
582
+
583
+ # Validate AWS credentials
584
+ if ! aws sts get-caller-identity &> /dev/null; then
585
+ echo "โŒ AWS credentials not configured"
586
+ echo " Run: aws configure"
587
+ exit 4
588
+ fi
589
+
590
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
591
+
592
+ # Look up IC_DEPLOYED_NAME from the config file
593
+ local ic_deployed_name=""
594
+ if grep -q "^export IC_DEPLOYED_NAME=" "${ic_conf}" 2>/dev/null; then
595
+ ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${ic_conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
596
+ fi
597
+
598
+ if [ -z "${ic_deployed_name}" ]; then
599
+ echo "โ„น๏ธ IC '${ic_name}' has not been deployed (no IC_DEPLOYED_NAME in config)"
600
+ return 0
601
+ fi
602
+
603
+ echo " Deployed as: ${ic_deployed_name}"
604
+
605
+ if ! confirm_action "This will delete inference component '${ic_deployed_name}'"; then
606
+ return 1
607
+ fi
608
+
609
+ # Delete the inference component
610
+ echo "๐Ÿ—‘๏ธ Deleting inference component: ${ic_deployed_name}"
611
+ if aws sagemaker delete-inference-component \
612
+ --inference-component-name "${ic_deployed_name}" \
613
+ --region "${AWS_REGION}" 2>/dev/null; then
614
+ echo "โณ Waiting for inference component deletion..."
615
+ aws sagemaker wait inference-component-deleted \
616
+ --inference-component-name "${ic_deployed_name}" \
617
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
618
+ echo "โœ… Inference component deleted: ${ic_deployed_name}"
619
+
620
+ # Mark inference component as deleted in manifest (non-blocking)
621
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
622
+
623
+ # Clear deployed state from config
624
+ sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${ic_conf}"
625
+ rm -f "${ic_conf}.bak"
626
+ else
627
+ echo "โŒ Failed to delete inference component: ${ic_deployed_name}"
628
+ return 1
629
+ fi
630
+
631
+ echo "โœ… Inference component '${ic_name}' cleaned"
632
+ }
633
+
634
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
635
+ # Function to clean a single LoRA adapter by name (synonym for do/adapter remove)
636
+ clean_adapter() {
637
+ local adapter_name="$1"
638
+ echo "๐Ÿงน Cleaning LoRA adapter: ${adapter_name}"
639
+ echo " Project: ${PROJECT_NAME}"
640
+ echo " Region: ${AWS_REGION}"
641
+
642
+ # Validate adapter name argument
643
+ if [ -z "${adapter_name}" ]; then
644
+ echo "โŒ Adapter name required"
645
+ echo " Usage: ./do/clean adapter <name>"
646
+ return 1
647
+ fi
648
+
649
+ # Check that the adapter config file exists
650
+ local adapter_conf="${SCRIPT_DIR}/adapters/${adapter_name}.conf"
651
+ if [ ! -f "${adapter_conf}" ]; then
652
+ echo "โŒ Adapter config not found: do/adapters/${adapter_name}.conf"
653
+ echo " Available adapters:"
654
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
655
+ for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
656
+ [ -f "${conf}" ] || continue
657
+ echo " - $(basename "${conf}" .conf)"
658
+ done
659
+ else
660
+ echo " (none)"
661
+ fi
662
+ return 1
663
+ fi
664
+
665
+ # Validate AWS credentials
666
+ if ! aws sts get-caller-identity &> /dev/null; then
667
+ echo "โŒ AWS credentials not configured"
668
+ echo " Run: aws configure"
669
+ exit 4
670
+ fi
671
+
672
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
673
+
674
+ # Read ADAPTER_IC_NAME from the config file
675
+ local adapter_ic_name=""
676
+ adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//')
677
+
678
+ if [ -z "${adapter_ic_name}" ]; then
679
+ echo "โš ๏ธ No ADAPTER_IC_NAME found in do/adapters/${adapter_name}.conf"
680
+ echo " Removing local config file."
681
+ rm -f "${adapter_conf}"
682
+ return 0
683
+ fi
684
+
685
+ echo " Adapter IC: ${adapter_ic_name}"
686
+
687
+ if ! confirm_action "This will delete LoRA adapter '${adapter_name}' (IC: ${adapter_ic_name})"; then
688
+ return 1
689
+ fi
690
+
691
+ # Delete the adapter inference component
692
+ echo "๐Ÿ—‘๏ธ Deleting adapter inference component: ${adapter_ic_name}"
693
+ if aws sagemaker delete-inference-component \
694
+ --inference-component-name "${adapter_ic_name}" \
695
+ --region "${AWS_REGION}" 2>/dev/null; then
696
+ echo "โณ Waiting for adapter IC deletion..."
697
+ aws sagemaker wait inference-component-deleted \
698
+ --inference-component-name "${adapter_ic_name}" \
699
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
700
+ echo "โœ… Adapter IC deleted: ${adapter_ic_name}"
701
+
702
+ # Mark adapter IC as deleted in manifest (non-blocking)
703
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
704
+ else
705
+ echo "โš ๏ธ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
706
+ fi
707
+
708
+ # Remove local conf file
709
+ rm -f "${adapter_conf}"
710
+ echo "โœ… Removed: do/adapters/${adapter_name}.conf"
711
+
712
+ echo "โœ… Adapter '${adapter_name}' cleaned"
713
+ }
714
+
715
+ # Function to clean ALL LoRA adapters (keeps base IC and endpoint running)
716
+ clean_adapters() {
717
+ echo "๐Ÿงน Cleaning all LoRA adapters"
718
+ echo " Project: ${PROJECT_NAME}"
719
+ echo " Region: ${AWS_REGION}"
720
+
721
+ # Check if adapters directory exists and has conf files
722
+ if [ ! -d "${SCRIPT_DIR}/adapters" ]; then
723
+ echo "โ„น๏ธ No adapters directory found"
724
+ return 0
725
+ fi
726
+
727
+ local ADAPTER_COUNT=0
728
+ local ADAPTER_NAMES=()
729
+ for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
730
+ [ -f "${conf}" ] || continue
731
+ ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
732
+ ADAPTER_NAMES+=("$(basename "${conf}" .conf)")
733
+ done
734
+
735
+ if [ "${ADAPTER_COUNT}" -eq 0 ]; then
736
+ echo "โ„น๏ธ No adapters found to clean"
737
+ return 0
738
+ fi
739
+
740
+ echo ""
741
+ echo "Adapters to be removed (${ADAPTER_COUNT}):"
742
+ for name in "${ADAPTER_NAMES[@]}"; do
743
+ echo " โ€ข ${name}"
744
+ done
745
+
746
+ if ! confirm_action "This will delete ${ADAPTER_COUNT} LoRA adapter(s). Base IC and endpoint will remain running."; then
747
+ return 1
748
+ fi
749
+
750
+ # Validate AWS credentials
751
+ if ! aws sts get-caller-identity &> /dev/null; then
752
+ echo "โŒ AWS credentials not configured"
753
+ echo " Run: aws configure"
754
+ exit 4
755
+ fi
756
+
757
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
758
+
759
+ # Delete each adapter
760
+ local DELETED=0
761
+ for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
762
+ [ -f "${adapter_conf}" ] || continue
763
+ local adapter_ic_name=""
764
+ adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
765
+ local adapter_display_name
766
+ adapter_display_name=$(basename "${adapter_conf}" .conf)
767
+
768
+ if [ -n "${adapter_ic_name}" ]; then
769
+ echo "๐Ÿ—‘๏ธ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
770
+ if aws sagemaker delete-inference-component \
771
+ --inference-component-name "${adapter_ic_name}" \
772
+ --region "${AWS_REGION}" 2>/dev/null; then
773
+ echo "โณ Waiting for adapter deletion..."
774
+ aws sagemaker wait inference-component-deleted \
775
+ --inference-component-name "${adapter_ic_name}" \
776
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
777
+ echo "โœ… Adapter deleted: ${adapter_display_name}"
778
+
779
+ # Mark adapter IC as deleted in manifest (non-blocking)
780
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
781
+ else
782
+ echo "โš ๏ธ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
783
+ fi
784
+ fi
785
+
786
+ # Remove adapter conf file
787
+ rm -f "${adapter_conf}"
788
+ DELETED=$((DELETED + 1))
789
+ done
790
+
791
+ echo ""
792
+ echo "โœ… All adapters cleaned (${DELETED} removed)"
793
+ echo " Base IC and endpoint remain running."
794
+ }
795
+ <% } %>
317
796
  <% } else if (deploymentTarget === 'async-inference') { %>
318
797
  # Function to clean SageMaker async endpoint and model
319
798
  clean_endpoint() {
@@ -724,6 +1203,17 @@ case "${CLEANUP_TARGET}" in
724
1203
  endpoint)
725
1204
  clean_endpoint
726
1205
  ;;
1206
+ ic)
1207
+ clean_ic "${CLEANUP_ARG}"
1208
+ ;;
1209
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
1210
+ adapter)
1211
+ clean_adapter "${CLEANUP_ARG}"
1212
+ ;;
1213
+ adapters)
1214
+ clean_adapters
1215
+ ;;
1216
+ <% } %>
727
1217
  <% } else if (deploymentTarget === 'async-inference') { %>
728
1218
  endpoint)
729
1219
  clean_endpoint
@@ -740,6 +1230,49 @@ case "${CLEANUP_TARGET}" in
740
1230
  codebuild)
741
1231
  clean_codebuild
742
1232
  ;;
1233
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
1234
+ benchmark)
1235
+ echo "๐Ÿงน Cleaning benchmark resources..."
1236
+ WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
1237
+
1238
+ # Delete workload config if exists
1239
+ if aws sagemaker describe-ai-workload-config \
1240
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
1241
+ --region "$AWS_REGION" 2>/dev/null; then
1242
+ aws sagemaker delete-ai-workload-config \
1243
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
1244
+ --region "$AWS_REGION"
1245
+ echo " โœ“ Deleted workload config: $WORKLOAD_CONFIG_NAME"
1246
+ fi
1247
+
1248
+ # Delete terminal benchmark jobs matching project prefix
1249
+ aws sagemaker list-ai-benchmark-jobs \
1250
+ --name-contains "${PROJECT_NAME}-benchmark-" \
1251
+ --region "$AWS_REGION" \
1252
+ --query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
1253
+ --output text | tr '\t' '\n' | while read -r job; do
1254
+ [ -z "$job" ] && continue
1255
+ aws sagemaker delete-ai-benchmark-job \
1256
+ --ai-benchmark-job-name "$job" \
1257
+ --region "$AWS_REGION"
1258
+ echo " โœ“ Deleted benchmark job: $job"
1259
+ done
1260
+
1261
+ # Delete local benchmark results
1262
+ if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
1263
+ read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
1264
+ CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
1265
+ if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
1266
+ rm -rf "${SCRIPT_DIR}/../benchmarks"
1267
+ echo " โœ“ Deleted local benchmarks/ directory"
1268
+ else
1269
+ echo " โญ Skipped local benchmarks/ deletion"
1270
+ fi
1271
+ fi
1272
+
1273
+ echo "โœ… Benchmark cleanup complete"
1274
+ ;;
1275
+ <% } %>
743
1276
  all)
744
1277
  echo "๐Ÿงน Performing complete cleanup"
745
1278
  echo ""
@@ -790,6 +1323,49 @@ case "${CLEANUP_TARGET}" in
790
1323
  CLEANED_ITEMS+=("CodeBuild resources")
791
1324
  fi
792
1325
 
1326
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
1327
+ echo ""
1328
+
1329
+ # Clean benchmark resources
1330
+ WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
1331
+
1332
+ # Delete workload config if exists
1333
+ if aws sagemaker describe-ai-workload-config \
1334
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
1335
+ --region "$AWS_REGION" 2>/dev/null; then
1336
+ aws sagemaker delete-ai-workload-config \
1337
+ --ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
1338
+ --region "$AWS_REGION"
1339
+ echo " โœ“ Deleted workload config: $WORKLOAD_CONFIG_NAME"
1340
+ fi
1341
+
1342
+ # Delete terminal benchmark jobs matching project prefix
1343
+ aws sagemaker list-ai-benchmark-jobs \
1344
+ --name-contains "${PROJECT_NAME}-benchmark-" \
1345
+ --region "$AWS_REGION" \
1346
+ --query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
1347
+ --output text | tr '\t' '\n' | while read -r job; do
1348
+ [ -z "$job" ] && continue
1349
+ aws sagemaker delete-ai-benchmark-job \
1350
+ --ai-benchmark-job-name "$job" \
1351
+ --region "$AWS_REGION"
1352
+ echo " โœ“ Deleted benchmark job: $job"
1353
+ done
1354
+
1355
+ # Delete local benchmark results
1356
+ if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
1357
+ read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
1358
+ CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
1359
+ if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
1360
+ rm -rf "${SCRIPT_DIR}/../benchmarks"
1361
+ echo " โœ“ Deleted local benchmarks/ directory"
1362
+ else
1363
+ echo " โญ Skipped local benchmarks/ deletion"
1364
+ fi
1365
+ fi
1366
+
1367
+ CLEANED_ITEMS+=("Benchmark resources")
1368
+ <% } %>
793
1369
  # Display summary
794
1370
  echo ""
795
1371
  echo "โœ… Cleanup complete!"