@aws/ml-container-creator 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +202 -0
  2. package/LICENSE-THIRD-PARTY +68620 -0
  3. package/NOTICE +2 -0
  4. package/README.md +106 -0
  5. package/bin/cli.js +365 -0
  6. package/config/defaults.json +32 -0
  7. package/config/presets/transformers-djl.json +26 -0
  8. package/config/presets/transformers-gpu.json +24 -0
  9. package/config/presets/transformers-lmi.json +27 -0
  10. package/package.json +129 -0
  11. package/servers/README.md +419 -0
  12. package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
  13. package/servers/base-image-picker/catalogs/python-slim.json +38 -0
  14. package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
  15. package/servers/base-image-picker/catalogs/triton.json +38 -0
  16. package/servers/base-image-picker/index.js +495 -0
  17. package/servers/base-image-picker/manifest.json +17 -0
  18. package/servers/base-image-picker/package.json +15 -0
  19. package/servers/hyperpod-cluster-picker/LICENSE +202 -0
  20. package/servers/hyperpod-cluster-picker/index.js +424 -0
  21. package/servers/hyperpod-cluster-picker/manifest.json +14 -0
  22. package/servers/hyperpod-cluster-picker/package.json +17 -0
  23. package/servers/instance-recommender/LICENSE +202 -0
  24. package/servers/instance-recommender/catalogs/instances.json +852 -0
  25. package/servers/instance-recommender/index.js +284 -0
  26. package/servers/instance-recommender/manifest.json +16 -0
  27. package/servers/instance-recommender/package.json +15 -0
  28. package/servers/lib/LICENSE +202 -0
  29. package/servers/lib/bedrock-client.js +160 -0
  30. package/servers/lib/custom-validators.js +46 -0
  31. package/servers/lib/dynamic-resolver.js +36 -0
  32. package/servers/lib/package.json +11 -0
  33. package/servers/lib/schemas/image-catalog.schema.json +185 -0
  34. package/servers/lib/schemas/instances.schema.json +124 -0
  35. package/servers/lib/schemas/manifest.schema.json +64 -0
  36. package/servers/lib/schemas/model-catalog.schema.json +91 -0
  37. package/servers/lib/schemas/regions.schema.json +26 -0
  38. package/servers/lib/schemas/triton-backends.schema.json +51 -0
  39. package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
  40. package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
  41. package/servers/model-picker/catalogs/popular-transformers.json +226 -0
  42. package/servers/model-picker/index.js +1693 -0
  43. package/servers/model-picker/manifest.json +18 -0
  44. package/servers/model-picker/package.json +20 -0
  45. package/servers/region-picker/LICENSE +202 -0
  46. package/servers/region-picker/catalogs/regions.json +263 -0
  47. package/servers/region-picker/index.js +230 -0
  48. package/servers/region-picker/manifest.json +16 -0
  49. package/servers/region-picker/package.json +15 -0
  50. package/src/app.js +1007 -0
  51. package/src/copy-tpl.js +77 -0
  52. package/src/lib/accelerator-validator.js +39 -0
  53. package/src/lib/asset-manager.js +385 -0
  54. package/src/lib/aws-profile-parser.js +181 -0
  55. package/src/lib/bootstrap-command-handler.js +1647 -0
  56. package/src/lib/bootstrap-config.js +238 -0
  57. package/src/lib/ci-register-helpers.js +124 -0
  58. package/src/lib/ci-report-helpers.js +158 -0
  59. package/src/lib/ci-stage-helpers.js +268 -0
  60. package/src/lib/cli-handler.js +529 -0
  61. package/src/lib/comment-generator.js +544 -0
  62. package/src/lib/community-reports-validator.js +91 -0
  63. package/src/lib/config-manager.js +2106 -0
  64. package/src/lib/configuration-exporter.js +204 -0
  65. package/src/lib/configuration-manager.js +695 -0
  66. package/src/lib/configuration-matcher.js +221 -0
  67. package/src/lib/cpu-validator.js +36 -0
  68. package/src/lib/cuda-validator.js +57 -0
  69. package/src/lib/deployment-config-resolver.js +103 -0
  70. package/src/lib/deployment-entry-schema.js +125 -0
  71. package/src/lib/deployment-registry.js +598 -0
  72. package/src/lib/docker-introspection-validator.js +51 -0
  73. package/src/lib/engine-prefix-resolver.js +60 -0
  74. package/src/lib/huggingface-client.js +172 -0
  75. package/src/lib/key-value-parser.js +37 -0
  76. package/src/lib/known-flags-validator.js +200 -0
  77. package/src/lib/manifest-cli.js +280 -0
  78. package/src/lib/mcp-client.js +303 -0
  79. package/src/lib/mcp-command-handler.js +532 -0
  80. package/src/lib/neuron-validator.js +80 -0
  81. package/src/lib/parameter-schema-validator.js +284 -0
  82. package/src/lib/prompt-runner.js +1349 -0
  83. package/src/lib/prompts.js +1138 -0
  84. package/src/lib/registry-command-handler.js +519 -0
  85. package/src/lib/registry-loader.js +198 -0
  86. package/src/lib/rocm-validator.js +80 -0
  87. package/src/lib/schema-validator.js +157 -0
  88. package/src/lib/sensitive-redactor.js +59 -0
  89. package/src/lib/template-engine.js +156 -0
  90. package/src/lib/template-manager.js +341 -0
  91. package/src/lib/validation-engine.js +314 -0
  92. package/src/prompt-adapter.js +63 -0
  93. package/templates/Dockerfile +300 -0
  94. package/templates/IAM_PERMISSIONS.md +84 -0
  95. package/templates/MIGRATION.md +488 -0
  96. package/templates/PROJECT_README.md +439 -0
  97. package/templates/TEMPLATE_SYSTEM.md +243 -0
  98. package/templates/buildspec.yml +64 -0
  99. package/templates/code/chat_template.jinja +1 -0
  100. package/templates/code/flask/gunicorn_config.py +35 -0
  101. package/templates/code/flask/wsgi.py +10 -0
  102. package/templates/code/model_handler.py +387 -0
  103. package/templates/code/serve +300 -0
  104. package/templates/code/serve.py +175 -0
  105. package/templates/code/serving.properties +105 -0
  106. package/templates/code/start_server.py +39 -0
  107. package/templates/code/start_server.sh +39 -0
  108. package/templates/diffusors/Dockerfile +72 -0
  109. package/templates/diffusors/patch_image_api.py +35 -0
  110. package/templates/diffusors/serve +115 -0
  111. package/templates/diffusors/start_server.sh +114 -0
  112. package/templates/do/.gitkeep +1 -0
  113. package/templates/do/README.md +541 -0
  114. package/templates/do/build +83 -0
  115. package/templates/do/ci +681 -0
  116. package/templates/do/clean +811 -0
  117. package/templates/do/config +260 -0
  118. package/templates/do/deploy +1560 -0
  119. package/templates/do/export +306 -0
  120. package/templates/do/logs +319 -0
  121. package/templates/do/manifest +12 -0
  122. package/templates/do/push +119 -0
  123. package/templates/do/register +580 -0
  124. package/templates/do/run +113 -0
  125. package/templates/do/submit +417 -0
  126. package/templates/do/test +1147 -0
  127. package/templates/hyperpod/configmap.yaml +24 -0
  128. package/templates/hyperpod/deployment.yaml +71 -0
  129. package/templates/hyperpod/pvc.yaml +42 -0
  130. package/templates/hyperpod/service.yaml +17 -0
  131. package/templates/nginx-diffusors.conf +74 -0
  132. package/templates/nginx-predictors.conf +47 -0
  133. package/templates/nginx-tensorrt.conf +74 -0
  134. package/templates/requirements.txt +61 -0
  135. package/templates/sample_model/test_inference.py +123 -0
  136. package/templates/sample_model/train_abalone.py +252 -0
  137. package/templates/test/test_endpoint.sh +79 -0
  138. package/templates/test/test_local_image.sh +80 -0
  139. package/templates/test/test_model_handler.py +180 -0
  140. package/templates/triton/Dockerfile +128 -0
  141. package/templates/triton/config.pbtxt +163 -0
  142. package/templates/triton/model.py +130 -0
  143. package/templates/triton/requirements.txt +11 -0
@@ -0,0 +1,811 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ set -e
6
+ set -u
7
+ set -o pipefail
8
+
9
+ # Source configuration
10
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
+ source "${SCRIPT_DIR}/config"
12
+
13
+ # Parse arguments
14
+ CLEANUP_TARGET=""
15
+ FORCE_CLEAN=false
16
+
17
+ for arg in "$@"; do
18
+ case "$arg" in
19
+ --force) FORCE_CLEAN=true ;;
20
+ -*) ;; # ignore other flags
21
+ *) CLEANUP_TARGET="$arg" ;;
22
+ esac
23
+ done
24
+
25
+ # Function to display usage
26
+ show_usage() {
27
+ <% if (deploymentTarget === 'managed-inference') { %>
28
+ echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
29
+ <% } else if (deploymentTarget === 'async-inference') { %>
30
+ echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
31
+ <% } else if (deploymentTarget === 'batch-transform') { %>
32
+ echo "Usage: ./do/clean [local|ecr|batch|codebuild|all]"
33
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
34
+ echo "Usage: ./do/clean [local|ecr|hyperpod|codebuild|all]"
35
+ <% } %>
36
+ echo ""
37
+ echo "Cleanup targets:"
38
+ echo " local - Remove local Docker images"
39
+ echo " ecr - Remove images from Amazon ECR"
40
+ <% if (deploymentTarget === 'managed-inference') { %>
41
+ echo " endpoint - Delete SageMaker endpoint, configuration, and model"
42
+ <% } else if (deploymentTarget === 'async-inference') { %>
43
+ echo " endpoint - Delete SageMaker async endpoint, configuration, and inference component"
44
+ <% } else if (deploymentTarget === 'batch-transform') { %>
45
+ echo " batch - Stop running transform job and delete SageMaker model"
46
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
47
+ echo " hyperpod - Delete HyperPod EKS deployment and services"
48
+ <% } %>
49
+ echo " codebuild - Delete CodeBuild project, IAM role, and S3 source artifacts"
50
+ echo " all - Perform all cleanup operations"
51
+ echo ""
52
+ echo "Examples:"
53
+ echo " ./do/clean local # Remove local Docker images only"
54
+ <% if (deploymentTarget === 'managed-inference') { %>
55
+ echo " ./do/clean endpoint # Delete SageMaker resources only"
56
+ <% } else if (deploymentTarget === 'async-inference') { %>
57
+ echo " ./do/clean endpoint # Delete SageMaker async resources only"
58
+ <% } else if (deploymentTarget === 'batch-transform') { %>
59
+ echo " ./do/clean batch # Stop transform job and delete model"
60
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
61
+ echo " ./do/clean hyperpod # Delete HyperPod EKS resources only"
62
+ <% } %>
63
+ echo " ./do/clean codebuild # Delete CodeBuild project and rebuild fresh"
64
+ echo " ./do/clean all # Clean up everything"
65
+ }
66
+
67
+ # Function to confirm action (skipped when --force is set)
68
+ confirm_action() {
69
+ local message="$1"
70
+ if [ "${FORCE_CLEAN}" = true ]; then
71
+ return 0
72
+ fi
73
+ echo ""
74
+ echo "โš ๏ธ ${message}"
75
+ read -p " Are you sure? (yes/no): " -r
76
+ echo ""
77
+ if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then
78
+ echo "โŒ Operation cancelled"
79
+ return 1
80
+ fi
81
+ return 0
82
+ }
83
+
84
+ # Function to clean local Docker images
85
+ clean_local() {
86
+ echo "๐Ÿงน Cleaning local Docker images"
87
+ echo " Project: ${PROJECT_NAME}"
88
+
89
+ # Build list of image patterns to clean
90
+ # Pattern 1: locally built images (e.g., my-project:latest)
91
+ # Pattern 2: ECR-pulled images (e.g., <account>.dkr.ecr.<region>.amazonaws.com/<repo>:<project>-latest)
92
+ LOCAL_PATTERN="^${PROJECT_NAME}:"
93
+ ECR_PATTERN="\.dkr\.ecr\..*\.amazonaws\.com/${ECR_REPOSITORY_NAME}:${PROJECT_NAME}-"
94
+
95
+ # Check if any matching images exist
96
+ if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -qE "${LOCAL_PATTERN}|${ECR_PATTERN}"; then
97
+ echo "โ„น๏ธ No local images found for ${PROJECT_NAME}"
98
+ return 0
99
+ fi
100
+
101
+ # List images to be removed
102
+ echo ""
103
+ echo "Images to be removed:"
104
+ docker images --format "{{.Repository}}:{{.Tag}}" | grep -E "${LOCAL_PATTERN}|${ECR_PATTERN}" | while read -r image; do
105
+ echo " ${image}"
106
+ done
107
+
108
+ if ! confirm_action "This will remove all local Docker images for ${PROJECT_NAME}"; then
109
+ return 1
110
+ fi
111
+
112
+ # Remove images
113
+ echo "๐Ÿ—‘๏ธ Removing local images..."
114
+ docker images --format "{{.Repository}}:{{.Tag}}" | grep -E "${LOCAL_PATTERN}|${ECR_PATTERN}" | while read -r image; do
115
+ echo " Removing: ${image}"
116
+ docker rmi "${image}" || echo " โš ๏ธ Failed to remove ${image}"
117
+ done
118
+
119
+ echo "โœ… Local images cleaned"
120
+ }
121
+
122
+ # Function to clean ECR images
123
+ clean_ecr() {
124
+ echo "๐Ÿงน Cleaning ECR images"
125
+ echo " Repository: ${ECR_REPOSITORY_NAME}"
126
+ echo " Region: ${AWS_REGION}"
127
+
128
+ # Validate AWS credentials
129
+ if ! aws sts get-caller-identity &> /dev/null; then
130
+ echo "โŒ AWS credentials not configured"
131
+ echo " Run: aws configure"
132
+ exit 4
133
+ fi
134
+
135
+ # Get AWS account ID
136
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
137
+
138
+ # Check if repository exists
139
+ if ! aws ecr describe-repositories \
140
+ --repository-names "${ECR_REPOSITORY_NAME}" \
141
+ --region "${AWS_REGION}" &> /dev/null; then
142
+ echo "โ„น๏ธ ECR repository ${ECR_REPOSITORY_NAME} does not exist"
143
+ return 0
144
+ fi
145
+
146
+ # List images in repository
147
+ echo ""
148
+ echo "Checking for images in repository..."
149
+
150
+ if ! IMAGE_IDS=$(aws ecr list-images \
151
+ --repository-name "${ECR_REPOSITORY_NAME}" \
152
+ --region "${AWS_REGION}" \
153
+ --query 'imageIds[*].[imageTag]' \
154
+ --output text 2>&1); then
155
+ echo "โ„น๏ธ No images found in repository"
156
+ return 0
157
+ fi
158
+
159
+ if [ -z "${IMAGE_IDS}" ] || [ "${IMAGE_IDS}" = "None" ]; then
160
+ echo "โ„น๏ธ No images found in repository"
161
+ return 0
162
+ fi
163
+
164
+ # Display images
165
+ echo "Images in repository:"
166
+ echo "${IMAGE_IDS}" | while read -r tag; do
167
+ if [ -n "${tag}" ] && [ "${tag}" != "None" ]; then
168
+ echo " - ${tag}"
169
+ fi
170
+ done
171
+
172
+ if ! confirm_action "This will remove all images from ECR repository ${ECR_REPOSITORY_NAME}"; then
173
+ return 1
174
+ fi
175
+
176
+ # Remove images
177
+ echo "๐Ÿ—‘๏ธ Removing ECR images..."
178
+
179
+ # Get image IDs in JSON format for batch delete
180
+ IMAGE_IDS_JSON=$(aws ecr list-images \
181
+ --repository-name "${ECR_REPOSITORY_NAME}" \
182
+ --region "${AWS_REGION}" \
183
+ --query 'imageIds' \
184
+ --output json)
185
+
186
+ if [ "${IMAGE_IDS_JSON}" != "[]" ]; then
187
+ if aws ecr batch-delete-image \
188
+ --repository-name "${ECR_REPOSITORY_NAME}" \
189
+ --region "${AWS_REGION}" \
190
+ --image-ids "${IMAGE_IDS_JSON}" &> /dev/null; then
191
+ echo "โœ… ECR images removed"
192
+ else
193
+ echo "โŒ Failed to remove some ECR images"
194
+ return 1
195
+ fi
196
+ else
197
+ echo "โ„น๏ธ No images to remove"
198
+ fi
199
+ }
200
+
201
+ <% if (deploymentTarget === 'managed-inference') { %>
202
+ # Function to clean SageMaker endpoint and inference components
203
+ clean_endpoint() {
204
+ echo "๐Ÿงน Cleaning SageMaker resources"
205
+ echo " Project: ${PROJECT_NAME}"
206
+ echo " Region: ${AWS_REGION}"
207
+
208
+ # Validate AWS credentials
209
+ if ! aws sts get-caller-identity &> /dev/null; then
210
+ echo "โŒ AWS credentials not configured"
211
+ echo " Run: aws configure"
212
+ exit 4
213
+ fi
214
+
215
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
216
+
217
+ # Use names from config (set by do/deploy) or argument
218
+ local EP_NAME="${ENDPOINT_NAME:-}"
219
+ local IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
220
+ local EPC_NAME="${ENDPOINT_CONFIG_NAME:-}"
221
+
222
+ if [ -z "${EP_NAME}" ]; then
223
+ echo "โŒ No endpoint name found"
224
+ echo " Run ./do/deploy first, or set ENDPOINT_NAME in do/config"
225
+ return 1
226
+ fi
227
+
228
+ echo ""
229
+ echo "Checking for SageMaker resources..."
230
+
231
+ local ENDPOINT_EXISTS=false
232
+
233
+ if aws sagemaker describe-endpoint \
234
+ --endpoint-name "${EP_NAME}" \
235
+ --region "${AWS_REGION}" &> /dev/null; then
236
+ ENDPOINT_EXISTS=true
237
+ echo " โœ“ Endpoint: ${EP_NAME}"
238
+ else
239
+ echo "โ„น๏ธ Endpoint not found: ${EP_NAME}"
240
+ return 0
241
+ fi
242
+
243
+ # Check for inference component
244
+ local IC_EXISTS=false
245
+ if [ -n "${IC_NAME}" ]; then
246
+ if aws sagemaker describe-inference-component \
247
+ --inference-component-name "${IC_NAME}" \
248
+ --region "${AWS_REGION}" &> /dev/null; then
249
+ IC_EXISTS=true
250
+ echo " โœ“ Inference component: ${IC_NAME}"
251
+ fi
252
+ fi
253
+
254
+ if ! confirm_action "This will delete the SageMaker endpoint and inference component(s)"; then
255
+ return 1
256
+ fi
257
+
258
+ # Delete inference component first (must be deleted before endpoint)
259
+ if [ "${IC_EXISTS}" = true ]; then
260
+ echo "๐Ÿ—‘๏ธ Deleting inference component: ${IC_NAME}"
261
+ if aws sagemaker delete-inference-component \
262
+ --inference-component-name "${IC_NAME}" \
263
+ --region "${AWS_REGION}" &> /dev/null; then
264
+ echo "โณ Waiting for inference component deletion..."
265
+ aws sagemaker wait inference-component-deleted \
266
+ --inference-component-name "${IC_NAME}" \
267
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
268
+ echo "โœ… Inference component deleted"
269
+
270
+ # Mark inference component as deleted in manifest (non-blocking)
271
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_NAME}" 2>/dev/null || true
272
+ else
273
+ echo "โŒ Failed to delete inference component"
274
+ fi
275
+ fi
276
+
277
+ # Delete endpoint
278
+ echo "๐Ÿ—‘๏ธ Deleting endpoint: ${EP_NAME}"
279
+ if aws sagemaker delete-endpoint \
280
+ --endpoint-name "${EP_NAME}" \
281
+ --region "${AWS_REGION}" &> /dev/null; then
282
+ echo "โœ… Endpoint deleted"
283
+ echo "โณ Waiting for endpoint deletion..."
284
+ aws sagemaker wait endpoint-deleted \
285
+ --endpoint-name "${EP_NAME}" \
286
+ --region "${AWS_REGION}" 2>/dev/null || sleep 10
287
+
288
+ # Mark endpoint as deleted in manifest (non-blocking)
289
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${EP_NAME}" 2>/dev/null || true
290
+ else
291
+ echo "โŒ Failed to delete endpoint"
292
+ fi
293
+
294
+ # Delete endpoint configuration
295
+ if [ -n "${EPC_NAME}" ]; then
296
+ echo "๐Ÿ—‘๏ธ Deleting endpoint configuration: ${EPC_NAME}"
297
+ if aws sagemaker delete-endpoint-config \
298
+ --endpoint-config-name "${EPC_NAME}" \
299
+ --region "${AWS_REGION}" &> /dev/null; then
300
+ echo "โœ… Endpoint configuration deleted"
301
+
302
+ # Mark endpoint config as deleted in manifest (non-blocking)
303
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${EPC_NAME}" 2>/dev/null || true
304
+ else
305
+ echo "โŒ Failed to delete endpoint configuration"
306
+ fi
307
+ fi
308
+
309
+ # Remove saved names from config
310
+ if grep -q "^export ENDPOINT_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
311
+ sed -i.bak '/^# Last deployed resources/d;/^export ENDPOINT_NAME=/d;/^export ENDPOINT_CONFIG_NAME=/d;/^export INFERENCE_COMPONENT_NAME=/d' "${SCRIPT_DIR}/config"
312
+ rm -f "${SCRIPT_DIR}/config.bak"
313
+ fi
314
+
315
+ echo "โœ… SageMaker resources cleaned"
316
+ }
317
+ <% } else if (deploymentTarget === 'async-inference') { %>
318
+ # Function to clean SageMaker async endpoint and model
319
+ clean_endpoint() {
320
+ echo "๐Ÿงน Cleaning SageMaker async resources"
321
+ echo " Project: ${PROJECT_NAME}"
322
+ echo " Region: ${AWS_REGION}"
323
+
324
+ # Validate AWS credentials
325
+ if ! aws sts get-caller-identity &> /dev/null; then
326
+ echo "โŒ AWS credentials not configured"
327
+ echo " Run: aws configure"
328
+ exit 4
329
+ fi
330
+
331
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
332
+
333
+ # Use names from config (set by do/deploy) or argument
334
+ local EP_NAME="${ENDPOINT_NAME:-}"
335
+ local EPC_NAME="${ENDPOINT_CONFIG_NAME:-}"
336
+ local SM_MODEL_NAME="${SAGEMAKER_MODEL_NAME:-}"
337
+
338
+ if [ -z "${EP_NAME}" ]; then
339
+ echo "โŒ No endpoint name found"
340
+ echo " Run ./do/deploy first, or set ENDPOINT_NAME in do/config"
341
+ return 1
342
+ fi
343
+
344
+ echo ""
345
+ echo "Checking for SageMaker resources..."
346
+
347
+ local ENDPOINT_EXISTS=false
348
+
349
+ if aws sagemaker describe-endpoint \
350
+ --endpoint-name "${EP_NAME}" \
351
+ --region "${AWS_REGION}" &> /dev/null; then
352
+ ENDPOINT_EXISTS=true
353
+ echo " โœ“ Endpoint: ${EP_NAME}"
354
+ else
355
+ echo "โ„น๏ธ Endpoint not found: ${EP_NAME}"
356
+ return 0
357
+ fi
358
+
359
+ if ! confirm_action "This will delete the SageMaker async endpoint and model"; then
360
+ return 1
361
+ fi
362
+
363
+ # Delete endpoint
364
+ echo "๐Ÿ—‘๏ธ Deleting endpoint: ${EP_NAME}"
365
+ if aws sagemaker delete-endpoint \
366
+ --endpoint-name "${EP_NAME}" \
367
+ --region "${AWS_REGION}" &> /dev/null; then
368
+ echo "โœ… Endpoint deleted"
369
+ echo "โณ Waiting for endpoint deletion..."
370
+ aws sagemaker wait endpoint-deleted \
371
+ --endpoint-name "${EP_NAME}" \
372
+ --region "${AWS_REGION}" 2>/dev/null || sleep 10
373
+
374
+ # Mark endpoint as deleted in manifest (non-blocking)
375
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${EP_NAME}" 2>/dev/null || true
376
+ else
377
+ echo "โŒ Failed to delete endpoint"
378
+ fi
379
+
380
+ # Delete endpoint configuration
381
+ if [ -n "${EPC_NAME}" ]; then
382
+ echo "๐Ÿ—‘๏ธ Deleting endpoint configuration: ${EPC_NAME}"
383
+ if aws sagemaker delete-endpoint-config \
384
+ --endpoint-config-name "${EPC_NAME}" \
385
+ --region "${AWS_REGION}" &> /dev/null; then
386
+ echo "โœ… Endpoint configuration deleted"
387
+
388
+ # Mark endpoint config as deleted in manifest (non-blocking)
389
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${EPC_NAME}" 2>/dev/null || true
390
+ else
391
+ echo "โŒ Failed to delete endpoint configuration"
392
+ fi
393
+ fi
394
+
395
+ # Delete SageMaker model (async uses classic model-based flow)
396
+ if [ -n "${SM_MODEL_NAME}" ]; then
397
+ echo "๐Ÿ—‘๏ธ Deleting SageMaker model: ${SM_MODEL_NAME}"
398
+ if aws sagemaker delete-model \
399
+ --model-name "${SM_MODEL_NAME}" \
400
+ --region "${AWS_REGION}" &> /dev/null; then
401
+ echo "โœ… SageMaker model deleted"
402
+
403
+ # Mark model as deleted in manifest (non-blocking)
404
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${SM_MODEL_NAME}" 2>/dev/null || true
405
+ else
406
+ echo "โŒ Failed to delete SageMaker model"
407
+ fi
408
+ fi
409
+
410
+ # Remove saved names from config
411
+ if grep -q "^export ENDPOINT_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
412
+ sed -i.bak '/^# Last deployed resources/d;/^export ENDPOINT_NAME=/d;/^export ENDPOINT_CONFIG_NAME=/d;/^export SAGEMAKER_MODEL_NAME=/d' "${SCRIPT_DIR}/config"
413
+ rm -f "${SCRIPT_DIR}/config.bak"
414
+ fi
415
+
416
+ echo "โœ… SageMaker async resources cleaned"
417
+ }
418
+ <% } else if (deploymentTarget === 'batch-transform') { %>
419
+ # Function to clean SageMaker managed inference batch resources
420
+ clean_batch() {
421
+ echo "๐Ÿงน Cleaning SageMaker managed inference batch resources"
422
+ echo " Project: ${PROJECT_NAME}"
423
+ echo " Region: ${AWS_REGION}"
424
+
425
+ # Validate AWS credentials
426
+ if ! aws sts get-caller-identity &> /dev/null; then
427
+ echo "โŒ AWS credentials not configured"
428
+ echo " Run: aws configure"
429
+ exit 4
430
+ fi
431
+
432
+ # Use names from config (set by do/deploy)
433
+ local TJ_NAME="${TRANSFORM_JOB_NAME:-}"
434
+ local SM_MODEL_NAME="${SAGEMAKER_MODEL_NAME:-}"
435
+
436
+ if [ -z "${TJ_NAME}" ] && [ -z "${SM_MODEL_NAME}" ]; then
437
+ echo "โŒ No transform job or model name found"
438
+ echo " Run ./do/deploy first, or set TRANSFORM_JOB_NAME in do/config"
439
+ return 1
440
+ fi
441
+
442
+ echo ""
443
+ echo "Checking for SageMaker resources..."
444
+
445
+ # Check transform job status
446
+ local JOB_EXISTS=false
447
+ local JOB_STATUS=""
448
+ if [ -n "${TJ_NAME}" ]; then
449
+ JOB_STATUS=$(aws sagemaker describe-transform-job \
450
+ --transform-job-name "${TJ_NAME}" \
451
+ --region "${AWS_REGION}" \
452
+ --query 'TransformJobStatus' \
453
+ --output text 2>/dev/null || echo "")
454
+ if [ -n "${JOB_STATUS}" ]; then
455
+ JOB_EXISTS=true
456
+ echo " โœ“ Transform job: ${TJ_NAME} (${JOB_STATUS})"
457
+ else
458
+ echo "โ„น๏ธ Transform job not found: ${TJ_NAME}"
459
+ fi
460
+ fi
461
+
462
+ # Check model
463
+ local MODEL_EXISTS=false
464
+ if [ -n "${SM_MODEL_NAME}" ]; then
465
+ if aws sagemaker describe-model \
466
+ --model-name "${SM_MODEL_NAME}" \
467
+ --region "${AWS_REGION}" &> /dev/null; then
468
+ MODEL_EXISTS=true
469
+ echo " โœ“ SageMaker model: ${SM_MODEL_NAME}"
470
+ else
471
+ echo "โ„น๏ธ SageMaker model not found: ${SM_MODEL_NAME}"
472
+ fi
473
+ fi
474
+
475
+ if [ "${JOB_EXISTS}" = false ] && [ "${MODEL_EXISTS}" = false ]; then
476
+ echo "โ„น๏ธ No batch transform resources found to clean"
477
+ return 0
478
+ fi
479
+
480
+ if ! confirm_action "This will stop the transform job (if running) and delete the SageMaker model"; then
481
+ return 1
482
+ fi
483
+
484
+ # Stop transform job if in progress
485
+ if [ "${JOB_EXISTS}" = true ] && [ "${JOB_STATUS}" = "InProgress" ]; then
486
+ echo "๐Ÿ—‘๏ธ Stopping transform job: ${TJ_NAME}"
487
+ if aws sagemaker stop-transform-job \
488
+ --transform-job-name "${TJ_NAME}" \
489
+ --region "${AWS_REGION}" &> /dev/null; then
490
+ echo "โณ Waiting for transform job to stop..."
491
+ aws sagemaker wait transform-job-completed-or-stopped \
492
+ --transform-job-name "${TJ_NAME}" \
493
+ --region "${AWS_REGION}" 2>/dev/null || sleep 15
494
+ echo "โœ… Transform job stopped"
495
+
496
+ # Mark transform job as deleted in manifest (non-blocking)
497
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
498
+ ./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:transform-job/${TJ_NAME}" 2>/dev/null || true
499
+ else
500
+ echo "โŒ Failed to stop transform job"
501
+ fi
502
+ fi
503
+
504
+ # Delete SageMaker model
505
+ if [ "${MODEL_EXISTS}" = true ]; then
506
+ echo "๐Ÿ—‘๏ธ Deleting SageMaker model: ${SM_MODEL_NAME}"
507
+ if aws sagemaker delete-model \
508
+ --model-name "${SM_MODEL_NAME}" \
509
+ --region "${AWS_REGION}" &> /dev/null; then
510
+ echo "โœ… SageMaker model deleted"
511
+ else
512
+ echo "โŒ Failed to delete SageMaker model"
513
+ fi
514
+ fi
515
+
516
+ # Remove saved names from config
517
+ if grep -q "^export TRANSFORM_JOB_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
518
+ sed -i.bak '/^# Last deployed resources/d;/^export TRANSFORM_JOB_NAME=/d;/^export SAGEMAKER_MODEL_NAME=/d' "${SCRIPT_DIR}/config"
519
+ rm -f "${SCRIPT_DIR}/config.bak"
520
+ fi
521
+
522
+ echo "โœ… SageMaker managed inference batch resources cleaned"
523
+ }
524
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
525
+ # Function to clean HyperPod EKS deployment
526
+ clean_hyperpod() {
527
+ echo "๐Ÿงน Cleaning HyperPod EKS resources"
528
+ echo " Cluster: ${HYPERPOD_CLUSTER_NAME}"
529
+ echo " Namespace: ${HYPERPOD_NAMESPACE}"
530
+
531
+ # Validate AWS credentials
532
+ if ! aws sts get-caller-identity &> /dev/null; then
533
+ echo "โŒ AWS credentials not configured"
534
+ echo " Run: aws configure"
535
+ exit 4
536
+ fi
537
+
538
+ # Get kubeconfig for HyperPod cluster
539
+ echo "๐Ÿ”‘ Configuring kubectl for HyperPod cluster..."
540
+ KUBECONFIG_PATH="${HOME}/.kube/hyperpod-${HYPERPOD_CLUSTER_NAME}"
541
+
542
+ EKS_CLUSTER_ARN=$(aws sagemaker describe-cluster \
543
+ --cluster-name "${HYPERPOD_CLUSTER_NAME}" \
544
+ --region "${AWS_REGION}" \
545
+ --query "Orchestrator.Eks.ClusterArn" \
546
+ --output text 2>&1) || {
547
+ echo "โŒ Failed to describe HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
548
+ echo " Check that the cluster exists and you have permission to access it"
549
+ return 1
550
+ }
551
+
552
+ EKS_CLUSTER_NAME=$(echo "${EKS_CLUSTER_ARN}" | awk -F'/' '{print $NF}')
553
+
554
+ if ! aws eks update-kubeconfig \
555
+ --name "${EKS_CLUSTER_NAME}" \
556
+ --region "${AWS_REGION}" \
557
+ --kubeconfig "${KUBECONFIG_PATH}" 2>&1; then
558
+ echo "โŒ Failed to configure kubectl for EKS cluster: ${EKS_CLUSTER_NAME}"
559
+ return 1
560
+ fi
561
+
562
+ export KUBECONFIG="${KUBECONFIG_PATH}"
563
+
564
+ if ! confirm_action "This will delete the HyperPod deployment in namespace ${HYPERPOD_NAMESPACE}"; then
565
+ return 1
566
+ fi
567
+
568
+ # Delete Kubernetes resources
569
+ echo "๐Ÿ—‘๏ธ Deleting Kubernetes resources..."
570
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
571
+ export AWS_ACCOUNT_ID
572
+ DELETE_FAILED=false
573
+ for manifest in hyperpod/*.yaml; do
574
+ # Skip files that contain no Kubernetes objects (e.g. comment-only PVC stubs)
575
+ RENDERED=$(envsubst < "${manifest}")
576
+ if echo "${RENDERED}" | grep -q '^kind:'; then
577
+ if ! echo "${RENDERED}" | kubectl delete -n "${HYPERPOD_NAMESPACE}" --ignore-not-found -f - 2>&1; then
578
+ DELETE_FAILED=true
579
+ fi
580
+ fi
581
+ done
582
+ if [ "${DELETE_FAILED}" = true ]; then
583
+ echo "โŒ Failed to delete some Kubernetes resources"
584
+ echo " You may need to manually clean up:"
585
+ echo " kubectl get all -n ${HYPERPOD_NAMESPACE}"
586
+ return 1
587
+ fi
588
+
589
+ # Mark k8s resources as deleted in manifest (non-blocking)
590
+ ./do/manifest delete --id "${HYPERPOD_NAMESPACE}/${PROJECT_NAME}" 2>/dev/null || true
591
+
592
+ echo "โœ… HyperPod EKS resources cleaned"
593
+ }
594
+ <% } %>
595
+
596
+ # Function to clean CodeBuild project and related resources
597
+ clean_codebuild() {
598
+ echo "๐Ÿงน Cleaning CodeBuild resources"
599
+ echo " Project: ${CODEBUILD_PROJECT_NAME:-not set}"
600
+ echo " Region: ${AWS_REGION}"
601
+
602
+ if [ -z "${CODEBUILD_PROJECT_NAME:-}" ]; then
603
+ echo "โ„น๏ธ No CodeBuild project name configured (build target may not be codebuild)"
604
+ return 0
605
+ fi
606
+
607
+ # Validate AWS credentials
608
+ if ! aws sts get-caller-identity &> /dev/null; then
609
+ echo "โŒ AWS credentials not configured"
610
+ echo " Run: aws configure"
611
+ exit 4
612
+ fi
613
+
614
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
615
+
616
+ # Check if project exists
617
+ PROJECT_CHECK=$(aws codebuild batch-get-projects \
618
+ --names "${CODEBUILD_PROJECT_NAME}" \
619
+ --region "${AWS_REGION}" \
620
+ --query 'projects[0].name' \
621
+ --output text 2>/dev/null)
622
+
623
+ if [ "$PROJECT_CHECK" = "None" ] || [ -z "$PROJECT_CHECK" ] || [ "$PROJECT_CHECK" = "null" ]; then
624
+ echo "โ„น๏ธ CodeBuild project not found: ${CODEBUILD_PROJECT_NAME}"
625
+ return 0
626
+ fi
627
+
628
+ echo ""
629
+ echo "Resources to be removed:"
630
+ echo " โ€ข CodeBuild project: ${CODEBUILD_PROJECT_NAME}"
631
+
632
+ # Check for service role
633
+ ROLE_NAME="${CODEBUILD_PROJECT_NAME}-service-role"
634
+ ROLE_EXISTS=false
635
+ if aws iam get-role --role-name "${ROLE_NAME}" &> /dev/null; then
636
+ ROLE_EXISTS=true
637
+ echo " โ€ข IAM service role: ${ROLE_NAME}"
638
+ fi
639
+
640
+ # Check for S3 source bucket
641
+ S3_BUCKET="codebuild-source-${AWS_ACCOUNT_ID}-${AWS_REGION}"
642
+ S3_PREFIX="${PROJECT_NAME}/"
643
+ S3_EXISTS=false
644
+ if aws s3api head-bucket --bucket "$S3_BUCKET" --region "${AWS_REGION}" &> /dev/null; then
645
+ S3_COUNT=$(aws s3 ls "s3://$S3_BUCKET/$S3_PREFIX" --region "${AWS_REGION}" 2>/dev/null | wc -l | tr -d ' ')
646
+ if [ "$S3_COUNT" -gt 0 ]; then
647
+ S3_EXISTS=true
648
+ echo " โ€ข S3 source artifacts: s3://$S3_BUCKET/$S3_PREFIX ($S3_COUNT objects)"
649
+ fi
650
+ fi
651
+
652
+ if ! confirm_action "This will delete the CodeBuild project and associated resources"; then
653
+ return 1
654
+ fi
655
+
656
+ # Delete CodeBuild project
657
+ echo "๐Ÿ—‘๏ธ Deleting CodeBuild project: ${CODEBUILD_PROJECT_NAME}"
658
+ if aws codebuild delete-project \
659
+ --name "${CODEBUILD_PROJECT_NAME}" \
660
+ --region "${AWS_REGION}" &> /dev/null; then
661
+ echo "โœ… CodeBuild project deleted"
662
+
663
+ # Mark CodeBuild project as deleted in manifest (non-blocking)
664
+ ./do/manifest delete --id "arn:aws:codebuild:${AWS_REGION}:${AWS_ACCOUNT_ID}:project/${CODEBUILD_PROJECT_NAME}" 2>/dev/null || true
665
+ else
666
+ echo "โŒ Failed to delete CodeBuild project"
667
+ fi
668
+
669
+ # Delete IAM service role
670
+ if [ "$ROLE_EXISTS" = true ]; then
671
+ echo "๐Ÿ—‘๏ธ Deleting IAM service role: ${ROLE_NAME}"
672
+ # Remove inline policies first
673
+ POLICIES=$(aws iam list-role-policies --role-name "${ROLE_NAME}" --query 'PolicyNames' --output text 2>/dev/null || echo "")
674
+ for policy in $POLICIES; do
675
+ aws iam delete-role-policy --role-name "${ROLE_NAME}" --policy-name "$policy" 2>/dev/null || true
676
+ done
677
+ if aws iam delete-role --role-name "${ROLE_NAME}" &> /dev/null; then
678
+ echo "โœ… IAM service role deleted"
679
+
680
+ # Mark IAM role as deleted in manifest (non-blocking)
681
+ ./do/manifest delete --id "arn:aws:iam::${AWS_ACCOUNT_ID}:role/${ROLE_NAME}" 2>/dev/null || true
682
+ else
683
+ echo "โŒ Failed to delete IAM service role"
684
+ fi
685
+ fi
686
+
687
+ # Delete S3 source artifacts
688
+ if [ "$S3_EXISTS" = true ]; then
689
+ echo "๐Ÿ—‘๏ธ Deleting S3 source artifacts: s3://$S3_BUCKET/$S3_PREFIX"
690
+ if aws s3 rm "s3://$S3_BUCKET/$S3_PREFIX" --recursive --region "${AWS_REGION}" &> /dev/null; then
691
+ echo "โœ… S3 source artifacts deleted"
692
+ else
693
+ echo "โŒ Failed to delete S3 source artifacts"
694
+ fi
695
+ fi
696
+
697
+ echo "โœ… CodeBuild resources cleaned"
698
+ }
699
+
700
+ # Main script logic
701
+ echo "๐Ÿงน Cleanup script for ${PROJECT_NAME}"
702
+ echo ""
703
+
704
+ if [ -z "${CLEANUP_TARGET}" ]; then
705
+ <% if (deploymentTarget === 'batch-transform') { %>
706
+ CLEANUP_TARGET="batch"
707
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
708
+ show_usage
709
+ exit 0
710
+ <% } else { %>
711
+ show_usage
712
+ exit 0
713
+ <% } %>
714
+ fi
715
+
716
+ case "${CLEANUP_TARGET}" in
717
+ local)
718
+ clean_local
719
+ ;;
720
+ ecr)
721
+ clean_ecr
722
+ ;;
723
+ <% if (deploymentTarget === 'managed-inference') { %>
724
+ endpoint)
725
+ clean_endpoint
726
+ ;;
727
+ <% } else if (deploymentTarget === 'async-inference') { %>
728
+ endpoint)
729
+ clean_endpoint
730
+ ;;
731
+ <% } else if (deploymentTarget === 'batch-transform') { %>
732
+ batch)
733
+ clean_batch
734
+ ;;
735
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
736
+ hyperpod)
737
+ clean_hyperpod
738
+ ;;
739
+ <% } %>
740
+ codebuild)
741
+ clean_codebuild
742
+ ;;
743
+ all)
744
+ echo "๐Ÿงน Performing complete cleanup"
745
+ echo ""
746
+
747
+ # Track what was cleaned
748
+ CLEANED_ITEMS=()
749
+
750
+ # Clean local images
751
+ if clean_local; then
752
+ CLEANED_ITEMS+=("Local Docker images")
753
+ fi
754
+
755
+ echo ""
756
+
757
+ # Clean ECR images
758
+ if clean_ecr; then
759
+ CLEANED_ITEMS+=("ECR images")
760
+ fi
761
+
762
+ echo ""
763
+
764
+ <% if (deploymentTarget === 'managed-inference') { %>
765
+ # Clean SageMaker resources
766
+ if clean_endpoint; then
767
+ CLEANED_ITEMS+=("SageMaker resources")
768
+ fi
769
+ <% } else if (deploymentTarget === 'async-inference') { %>
770
+ # Clean SageMaker async resources
771
+ if clean_endpoint; then
772
+ CLEANED_ITEMS+=("SageMaker async resources")
773
+ fi
774
+ <% } else if (deploymentTarget === 'batch-transform') { %>
775
+ # Clean SageMaker managed inference batch resources
776
+ if clean_batch; then
777
+ CLEANED_ITEMS+=("SageMaker managed inference batch resources")
778
+ fi
779
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
780
+ # Clean HyperPod EKS resources
781
+ if clean_hyperpod; then
782
+ CLEANED_ITEMS+=("HyperPod EKS resources")
783
+ fi
784
+ <% } %>
785
+
786
+ echo ""
787
+
788
+ # Clean CodeBuild resources
789
+ if clean_codebuild; then
790
+ CLEANED_ITEMS+=("CodeBuild resources")
791
+ fi
792
+
793
+ # Display summary
794
+ echo ""
795
+ echo "โœ… Cleanup complete!"
796
+ echo ""
797
+ echo "Summary of cleaned resources:"
798
+ for item in "${CLEANED_ITEMS[@]}"; do
799
+ echo " โœ“ ${item}"
800
+ done
801
+ ;;
802
+ *)
803
+ echo "โŒ Unknown cleanup target: ${CLEANUP_TARGET}"
804
+ echo ""
805
+ show_usage
806
+ exit 1
807
+ ;;
808
+ esac
809
+
810
+ echo ""
811
+ echo "Cleanup finished!"