@aws/ml-container-creator 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +31 -137
- package/config/parameter-schema-v2.json +2065 -0
- package/package.json +6 -3
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/models.json +182 -26
- package/src/app.js +6 -389
- package/src/lib/bootstrap-command-handler.js +75 -1078
- package/src/lib/bootstrap-profile-manager.js +634 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/config-loader.js +405 -0
- package/src/lib/config-manager.js +59 -1668
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +140 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +471 -0
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +70 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +398 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
- package/config/parameter-schema.json +0 -88
package/templates/do/clean
CHANGED
|
@@ -1,1387 +1 @@
|
|
|
1
|
-
|
|
2
|
-
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
|
|
5
|
-
set -e
|
|
6
|
-
set -u
|
|
7
|
-
set -o pipefail
|
|
8
|
-
|
|
9
|
-
# Source configuration
|
|
10
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
|
-
source "${SCRIPT_DIR}/config"
|
|
12
|
-
|
|
13
|
-
# Parse arguments
|
|
14
|
-
CLEANUP_TARGET=""
|
|
15
|
-
CLEANUP_ARG=""
|
|
16
|
-
FORCE_CLEAN=false
|
|
17
|
-
|
|
18
|
-
for arg in "$@"; do
|
|
19
|
-
case "$arg" in
|
|
20
|
-
--force) FORCE_CLEAN=true ;;
|
|
21
|
-
-*) ;; # ignore other flags
|
|
22
|
-
*)
|
|
23
|
-
if [ -z "${CLEANUP_TARGET}" ]; then
|
|
24
|
-
CLEANUP_TARGET="$arg"
|
|
25
|
-
elif [ -z "${CLEANUP_ARG}" ]; then
|
|
26
|
-
CLEANUP_ARG="$arg"
|
|
27
|
-
fi
|
|
28
|
-
;;
|
|
29
|
-
esac
|
|
30
|
-
done
|
|
31
|
-
|
|
32
|
-
# Function to display usage
|
|
33
|
-
show_usage() {
|
|
34
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
35
|
-
echo "Usage: ./do/clean [local|ecr|endpoint|ic <name>|adapter <name>|adapters|codebuild|all]"
|
|
36
|
-
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
37
|
-
echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
|
|
38
|
-
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
39
|
-
echo "Usage: ./do/clean [local|ecr|batch|codebuild|all]"
|
|
40
|
-
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
41
|
-
echo "Usage: ./do/clean [local|ecr|hyperpod|codebuild|all]"
|
|
42
|
-
<% } %>
|
|
43
|
-
echo ""
|
|
44
|
-
echo "Cleanup targets:"
|
|
45
|
-
echo " local - Remove local Docker images"
|
|
46
|
-
echo " ecr - Remove images from Amazon ECR"
|
|
47
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
48
|
-
echo " endpoint - Delete SageMaker endpoint, configuration, and inference components"
|
|
49
|
-
echo " ic <name> - Delete a single inference component (does not touch the endpoint)"
|
|
50
|
-
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
51
|
-
echo " adapter <name> - Delete a single LoRA adapter (synonym for do/adapter remove)"
|
|
52
|
-
echo " adapters - Remove ALL LoRA adapters (keeps base IC and endpoint running)"
|
|
53
|
-
<% } %>
|
|
54
|
-
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
55
|
-
echo " endpoint - Delete SageMaker async endpoint, configuration, and inference component"
|
|
56
|
-
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
57
|
-
echo " batch - Stop running transform job and delete SageMaker model"
|
|
58
|
-
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
59
|
-
echo " hyperpod - Delete HyperPod EKS deployment and services"
|
|
60
|
-
<% } %>
|
|
61
|
-
echo " codebuild - Delete CodeBuild project, IAM role, and S3 source artifacts"
|
|
62
|
-
echo " all - Perform all cleanup operations"
|
|
63
|
-
echo ""
|
|
64
|
-
echo "Examples:"
|
|
65
|
-
echo " ./do/clean local # Remove local Docker images only"
|
|
66
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
67
|
-
echo " ./do/clean endpoint # Delete SageMaker resources only"
|
|
68
|
-
echo " ./do/clean ic llama # Delete a single inference component"
|
|
69
|
-
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
70
|
-
echo " ./do/clean adapter ectsum # Delete a single LoRA adapter"
|
|
71
|
-
echo " ./do/clean adapters # Remove all LoRA adapters"
|
|
72
|
-
<% } %>
|
|
73
|
-
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
74
|
-
echo " ./do/clean endpoint # Delete SageMaker async resources only"
|
|
75
|
-
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
76
|
-
echo " ./do/clean batch # Stop transform job and delete model"
|
|
77
|
-
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
78
|
-
echo " ./do/clean hyperpod # Delete HyperPod EKS resources only"
|
|
79
|
-
<% } %>
|
|
80
|
-
echo " ./do/clean codebuild # Delete CodeBuild project and rebuild fresh"
|
|
81
|
-
echo " ./do/clean all # Clean up everything"
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
# Function to confirm action (skipped when --force is set)
|
|
85
|
-
confirm_action() {
|
|
86
|
-
local message="$1"
|
|
87
|
-
if [ "${FORCE_CLEAN}" = true ]; then
|
|
88
|
-
return 0
|
|
89
|
-
fi
|
|
90
|
-
echo ""
|
|
91
|
-
echo "⚠️ ${message}"
|
|
92
|
-
read -p " Are you sure? (yes/no): " -r
|
|
93
|
-
echo ""
|
|
94
|
-
if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then
|
|
95
|
-
echo "❌ Operation cancelled"
|
|
96
|
-
return 1
|
|
97
|
-
fi
|
|
98
|
-
return 0
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
# Function to clean local Docker images
|
|
102
|
-
clean_local() {
|
|
103
|
-
echo "🧹 Cleaning local Docker images"
|
|
104
|
-
echo " Project: ${PROJECT_NAME}"
|
|
105
|
-
|
|
106
|
-
# Build list of image patterns to clean
|
|
107
|
-
# Pattern 1: locally built images (e.g., my-project:latest)
|
|
108
|
-
# Pattern 2: ECR-pulled images (e.g., <account>.dkr.ecr.<region>.amazonaws.com/<repo>:<project>-latest)
|
|
109
|
-
LOCAL_PATTERN="^${PROJECT_NAME}:"
|
|
110
|
-
ECR_PATTERN="\.dkr\.ecr\..*\.amazonaws\.com/${ECR_REPOSITORY_NAME}:${PROJECT_NAME}-"
|
|
111
|
-
|
|
112
|
-
# Check if any matching images exist
|
|
113
|
-
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -qE "${LOCAL_PATTERN}|${ECR_PATTERN}"; then
|
|
114
|
-
echo "ℹ️ No local images found for ${PROJECT_NAME}"
|
|
115
|
-
return 0
|
|
116
|
-
fi
|
|
117
|
-
|
|
118
|
-
# List images to be removed
|
|
119
|
-
echo ""
|
|
120
|
-
echo "Images to be removed:"
|
|
121
|
-
docker images --format "{{.Repository}}:{{.Tag}}" | grep -E "${LOCAL_PATTERN}|${ECR_PATTERN}" | while read -r image; do
|
|
122
|
-
echo " ${image}"
|
|
123
|
-
done
|
|
124
|
-
|
|
125
|
-
if ! confirm_action "This will remove all local Docker images for ${PROJECT_NAME}"; then
|
|
126
|
-
return 1
|
|
127
|
-
fi
|
|
128
|
-
|
|
129
|
-
# Remove images
|
|
130
|
-
echo "🗑️ Removing local images..."
|
|
131
|
-
docker images --format "{{.Repository}}:{{.Tag}}" | grep -E "${LOCAL_PATTERN}|${ECR_PATTERN}" | while read -r image; do
|
|
132
|
-
echo " Removing: ${image}"
|
|
133
|
-
docker rmi "${image}" || echo " ⚠️ Failed to remove ${image}"
|
|
134
|
-
done
|
|
135
|
-
|
|
136
|
-
echo "✅ Local images cleaned"
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
# Function to clean ECR images
|
|
140
|
-
clean_ecr() {
|
|
141
|
-
echo "🧹 Cleaning ECR images"
|
|
142
|
-
echo " Repository: ${ECR_REPOSITORY_NAME}"
|
|
143
|
-
echo " Region: ${AWS_REGION}"
|
|
144
|
-
|
|
145
|
-
# Validate AWS credentials
|
|
146
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
147
|
-
echo "❌ AWS credentials not configured"
|
|
148
|
-
echo " Run: aws configure"
|
|
149
|
-
exit 4
|
|
150
|
-
fi
|
|
151
|
-
|
|
152
|
-
# Get AWS account ID
|
|
153
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
154
|
-
|
|
155
|
-
# Check if repository exists
|
|
156
|
-
if ! aws ecr describe-repositories \
|
|
157
|
-
--repository-names "${ECR_REPOSITORY_NAME}" \
|
|
158
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
159
|
-
echo "ℹ️ ECR repository ${ECR_REPOSITORY_NAME} does not exist"
|
|
160
|
-
return 0
|
|
161
|
-
fi
|
|
162
|
-
|
|
163
|
-
# List images in repository
|
|
164
|
-
echo ""
|
|
165
|
-
echo "Checking for images in repository..."
|
|
166
|
-
|
|
167
|
-
if ! IMAGE_IDS=$(aws ecr list-images \
|
|
168
|
-
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
169
|
-
--region "${AWS_REGION}" \
|
|
170
|
-
--query "imageIds[?starts_with(imageTag, '${PROJECT_NAME}-')].[imageTag]" \
|
|
171
|
-
--output text 2>&1); then
|
|
172
|
-
echo "ℹ️ No images found for project: ${PROJECT_NAME}"
|
|
173
|
-
return 0
|
|
174
|
-
fi
|
|
175
|
-
|
|
176
|
-
if [ -z "${IMAGE_IDS}" ] || [ "${IMAGE_IDS}" = "None" ]; then
|
|
177
|
-
echo "ℹ️ No images found for project: ${PROJECT_NAME}"
|
|
178
|
-
return 0
|
|
179
|
-
fi
|
|
180
|
-
|
|
181
|
-
# Display images
|
|
182
|
-
echo "Images for project ${PROJECT_NAME}:"
|
|
183
|
-
echo "${IMAGE_IDS}" | while read -r tag; do
|
|
184
|
-
if [ -n "${tag}" ] && [ "${tag}" != "None" ]; then
|
|
185
|
-
echo " - ${tag}"
|
|
186
|
-
fi
|
|
187
|
-
done
|
|
188
|
-
|
|
189
|
-
if ! confirm_action "This will remove all images from ECR repository ${ECR_REPOSITORY_NAME}"; then
|
|
190
|
-
return 1
|
|
191
|
-
fi
|
|
192
|
-
|
|
193
|
-
# Remove images
|
|
194
|
-
echo "🗑️ Removing ECR images..."
|
|
195
|
-
|
|
196
|
-
# Only delete images tagged with this project's name (not all images in the shared repo)
|
|
197
|
-
IMAGE_IDS_JSON=$(aws ecr list-images \
|
|
198
|
-
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
199
|
-
--region "${AWS_REGION}" \
|
|
200
|
-
--query "imageIds[?starts_with(imageTag, '${PROJECT_NAME}-')]" \
|
|
201
|
-
--output json)
|
|
202
|
-
|
|
203
|
-
if [ "${IMAGE_IDS_JSON}" != "[]" ] && [ -n "${IMAGE_IDS_JSON}" ]; then
|
|
204
|
-
if aws ecr batch-delete-image \
|
|
205
|
-
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
206
|
-
--region "${AWS_REGION}" \
|
|
207
|
-
--image-ids "${IMAGE_IDS_JSON}" &> /dev/null; then
|
|
208
|
-
echo "✅ ECR images removed for project: ${PROJECT_NAME}"
|
|
209
|
-
else
|
|
210
|
-
echo "❌ Failed to remove some ECR images"
|
|
211
|
-
return 1
|
|
212
|
-
fi
|
|
213
|
-
else
|
|
214
|
-
echo "ℹ️ No images to remove for project: ${PROJECT_NAME}"
|
|
215
|
-
fi
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
219
|
-
# Function to clean SageMaker endpoint and inference components
|
|
220
|
-
clean_endpoint() {
|
|
221
|
-
echo "🧹 Cleaning SageMaker resources"
|
|
222
|
-
echo " Project: ${PROJECT_NAME}"
|
|
223
|
-
echo " Region: ${AWS_REGION}"
|
|
224
|
-
|
|
225
|
-
# Validate AWS credentials
|
|
226
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
227
|
-
echo "❌ AWS credentials not configured"
|
|
228
|
-
echo " Run: aws configure"
|
|
229
|
-
exit 4
|
|
230
|
-
fi
|
|
231
|
-
|
|
232
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
233
|
-
|
|
234
|
-
# Use names from config (set by do/deploy) or argument
|
|
235
|
-
local EP_NAME="${ENDPOINT_NAME:-}"
|
|
236
|
-
local IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
|
|
237
|
-
local EPC_NAME="${ENDPOINT_CONFIG_NAME:-}"
|
|
238
|
-
|
|
239
|
-
if [ -z "${EP_NAME}" ]; then
|
|
240
|
-
echo "❌ No endpoint name found"
|
|
241
|
-
echo " Run ./do/deploy first, or set ENDPOINT_NAME in do/config"
|
|
242
|
-
return 1
|
|
243
|
-
fi
|
|
244
|
-
|
|
245
|
-
# External endpoint: only remove inference components, not the endpoint itself
|
|
246
|
-
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
247
|
-
echo ""
|
|
248
|
-
echo "⚠️ Endpoint is external — only removing inference components"
|
|
249
|
-
echo " Endpoint ${EP_NAME} will NOT be deleted (managed externally)."
|
|
250
|
-
echo ""
|
|
251
|
-
|
|
252
|
-
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
253
|
-
# Delete adapter ICs first (adapters depend on base ICs)
|
|
254
|
-
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
255
|
-
local ADAPTER_COUNT=0
|
|
256
|
-
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
257
|
-
[ -f "${adapter_conf}" ] || continue
|
|
258
|
-
ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
|
|
259
|
-
done
|
|
260
|
-
|
|
261
|
-
if [ "${ADAPTER_COUNT}" -gt 0 ]; then
|
|
262
|
-
echo "🔌 Deleting ${ADAPTER_COUNT} LoRA adapter(s) first..."
|
|
263
|
-
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
264
|
-
[ -f "${adapter_conf}" ] || continue
|
|
265
|
-
local adapter_ic_name=""
|
|
266
|
-
adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
|
|
267
|
-
local adapter_display_name
|
|
268
|
-
adapter_display_name=$(basename "${adapter_conf}" .conf)
|
|
269
|
-
|
|
270
|
-
if [ -n "${adapter_ic_name}" ]; then
|
|
271
|
-
echo "🗑️ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
|
|
272
|
-
if aws sagemaker delete-inference-component \
|
|
273
|
-
--inference-component-name "${adapter_ic_name}" \
|
|
274
|
-
--region "${AWS_REGION}" 2>/dev/null; then
|
|
275
|
-
echo "⏳ Waiting for adapter deletion..."
|
|
276
|
-
aws sagemaker wait inference-component-deleted \
|
|
277
|
-
--inference-component-name "${adapter_ic_name}" \
|
|
278
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
279
|
-
echo "✅ Adapter deleted: ${adapter_display_name}"
|
|
280
|
-
|
|
281
|
-
# Mark adapter IC as deleted in manifest (non-blocking)
|
|
282
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
|
|
283
|
-
else
|
|
284
|
-
echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
|
|
285
|
-
fi
|
|
286
|
-
fi
|
|
287
|
-
|
|
288
|
-
# Remove adapter conf file
|
|
289
|
-
rm -f "${adapter_conf}"
|
|
290
|
-
done
|
|
291
|
-
echo "✅ All adapters deleted"
|
|
292
|
-
echo ""
|
|
293
|
-
fi
|
|
294
|
-
fi
|
|
295
|
-
|
|
296
|
-
<% } %>
|
|
297
|
-
# Iterate do/ic/*.conf and delete each IC owned by this project
|
|
298
|
-
local IC_DELETED=0
|
|
299
|
-
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
300
|
-
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
301
|
-
[ -f "${conf}" ] || continue
|
|
302
|
-
local ic_deployed_name=""
|
|
303
|
-
if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
|
|
304
|
-
ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
305
|
-
fi
|
|
306
|
-
if [ -n "${ic_deployed_name}" ]; then
|
|
307
|
-
echo "🗑️ Deleting inference component: ${ic_deployed_name}"
|
|
308
|
-
if aws sagemaker delete-inference-component \
|
|
309
|
-
--inference-component-name "${ic_deployed_name}" \
|
|
310
|
-
--region "${AWS_REGION}" 2>/dev/null; then
|
|
311
|
-
echo "⏳ Waiting for inference component deletion..."
|
|
312
|
-
aws sagemaker wait inference-component-deleted \
|
|
313
|
-
--inference-component-name "${ic_deployed_name}" \
|
|
314
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
315
|
-
echo "✅ Inference component deleted: ${ic_deployed_name}"
|
|
316
|
-
|
|
317
|
-
# Mark inference component as deleted in manifest (non-blocking)
|
|
318
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
|
|
319
|
-
|
|
320
|
-
# Clear deployed state from config
|
|
321
|
-
sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${conf}"
|
|
322
|
-
rm -f "${conf}.bak"
|
|
323
|
-
else
|
|
324
|
-
echo "⚠️ Failed to delete inference component: ${ic_deployed_name}"
|
|
325
|
-
fi
|
|
326
|
-
IC_DELETED=$((IC_DELETED + 1))
|
|
327
|
-
fi
|
|
328
|
-
done
|
|
329
|
-
fi
|
|
330
|
-
|
|
331
|
-
# Also handle legacy single IC from config
|
|
332
|
-
if [ -n "${IC_NAME}" ]; then
|
|
333
|
-
if aws sagemaker describe-inference-component \
|
|
334
|
-
--inference-component-name "${IC_NAME}" \
|
|
335
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
336
|
-
echo "🗑️ Deleting inference component: ${IC_NAME}"
|
|
337
|
-
if aws sagemaker delete-inference-component \
|
|
338
|
-
--inference-component-name "${IC_NAME}" \
|
|
339
|
-
--region "${AWS_REGION}" 2>/dev/null; then
|
|
340
|
-
echo "⏳ Waiting for inference component deletion..."
|
|
341
|
-
aws sagemaker wait inference-component-deleted \
|
|
342
|
-
--inference-component-name "${IC_NAME}" \
|
|
343
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
344
|
-
echo "✅ Inference component deleted: ${IC_NAME}"
|
|
345
|
-
|
|
346
|
-
# Mark inference component as deleted in manifest (non-blocking)
|
|
347
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_NAME}" 2>/dev/null || true
|
|
348
|
-
fi
|
|
349
|
-
IC_DELETED=$((IC_DELETED + 1))
|
|
350
|
-
fi
|
|
351
|
-
fi
|
|
352
|
-
|
|
353
|
-
if [ "${IC_DELETED}" -eq 0 ]; then
|
|
354
|
-
echo "ℹ️ No deployed inference components found to clean"
|
|
355
|
-
fi
|
|
356
|
-
|
|
357
|
-
echo "✅ External endpoint cleanup complete (endpoint preserved)"
|
|
358
|
-
return 0
|
|
359
|
-
fi
|
|
360
|
-
|
|
361
|
-
echo ""
|
|
362
|
-
echo "Checking for SageMaker resources..."
|
|
363
|
-
|
|
364
|
-
local ENDPOINT_EXISTS=false
|
|
365
|
-
|
|
366
|
-
if aws sagemaker describe-endpoint \
|
|
367
|
-
--endpoint-name "${EP_NAME}" \
|
|
368
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
369
|
-
ENDPOINT_EXISTS=true
|
|
370
|
-
echo " ✓ Endpoint: ${EP_NAME}"
|
|
371
|
-
else
|
|
372
|
-
echo "ℹ️ Endpoint not found: ${EP_NAME}"
|
|
373
|
-
return 0
|
|
374
|
-
fi
|
|
375
|
-
|
|
376
|
-
# Count ICs to be deleted (multi-IC path)
|
|
377
|
-
local IC_COUNT=0
|
|
378
|
-
local IC_NAMES_TO_DELETE=()
|
|
379
|
-
local IC_CONFS_TO_CLEAN=()
|
|
380
|
-
|
|
381
|
-
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
382
|
-
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
383
|
-
[ -f "${conf}" ] || continue
|
|
384
|
-
local ic_deployed_name=""
|
|
385
|
-
if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
|
|
386
|
-
ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
387
|
-
fi
|
|
388
|
-
if [ -n "${ic_deployed_name}" ]; then
|
|
389
|
-
IC_NAMES_TO_DELETE+=("${ic_deployed_name}")
|
|
390
|
-
IC_CONFS_TO_CLEAN+=("${conf}")
|
|
391
|
-
IC_COUNT=$((IC_COUNT + 1))
|
|
392
|
-
echo " ✓ Inference component: ${ic_deployed_name}"
|
|
393
|
-
fi
|
|
394
|
-
done
|
|
395
|
-
fi
|
|
396
|
-
|
|
397
|
-
# Legacy: check single IC from config (no do/ic/ directory)
|
|
398
|
-
local IC_EXISTS=false
|
|
399
|
-
if [ "${IC_COUNT}" -eq 0 ] && [ -n "${IC_NAME}" ]; then
|
|
400
|
-
if aws sagemaker describe-inference-component \
|
|
401
|
-
--inference-component-name "${IC_NAME}" \
|
|
402
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
403
|
-
IC_EXISTS=true
|
|
404
|
-
IC_COUNT=1
|
|
405
|
-
echo " ✓ Inference component: ${IC_NAME}"
|
|
406
|
-
fi
|
|
407
|
-
fi
|
|
408
|
-
|
|
409
|
-
# Confirmation with IC count
|
|
410
|
-
local confirm_msg="Delete ${IC_COUNT} inference component"
|
|
411
|
-
if [ "${IC_COUNT}" -ne 1 ]; then
|
|
412
|
-
confirm_msg="${confirm_msg}s"
|
|
413
|
-
fi
|
|
414
|
-
confirm_msg="${confirm_msg} and endpoint?"
|
|
415
|
-
|
|
416
|
-
if ! confirm_action "${confirm_msg}"; then
|
|
417
|
-
return 1
|
|
418
|
-
fi
|
|
419
|
-
|
|
420
|
-
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
421
|
-
# Delete adapter ICs first (adapters depend on base ICs)
|
|
422
|
-
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
423
|
-
local ADAPTER_COUNT=0
|
|
424
|
-
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
425
|
-
[ -f "${adapter_conf}" ] || continue
|
|
426
|
-
ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
|
|
427
|
-
done
|
|
428
|
-
|
|
429
|
-
if [ "${ADAPTER_COUNT}" -gt 0 ]; then
|
|
430
|
-
echo ""
|
|
431
|
-
echo "🔌 Deleting ${ADAPTER_COUNT} LoRA adapter(s) first..."
|
|
432
|
-
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
433
|
-
[ -f "${adapter_conf}" ] || continue
|
|
434
|
-
local adapter_ic_name=""
|
|
435
|
-
adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
|
|
436
|
-
local adapter_display_name
|
|
437
|
-
adapter_display_name=$(basename "${adapter_conf}" .conf)
|
|
438
|
-
|
|
439
|
-
if [ -n "${adapter_ic_name}" ]; then
|
|
440
|
-
echo "🗑️ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
|
|
441
|
-
if aws sagemaker delete-inference-component \
|
|
442
|
-
--inference-component-name "${adapter_ic_name}" \
|
|
443
|
-
--region "${AWS_REGION}" 2>/dev/null; then
|
|
444
|
-
echo "⏳ Waiting for adapter deletion..."
|
|
445
|
-
aws sagemaker wait inference-component-deleted \
|
|
446
|
-
--inference-component-name "${adapter_ic_name}" \
|
|
447
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
448
|
-
echo "✅ Adapter deleted: ${adapter_display_name}"
|
|
449
|
-
|
|
450
|
-
# Mark adapter IC as deleted in manifest (non-blocking)
|
|
451
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
|
|
452
|
-
else
|
|
453
|
-
echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
|
|
454
|
-
fi
|
|
455
|
-
fi
|
|
456
|
-
|
|
457
|
-
# Remove adapter conf file
|
|
458
|
-
rm -f "${adapter_conf}"
|
|
459
|
-
done
|
|
460
|
-
echo "✅ All adapters deleted"
|
|
461
|
-
echo ""
|
|
462
|
-
fi
|
|
463
|
-
fi
|
|
464
|
-
|
|
465
|
-
<% } %>
|
|
466
|
-
# Delete inference components first (must be deleted before endpoint)
|
|
467
|
-
if [ ${#IC_NAMES_TO_DELETE[@]} -gt 0 ]; then
|
|
468
|
-
# Multi-IC path: iterate do/ic/*.conf
|
|
469
|
-
local idx=0
|
|
470
|
-
for ic_deployed_name in "${IC_NAMES_TO_DELETE[@]}"; do
|
|
471
|
-
local conf="${IC_CONFS_TO_CLEAN[$idx]}"
|
|
472
|
-
echo "🗑️ Deleting inference component: ${ic_deployed_name}"
|
|
473
|
-
if aws sagemaker delete-inference-component \
|
|
474
|
-
--inference-component-name "${ic_deployed_name}" \
|
|
475
|
-
--region "${AWS_REGION}" 2>/dev/null; then
|
|
476
|
-
echo "⏳ Waiting for inference component deletion..."
|
|
477
|
-
aws sagemaker wait inference-component-deleted \
|
|
478
|
-
--inference-component-name "${ic_deployed_name}" \
|
|
479
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
480
|
-
echo "✅ Inference component deleted: ${ic_deployed_name}"
|
|
481
|
-
|
|
482
|
-
# Mark inference component as deleted in manifest (non-blocking)
|
|
483
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
|
|
484
|
-
|
|
485
|
-
# Clear deployed state from config
|
|
486
|
-
sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${conf}"
|
|
487
|
-
rm -f "${conf}.bak"
|
|
488
|
-
else
|
|
489
|
-
echo "❌ Failed to delete inference component: ${ic_deployed_name}"
|
|
490
|
-
fi
|
|
491
|
-
idx=$((idx + 1))
|
|
492
|
-
done
|
|
493
|
-
elif [ "${IC_EXISTS}" = true ]; then
|
|
494
|
-
# Legacy single IC path
|
|
495
|
-
echo "🗑️ Deleting inference component: ${IC_NAME}"
|
|
496
|
-
if aws sagemaker delete-inference-component \
|
|
497
|
-
--inference-component-name "${IC_NAME}" \
|
|
498
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
499
|
-
echo "⏳ Waiting for inference component deletion..."
|
|
500
|
-
aws sagemaker wait inference-component-deleted \
|
|
501
|
-
--inference-component-name "${IC_NAME}" \
|
|
502
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
503
|
-
echo "✅ Inference component deleted"
|
|
504
|
-
|
|
505
|
-
# Mark inference component as deleted in manifest (non-blocking)
|
|
506
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_NAME}" 2>/dev/null || true
|
|
507
|
-
else
|
|
508
|
-
echo "❌ Failed to delete inference component"
|
|
509
|
-
fi
|
|
510
|
-
fi
|
|
511
|
-
|
|
512
|
-
# Delete endpoint
|
|
513
|
-
echo "🗑️ Deleting endpoint: ${EP_NAME}"
|
|
514
|
-
if aws sagemaker delete-endpoint \
|
|
515
|
-
--endpoint-name "${EP_NAME}" \
|
|
516
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
517
|
-
echo "✅ Endpoint deleted"
|
|
518
|
-
echo "⏳ Waiting for endpoint deletion..."
|
|
519
|
-
aws sagemaker wait endpoint-deleted \
|
|
520
|
-
--endpoint-name "${EP_NAME}" \
|
|
521
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 10
|
|
522
|
-
|
|
523
|
-
# Mark endpoint as deleted in manifest (non-blocking)
|
|
524
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${EP_NAME}" 2>/dev/null || true
|
|
525
|
-
else
|
|
526
|
-
echo "❌ Failed to delete endpoint"
|
|
527
|
-
fi
|
|
528
|
-
|
|
529
|
-
# Delete endpoint configuration
|
|
530
|
-
if [ -n "${EPC_NAME}" ]; then
|
|
531
|
-
echo "🗑️ Deleting endpoint configuration: ${EPC_NAME}"
|
|
532
|
-
if aws sagemaker delete-endpoint-config \
|
|
533
|
-
--endpoint-config-name "${EPC_NAME}" \
|
|
534
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
535
|
-
echo "✅ Endpoint configuration deleted"
|
|
536
|
-
|
|
537
|
-
# Mark endpoint config as deleted in manifest (non-blocking)
|
|
538
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${EPC_NAME}" 2>/dev/null || true
|
|
539
|
-
else
|
|
540
|
-
echo "❌ Failed to delete endpoint configuration"
|
|
541
|
-
fi
|
|
542
|
-
fi
|
|
543
|
-
|
|
544
|
-
# Remove saved names from config
|
|
545
|
-
if grep -q "^export ENDPOINT_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
|
|
546
|
-
sed -i.bak '/^# Last deployed resources/d;/^export ENDPOINT_NAME=/d;/^export ENDPOINT_CONFIG_NAME=/d;/^export INFERENCE_COMPONENT_NAME=/d' "${SCRIPT_DIR}/config"
|
|
547
|
-
rm -f "${SCRIPT_DIR}/config.bak"
|
|
548
|
-
fi
|
|
549
|
-
|
|
550
|
-
echo "✅ SageMaker resources cleaned"
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
# Function to clean a single inference component by name
|
|
554
|
-
clean_ic() {
|
|
555
|
-
local ic_name="$1"
|
|
556
|
-
echo "🧹 Cleaning inference component: ${ic_name}"
|
|
557
|
-
echo " Project: ${PROJECT_NAME}"
|
|
558
|
-
echo " Region: ${AWS_REGION}"
|
|
559
|
-
|
|
560
|
-
# Validate IC name argument
|
|
561
|
-
if [ -z "${ic_name}" ]; then
|
|
562
|
-
echo "❌ IC name required"
|
|
563
|
-
echo " Usage: ./do/clean ic <name>"
|
|
564
|
-
return 1
|
|
565
|
-
fi
|
|
566
|
-
|
|
567
|
-
# Check that the IC config file exists
|
|
568
|
-
local ic_conf="${SCRIPT_DIR}/ic/${ic_name}.conf"
|
|
569
|
-
if [ ! -f "${ic_conf}" ]; then
|
|
570
|
-
echo "❌ IC config not found: do/ic/${ic_name}.conf"
|
|
571
|
-
echo " Available ICs:"
|
|
572
|
-
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
573
|
-
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
574
|
-
[ -f "${conf}" ] || continue
|
|
575
|
-
echo " - $(basename "${conf}" .conf)"
|
|
576
|
-
done
|
|
577
|
-
else
|
|
578
|
-
echo " (none)"
|
|
579
|
-
fi
|
|
580
|
-
return 1
|
|
581
|
-
fi
|
|
582
|
-
|
|
583
|
-
# Validate AWS credentials
|
|
584
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
585
|
-
echo "❌ AWS credentials not configured"
|
|
586
|
-
echo " Run: aws configure"
|
|
587
|
-
exit 4
|
|
588
|
-
fi
|
|
589
|
-
|
|
590
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
591
|
-
|
|
592
|
-
# Look up IC_DEPLOYED_NAME from the config file
|
|
593
|
-
local ic_deployed_name=""
|
|
594
|
-
if grep -q "^export IC_DEPLOYED_NAME=" "${ic_conf}" 2>/dev/null; then
|
|
595
|
-
ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${ic_conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
596
|
-
fi
|
|
597
|
-
|
|
598
|
-
if [ -z "${ic_deployed_name}" ]; then
|
|
599
|
-
echo "ℹ️ IC '${ic_name}' has not been deployed (no IC_DEPLOYED_NAME in config)"
|
|
600
|
-
return 0
|
|
601
|
-
fi
|
|
602
|
-
|
|
603
|
-
echo " Deployed as: ${ic_deployed_name}"
|
|
604
|
-
|
|
605
|
-
if ! confirm_action "This will delete inference component '${ic_deployed_name}'"; then
|
|
606
|
-
return 1
|
|
607
|
-
fi
|
|
608
|
-
|
|
609
|
-
# Delete the inference component
|
|
610
|
-
echo "🗑️ Deleting inference component: ${ic_deployed_name}"
|
|
611
|
-
if aws sagemaker delete-inference-component \
|
|
612
|
-
--inference-component-name "${ic_deployed_name}" \
|
|
613
|
-
--region "${AWS_REGION}" 2>/dev/null; then
|
|
614
|
-
echo "⏳ Waiting for inference component deletion..."
|
|
615
|
-
aws sagemaker wait inference-component-deleted \
|
|
616
|
-
--inference-component-name "${ic_deployed_name}" \
|
|
617
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
618
|
-
echo "✅ Inference component deleted: ${ic_deployed_name}"
|
|
619
|
-
|
|
620
|
-
# Mark inference component as deleted in manifest (non-blocking)
|
|
621
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
|
|
622
|
-
|
|
623
|
-
# Clear deployed state from config
|
|
624
|
-
sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${ic_conf}"
|
|
625
|
-
rm -f "${ic_conf}.bak"
|
|
626
|
-
else
|
|
627
|
-
echo "❌ Failed to delete inference component: ${ic_deployed_name}"
|
|
628
|
-
return 1
|
|
629
|
-
fi
|
|
630
|
-
|
|
631
|
-
echo "✅ Inference component '${ic_name}' cleaned"
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
635
|
-
# Function to clean a single LoRA adapter by name (synonym for do/adapter remove)
|
|
636
|
-
clean_adapter() {
|
|
637
|
-
local adapter_name="$1"
|
|
638
|
-
echo "🧹 Cleaning LoRA adapter: ${adapter_name}"
|
|
639
|
-
echo " Project: ${PROJECT_NAME}"
|
|
640
|
-
echo " Region: ${AWS_REGION}"
|
|
641
|
-
|
|
642
|
-
# Validate adapter name argument
|
|
643
|
-
if [ -z "${adapter_name}" ]; then
|
|
644
|
-
echo "❌ Adapter name required"
|
|
645
|
-
echo " Usage: ./do/clean adapter <name>"
|
|
646
|
-
return 1
|
|
647
|
-
fi
|
|
648
|
-
|
|
649
|
-
# Check that the adapter config file exists
|
|
650
|
-
local adapter_conf="${SCRIPT_DIR}/adapters/${adapter_name}.conf"
|
|
651
|
-
if [ ! -f "${adapter_conf}" ]; then
|
|
652
|
-
echo "❌ Adapter config not found: do/adapters/${adapter_name}.conf"
|
|
653
|
-
echo " Available adapters:"
|
|
654
|
-
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
655
|
-
for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
656
|
-
[ -f "${conf}" ] || continue
|
|
657
|
-
echo " - $(basename "${conf}" .conf)"
|
|
658
|
-
done
|
|
659
|
-
else
|
|
660
|
-
echo " (none)"
|
|
661
|
-
fi
|
|
662
|
-
return 1
|
|
663
|
-
fi
|
|
664
|
-
|
|
665
|
-
# Validate AWS credentials
|
|
666
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
667
|
-
echo "❌ AWS credentials not configured"
|
|
668
|
-
echo " Run: aws configure"
|
|
669
|
-
exit 4
|
|
670
|
-
fi
|
|
671
|
-
|
|
672
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
673
|
-
|
|
674
|
-
# Read ADAPTER_IC_NAME from the config file
|
|
675
|
-
local adapter_ic_name=""
|
|
676
|
-
adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//')
|
|
677
|
-
|
|
678
|
-
if [ -z "${adapter_ic_name}" ]; then
|
|
679
|
-
echo "⚠️ No ADAPTER_IC_NAME found in do/adapters/${adapter_name}.conf"
|
|
680
|
-
echo " Removing local config file."
|
|
681
|
-
rm -f "${adapter_conf}"
|
|
682
|
-
return 0
|
|
683
|
-
fi
|
|
684
|
-
|
|
685
|
-
echo " Adapter IC: ${adapter_ic_name}"
|
|
686
|
-
|
|
687
|
-
if ! confirm_action "This will delete LoRA adapter '${adapter_name}' (IC: ${adapter_ic_name})"; then
|
|
688
|
-
return 1
|
|
689
|
-
fi
|
|
690
|
-
|
|
691
|
-
# Delete the adapter inference component
|
|
692
|
-
echo "🗑️ Deleting adapter inference component: ${adapter_ic_name}"
|
|
693
|
-
if aws sagemaker delete-inference-component \
|
|
694
|
-
--inference-component-name "${adapter_ic_name}" \
|
|
695
|
-
--region "${AWS_REGION}" 2>/dev/null; then
|
|
696
|
-
echo "⏳ Waiting for adapter IC deletion..."
|
|
697
|
-
aws sagemaker wait inference-component-deleted \
|
|
698
|
-
--inference-component-name "${adapter_ic_name}" \
|
|
699
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
700
|
-
echo "✅ Adapter IC deleted: ${adapter_ic_name}"
|
|
701
|
-
|
|
702
|
-
# Mark adapter IC as deleted in manifest (non-blocking)
|
|
703
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
|
|
704
|
-
else
|
|
705
|
-
echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
|
|
706
|
-
fi
|
|
707
|
-
|
|
708
|
-
# Remove local conf file
|
|
709
|
-
rm -f "${adapter_conf}"
|
|
710
|
-
echo "✅ Removed: do/adapters/${adapter_name}.conf"
|
|
711
|
-
|
|
712
|
-
echo "✅ Adapter '${adapter_name}' cleaned"
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
# Function to clean ALL LoRA adapters (keeps base IC and endpoint running)
|
|
716
|
-
clean_adapters() {
|
|
717
|
-
echo "🧹 Cleaning all LoRA adapters"
|
|
718
|
-
echo " Project: ${PROJECT_NAME}"
|
|
719
|
-
echo " Region: ${AWS_REGION}"
|
|
720
|
-
|
|
721
|
-
# Check if adapters directory exists and has conf files
|
|
722
|
-
if [ ! -d "${SCRIPT_DIR}/adapters" ]; then
|
|
723
|
-
echo "ℹ️ No adapters directory found"
|
|
724
|
-
return 0
|
|
725
|
-
fi
|
|
726
|
-
|
|
727
|
-
local ADAPTER_COUNT=0
|
|
728
|
-
local ADAPTER_NAMES=()
|
|
729
|
-
for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
730
|
-
[ -f "${conf}" ] || continue
|
|
731
|
-
ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
|
|
732
|
-
ADAPTER_NAMES+=("$(basename "${conf}" .conf)")
|
|
733
|
-
done
|
|
734
|
-
|
|
735
|
-
if [ "${ADAPTER_COUNT}" -eq 0 ]; then
|
|
736
|
-
echo "ℹ️ No adapters found to clean"
|
|
737
|
-
return 0
|
|
738
|
-
fi
|
|
739
|
-
|
|
740
|
-
echo ""
|
|
741
|
-
echo "Adapters to be removed (${ADAPTER_COUNT}):"
|
|
742
|
-
for name in "${ADAPTER_NAMES[@]}"; do
|
|
743
|
-
echo " • ${name}"
|
|
744
|
-
done
|
|
745
|
-
|
|
746
|
-
if ! confirm_action "This will delete ${ADAPTER_COUNT} LoRA adapter(s). Base IC and endpoint will remain running."; then
|
|
747
|
-
return 1
|
|
748
|
-
fi
|
|
749
|
-
|
|
750
|
-
# Validate AWS credentials
|
|
751
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
752
|
-
echo "❌ AWS credentials not configured"
|
|
753
|
-
echo " Run: aws configure"
|
|
754
|
-
exit 4
|
|
755
|
-
fi
|
|
756
|
-
|
|
757
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
758
|
-
|
|
759
|
-
# Delete each adapter
|
|
760
|
-
local DELETED=0
|
|
761
|
-
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
762
|
-
[ -f "${adapter_conf}" ] || continue
|
|
763
|
-
local adapter_ic_name=""
|
|
764
|
-
adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
|
|
765
|
-
local adapter_display_name
|
|
766
|
-
adapter_display_name=$(basename "${adapter_conf}" .conf)
|
|
767
|
-
|
|
768
|
-
if [ -n "${adapter_ic_name}" ]; then
|
|
769
|
-
echo "🗑️ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
|
|
770
|
-
if aws sagemaker delete-inference-component \
|
|
771
|
-
--inference-component-name "${adapter_ic_name}" \
|
|
772
|
-
--region "${AWS_REGION}" 2>/dev/null; then
|
|
773
|
-
echo "⏳ Waiting for adapter deletion..."
|
|
774
|
-
aws sagemaker wait inference-component-deleted \
|
|
775
|
-
--inference-component-name "${adapter_ic_name}" \
|
|
776
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
777
|
-
echo "✅ Adapter deleted: ${adapter_display_name}"
|
|
778
|
-
|
|
779
|
-
# Mark adapter IC as deleted in manifest (non-blocking)
|
|
780
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
|
|
781
|
-
else
|
|
782
|
-
echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
|
|
783
|
-
fi
|
|
784
|
-
fi
|
|
785
|
-
|
|
786
|
-
# Remove adapter conf file
|
|
787
|
-
rm -f "${adapter_conf}"
|
|
788
|
-
DELETED=$((DELETED + 1))
|
|
789
|
-
done
|
|
790
|
-
|
|
791
|
-
echo ""
|
|
792
|
-
echo "✅ All adapters cleaned (${DELETED} removed)"
|
|
793
|
-
echo " Base IC and endpoint remain running."
|
|
794
|
-
}
|
|
795
|
-
<% } %>
|
|
796
|
-
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
797
|
-
# Function to clean SageMaker async endpoint and model
|
|
798
|
-
clean_endpoint() {
|
|
799
|
-
echo "🧹 Cleaning SageMaker async resources"
|
|
800
|
-
echo " Project: ${PROJECT_NAME}"
|
|
801
|
-
echo " Region: ${AWS_REGION}"
|
|
802
|
-
|
|
803
|
-
# Validate AWS credentials
|
|
804
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
805
|
-
echo "❌ AWS credentials not configured"
|
|
806
|
-
echo " Run: aws configure"
|
|
807
|
-
exit 4
|
|
808
|
-
fi
|
|
809
|
-
|
|
810
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
811
|
-
|
|
812
|
-
# Use names from config (set by do/deploy) or argument
|
|
813
|
-
local EP_NAME="${ENDPOINT_NAME:-}"
|
|
814
|
-
local EPC_NAME="${ENDPOINT_CONFIG_NAME:-}"
|
|
815
|
-
local SM_MODEL_NAME="${SAGEMAKER_MODEL_NAME:-}"
|
|
816
|
-
|
|
817
|
-
if [ -z "${EP_NAME}" ]; then
|
|
818
|
-
echo "❌ No endpoint name found"
|
|
819
|
-
echo " Run ./do/deploy first, or set ENDPOINT_NAME in do/config"
|
|
820
|
-
return 1
|
|
821
|
-
fi
|
|
822
|
-
|
|
823
|
-
echo ""
|
|
824
|
-
echo "Checking for SageMaker resources..."
|
|
825
|
-
|
|
826
|
-
local ENDPOINT_EXISTS=false
|
|
827
|
-
|
|
828
|
-
if aws sagemaker describe-endpoint \
|
|
829
|
-
--endpoint-name "${EP_NAME}" \
|
|
830
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
831
|
-
ENDPOINT_EXISTS=true
|
|
832
|
-
echo " ✓ Endpoint: ${EP_NAME}"
|
|
833
|
-
else
|
|
834
|
-
echo "ℹ️ Endpoint not found: ${EP_NAME}"
|
|
835
|
-
return 0
|
|
836
|
-
fi
|
|
837
|
-
|
|
838
|
-
if ! confirm_action "This will delete the SageMaker async endpoint and model"; then
|
|
839
|
-
return 1
|
|
840
|
-
fi
|
|
841
|
-
|
|
842
|
-
# Delete endpoint
|
|
843
|
-
echo "🗑️ Deleting endpoint: ${EP_NAME}"
|
|
844
|
-
if aws sagemaker delete-endpoint \
|
|
845
|
-
--endpoint-name "${EP_NAME}" \
|
|
846
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
847
|
-
echo "✅ Endpoint deleted"
|
|
848
|
-
echo "⏳ Waiting for endpoint deletion..."
|
|
849
|
-
aws sagemaker wait endpoint-deleted \
|
|
850
|
-
--endpoint-name "${EP_NAME}" \
|
|
851
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 10
|
|
852
|
-
|
|
853
|
-
# Mark endpoint as deleted in manifest (non-blocking)
|
|
854
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${EP_NAME}" 2>/dev/null || true
|
|
855
|
-
else
|
|
856
|
-
echo "❌ Failed to delete endpoint"
|
|
857
|
-
fi
|
|
858
|
-
|
|
859
|
-
# Delete endpoint configuration
|
|
860
|
-
if [ -n "${EPC_NAME}" ]; then
|
|
861
|
-
echo "🗑️ Deleting endpoint configuration: ${EPC_NAME}"
|
|
862
|
-
if aws sagemaker delete-endpoint-config \
|
|
863
|
-
--endpoint-config-name "${EPC_NAME}" \
|
|
864
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
865
|
-
echo "✅ Endpoint configuration deleted"
|
|
866
|
-
|
|
867
|
-
# Mark endpoint config as deleted in manifest (non-blocking)
|
|
868
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${EPC_NAME}" 2>/dev/null || true
|
|
869
|
-
else
|
|
870
|
-
echo "❌ Failed to delete endpoint configuration"
|
|
871
|
-
fi
|
|
872
|
-
fi
|
|
873
|
-
|
|
874
|
-
# Delete SageMaker model (async uses classic model-based flow)
|
|
875
|
-
if [ -n "${SM_MODEL_NAME}" ]; then
|
|
876
|
-
echo "🗑️ Deleting SageMaker model: ${SM_MODEL_NAME}"
|
|
877
|
-
if aws sagemaker delete-model \
|
|
878
|
-
--model-name "${SM_MODEL_NAME}" \
|
|
879
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
880
|
-
echo "✅ SageMaker model deleted"
|
|
881
|
-
|
|
882
|
-
# Mark model as deleted in manifest (non-blocking)
|
|
883
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${SM_MODEL_NAME}" 2>/dev/null || true
|
|
884
|
-
else
|
|
885
|
-
echo "❌ Failed to delete SageMaker model"
|
|
886
|
-
fi
|
|
887
|
-
fi
|
|
888
|
-
|
|
889
|
-
# Remove saved names from config
|
|
890
|
-
if grep -q "^export ENDPOINT_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
|
|
891
|
-
sed -i.bak '/^# Last deployed resources/d;/^export ENDPOINT_NAME=/d;/^export ENDPOINT_CONFIG_NAME=/d;/^export SAGEMAKER_MODEL_NAME=/d' "${SCRIPT_DIR}/config"
|
|
892
|
-
rm -f "${SCRIPT_DIR}/config.bak"
|
|
893
|
-
fi
|
|
894
|
-
|
|
895
|
-
echo "✅ SageMaker async resources cleaned"
|
|
896
|
-
}
|
|
897
|
-
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
898
|
-
# Function to clean SageMaker managed inference batch resources
|
|
899
|
-
clean_batch() {
|
|
900
|
-
echo "🧹 Cleaning SageMaker managed inference batch resources"
|
|
901
|
-
echo " Project: ${PROJECT_NAME}"
|
|
902
|
-
echo " Region: ${AWS_REGION}"
|
|
903
|
-
|
|
904
|
-
# Validate AWS credentials
|
|
905
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
906
|
-
echo "❌ AWS credentials not configured"
|
|
907
|
-
echo " Run: aws configure"
|
|
908
|
-
exit 4
|
|
909
|
-
fi
|
|
910
|
-
|
|
911
|
-
# Use names from config (set by do/deploy)
|
|
912
|
-
local TJ_NAME="${TRANSFORM_JOB_NAME:-}"
|
|
913
|
-
local SM_MODEL_NAME="${SAGEMAKER_MODEL_NAME:-}"
|
|
914
|
-
|
|
915
|
-
if [ -z "${TJ_NAME}" ] && [ -z "${SM_MODEL_NAME}" ]; then
|
|
916
|
-
echo "❌ No transform job or model name found"
|
|
917
|
-
echo " Run ./do/deploy first, or set TRANSFORM_JOB_NAME in do/config"
|
|
918
|
-
return 1
|
|
919
|
-
fi
|
|
920
|
-
|
|
921
|
-
echo ""
|
|
922
|
-
echo "Checking for SageMaker resources..."
|
|
923
|
-
|
|
924
|
-
# Check transform job status
|
|
925
|
-
local JOB_EXISTS=false
|
|
926
|
-
local JOB_STATUS=""
|
|
927
|
-
if [ -n "${TJ_NAME}" ]; then
|
|
928
|
-
JOB_STATUS=$(aws sagemaker describe-transform-job \
|
|
929
|
-
--transform-job-name "${TJ_NAME}" \
|
|
930
|
-
--region "${AWS_REGION}" \
|
|
931
|
-
--query 'TransformJobStatus' \
|
|
932
|
-
--output text 2>/dev/null || echo "")
|
|
933
|
-
if [ -n "${JOB_STATUS}" ]; then
|
|
934
|
-
JOB_EXISTS=true
|
|
935
|
-
echo " ✓ Transform job: ${TJ_NAME} (${JOB_STATUS})"
|
|
936
|
-
else
|
|
937
|
-
echo "ℹ️ Transform job not found: ${TJ_NAME}"
|
|
938
|
-
fi
|
|
939
|
-
fi
|
|
940
|
-
|
|
941
|
-
# Check model
|
|
942
|
-
local MODEL_EXISTS=false
|
|
943
|
-
if [ -n "${SM_MODEL_NAME}" ]; then
|
|
944
|
-
if aws sagemaker describe-model \
|
|
945
|
-
--model-name "${SM_MODEL_NAME}" \
|
|
946
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
947
|
-
MODEL_EXISTS=true
|
|
948
|
-
echo " ✓ SageMaker model: ${SM_MODEL_NAME}"
|
|
949
|
-
else
|
|
950
|
-
echo "ℹ️ SageMaker model not found: ${SM_MODEL_NAME}"
|
|
951
|
-
fi
|
|
952
|
-
fi
|
|
953
|
-
|
|
954
|
-
if [ "${JOB_EXISTS}" = false ] && [ "${MODEL_EXISTS}" = false ]; then
|
|
955
|
-
echo "ℹ️ No batch transform resources found to clean"
|
|
956
|
-
return 0
|
|
957
|
-
fi
|
|
958
|
-
|
|
959
|
-
if ! confirm_action "This will stop the transform job (if running) and delete the SageMaker model"; then
|
|
960
|
-
return 1
|
|
961
|
-
fi
|
|
962
|
-
|
|
963
|
-
# Stop transform job if in progress
|
|
964
|
-
if [ "${JOB_EXISTS}" = true ] && [ "${JOB_STATUS}" = "InProgress" ]; then
|
|
965
|
-
echo "🗑️ Stopping transform job: ${TJ_NAME}"
|
|
966
|
-
if aws sagemaker stop-transform-job \
|
|
967
|
-
--transform-job-name "${TJ_NAME}" \
|
|
968
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
969
|
-
echo "⏳ Waiting for transform job to stop..."
|
|
970
|
-
aws sagemaker wait transform-job-completed-or-stopped \
|
|
971
|
-
--transform-job-name "${TJ_NAME}" \
|
|
972
|
-
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
973
|
-
echo "✅ Transform job stopped"
|
|
974
|
-
|
|
975
|
-
# Mark transform job as deleted in manifest (non-blocking)
|
|
976
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
977
|
-
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:transform-job/${TJ_NAME}" 2>/dev/null || true
|
|
978
|
-
else
|
|
979
|
-
echo "❌ Failed to stop transform job"
|
|
980
|
-
fi
|
|
981
|
-
fi
|
|
982
|
-
|
|
983
|
-
# Delete SageMaker model
|
|
984
|
-
if [ "${MODEL_EXISTS}" = true ]; then
|
|
985
|
-
echo "🗑️ Deleting SageMaker model: ${SM_MODEL_NAME}"
|
|
986
|
-
if aws sagemaker delete-model \
|
|
987
|
-
--model-name "${SM_MODEL_NAME}" \
|
|
988
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
989
|
-
echo "✅ SageMaker model deleted"
|
|
990
|
-
else
|
|
991
|
-
echo "❌ Failed to delete SageMaker model"
|
|
992
|
-
fi
|
|
993
|
-
fi
|
|
994
|
-
|
|
995
|
-
# Remove saved names from config
|
|
996
|
-
if grep -q "^export TRANSFORM_JOB_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
|
|
997
|
-
sed -i.bak '/^# Last deployed resources/d;/^export TRANSFORM_JOB_NAME=/d;/^export SAGEMAKER_MODEL_NAME=/d' "${SCRIPT_DIR}/config"
|
|
998
|
-
rm -f "${SCRIPT_DIR}/config.bak"
|
|
999
|
-
fi
|
|
1000
|
-
|
|
1001
|
-
echo "✅ SageMaker managed inference batch resources cleaned"
|
|
1002
|
-
}
|
|
1003
|
-
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
1004
|
-
# Function to clean HyperPod EKS deployment
|
|
1005
|
-
clean_hyperpod() {
|
|
1006
|
-
echo "🧹 Cleaning HyperPod EKS resources"
|
|
1007
|
-
echo " Cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
1008
|
-
echo " Namespace: ${HYPERPOD_NAMESPACE}"
|
|
1009
|
-
|
|
1010
|
-
# Validate AWS credentials
|
|
1011
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
1012
|
-
echo "❌ AWS credentials not configured"
|
|
1013
|
-
echo " Run: aws configure"
|
|
1014
|
-
exit 4
|
|
1015
|
-
fi
|
|
1016
|
-
|
|
1017
|
-
# Get kubeconfig for HyperPod cluster
|
|
1018
|
-
echo "🔑 Configuring kubectl for HyperPod cluster..."
|
|
1019
|
-
KUBECONFIG_PATH="${HOME}/.kube/hyperpod-${HYPERPOD_CLUSTER_NAME}"
|
|
1020
|
-
|
|
1021
|
-
EKS_CLUSTER_ARN=$(aws sagemaker describe-cluster \
|
|
1022
|
-
--cluster-name "${HYPERPOD_CLUSTER_NAME}" \
|
|
1023
|
-
--region "${AWS_REGION}" \
|
|
1024
|
-
--query "Orchestrator.Eks.ClusterArn" \
|
|
1025
|
-
--output text 2>&1) || {
|
|
1026
|
-
echo "❌ Failed to describe HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
1027
|
-
echo " Check that the cluster exists and you have permission to access it"
|
|
1028
|
-
return 1
|
|
1029
|
-
}
|
|
1030
|
-
|
|
1031
|
-
EKS_CLUSTER_NAME=$(echo "${EKS_CLUSTER_ARN}" | awk -F'/' '{print $NF}')
|
|
1032
|
-
|
|
1033
|
-
if ! aws eks update-kubeconfig \
|
|
1034
|
-
--name "${EKS_CLUSTER_NAME}" \
|
|
1035
|
-
--region "${AWS_REGION}" \
|
|
1036
|
-
--kubeconfig "${KUBECONFIG_PATH}" 2>&1; then
|
|
1037
|
-
echo "❌ Failed to configure kubectl for EKS cluster: ${EKS_CLUSTER_NAME}"
|
|
1038
|
-
return 1
|
|
1039
|
-
fi
|
|
1040
|
-
|
|
1041
|
-
export KUBECONFIG="${KUBECONFIG_PATH}"
|
|
1042
|
-
|
|
1043
|
-
if ! confirm_action "This will delete the HyperPod deployment in namespace ${HYPERPOD_NAMESPACE}"; then
|
|
1044
|
-
return 1
|
|
1045
|
-
fi
|
|
1046
|
-
|
|
1047
|
-
# Delete Kubernetes resources
|
|
1048
|
-
echo "🗑️ Deleting Kubernetes resources..."
|
|
1049
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
1050
|
-
export AWS_ACCOUNT_ID
|
|
1051
|
-
DELETE_FAILED=false
|
|
1052
|
-
for manifest in hyperpod/*.yaml; do
|
|
1053
|
-
# Skip files that contain no Kubernetes objects (e.g. comment-only PVC stubs)
|
|
1054
|
-
RENDERED=$(envsubst < "${manifest}")
|
|
1055
|
-
if echo "${RENDERED}" | grep -q '^kind:'; then
|
|
1056
|
-
if ! echo "${RENDERED}" | kubectl delete -n "${HYPERPOD_NAMESPACE}" --ignore-not-found -f - 2>&1; then
|
|
1057
|
-
DELETE_FAILED=true
|
|
1058
|
-
fi
|
|
1059
|
-
fi
|
|
1060
|
-
done
|
|
1061
|
-
if [ "${DELETE_FAILED}" = true ]; then
|
|
1062
|
-
echo "❌ Failed to delete some Kubernetes resources"
|
|
1063
|
-
echo " You may need to manually clean up:"
|
|
1064
|
-
echo " kubectl get all -n ${HYPERPOD_NAMESPACE}"
|
|
1065
|
-
return 1
|
|
1066
|
-
fi
|
|
1067
|
-
|
|
1068
|
-
# Mark k8s resources as deleted in manifest (non-blocking)
|
|
1069
|
-
./do/manifest delete --id "${HYPERPOD_NAMESPACE}/${PROJECT_NAME}" 2>/dev/null || true
|
|
1070
|
-
|
|
1071
|
-
echo "✅ HyperPod EKS resources cleaned"
|
|
1072
|
-
}
|
|
1073
|
-
<% } %>
|
|
1074
|
-
|
|
1075
|
-
# Function to clean CodeBuild project and related resources
|
|
1076
|
-
clean_codebuild() {
|
|
1077
|
-
echo "🧹 Cleaning CodeBuild resources"
|
|
1078
|
-
echo " Project: ${CODEBUILD_PROJECT_NAME:-not set}"
|
|
1079
|
-
echo " Region: ${AWS_REGION}"
|
|
1080
|
-
|
|
1081
|
-
if [ -z "${CODEBUILD_PROJECT_NAME:-}" ]; then
|
|
1082
|
-
echo "ℹ️ No CodeBuild project name configured (build target may not be codebuild)"
|
|
1083
|
-
return 0
|
|
1084
|
-
fi
|
|
1085
|
-
|
|
1086
|
-
# Validate AWS credentials
|
|
1087
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
1088
|
-
echo "❌ AWS credentials not configured"
|
|
1089
|
-
echo " Run: aws configure"
|
|
1090
|
-
exit 4
|
|
1091
|
-
fi
|
|
1092
|
-
|
|
1093
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
1094
|
-
|
|
1095
|
-
# Check if project exists
|
|
1096
|
-
PROJECT_CHECK=$(aws codebuild batch-get-projects \
|
|
1097
|
-
--names "${CODEBUILD_PROJECT_NAME}" \
|
|
1098
|
-
--region "${AWS_REGION}" \
|
|
1099
|
-
--query 'projects[0].name' \
|
|
1100
|
-
--output text 2>/dev/null)
|
|
1101
|
-
|
|
1102
|
-
if [ "$PROJECT_CHECK" = "None" ] || [ -z "$PROJECT_CHECK" ] || [ "$PROJECT_CHECK" = "null" ]; then
|
|
1103
|
-
echo "ℹ️ CodeBuild project not found: ${CODEBUILD_PROJECT_NAME}"
|
|
1104
|
-
return 0
|
|
1105
|
-
fi
|
|
1106
|
-
|
|
1107
|
-
echo ""
|
|
1108
|
-
echo "Resources to be removed:"
|
|
1109
|
-
echo " • CodeBuild project: ${CODEBUILD_PROJECT_NAME}"
|
|
1110
|
-
|
|
1111
|
-
# Check for service role
|
|
1112
|
-
ROLE_NAME="${CODEBUILD_PROJECT_NAME}-service-role"
|
|
1113
|
-
ROLE_EXISTS=false
|
|
1114
|
-
if aws iam get-role --role-name "${ROLE_NAME}" &> /dev/null; then
|
|
1115
|
-
ROLE_EXISTS=true
|
|
1116
|
-
echo " • IAM service role: ${ROLE_NAME}"
|
|
1117
|
-
fi
|
|
1118
|
-
|
|
1119
|
-
# Check for S3 source bucket
|
|
1120
|
-
S3_BUCKET="codebuild-source-${AWS_ACCOUNT_ID}-${AWS_REGION}"
|
|
1121
|
-
S3_PREFIX="${PROJECT_NAME}/"
|
|
1122
|
-
S3_EXISTS=false
|
|
1123
|
-
if aws s3api head-bucket --bucket "$S3_BUCKET" --region "${AWS_REGION}" &> /dev/null; then
|
|
1124
|
-
S3_COUNT=$(aws s3 ls "s3://$S3_BUCKET/$S3_PREFIX" --region "${AWS_REGION}" 2>/dev/null | wc -l | tr -d ' ')
|
|
1125
|
-
if [ "$S3_COUNT" -gt 0 ]; then
|
|
1126
|
-
S3_EXISTS=true
|
|
1127
|
-
echo " • S3 source artifacts: s3://$S3_BUCKET/$S3_PREFIX ($S3_COUNT objects)"
|
|
1128
|
-
fi
|
|
1129
|
-
fi
|
|
1130
|
-
|
|
1131
|
-
if ! confirm_action "This will delete the CodeBuild project and associated resources"; then
|
|
1132
|
-
return 1
|
|
1133
|
-
fi
|
|
1134
|
-
|
|
1135
|
-
# Delete CodeBuild project
|
|
1136
|
-
echo "🗑️ Deleting CodeBuild project: ${CODEBUILD_PROJECT_NAME}"
|
|
1137
|
-
if aws codebuild delete-project \
|
|
1138
|
-
--name "${CODEBUILD_PROJECT_NAME}" \
|
|
1139
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
1140
|
-
echo "✅ CodeBuild project deleted"
|
|
1141
|
-
|
|
1142
|
-
# Mark CodeBuild project as deleted in manifest (non-blocking)
|
|
1143
|
-
./do/manifest delete --id "arn:aws:codebuild:${AWS_REGION}:${AWS_ACCOUNT_ID}:project/${CODEBUILD_PROJECT_NAME}" 2>/dev/null || true
|
|
1144
|
-
else
|
|
1145
|
-
echo "❌ Failed to delete CodeBuild project"
|
|
1146
|
-
fi
|
|
1147
|
-
|
|
1148
|
-
# Delete IAM service role
|
|
1149
|
-
if [ "$ROLE_EXISTS" = true ]; then
|
|
1150
|
-
echo "🗑️ Deleting IAM service role: ${ROLE_NAME}"
|
|
1151
|
-
# Remove inline policies first
|
|
1152
|
-
POLICIES=$(aws iam list-role-policies --role-name "${ROLE_NAME}" --query 'PolicyNames' --output text 2>/dev/null || echo "")
|
|
1153
|
-
for policy in $POLICIES; do
|
|
1154
|
-
aws iam delete-role-policy --role-name "${ROLE_NAME}" --policy-name "$policy" 2>/dev/null || true
|
|
1155
|
-
done
|
|
1156
|
-
if aws iam delete-role --role-name "${ROLE_NAME}" &> /dev/null; then
|
|
1157
|
-
echo "✅ IAM service role deleted"
|
|
1158
|
-
|
|
1159
|
-
# Mark IAM role as deleted in manifest (non-blocking)
|
|
1160
|
-
./do/manifest delete --id "arn:aws:iam::${AWS_ACCOUNT_ID}:role/${ROLE_NAME}" 2>/dev/null || true
|
|
1161
|
-
else
|
|
1162
|
-
echo "❌ Failed to delete IAM service role"
|
|
1163
|
-
fi
|
|
1164
|
-
fi
|
|
1165
|
-
|
|
1166
|
-
# Delete S3 source artifacts
|
|
1167
|
-
if [ "$S3_EXISTS" = true ]; then
|
|
1168
|
-
echo "🗑️ Deleting S3 source artifacts: s3://$S3_BUCKET/$S3_PREFIX"
|
|
1169
|
-
if aws s3 rm "s3://$S3_BUCKET/$S3_PREFIX" --recursive --region "${AWS_REGION}" &> /dev/null; then
|
|
1170
|
-
echo "✅ S3 source artifacts deleted"
|
|
1171
|
-
else
|
|
1172
|
-
echo "❌ Failed to delete S3 source artifacts"
|
|
1173
|
-
fi
|
|
1174
|
-
fi
|
|
1175
|
-
|
|
1176
|
-
echo "✅ CodeBuild resources cleaned"
|
|
1177
|
-
}
|
|
1178
|
-
|
|
1179
|
-
# Main script logic
|
|
1180
|
-
echo "🧹 Cleanup script for ${PROJECT_NAME}"
|
|
1181
|
-
echo ""
|
|
1182
|
-
|
|
1183
|
-
if [ -z "${CLEANUP_TARGET}" ]; then
|
|
1184
|
-
<% if (deploymentTarget === 'batch-transform') { %>
|
|
1185
|
-
CLEANUP_TARGET="batch"
|
|
1186
|
-
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
1187
|
-
show_usage
|
|
1188
|
-
exit 0
|
|
1189
|
-
<% } else { %>
|
|
1190
|
-
show_usage
|
|
1191
|
-
exit 0
|
|
1192
|
-
<% } %>
|
|
1193
|
-
fi
|
|
1194
|
-
|
|
1195
|
-
case "${CLEANUP_TARGET}" in
|
|
1196
|
-
local)
|
|
1197
|
-
clean_local
|
|
1198
|
-
;;
|
|
1199
|
-
ecr)
|
|
1200
|
-
clean_ecr
|
|
1201
|
-
;;
|
|
1202
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
1203
|
-
endpoint)
|
|
1204
|
-
clean_endpoint
|
|
1205
|
-
;;
|
|
1206
|
-
ic)
|
|
1207
|
-
clean_ic "${CLEANUP_ARG}"
|
|
1208
|
-
;;
|
|
1209
|
-
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
1210
|
-
adapter)
|
|
1211
|
-
clean_adapter "${CLEANUP_ARG}"
|
|
1212
|
-
;;
|
|
1213
|
-
adapters)
|
|
1214
|
-
clean_adapters
|
|
1215
|
-
;;
|
|
1216
|
-
<% } %>
|
|
1217
|
-
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
1218
|
-
endpoint)
|
|
1219
|
-
clean_endpoint
|
|
1220
|
-
;;
|
|
1221
|
-
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
1222
|
-
batch)
|
|
1223
|
-
clean_batch
|
|
1224
|
-
;;
|
|
1225
|
-
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
1226
|
-
hyperpod)
|
|
1227
|
-
clean_hyperpod
|
|
1228
|
-
;;
|
|
1229
|
-
<% } %>
|
|
1230
|
-
codebuild)
|
|
1231
|
-
clean_codebuild
|
|
1232
|
-
;;
|
|
1233
|
-
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
1234
|
-
benchmark)
|
|
1235
|
-
echo "🧹 Cleaning benchmark resources..."
|
|
1236
|
-
WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
|
|
1237
|
-
|
|
1238
|
-
# Delete workload config if exists
|
|
1239
|
-
if aws sagemaker describe-ai-workload-config \
|
|
1240
|
-
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
1241
|
-
--region "$AWS_REGION" 2>/dev/null; then
|
|
1242
|
-
aws sagemaker delete-ai-workload-config \
|
|
1243
|
-
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
1244
|
-
--region "$AWS_REGION"
|
|
1245
|
-
echo " ✓ Deleted workload config: $WORKLOAD_CONFIG_NAME"
|
|
1246
|
-
fi
|
|
1247
|
-
|
|
1248
|
-
# Delete terminal benchmark jobs matching project prefix
|
|
1249
|
-
aws sagemaker list-ai-benchmark-jobs \
|
|
1250
|
-
--name-contains "${PROJECT_NAME}-benchmark-" \
|
|
1251
|
-
--region "$AWS_REGION" \
|
|
1252
|
-
--query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
|
|
1253
|
-
--output text | tr '\t' '\n' | while read -r job; do
|
|
1254
|
-
[ -z "$job" ] && continue
|
|
1255
|
-
aws sagemaker delete-ai-benchmark-job \
|
|
1256
|
-
--ai-benchmark-job-name "$job" \
|
|
1257
|
-
--region "$AWS_REGION"
|
|
1258
|
-
echo " ✓ Deleted benchmark job: $job"
|
|
1259
|
-
done
|
|
1260
|
-
|
|
1261
|
-
# Delete local benchmark results
|
|
1262
|
-
if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
|
|
1263
|
-
read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
|
|
1264
|
-
CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
|
|
1265
|
-
if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
|
|
1266
|
-
rm -rf "${SCRIPT_DIR}/../benchmarks"
|
|
1267
|
-
echo " ✓ Deleted local benchmarks/ directory"
|
|
1268
|
-
else
|
|
1269
|
-
echo " ⏭ Skipped local benchmarks/ deletion"
|
|
1270
|
-
fi
|
|
1271
|
-
fi
|
|
1272
|
-
|
|
1273
|
-
echo "✅ Benchmark cleanup complete"
|
|
1274
|
-
;;
|
|
1275
|
-
<% } %>
|
|
1276
|
-
all)
|
|
1277
|
-
echo "🧹 Performing complete cleanup"
|
|
1278
|
-
echo ""
|
|
1279
|
-
|
|
1280
|
-
# Track what was cleaned
|
|
1281
|
-
CLEANED_ITEMS=()
|
|
1282
|
-
|
|
1283
|
-
# Clean local images
|
|
1284
|
-
if clean_local; then
|
|
1285
|
-
CLEANED_ITEMS+=("Local Docker images")
|
|
1286
|
-
fi
|
|
1287
|
-
|
|
1288
|
-
echo ""
|
|
1289
|
-
|
|
1290
|
-
# Clean ECR images
|
|
1291
|
-
if clean_ecr; then
|
|
1292
|
-
CLEANED_ITEMS+=("ECR images")
|
|
1293
|
-
fi
|
|
1294
|
-
|
|
1295
|
-
echo ""
|
|
1296
|
-
|
|
1297
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
1298
|
-
# Clean SageMaker resources
|
|
1299
|
-
if clean_endpoint; then
|
|
1300
|
-
CLEANED_ITEMS+=("SageMaker resources")
|
|
1301
|
-
fi
|
|
1302
|
-
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
1303
|
-
# Clean SageMaker async resources
|
|
1304
|
-
if clean_endpoint; then
|
|
1305
|
-
CLEANED_ITEMS+=("SageMaker async resources")
|
|
1306
|
-
fi
|
|
1307
|
-
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
1308
|
-
# Clean SageMaker managed inference batch resources
|
|
1309
|
-
if clean_batch; then
|
|
1310
|
-
CLEANED_ITEMS+=("SageMaker managed inference batch resources")
|
|
1311
|
-
fi
|
|
1312
|
-
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
1313
|
-
# Clean HyperPod EKS resources
|
|
1314
|
-
if clean_hyperpod; then
|
|
1315
|
-
CLEANED_ITEMS+=("HyperPod EKS resources")
|
|
1316
|
-
fi
|
|
1317
|
-
<% } %>
|
|
1318
|
-
|
|
1319
|
-
echo ""
|
|
1320
|
-
|
|
1321
|
-
# Clean CodeBuild resources
|
|
1322
|
-
if clean_codebuild; then
|
|
1323
|
-
CLEANED_ITEMS+=("CodeBuild resources")
|
|
1324
|
-
fi
|
|
1325
|
-
|
|
1326
|
-
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
1327
|
-
echo ""
|
|
1328
|
-
|
|
1329
|
-
# Clean benchmark resources
|
|
1330
|
-
WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
|
|
1331
|
-
|
|
1332
|
-
# Delete workload config if exists
|
|
1333
|
-
if aws sagemaker describe-ai-workload-config \
|
|
1334
|
-
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
1335
|
-
--region "$AWS_REGION" 2>/dev/null; then
|
|
1336
|
-
aws sagemaker delete-ai-workload-config \
|
|
1337
|
-
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
1338
|
-
--region "$AWS_REGION"
|
|
1339
|
-
echo " ✓ Deleted workload config: $WORKLOAD_CONFIG_NAME"
|
|
1340
|
-
fi
|
|
1341
|
-
|
|
1342
|
-
# Delete terminal benchmark jobs matching project prefix
|
|
1343
|
-
aws sagemaker list-ai-benchmark-jobs \
|
|
1344
|
-
--name-contains "${PROJECT_NAME}-benchmark-" \
|
|
1345
|
-
--region "$AWS_REGION" \
|
|
1346
|
-
--query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
|
|
1347
|
-
--output text | tr '\t' '\n' | while read -r job; do
|
|
1348
|
-
[ -z "$job" ] && continue
|
|
1349
|
-
aws sagemaker delete-ai-benchmark-job \
|
|
1350
|
-
--ai-benchmark-job-name "$job" \
|
|
1351
|
-
--region "$AWS_REGION"
|
|
1352
|
-
echo " ✓ Deleted benchmark job: $job"
|
|
1353
|
-
done
|
|
1354
|
-
|
|
1355
|
-
# Delete local benchmark results
|
|
1356
|
-
if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
|
|
1357
|
-
read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
|
|
1358
|
-
CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
|
|
1359
|
-
if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
|
|
1360
|
-
rm -rf "${SCRIPT_DIR}/../benchmarks"
|
|
1361
|
-
echo " ✓ Deleted local benchmarks/ directory"
|
|
1362
|
-
else
|
|
1363
|
-
echo " ⏭ Skipped local benchmarks/ deletion"
|
|
1364
|
-
fi
|
|
1365
|
-
fi
|
|
1366
|
-
|
|
1367
|
-
CLEANED_ITEMS+=("Benchmark resources")
|
|
1368
|
-
<% } %>
|
|
1369
|
-
# Display summary
|
|
1370
|
-
echo ""
|
|
1371
|
-
echo "✅ Cleanup complete!"
|
|
1372
|
-
echo ""
|
|
1373
|
-
echo "Summary of cleaned resources:"
|
|
1374
|
-
for item in "${CLEANED_ITEMS[@]}"; do
|
|
1375
|
-
echo " ✓ ${item}"
|
|
1376
|
-
done
|
|
1377
|
-
;;
|
|
1378
|
-
*)
|
|
1379
|
-
echo "❌ Unknown cleanup target: ${CLEANUP_TARGET}"
|
|
1380
|
-
echo ""
|
|
1381
|
-
show_usage
|
|
1382
|
-
exit 1
|
|
1383
|
-
;;
|
|
1384
|
-
esac
|
|
1385
|
-
|
|
1386
|
-
echo ""
|
|
1387
|
-
echo "Cleanup finished!"
|
|
1
|
+
<%- include('clean.d/' + (deploymentTarget === 'realtime-inference' ? 'managed-inference' : deploymentTarget)) %>
|