@aws/ml-container-creator 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +50760 -16218
- package/bin/cli.js +31 -137
- package/package.json +7 -2
- package/servers/lib/catalogs/instances.json +52 -1275
- package/servers/lib/catalogs/models.json +0 -132
- package/servers/lib/catalogs/popular-diffusors.json +1 -110
- package/src/app.js +29 -2
- package/src/lib/config-manager.js +17 -0
- package/src/lib/generated/cli-options.js +467 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/src/lib/mcp-client.js +16 -1
- package/src/lib/mcp-command-handler.js +10 -2
- package/src/lib/prompt-runner.js +16 -2
- package/src/lib/train-config-parser.js +136 -0
- package/src/lib/train-config-persistence.js +143 -0
- package/src/lib/train-config-validator.js +112 -0
- package/src/lib/train-feedback.js +46 -0
- package/src/lib/train-idempotency.js +97 -0
- package/src/lib/train-request-builder.js +120 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/.train_build_request.py +141 -0
- package/templates/do/.train_poll_parser.py +135 -0
- package/templates/do/.train_status_parser.py +187 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
- package/templates/do/lib/feedback.sh +41 -0
- package/templates/do/train +786 -0
- package/templates/do/training/config.yaml +140 -0
- package/templates/do/training/train.py +463 -0
|
@@ -0,0 +1,1043 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
set -u
|
|
7
|
+
set -o pipefail
|
|
8
|
+
|
|
9
|
+
# Source configuration
|
|
10
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
|
+
source "${SCRIPT_DIR}/config"
|
|
12
|
+
|
|
13
|
+
# Parse arguments
|
|
14
|
+
CLEANUP_TARGET=""
|
|
15
|
+
CLEANUP_ARG=""
|
|
16
|
+
FORCE_CLEAN=false
|
|
17
|
+
|
|
18
|
+
for arg in "$@"; do
|
|
19
|
+
case "$arg" in
|
|
20
|
+
--force) FORCE_CLEAN=true ;;
|
|
21
|
+
-*) ;; # ignore other flags
|
|
22
|
+
*)
|
|
23
|
+
if [ -z "${CLEANUP_TARGET}" ]; then
|
|
24
|
+
CLEANUP_TARGET="$arg"
|
|
25
|
+
elif [ -z "${CLEANUP_ARG}" ]; then
|
|
26
|
+
CLEANUP_ARG="$arg"
|
|
27
|
+
fi
|
|
28
|
+
;;
|
|
29
|
+
esac
|
|
30
|
+
done
|
|
31
|
+
|
|
32
|
+
# Function to display usage
|
|
33
|
+
show_usage() {
|
|
34
|
+
echo "Usage: ./do/clean [local|ecr|endpoint|ic <name>|<% if (typeof enableLora !== 'undefined' && enableLora) { %>adapter <name>|adapters|<% } %>codebuild|all]"
|
|
35
|
+
echo ""
|
|
36
|
+
echo "Cleanup targets:"
|
|
37
|
+
echo " local - Remove local Docker images"
|
|
38
|
+
echo " ecr - Remove images from Amazon ECR"
|
|
39
|
+
echo " endpoint - Delete SageMaker endpoint, configuration, and inference components"
|
|
40
|
+
echo " ic <name> - Delete a single inference component (does not touch the endpoint)"
|
|
41
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
42
|
+
echo " adapter <name> - Delete a single LoRA adapter (synonym for do/adapter remove)"
|
|
43
|
+
echo " adapters - Remove ALL LoRA adapters (keeps base IC and endpoint running)"
|
|
44
|
+
<% } %>
|
|
45
|
+
echo " codebuild - Delete CodeBuild project, IAM role, and S3 source artifacts"
|
|
46
|
+
echo " all - Perform all cleanup operations"
|
|
47
|
+
echo ""
|
|
48
|
+
echo "Examples:"
|
|
49
|
+
echo " ./do/clean local # Remove local Docker images only"
|
|
50
|
+
echo " ./do/clean endpoint # Delete SageMaker resources only"
|
|
51
|
+
echo " ./do/clean ic llama # Delete a single inference component"
|
|
52
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
53
|
+
echo " ./do/clean adapter ectsum # Delete a single LoRA adapter"
|
|
54
|
+
echo " ./do/clean adapters # Remove all LoRA adapters"
|
|
55
|
+
<% } %>
|
|
56
|
+
echo " ./do/clean codebuild # Delete CodeBuild project and rebuild fresh"
|
|
57
|
+
echo " ./do/clean all # Clean up everything"
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# Function to confirm action (skipped when --force is set)
|
|
61
|
+
confirm_action() {
|
|
62
|
+
local message="$1"
|
|
63
|
+
if [ "${FORCE_CLEAN}" = true ]; then
|
|
64
|
+
return 0
|
|
65
|
+
fi
|
|
66
|
+
echo ""
|
|
67
|
+
echo "⚠️ ${message}"
|
|
68
|
+
read -p " Are you sure? (yes/no): " -r
|
|
69
|
+
echo ""
|
|
70
|
+
if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then
|
|
71
|
+
echo "❌ Operation cancelled"
|
|
72
|
+
return 1
|
|
73
|
+
fi
|
|
74
|
+
return 0
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Function to clean local Docker images
|
|
78
|
+
clean_local() {
|
|
79
|
+
echo "🧹 Cleaning local Docker images"
|
|
80
|
+
echo " Project: ${PROJECT_NAME}"
|
|
81
|
+
|
|
82
|
+
# Build list of image patterns to clean
|
|
83
|
+
LOCAL_PATTERN="^${PROJECT_NAME}:"
|
|
84
|
+
ECR_PATTERN="\.dkr\.ecr\..*\.amazonaws\.com/${ECR_REPOSITORY_NAME}:${PROJECT_NAME}-"
|
|
85
|
+
|
|
86
|
+
# Check if any matching images exist
|
|
87
|
+
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -qE "${LOCAL_PATTERN}|${ECR_PATTERN}"; then
|
|
88
|
+
echo "ℹ️ No local images found for ${PROJECT_NAME}"
|
|
89
|
+
return 0
|
|
90
|
+
fi
|
|
91
|
+
|
|
92
|
+
# List images to be removed
|
|
93
|
+
echo ""
|
|
94
|
+
echo "Images to be removed:"
|
|
95
|
+
docker images --format "{{.Repository}}:{{.Tag}}" | grep -E "${LOCAL_PATTERN}|${ECR_PATTERN}" | while read -r image; do
|
|
96
|
+
echo " ${image}"
|
|
97
|
+
done
|
|
98
|
+
|
|
99
|
+
if ! confirm_action "This will remove all local Docker images for ${PROJECT_NAME}"; then
|
|
100
|
+
return 1
|
|
101
|
+
fi
|
|
102
|
+
|
|
103
|
+
# Remove images
|
|
104
|
+
echo "🗑️ Removing local images..."
|
|
105
|
+
docker images --format "{{.Repository}}:{{.Tag}}" | grep -E "${LOCAL_PATTERN}|${ECR_PATTERN}" | while read -r image; do
|
|
106
|
+
echo " Removing: ${image}"
|
|
107
|
+
docker rmi "${image}" || echo " ⚠️ Failed to remove ${image}"
|
|
108
|
+
done
|
|
109
|
+
|
|
110
|
+
echo "✅ Local images cleaned"
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Function to clean ECR images
|
|
114
|
+
clean_ecr() {
|
|
115
|
+
echo "🧹 Cleaning ECR images"
|
|
116
|
+
echo " Repository: ${ECR_REPOSITORY_NAME}"
|
|
117
|
+
echo " Region: ${AWS_REGION}"
|
|
118
|
+
|
|
119
|
+
# Validate AWS credentials
|
|
120
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
121
|
+
echo "❌ AWS credentials not configured"
|
|
122
|
+
echo " Run: aws configure"
|
|
123
|
+
exit 4
|
|
124
|
+
fi
|
|
125
|
+
|
|
126
|
+
# Get AWS account ID
|
|
127
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
128
|
+
|
|
129
|
+
# Check if repository exists
|
|
130
|
+
if ! aws ecr describe-repositories \
|
|
131
|
+
--repository-names "${ECR_REPOSITORY_NAME}" \
|
|
132
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
133
|
+
echo "ℹ️ ECR repository ${ECR_REPOSITORY_NAME} does not exist"
|
|
134
|
+
return 0
|
|
135
|
+
fi
|
|
136
|
+
|
|
137
|
+
# List images in repository
|
|
138
|
+
echo ""
|
|
139
|
+
echo "Checking for images in repository..."
|
|
140
|
+
|
|
141
|
+
if ! IMAGE_IDS=$(aws ecr list-images \
|
|
142
|
+
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
143
|
+
--region "${AWS_REGION}" \
|
|
144
|
+
--query "imageIds[?starts_with(imageTag, '${PROJECT_NAME}-')].[imageTag]" \
|
|
145
|
+
--output text 2>&1); then
|
|
146
|
+
echo "ℹ️ No images found for project: ${PROJECT_NAME}"
|
|
147
|
+
return 0
|
|
148
|
+
fi
|
|
149
|
+
|
|
150
|
+
if [ -z "${IMAGE_IDS}" ] || [ "${IMAGE_IDS}" = "None" ]; then
|
|
151
|
+
echo "ℹ️ No images found for project: ${PROJECT_NAME}"
|
|
152
|
+
return 0
|
|
153
|
+
fi
|
|
154
|
+
|
|
155
|
+
# Display images
|
|
156
|
+
echo "Images for project ${PROJECT_NAME}:"
|
|
157
|
+
echo "${IMAGE_IDS}" | while read -r tag; do
|
|
158
|
+
if [ -n "${tag}" ] && [ "${tag}" != "None" ]; then
|
|
159
|
+
echo " - ${tag}"
|
|
160
|
+
fi
|
|
161
|
+
done
|
|
162
|
+
|
|
163
|
+
if ! confirm_action "This will remove all images from ECR repository ${ECR_REPOSITORY_NAME}"; then
|
|
164
|
+
return 1
|
|
165
|
+
fi
|
|
166
|
+
|
|
167
|
+
# Remove images
|
|
168
|
+
echo "🗑️ Removing ECR images..."
|
|
169
|
+
|
|
170
|
+
IMAGE_IDS_JSON=$(aws ecr list-images \
|
|
171
|
+
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
172
|
+
--region "${AWS_REGION}" \
|
|
173
|
+
--query "imageIds[?starts_with(imageTag, '${PROJECT_NAME}-')]" \
|
|
174
|
+
--output json)
|
|
175
|
+
|
|
176
|
+
if [ "${IMAGE_IDS_JSON}" != "[]" ] && [ -n "${IMAGE_IDS_JSON}" ]; then
|
|
177
|
+
if aws ecr batch-delete-image \
|
|
178
|
+
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
179
|
+
--region "${AWS_REGION}" \
|
|
180
|
+
--image-ids "${IMAGE_IDS_JSON}" &> /dev/null; then
|
|
181
|
+
echo "✅ ECR images removed for project: ${PROJECT_NAME}"
|
|
182
|
+
else
|
|
183
|
+
echo "❌ Failed to remove some ECR images"
|
|
184
|
+
return 1
|
|
185
|
+
fi
|
|
186
|
+
else
|
|
187
|
+
echo "ℹ️ No images to remove for project: ${PROJECT_NAME}"
|
|
188
|
+
fi
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
# Function to clean SageMaker endpoint and inference components
|
|
192
|
+
clean_endpoint() {
|
|
193
|
+
echo "🧹 Cleaning SageMaker resources"
|
|
194
|
+
echo " Project: ${PROJECT_NAME}"
|
|
195
|
+
echo " Region: ${AWS_REGION}"
|
|
196
|
+
|
|
197
|
+
# Validate AWS credentials
|
|
198
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
199
|
+
echo "❌ AWS credentials not configured"
|
|
200
|
+
echo " Run: aws configure"
|
|
201
|
+
exit 4
|
|
202
|
+
fi
|
|
203
|
+
|
|
204
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
205
|
+
|
|
206
|
+
# Use names from config (set by do/deploy) or argument
|
|
207
|
+
local EP_NAME="${ENDPOINT_NAME:-}"
|
|
208
|
+
local IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
|
|
209
|
+
local EPC_NAME="${ENDPOINT_CONFIG_NAME:-}"
|
|
210
|
+
|
|
211
|
+
if [ -z "${EP_NAME}" ]; then
|
|
212
|
+
echo "❌ No endpoint name found"
|
|
213
|
+
echo " Run ./do/deploy first, or set ENDPOINT_NAME in do/config"
|
|
214
|
+
return 1
|
|
215
|
+
fi
|
|
216
|
+
|
|
217
|
+
# External endpoint: only remove inference components, not the endpoint itself
|
|
218
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
219
|
+
echo ""
|
|
220
|
+
echo "⚠️ Endpoint is external — only removing inference components"
|
|
221
|
+
echo " Endpoint ${EP_NAME} will NOT be deleted (managed externally)."
|
|
222
|
+
echo ""
|
|
223
|
+
|
|
224
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
225
|
+
# Delete adapter ICs first (adapters depend on base ICs)
|
|
226
|
+
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
227
|
+
local ADAPTER_COUNT=0
|
|
228
|
+
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
229
|
+
[ -f "${adapter_conf}" ] || continue
|
|
230
|
+
ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
|
|
231
|
+
done
|
|
232
|
+
|
|
233
|
+
if [ "${ADAPTER_COUNT}" -gt 0 ]; then
|
|
234
|
+
echo "🔌 Deleting ${ADAPTER_COUNT} LoRA adapter(s) first..."
|
|
235
|
+
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
236
|
+
[ -f "${adapter_conf}" ] || continue
|
|
237
|
+
local adapter_ic_name=""
|
|
238
|
+
adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
|
|
239
|
+
local adapter_display_name
|
|
240
|
+
adapter_display_name=$(basename "${adapter_conf}" .conf)
|
|
241
|
+
|
|
242
|
+
if [ -n "${adapter_ic_name}" ]; then
|
|
243
|
+
echo "🗑️ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
|
|
244
|
+
if aws sagemaker delete-inference-component \
|
|
245
|
+
--inference-component-name "${adapter_ic_name}" \
|
|
246
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
247
|
+
echo "⏳ Waiting for adapter deletion..."
|
|
248
|
+
aws sagemaker wait inference-component-deleted \
|
|
249
|
+
--inference-component-name "${adapter_ic_name}" \
|
|
250
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
251
|
+
echo "✅ Adapter deleted: ${adapter_display_name}"
|
|
252
|
+
|
|
253
|
+
# Mark adapter IC as deleted in manifest (non-blocking)
|
|
254
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
|
|
255
|
+
else
|
|
256
|
+
echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
|
|
257
|
+
fi
|
|
258
|
+
fi
|
|
259
|
+
|
|
260
|
+
# Remove adapter conf file
|
|
261
|
+
rm -f "${adapter_conf}"
|
|
262
|
+
done
|
|
263
|
+
echo "✅ All adapters deleted"
|
|
264
|
+
echo ""
|
|
265
|
+
fi
|
|
266
|
+
fi
|
|
267
|
+
|
|
268
|
+
<% } %>
|
|
269
|
+
# Iterate do/ic/*.conf and delete each IC owned by this project
|
|
270
|
+
local IC_DELETED=0
|
|
271
|
+
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
272
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
273
|
+
[ -f "${conf}" ] || continue
|
|
274
|
+
local ic_deployed_name=""
|
|
275
|
+
if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
|
|
276
|
+
ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
277
|
+
fi
|
|
278
|
+
if [ -n "${ic_deployed_name}" ]; then
|
|
279
|
+
echo "🗑️ Deleting inference component: ${ic_deployed_name}"
|
|
280
|
+
if aws sagemaker delete-inference-component \
|
|
281
|
+
--inference-component-name "${ic_deployed_name}" \
|
|
282
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
283
|
+
echo "⏳ Waiting for inference component deletion..."
|
|
284
|
+
aws sagemaker wait inference-component-deleted \
|
|
285
|
+
--inference-component-name "${ic_deployed_name}" \
|
|
286
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
287
|
+
echo "✅ Inference component deleted: ${ic_deployed_name}"
|
|
288
|
+
|
|
289
|
+
# Mark inference component as deleted in manifest (non-blocking)
|
|
290
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
|
|
291
|
+
|
|
292
|
+
# Clear deployed state from config
|
|
293
|
+
sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${conf}"
|
|
294
|
+
rm -f "${conf}.bak"
|
|
295
|
+
else
|
|
296
|
+
echo "⚠️ Failed to delete inference component: ${ic_deployed_name}"
|
|
297
|
+
fi
|
|
298
|
+
IC_DELETED=$((IC_DELETED + 1))
|
|
299
|
+
fi
|
|
300
|
+
done
|
|
301
|
+
fi
|
|
302
|
+
|
|
303
|
+
# Also handle legacy single IC from config
|
|
304
|
+
if [ -n "${IC_NAME}" ]; then
|
|
305
|
+
if aws sagemaker describe-inference-component \
|
|
306
|
+
--inference-component-name "${IC_NAME}" \
|
|
307
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
308
|
+
echo "🗑️ Deleting inference component: ${IC_NAME}"
|
|
309
|
+
if aws sagemaker delete-inference-component \
|
|
310
|
+
--inference-component-name "${IC_NAME}" \
|
|
311
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
312
|
+
echo "⏳ Waiting for inference component deletion..."
|
|
313
|
+
aws sagemaker wait inference-component-deleted \
|
|
314
|
+
--inference-component-name "${IC_NAME}" \
|
|
315
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
316
|
+
echo "✅ Inference component deleted: ${IC_NAME}"
|
|
317
|
+
|
|
318
|
+
# Mark inference component as deleted in manifest (non-blocking)
|
|
319
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_NAME}" 2>/dev/null || true
|
|
320
|
+
fi
|
|
321
|
+
IC_DELETED=$((IC_DELETED + 1))
|
|
322
|
+
fi
|
|
323
|
+
fi
|
|
324
|
+
|
|
325
|
+
if [ "${IC_DELETED}" -eq 0 ]; then
|
|
326
|
+
echo "ℹ️ No deployed inference components found to clean"
|
|
327
|
+
fi
|
|
328
|
+
|
|
329
|
+
echo "✅ External endpoint cleanup complete (endpoint preserved)"
|
|
330
|
+
return 0
|
|
331
|
+
fi
|
|
332
|
+
|
|
333
|
+
echo ""
|
|
334
|
+
echo "Checking for SageMaker resources..."
|
|
335
|
+
|
|
336
|
+
local ENDPOINT_EXISTS=false
|
|
337
|
+
|
|
338
|
+
if aws sagemaker describe-endpoint \
|
|
339
|
+
--endpoint-name "${EP_NAME}" \
|
|
340
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
341
|
+
ENDPOINT_EXISTS=true
|
|
342
|
+
echo " ✓ Endpoint: ${EP_NAME}"
|
|
343
|
+
else
|
|
344
|
+
echo "ℹ️ Endpoint not found: ${EP_NAME}"
|
|
345
|
+
return 0
|
|
346
|
+
fi
|
|
347
|
+
|
|
348
|
+
# Count ICs to be deleted (multi-IC path)
|
|
349
|
+
local IC_COUNT=0
|
|
350
|
+
local IC_NAMES_TO_DELETE=()
|
|
351
|
+
local IC_CONFS_TO_CLEAN=()
|
|
352
|
+
|
|
353
|
+
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
354
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
355
|
+
[ -f "${conf}" ] || continue
|
|
356
|
+
local ic_deployed_name=""
|
|
357
|
+
if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
|
|
358
|
+
ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
359
|
+
fi
|
|
360
|
+
if [ -n "${ic_deployed_name}" ]; then
|
|
361
|
+
IC_NAMES_TO_DELETE+=("${ic_deployed_name}")
|
|
362
|
+
IC_CONFS_TO_CLEAN+=("${conf}")
|
|
363
|
+
IC_COUNT=$((IC_COUNT + 1))
|
|
364
|
+
echo " ✓ Inference component: ${ic_deployed_name}"
|
|
365
|
+
fi
|
|
366
|
+
done
|
|
367
|
+
fi
|
|
368
|
+
|
|
369
|
+
# Legacy: check single IC from config (no do/ic/ directory)
|
|
370
|
+
local IC_EXISTS=false
|
|
371
|
+
if [ "${IC_COUNT}" -eq 0 ] && [ -n "${IC_NAME}" ]; then
|
|
372
|
+
if aws sagemaker describe-inference-component \
|
|
373
|
+
--inference-component-name "${IC_NAME}" \
|
|
374
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
375
|
+
IC_EXISTS=true
|
|
376
|
+
IC_COUNT=1
|
|
377
|
+
echo " ✓ Inference component: ${IC_NAME}"
|
|
378
|
+
fi
|
|
379
|
+
fi
|
|
380
|
+
|
|
381
|
+
# Confirmation with IC count
|
|
382
|
+
local confirm_msg="Delete ${IC_COUNT} inference component"
|
|
383
|
+
if [ "${IC_COUNT}" -ne 1 ]; then
|
|
384
|
+
confirm_msg="${confirm_msg}s"
|
|
385
|
+
fi
|
|
386
|
+
confirm_msg="${confirm_msg} and endpoint?"
|
|
387
|
+
|
|
388
|
+
if ! confirm_action "${confirm_msg}"; then
|
|
389
|
+
return 1
|
|
390
|
+
fi
|
|
391
|
+
|
|
392
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
393
|
+
# Delete adapter ICs first (adapters depend on base ICs)
|
|
394
|
+
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
395
|
+
local ADAPTER_COUNT=0
|
|
396
|
+
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
397
|
+
[ -f "${adapter_conf}" ] || continue
|
|
398
|
+
ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
|
|
399
|
+
done
|
|
400
|
+
|
|
401
|
+
if [ "${ADAPTER_COUNT}" -gt 0 ]; then
|
|
402
|
+
echo ""
|
|
403
|
+
echo "🔌 Deleting ${ADAPTER_COUNT} LoRA adapter(s) first..."
|
|
404
|
+
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
405
|
+
[ -f "${adapter_conf}" ] || continue
|
|
406
|
+
local adapter_ic_name=""
|
|
407
|
+
adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
|
|
408
|
+
local adapter_display_name
|
|
409
|
+
adapter_display_name=$(basename "${adapter_conf}" .conf)
|
|
410
|
+
|
|
411
|
+
if [ -n "${adapter_ic_name}" ]; then
|
|
412
|
+
echo "🗑️ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
|
|
413
|
+
if aws sagemaker delete-inference-component \
|
|
414
|
+
--inference-component-name "${adapter_ic_name}" \
|
|
415
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
416
|
+
echo "⏳ Waiting for adapter deletion..."
|
|
417
|
+
aws sagemaker wait inference-component-deleted \
|
|
418
|
+
--inference-component-name "${adapter_ic_name}" \
|
|
419
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
420
|
+
echo "✅ Adapter deleted: ${adapter_display_name}"
|
|
421
|
+
|
|
422
|
+
# Mark adapter IC as deleted in manifest (non-blocking)
|
|
423
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
|
|
424
|
+
else
|
|
425
|
+
echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
|
|
426
|
+
fi
|
|
427
|
+
fi
|
|
428
|
+
|
|
429
|
+
# Remove adapter conf file
|
|
430
|
+
rm -f "${adapter_conf}"
|
|
431
|
+
done
|
|
432
|
+
echo "✅ All adapters deleted"
|
|
433
|
+
echo ""
|
|
434
|
+
fi
|
|
435
|
+
fi
|
|
436
|
+
|
|
437
|
+
<% } %>
|
|
438
|
+
# Delete inference components first (must be deleted before endpoint)
|
|
439
|
+
if [ ${#IC_NAMES_TO_DELETE[@]} -gt 0 ]; then
|
|
440
|
+
# Multi-IC path: iterate do/ic/*.conf
|
|
441
|
+
local idx=0
|
|
442
|
+
for ic_deployed_name in "${IC_NAMES_TO_DELETE[@]}"; do
|
|
443
|
+
local conf="${IC_CONFS_TO_CLEAN[$idx]}"
|
|
444
|
+
echo "🗑️ Deleting inference component: ${ic_deployed_name}"
|
|
445
|
+
if aws sagemaker delete-inference-component \
|
|
446
|
+
--inference-component-name "${ic_deployed_name}" \
|
|
447
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
448
|
+
echo "⏳ Waiting for inference component deletion..."
|
|
449
|
+
aws sagemaker wait inference-component-deleted \
|
|
450
|
+
--inference-component-name "${ic_deployed_name}" \
|
|
451
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
452
|
+
echo "✅ Inference component deleted: ${ic_deployed_name}"
|
|
453
|
+
|
|
454
|
+
# Mark inference component as deleted in manifest (non-blocking)
|
|
455
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
|
|
456
|
+
|
|
457
|
+
# Clear deployed state from config
|
|
458
|
+
sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${conf}"
|
|
459
|
+
rm -f "${conf}.bak"
|
|
460
|
+
else
|
|
461
|
+
echo "❌ Failed to delete inference component: ${ic_deployed_name}"
|
|
462
|
+
fi
|
|
463
|
+
idx=$((idx + 1))
|
|
464
|
+
done
|
|
465
|
+
elif [ "${IC_EXISTS}" = true ]; then
|
|
466
|
+
# Legacy single IC path
|
|
467
|
+
echo "🗑️ Deleting inference component: ${IC_NAME}"
|
|
468
|
+
if aws sagemaker delete-inference-component \
|
|
469
|
+
--inference-component-name "${IC_NAME}" \
|
|
470
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
471
|
+
echo "⏳ Waiting for inference component deletion..."
|
|
472
|
+
aws sagemaker wait inference-component-deleted \
|
|
473
|
+
--inference-component-name "${IC_NAME}" \
|
|
474
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
475
|
+
echo "✅ Inference component deleted"
|
|
476
|
+
|
|
477
|
+
# Mark inference component as deleted in manifest (non-blocking)
|
|
478
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_NAME}" 2>/dev/null || true
|
|
479
|
+
else
|
|
480
|
+
echo "❌ Failed to delete inference component"
|
|
481
|
+
fi
|
|
482
|
+
fi
|
|
483
|
+
|
|
484
|
+
# Delete endpoint
|
|
485
|
+
echo "🗑️ Deleting endpoint: ${EP_NAME}"
|
|
486
|
+
if aws sagemaker delete-endpoint \
|
|
487
|
+
--endpoint-name "${EP_NAME}" \
|
|
488
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
489
|
+
echo "✅ Endpoint deleted"
|
|
490
|
+
echo "⏳ Waiting for endpoint deletion..."
|
|
491
|
+
aws sagemaker wait endpoint-deleted \
|
|
492
|
+
--endpoint-name "${EP_NAME}" \
|
|
493
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 10
|
|
494
|
+
|
|
495
|
+
# Mark endpoint as deleted in manifest (non-blocking)
|
|
496
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${EP_NAME}" 2>/dev/null || true
|
|
497
|
+
else
|
|
498
|
+
echo "❌ Failed to delete endpoint"
|
|
499
|
+
fi
|
|
500
|
+
|
|
501
|
+
# Delete endpoint configuration
|
|
502
|
+
if [ -n "${EPC_NAME}" ]; then
|
|
503
|
+
echo "🗑️ Deleting endpoint configuration: ${EPC_NAME}"
|
|
504
|
+
if aws sagemaker delete-endpoint-config \
|
|
505
|
+
--endpoint-config-name "${EPC_NAME}" \
|
|
506
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
507
|
+
echo "✅ Endpoint configuration deleted"
|
|
508
|
+
|
|
509
|
+
# Mark endpoint config as deleted in manifest (non-blocking)
|
|
510
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${EPC_NAME}" 2>/dev/null || true
|
|
511
|
+
else
|
|
512
|
+
echo "❌ Failed to delete endpoint configuration"
|
|
513
|
+
fi
|
|
514
|
+
fi
|
|
515
|
+
|
|
516
|
+
# Remove saved names from config
|
|
517
|
+
if grep -q "^export ENDPOINT_NAME=" "${SCRIPT_DIR}/config" 2>/dev/null; then
|
|
518
|
+
sed -i.bak '/^# Last deployed resources/d;/^export ENDPOINT_NAME=/d;/^export ENDPOINT_CONFIG_NAME=/d;/^export INFERENCE_COMPONENT_NAME=/d' "${SCRIPT_DIR}/config"
|
|
519
|
+
rm -f "${SCRIPT_DIR}/config.bak"
|
|
520
|
+
fi
|
|
521
|
+
|
|
522
|
+
echo "✅ SageMaker resources cleaned"
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
# Function to clean a single inference component by name
|
|
526
|
+
clean_ic() {
|
|
527
|
+
local ic_name="$1"
|
|
528
|
+
echo "🧹 Cleaning inference component: ${ic_name}"
|
|
529
|
+
echo " Project: ${PROJECT_NAME}"
|
|
530
|
+
echo " Region: ${AWS_REGION}"
|
|
531
|
+
|
|
532
|
+
# Validate IC name argument
|
|
533
|
+
if [ -z "${ic_name}" ]; then
|
|
534
|
+
echo "❌ IC name required"
|
|
535
|
+
echo " Usage: ./do/clean ic <name>"
|
|
536
|
+
return 1
|
|
537
|
+
fi
|
|
538
|
+
|
|
539
|
+
# Check that the IC config file exists
|
|
540
|
+
local ic_conf="${SCRIPT_DIR}/ic/${ic_name}.conf"
|
|
541
|
+
if [ ! -f "${ic_conf}" ]; then
|
|
542
|
+
echo "❌ IC config not found: do/ic/${ic_name}.conf"
|
|
543
|
+
echo " Available ICs:"
|
|
544
|
+
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
545
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
546
|
+
[ -f "${conf}" ] || continue
|
|
547
|
+
echo " - $(basename "${conf}" .conf)"
|
|
548
|
+
done
|
|
549
|
+
else
|
|
550
|
+
echo " (none)"
|
|
551
|
+
fi
|
|
552
|
+
return 1
|
|
553
|
+
fi
|
|
554
|
+
|
|
555
|
+
# Validate AWS credentials
|
|
556
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
557
|
+
echo "❌ AWS credentials not configured"
|
|
558
|
+
echo " Run: aws configure"
|
|
559
|
+
exit 4
|
|
560
|
+
fi
|
|
561
|
+
|
|
562
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
563
|
+
|
|
564
|
+
# Look up IC_DEPLOYED_NAME from the config file
|
|
565
|
+
local ic_deployed_name=""
|
|
566
|
+
if grep -q "^export IC_DEPLOYED_NAME=" "${ic_conf}" 2>/dev/null; then
|
|
567
|
+
ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${ic_conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
568
|
+
fi
|
|
569
|
+
|
|
570
|
+
if [ -z "${ic_deployed_name}" ]; then
|
|
571
|
+
echo "ℹ️ IC '${ic_name}' has not been deployed (no IC_DEPLOYED_NAME in config)"
|
|
572
|
+
return 0
|
|
573
|
+
fi
|
|
574
|
+
|
|
575
|
+
echo " Deployed as: ${ic_deployed_name}"
|
|
576
|
+
|
|
577
|
+
if ! confirm_action "This will delete inference component '${ic_deployed_name}'"; then
|
|
578
|
+
return 1
|
|
579
|
+
fi
|
|
580
|
+
|
|
581
|
+
# Delete the inference component
|
|
582
|
+
echo "🗑️ Deleting inference component: ${ic_deployed_name}"
|
|
583
|
+
if aws sagemaker delete-inference-component \
|
|
584
|
+
--inference-component-name "${ic_deployed_name}" \
|
|
585
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
586
|
+
echo "⏳ Waiting for inference component deletion..."
|
|
587
|
+
aws sagemaker wait inference-component-deleted \
|
|
588
|
+
--inference-component-name "${ic_deployed_name}" \
|
|
589
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
590
|
+
echo "✅ Inference component deleted: ${ic_deployed_name}"
|
|
591
|
+
|
|
592
|
+
# Mark inference component as deleted in manifest (non-blocking)
|
|
593
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${ic_deployed_name}" 2>/dev/null || true
|
|
594
|
+
|
|
595
|
+
# Clear deployed state from config
|
|
596
|
+
sed -i.bak '/^export IC_DEPLOYED_NAME=/d;/^export IC_DEPLOYED_AT=/d' "${ic_conf}"
|
|
597
|
+
rm -f "${ic_conf}.bak"
|
|
598
|
+
else
|
|
599
|
+
echo "❌ Failed to delete inference component: ${ic_deployed_name}"
|
|
600
|
+
return 1
|
|
601
|
+
fi
|
|
602
|
+
|
|
603
|
+
echo "✅ Inference component '${ic_name}' cleaned"
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
607
|
+
# Function to clean a single LoRA adapter by name (synonym for do/adapter remove)
|
|
608
|
+
clean_adapter() {
|
|
609
|
+
local adapter_name="$1"
|
|
610
|
+
echo "🧹 Cleaning LoRA adapter: ${adapter_name}"
|
|
611
|
+
echo " Project: ${PROJECT_NAME}"
|
|
612
|
+
echo " Region: ${AWS_REGION}"
|
|
613
|
+
|
|
614
|
+
# Validate adapter name argument
|
|
615
|
+
if [ -z "${adapter_name}" ]; then
|
|
616
|
+
echo "❌ Adapter name required"
|
|
617
|
+
echo " Usage: ./do/clean adapter <name>"
|
|
618
|
+
return 1
|
|
619
|
+
fi
|
|
620
|
+
|
|
621
|
+
# Check that the adapter config file exists
|
|
622
|
+
local adapter_conf="${SCRIPT_DIR}/adapters/${adapter_name}.conf"
|
|
623
|
+
if [ ! -f "${adapter_conf}" ]; then
|
|
624
|
+
echo "❌ Adapter config not found: do/adapters/${adapter_name}.conf"
|
|
625
|
+
echo " Available adapters:"
|
|
626
|
+
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
627
|
+
for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
628
|
+
[ -f "${conf}" ] || continue
|
|
629
|
+
echo " - $(basename "${conf}" .conf)"
|
|
630
|
+
done
|
|
631
|
+
else
|
|
632
|
+
echo " (none)"
|
|
633
|
+
fi
|
|
634
|
+
return 1
|
|
635
|
+
fi
|
|
636
|
+
|
|
637
|
+
# Validate AWS credentials
|
|
638
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
639
|
+
echo "❌ AWS credentials not configured"
|
|
640
|
+
echo " Run: aws configure"
|
|
641
|
+
exit 4
|
|
642
|
+
fi
|
|
643
|
+
|
|
644
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
645
|
+
|
|
646
|
+
# Read ADAPTER_IC_NAME from the config file
|
|
647
|
+
local adapter_ic_name=""
|
|
648
|
+
adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//')
|
|
649
|
+
|
|
650
|
+
if [ -z "${adapter_ic_name}" ]; then
|
|
651
|
+
echo "⚠️ No ADAPTER_IC_NAME found in do/adapters/${adapter_name}.conf"
|
|
652
|
+
echo " Removing local config file."
|
|
653
|
+
rm -f "${adapter_conf}"
|
|
654
|
+
return 0
|
|
655
|
+
fi
|
|
656
|
+
|
|
657
|
+
echo " Adapter IC: ${adapter_ic_name}"
|
|
658
|
+
|
|
659
|
+
if ! confirm_action "This will delete LoRA adapter '${adapter_name}' (IC: ${adapter_ic_name})"; then
|
|
660
|
+
return 1
|
|
661
|
+
fi
|
|
662
|
+
|
|
663
|
+
# Delete the adapter inference component
|
|
664
|
+
echo "🗑️ Deleting adapter inference component: ${adapter_ic_name}"
|
|
665
|
+
if aws sagemaker delete-inference-component \
|
|
666
|
+
--inference-component-name "${adapter_ic_name}" \
|
|
667
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
668
|
+
echo "⏳ Waiting for adapter IC deletion..."
|
|
669
|
+
aws sagemaker wait inference-component-deleted \
|
|
670
|
+
--inference-component-name "${adapter_ic_name}" \
|
|
671
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
672
|
+
echo "✅ Adapter IC deleted: ${adapter_ic_name}"
|
|
673
|
+
|
|
674
|
+
# Mark adapter IC as deleted in manifest (non-blocking)
|
|
675
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
|
|
676
|
+
else
|
|
677
|
+
echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
|
|
678
|
+
fi
|
|
679
|
+
|
|
680
|
+
# Remove local conf file
|
|
681
|
+
rm -f "${adapter_conf}"
|
|
682
|
+
echo "✅ Removed: do/adapters/${adapter_name}.conf"
|
|
683
|
+
|
|
684
|
+
echo "✅ Adapter '${adapter_name}' cleaned"
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
# Function to clean ALL LoRA adapters (keeps base IC and endpoint running)
|
|
688
|
+
clean_adapters() {
|
|
689
|
+
echo "🧹 Cleaning all LoRA adapters"
|
|
690
|
+
echo " Project: ${PROJECT_NAME}"
|
|
691
|
+
echo " Region: ${AWS_REGION}"
|
|
692
|
+
|
|
693
|
+
# Check if adapters directory exists and has conf files
|
|
694
|
+
if [ ! -d "${SCRIPT_DIR}/adapters" ]; then
|
|
695
|
+
echo "ℹ️ No adapters directory found"
|
|
696
|
+
return 0
|
|
697
|
+
fi
|
|
698
|
+
|
|
699
|
+
local ADAPTER_COUNT=0
|
|
700
|
+
local ADAPTER_NAMES=()
|
|
701
|
+
for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
702
|
+
[ -f "${conf}" ] || continue
|
|
703
|
+
ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
|
|
704
|
+
ADAPTER_NAMES+=("$(basename "${conf}" .conf)")
|
|
705
|
+
done
|
|
706
|
+
|
|
707
|
+
if [ "${ADAPTER_COUNT}" -eq 0 ]; then
|
|
708
|
+
echo "ℹ️ No adapters found to clean"
|
|
709
|
+
return 0
|
|
710
|
+
fi
|
|
711
|
+
|
|
712
|
+
echo ""
|
|
713
|
+
echo "Adapters to be removed (${ADAPTER_COUNT}):"
|
|
714
|
+
for name in "${ADAPTER_NAMES[@]}"; do
|
|
715
|
+
echo " • ${name}"
|
|
716
|
+
done
|
|
717
|
+
|
|
718
|
+
if ! confirm_action "This will delete ${ADAPTER_COUNT} LoRA adapter(s). Base IC and endpoint will remain running."; then
|
|
719
|
+
return 1
|
|
720
|
+
fi
|
|
721
|
+
|
|
722
|
+
# Validate AWS credentials
|
|
723
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
724
|
+
echo "❌ AWS credentials not configured"
|
|
725
|
+
echo " Run: aws configure"
|
|
726
|
+
exit 4
|
|
727
|
+
fi
|
|
728
|
+
|
|
729
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
730
|
+
|
|
731
|
+
# Delete each adapter
|
|
732
|
+
local DELETED=0
|
|
733
|
+
for adapter_conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
734
|
+
[ -f "${adapter_conf}" ] || continue
|
|
735
|
+
local adapter_ic_name=""
|
|
736
|
+
adapter_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${adapter_conf}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
|
|
737
|
+
local adapter_display_name
|
|
738
|
+
adapter_display_name=$(basename "${adapter_conf}" .conf)
|
|
739
|
+
|
|
740
|
+
if [ -n "${adapter_ic_name}" ]; then
|
|
741
|
+
echo "🗑️ Deleting adapter: ${adapter_display_name} (${adapter_ic_name})"
|
|
742
|
+
if aws sagemaker delete-inference-component \
|
|
743
|
+
--inference-component-name "${adapter_ic_name}" \
|
|
744
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
745
|
+
echo "⏳ Waiting for adapter deletion..."
|
|
746
|
+
aws sagemaker wait inference-component-deleted \
|
|
747
|
+
--inference-component-name "${adapter_ic_name}" \
|
|
748
|
+
--region "${AWS_REGION}" 2>/dev/null || sleep 15
|
|
749
|
+
echo "✅ Adapter deleted: ${adapter_display_name}"
|
|
750
|
+
|
|
751
|
+
# Mark adapter IC as deleted in manifest (non-blocking)
|
|
752
|
+
./do/manifest delete --id "arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${adapter_ic_name}" 2>/dev/null || true
|
|
753
|
+
else
|
|
754
|
+
echo "⚠️ Failed to delete adapter IC: ${adapter_ic_name} (may already be gone)"
|
|
755
|
+
fi
|
|
756
|
+
fi
|
|
757
|
+
|
|
758
|
+
# Remove adapter conf file
|
|
759
|
+
rm -f "${adapter_conf}"
|
|
760
|
+
DELETED=$((DELETED + 1))
|
|
761
|
+
done
|
|
762
|
+
|
|
763
|
+
echo ""
|
|
764
|
+
echo "✅ All adapters cleaned (${DELETED} removed)"
|
|
765
|
+
echo " Base IC and endpoint remain running."
|
|
766
|
+
}
|
|
767
|
+
<% } %>
|
|
768
|
+
|
|
769
|
+
# Function to clean CodeBuild project and related resources
|
|
770
|
+
clean_codebuild() {
|
|
771
|
+
echo "🧹 Cleaning CodeBuild resources"
|
|
772
|
+
echo " Project: ${CODEBUILD_PROJECT_NAME:-not set}"
|
|
773
|
+
echo " Region: ${AWS_REGION}"
|
|
774
|
+
|
|
775
|
+
if [ -z "${CODEBUILD_PROJECT_NAME:-}" ]; then
|
|
776
|
+
echo "ℹ️ No CodeBuild project name configured (build target may not be codebuild)"
|
|
777
|
+
return 0
|
|
778
|
+
fi
|
|
779
|
+
|
|
780
|
+
# Validate AWS credentials
|
|
781
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
782
|
+
echo "❌ AWS credentials not configured"
|
|
783
|
+
echo " Run: aws configure"
|
|
784
|
+
exit 4
|
|
785
|
+
fi
|
|
786
|
+
|
|
787
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
788
|
+
|
|
789
|
+
# Check if project exists
|
|
790
|
+
PROJECT_CHECK=$(aws codebuild batch-get-projects \
|
|
791
|
+
--names "${CODEBUILD_PROJECT_NAME}" \
|
|
792
|
+
--region "${AWS_REGION}" \
|
|
793
|
+
--query 'projects[0].name' \
|
|
794
|
+
--output text 2>/dev/null)
|
|
795
|
+
|
|
796
|
+
if [ "$PROJECT_CHECK" = "None" ] || [ -z "$PROJECT_CHECK" ] || [ "$PROJECT_CHECK" = "null" ]; then
|
|
797
|
+
echo "ℹ️ CodeBuild project not found: ${CODEBUILD_PROJECT_NAME}"
|
|
798
|
+
return 0
|
|
799
|
+
fi
|
|
800
|
+
|
|
801
|
+
echo ""
|
|
802
|
+
echo "Resources to be removed:"
|
|
803
|
+
echo " • CodeBuild project: ${CODEBUILD_PROJECT_NAME}"
|
|
804
|
+
|
|
805
|
+
# Check for service role
|
|
806
|
+
ROLE_NAME="${CODEBUILD_PROJECT_NAME}-service-role"
|
|
807
|
+
ROLE_EXISTS=false
|
|
808
|
+
if aws iam get-role --role-name "${ROLE_NAME}" &> /dev/null; then
|
|
809
|
+
ROLE_EXISTS=true
|
|
810
|
+
echo " • IAM service role: ${ROLE_NAME}"
|
|
811
|
+
fi
|
|
812
|
+
|
|
813
|
+
# Check for S3 source bucket
|
|
814
|
+
S3_BUCKET="codebuild-source-${AWS_ACCOUNT_ID}-${AWS_REGION}"
|
|
815
|
+
S3_PREFIX="${PROJECT_NAME}/"
|
|
816
|
+
S3_EXISTS=false
|
|
817
|
+
if aws s3api head-bucket --bucket "$S3_BUCKET" --region "${AWS_REGION}" &> /dev/null; then
|
|
818
|
+
S3_COUNT=$(aws s3 ls "s3://$S3_BUCKET/$S3_PREFIX" --region "${AWS_REGION}" 2>/dev/null | wc -l | tr -d ' ')
|
|
819
|
+
if [ "$S3_COUNT" -gt 0 ]; then
|
|
820
|
+
S3_EXISTS=true
|
|
821
|
+
echo " • S3 source artifacts: s3://$S3_BUCKET/$S3_PREFIX ($S3_COUNT objects)"
|
|
822
|
+
fi
|
|
823
|
+
fi
|
|
824
|
+
|
|
825
|
+
if ! confirm_action "This will delete the CodeBuild project and associated resources"; then
|
|
826
|
+
return 1
|
|
827
|
+
fi
|
|
828
|
+
|
|
829
|
+
# Delete CodeBuild project
|
|
830
|
+
echo "🗑️ Deleting CodeBuild project: ${CODEBUILD_PROJECT_NAME}"
|
|
831
|
+
if aws codebuild delete-project \
|
|
832
|
+
--name "${CODEBUILD_PROJECT_NAME}" \
|
|
833
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
834
|
+
echo "✅ CodeBuild project deleted"
|
|
835
|
+
|
|
836
|
+
# Mark CodeBuild project as deleted in manifest (non-blocking)
|
|
837
|
+
./do/manifest delete --id "arn:aws:codebuild:${AWS_REGION}:${AWS_ACCOUNT_ID}:project/${CODEBUILD_PROJECT_NAME}" 2>/dev/null || true
|
|
838
|
+
else
|
|
839
|
+
echo "❌ Failed to delete CodeBuild project"
|
|
840
|
+
fi
|
|
841
|
+
|
|
842
|
+
# Delete IAM service role
|
|
843
|
+
if [ "$ROLE_EXISTS" = true ]; then
|
|
844
|
+
echo "🗑️ Deleting IAM service role: ${ROLE_NAME}"
|
|
845
|
+
# Remove inline policies first
|
|
846
|
+
POLICIES=$(aws iam list-role-policies --role-name "${ROLE_NAME}" --query 'PolicyNames' --output text 2>/dev/null || echo "")
|
|
847
|
+
for policy in $POLICIES; do
|
|
848
|
+
aws iam delete-role-policy --role-name "${ROLE_NAME}" --policy-name "$policy" 2>/dev/null || true
|
|
849
|
+
done
|
|
850
|
+
if aws iam delete-role --role-name "${ROLE_NAME}" &> /dev/null; then
|
|
851
|
+
echo "✅ IAM service role deleted"
|
|
852
|
+
|
|
853
|
+
# Mark IAM role as deleted in manifest (non-blocking)
|
|
854
|
+
./do/manifest delete --id "arn:aws:iam::${AWS_ACCOUNT_ID}:role/${ROLE_NAME}" 2>/dev/null || true
|
|
855
|
+
else
|
|
856
|
+
echo "❌ Failed to delete IAM service role"
|
|
857
|
+
fi
|
|
858
|
+
fi
|
|
859
|
+
|
|
860
|
+
# Delete S3 source artifacts
|
|
861
|
+
if [ "$S3_EXISTS" = true ]; then
|
|
862
|
+
echo "🗑️ Deleting S3 source artifacts: s3://$S3_BUCKET/$S3_PREFIX"
|
|
863
|
+
if aws s3 rm "s3://$S3_BUCKET/$S3_PREFIX" --recursive --region "${AWS_REGION}" &> /dev/null; then
|
|
864
|
+
echo "✅ S3 source artifacts deleted"
|
|
865
|
+
else
|
|
866
|
+
echo "❌ Failed to delete S3 source artifacts"
|
|
867
|
+
fi
|
|
868
|
+
fi
|
|
869
|
+
|
|
870
|
+
echo "✅ CodeBuild resources cleaned"
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
# Main script logic
|
|
874
|
+
echo "🧹 Cleanup script for ${PROJECT_NAME}"
|
|
875
|
+
echo ""
|
|
876
|
+
|
|
877
|
+
if [ -z "${CLEANUP_TARGET}" ]; then
|
|
878
|
+
show_usage
|
|
879
|
+
exit 0
|
|
880
|
+
fi
|
|
881
|
+
|
|
882
|
+
case "${CLEANUP_TARGET}" in
|
|
883
|
+
local)
|
|
884
|
+
clean_local
|
|
885
|
+
;;
|
|
886
|
+
ecr)
|
|
887
|
+
clean_ecr
|
|
888
|
+
;;
|
|
889
|
+
endpoint)
|
|
890
|
+
clean_endpoint
|
|
891
|
+
;;
|
|
892
|
+
ic)
|
|
893
|
+
clean_ic "${CLEANUP_ARG}"
|
|
894
|
+
;;
|
|
895
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
896
|
+
adapter)
|
|
897
|
+
clean_adapter "${CLEANUP_ARG}"
|
|
898
|
+
;;
|
|
899
|
+
adapters)
|
|
900
|
+
clean_adapters
|
|
901
|
+
;;
|
|
902
|
+
<% } %>
|
|
903
|
+
codebuild)
|
|
904
|
+
clean_codebuild
|
|
905
|
+
;;
|
|
906
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
907
|
+
benchmark)
|
|
908
|
+
echo "🧹 Cleaning benchmark resources..."
|
|
909
|
+
WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
|
|
910
|
+
|
|
911
|
+
# Delete workload config if exists
|
|
912
|
+
if aws sagemaker describe-ai-workload-config \
|
|
913
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
914
|
+
--region "$AWS_REGION" 2>/dev/null; then
|
|
915
|
+
aws sagemaker delete-ai-workload-config \
|
|
916
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
917
|
+
--region "$AWS_REGION"
|
|
918
|
+
echo " ✓ Deleted workload config: $WORKLOAD_CONFIG_NAME"
|
|
919
|
+
fi
|
|
920
|
+
|
|
921
|
+
# Delete terminal benchmark jobs matching project prefix
|
|
922
|
+
aws sagemaker list-ai-benchmark-jobs \
|
|
923
|
+
--name-contains "${PROJECT_NAME}-benchmark-" \
|
|
924
|
+
--region "$AWS_REGION" \
|
|
925
|
+
--query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
|
|
926
|
+
--output text | tr '\t' '\n' | while read -r job; do
|
|
927
|
+
[ -z "$job" ] && continue
|
|
928
|
+
aws sagemaker delete-ai-benchmark-job \
|
|
929
|
+
--ai-benchmark-job-name "$job" \
|
|
930
|
+
--region "$AWS_REGION"
|
|
931
|
+
echo " ✓ Deleted benchmark job: $job"
|
|
932
|
+
done
|
|
933
|
+
|
|
934
|
+
# Delete local benchmark results
|
|
935
|
+
if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
|
|
936
|
+
read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
|
|
937
|
+
CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
|
|
938
|
+
if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
|
|
939
|
+
rm -rf "${SCRIPT_DIR}/../benchmarks"
|
|
940
|
+
echo " ✓ Deleted local benchmarks/ directory"
|
|
941
|
+
else
|
|
942
|
+
echo " ⏭ Skipped local benchmarks/ deletion"
|
|
943
|
+
fi
|
|
944
|
+
fi
|
|
945
|
+
|
|
946
|
+
echo "✅ Benchmark cleanup complete"
|
|
947
|
+
;;
|
|
948
|
+
<% } %>
|
|
949
|
+
all)
|
|
950
|
+
echo "🧹 Performing complete cleanup"
|
|
951
|
+
echo ""
|
|
952
|
+
|
|
953
|
+
# Track what was cleaned
|
|
954
|
+
CLEANED_ITEMS=()
|
|
955
|
+
|
|
956
|
+
# Clean local images
|
|
957
|
+
if clean_local; then
|
|
958
|
+
CLEANED_ITEMS+=("Local Docker images")
|
|
959
|
+
fi
|
|
960
|
+
|
|
961
|
+
echo ""
|
|
962
|
+
|
|
963
|
+
# Clean ECR images
|
|
964
|
+
if clean_ecr; then
|
|
965
|
+
CLEANED_ITEMS+=("ECR images")
|
|
966
|
+
fi
|
|
967
|
+
|
|
968
|
+
echo ""
|
|
969
|
+
|
|
970
|
+
# Clean SageMaker resources
|
|
971
|
+
if clean_endpoint; then
|
|
972
|
+
CLEANED_ITEMS+=("SageMaker resources")
|
|
973
|
+
fi
|
|
974
|
+
|
|
975
|
+
echo ""
|
|
976
|
+
|
|
977
|
+
# Clean CodeBuild resources
|
|
978
|
+
if clean_codebuild; then
|
|
979
|
+
CLEANED_ITEMS+=("CodeBuild resources")
|
|
980
|
+
fi
|
|
981
|
+
|
|
982
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
983
|
+
echo ""
|
|
984
|
+
|
|
985
|
+
# Clean benchmark resources
|
|
986
|
+
WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
|
|
987
|
+
|
|
988
|
+
# Delete workload config if exists
|
|
989
|
+
if aws sagemaker describe-ai-workload-config \
|
|
990
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
991
|
+
--region "$AWS_REGION" 2>/dev/null; then
|
|
992
|
+
aws sagemaker delete-ai-workload-config \
|
|
993
|
+
--ai-workload-config-name "$WORKLOAD_CONFIG_NAME" \
|
|
994
|
+
--region "$AWS_REGION"
|
|
995
|
+
echo " ✓ Deleted workload config: $WORKLOAD_CONFIG_NAME"
|
|
996
|
+
fi
|
|
997
|
+
|
|
998
|
+
# Delete terminal benchmark jobs matching project prefix
|
|
999
|
+
aws sagemaker list-ai-benchmark-jobs \
|
|
1000
|
+
--name-contains "${PROJECT_NAME}-benchmark-" \
|
|
1001
|
+
--region "$AWS_REGION" \
|
|
1002
|
+
--query 'AIBenchmarkJobs[?AIBenchmarkJobStatus!=`InProgress`].AIBenchmarkJobName' \
|
|
1003
|
+
--output text | tr '\t' '\n' | while read -r job; do
|
|
1004
|
+
[ -z "$job" ] && continue
|
|
1005
|
+
aws sagemaker delete-ai-benchmark-job \
|
|
1006
|
+
--ai-benchmark-job-name "$job" \
|
|
1007
|
+
--region "$AWS_REGION"
|
|
1008
|
+
echo " ✓ Deleted benchmark job: $job"
|
|
1009
|
+
done
|
|
1010
|
+
|
|
1011
|
+
# Delete local benchmark results
|
|
1012
|
+
if [ -d "${SCRIPT_DIR}/../benchmarks" ]; then
|
|
1013
|
+
read -p "Delete local benchmark results? (Y/n) " CONFIRM_DELETE
|
|
1014
|
+
CONFIRM_DELETE="${CONFIRM_DELETE:-Y}"
|
|
1015
|
+
if [[ "${CONFIRM_DELETE}" =~ ^[Yy]$ ]]; then
|
|
1016
|
+
rm -rf "${SCRIPT_DIR}/../benchmarks"
|
|
1017
|
+
echo " ✓ Deleted local benchmarks/ directory"
|
|
1018
|
+
else
|
|
1019
|
+
echo " ⏭ Skipped local benchmarks/ deletion"
|
|
1020
|
+
fi
|
|
1021
|
+
fi
|
|
1022
|
+
|
|
1023
|
+
CLEANED_ITEMS+=("Benchmark resources")
|
|
1024
|
+
<% } %>
|
|
1025
|
+
# Display summary
|
|
1026
|
+
echo ""
|
|
1027
|
+
echo "✅ Cleanup complete!"
|
|
1028
|
+
echo ""
|
|
1029
|
+
echo "Summary of cleaned resources:"
|
|
1030
|
+
for item in "${CLEANED_ITEMS[@]}"; do
|
|
1031
|
+
echo " ✓ ${item}"
|
|
1032
|
+
done
|
|
1033
|
+
;;
|
|
1034
|
+
*)
|
|
1035
|
+
echo "❌ Unknown cleanup target: ${CLEANUP_TARGET}"
|
|
1036
|
+
echo ""
|
|
1037
|
+
show_usage
|
|
1038
|
+
exit 1
|
|
1039
|
+
;;
|
|
1040
|
+
esac
|
|
1041
|
+
|
|
1042
|
+
echo ""
|
|
1043
|
+
echo "Cleanup finished!"
|