@aws/ml-container-creator 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +31 -137
- package/config/parameter-schema-v2.json +2065 -0
- package/package.json +6 -3
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/models.json +182 -26
- package/src/app.js +6 -389
- package/src/lib/bootstrap-command-handler.js +75 -1078
- package/src/lib/bootstrap-profile-manager.js +634 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/config-loader.js +405 -0
- package/src/lib/config-manager.js +59 -1668
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +140 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +471 -0
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +70 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +398 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
- package/config/parameter-schema.json +0 -88
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
set -u
|
|
7
|
+
set -o pipefail
|
|
8
|
+
|
|
9
|
+
# Parse flags
|
|
10
|
+
FORCE_NEW=false
|
|
11
|
+
FORCE_IC=false
|
|
12
|
+
IC_TARGET=""
|
|
13
|
+
while [ $# -gt 0 ]; do
|
|
14
|
+
case "$1" in
|
|
15
|
+
--force) FORCE_NEW=true; shift ;;
|
|
16
|
+
--force-ic)
|
|
17
|
+
FORCE_IC=true
|
|
18
|
+
shift
|
|
19
|
+
;;
|
|
20
|
+
--help|-h)
|
|
21
|
+
echo "Usage: ./do/deploy [--force] [--force-ic]"
|
|
22
|
+
echo ""
|
|
23
|
+
echo "Options:"
|
|
24
|
+
echo " --force Create a new endpoint, even if one already exists."
|
|
25
|
+
echo " --force-ic Recreate the inference component on the existing endpoint."
|
|
26
|
+
echo ""
|
|
27
|
+
echo "Without flags, deploy resumes from the last run."
|
|
28
|
+
exit 0
|
|
29
|
+
;;
|
|
30
|
+
*)
|
|
31
|
+
echo "❌ Unknown option: $1"
|
|
32
|
+
echo " Run ./do/deploy --help for usage."
|
|
33
|
+
exit 1
|
|
34
|
+
;;
|
|
35
|
+
esac
|
|
36
|
+
done
|
|
37
|
+
|
|
38
|
+
# Source configuration
|
|
39
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
|
+
source "${SCRIPT_DIR}/config"
|
|
41
|
+
|
|
42
|
+
echo "🚀 Deploying to AWS"
|
|
43
|
+
echo " Project: ${PROJECT_NAME}"
|
|
44
|
+
echo " Deployment config: ${DEPLOYMENT_CONFIG}"
|
|
45
|
+
echo " Region: ${AWS_REGION}"
|
|
46
|
+
echo " Build target: ${BUILD_TARGET}"
|
|
47
|
+
echo " Deployment target: ${DEPLOYMENT_TARGET}"
|
|
48
|
+
echo " HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
49
|
+
echo " Namespace: ${HYPERPOD_NAMESPACE}"
|
|
50
|
+
echo " Replicas: ${HYPERPOD_REPLICAS}"
|
|
51
|
+
|
|
52
|
+
# Check AWS credentials
|
|
53
|
+
echo "🔍 Validating AWS credentials..."
|
|
54
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
55
|
+
echo "❌ AWS credentials not configured"
|
|
56
|
+
echo " Run: aws configure"
|
|
57
|
+
echo " Or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"
|
|
58
|
+
exit 4
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
62
|
+
echo "✅ AWS credentials validated (Account: ${AWS_ACCOUNT_ID})"
|
|
63
|
+
|
|
64
|
+
# Construct ECR repository URL
|
|
65
|
+
ECR_REPOSITORY="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPOSITORY_NAME}"
|
|
66
|
+
|
|
67
|
+
# ============================================================
|
|
68
|
+
# Shared: Verify ECR image exists
|
|
69
|
+
# ============================================================
|
|
70
|
+
echo "🔍 Verifying ECR image exists..."
|
|
71
|
+
if ! aws ecr describe-images \
|
|
72
|
+
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
73
|
+
--image-ids imageTag="${PROJECT_NAME}-latest" \
|
|
74
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
75
|
+
|
|
76
|
+
echo "❌ ECR image not found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
77
|
+
echo ""
|
|
78
|
+
echo "Please build and push your image first:"
|
|
79
|
+
echo " ./do/submit"
|
|
80
|
+
echo ""
|
|
81
|
+
echo "After the build completes successfully, run this deploy script again."
|
|
82
|
+
exit 4
|
|
83
|
+
fi
|
|
84
|
+
|
|
85
|
+
echo "✅ ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
86
|
+
IMAGE_TAG="${PROJECT_NAME}-latest"
|
|
87
|
+
|
|
88
|
+
# ============================================================
|
|
89
|
+
# Shared: Resolve secrets for container environment
|
|
90
|
+
# ============================================================
|
|
91
|
+
CONTAINER_ENV_JSON=""
|
|
92
|
+
|
|
93
|
+
if [ -n "${HF_TOKEN_ARN:-}" ]; then
|
|
94
|
+
echo "🔐 Resolving HuggingFace token from Secrets Manager..."
|
|
95
|
+
RESOLVED_HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
96
|
+
echo "❌ Failed to resolve HuggingFace token from Secrets Manager"
|
|
97
|
+
exit 3
|
|
98
|
+
}
|
|
99
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${RESOLVED_HF_TOKEN}\""
|
|
100
|
+
elif [ -n "${HF_TOKEN:-}" ]; then
|
|
101
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${HF_TOKEN}\""
|
|
102
|
+
fi
|
|
103
|
+
|
|
104
|
+
if [ -n "${NGC_API_KEY_ARN:-}" ]; then
|
|
105
|
+
echo "🔐 Resolving NGC API key from Secrets Manager..."
|
|
106
|
+
RESOLVED_NGC_KEY=$(aws secretsmanager get-secret-value --secret-id "${NGC_API_KEY_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
107
|
+
echo "❌ Failed to resolve NGC API key from Secrets Manager"
|
|
108
|
+
exit 3
|
|
109
|
+
}
|
|
110
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
111
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
112
|
+
else
|
|
113
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
114
|
+
fi
|
|
115
|
+
elif [ -n "${NGC_API_KEY:-}" ]; then
|
|
116
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
117
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
118
|
+
else
|
|
119
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
120
|
+
fi
|
|
121
|
+
fi
|
|
122
|
+
|
|
123
|
+
# ============================================================
|
|
124
|
+
# HyperPod EKS Deployment
|
|
125
|
+
# ============================================================
|
|
126
|
+
|
|
127
|
+
# Get kubeconfig for HyperPod cluster
|
|
128
|
+
echo "🔑 Configuring kubectl for HyperPod cluster..."
|
|
129
|
+
KUBECONFIG_PATH="${HOME}/.kube/hyperpod-${HYPERPOD_CLUSTER_NAME}"
|
|
130
|
+
|
|
131
|
+
# Step 1: Describe the HyperPod cluster to get the underlying EKS cluster ARN
|
|
132
|
+
EKS_CLUSTER_ARN=$(aws sagemaker describe-cluster \
|
|
133
|
+
--cluster-name "${HYPERPOD_CLUSTER_NAME}" \
|
|
134
|
+
--region "${AWS_REGION}" \
|
|
135
|
+
--query "Orchestrator.Eks.ClusterArn" \
|
|
136
|
+
--output text 2>&1) || {
|
|
137
|
+
echo "❌ Failed to describe HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
138
|
+
echo ""
|
|
139
|
+
echo " Error details:"
|
|
140
|
+
echo " ${EKS_CLUSTER_ARN}"
|
|
141
|
+
echo ""
|
|
142
|
+
echo " Check that:"
|
|
143
|
+
echo " • The cluster name is correct"
|
|
144
|
+
echo " • The cluster exists in region: ${AWS_REGION}"
|
|
145
|
+
echo " • Your IAM user/role has permission to access the cluster"
|
|
146
|
+
echo ""
|
|
147
|
+
echo " Required IAM permissions:"
|
|
148
|
+
echo " • sagemaker:DescribeCluster"
|
|
149
|
+
echo " • eks:DescribeCluster"
|
|
150
|
+
exit 4
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
# Step 2: Extract the EKS cluster name from the ARN
|
|
154
|
+
EKS_CLUSTER_NAME=$(echo "${EKS_CLUSTER_ARN}" | awk -F'/' '{print $NF}')
|
|
155
|
+
echo " HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
156
|
+
echo " EKS cluster: ${EKS_CLUSTER_NAME}"
|
|
157
|
+
|
|
158
|
+
# Step 3: Update kubeconfig using the EKS cluster
|
|
159
|
+
if ! aws eks update-kubeconfig \
|
|
160
|
+
--name "${EKS_CLUSTER_NAME}" \
|
|
161
|
+
--region "${AWS_REGION}" \
|
|
162
|
+
--kubeconfig "${KUBECONFIG_PATH}" 2>&1; then
|
|
163
|
+
echo "❌ Failed to configure kubectl for EKS cluster: ${EKS_CLUSTER_NAME}"
|
|
164
|
+
echo ""
|
|
165
|
+
echo " Required IAM permissions:"
|
|
166
|
+
echo " • eks:DescribeCluster"
|
|
167
|
+
echo " • eks:AccessKubernetesApi"
|
|
168
|
+
exit 4
|
|
169
|
+
fi
|
|
170
|
+
|
|
171
|
+
export KUBECONFIG="${KUBECONFIG_PATH}"
|
|
172
|
+
echo "✅ Kubeconfig saved to: ${KUBECONFIG_PATH}"
|
|
173
|
+
|
|
174
|
+
# Verify cluster connectivity
|
|
175
|
+
echo "🔍 Verifying cluster connectivity..."
|
|
176
|
+
if ! kubectl cluster-info &> /dev/null; then
|
|
177
|
+
echo "❌ Cannot connect to HyperPod cluster"
|
|
178
|
+
echo ""
|
|
179
|
+
echo " Check that:"
|
|
180
|
+
echo " • The cluster is in 'InService' status"
|
|
181
|
+
echo " • Your network can reach the cluster API server"
|
|
182
|
+
echo " • Your IAM credentials are valid"
|
|
183
|
+
exit 4
|
|
184
|
+
fi
|
|
185
|
+
echo "✅ Connected to HyperPod cluster"
|
|
186
|
+
|
|
187
|
+
# Create namespace if it doesn't exist
|
|
188
|
+
echo "📁 Ensuring namespace exists: ${HYPERPOD_NAMESPACE}"
|
|
189
|
+
if ! kubectl create namespace "${HYPERPOD_NAMESPACE}" --dry-run=client -o yaml | kubectl apply -f - 2>&1; then
|
|
190
|
+
echo "⚠️ Warning: Could not create/verify namespace"
|
|
191
|
+
fi
|
|
192
|
+
|
|
193
|
+
# Apply Kubernetes manifests
|
|
194
|
+
echo "📄 Applying Kubernetes manifests from hyperpod/..."
|
|
195
|
+
|
|
196
|
+
# Substitute shell variables (e.g. ${AWS_ACCOUNT_ID}) in manifests before applying
|
|
197
|
+
export AWS_ACCOUNT_ID
|
|
198
|
+
export ECR_IMAGE="${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
199
|
+
|
|
200
|
+
APPLY_OUTPUT=""
|
|
201
|
+
APPLY_EXIT_CODE=0
|
|
202
|
+
for manifest in hyperpod/*.yaml; do
|
|
203
|
+
# Skip files that contain no Kubernetes objects (e.g. comment-only PVC stubs)
|
|
204
|
+
RENDERED=$(envsubst < "${manifest}")
|
|
205
|
+
if echo "${RENDERED}" | grep -q '^kind:'; then
|
|
206
|
+
FILE_OUTPUT=$(echo "${RENDERED}" | kubectl apply -n "${HYPERPOD_NAMESPACE}" -f - 2>&1) || {
|
|
207
|
+
APPLY_EXIT_CODE=$?
|
|
208
|
+
}
|
|
209
|
+
APPLY_OUTPUT="${APPLY_OUTPUT}${FILE_OUTPUT}\n"
|
|
210
|
+
fi
|
|
211
|
+
done
|
|
212
|
+
|
|
213
|
+
if [ "${APPLY_EXIT_CODE}" -ne 0 ]; then
|
|
214
|
+
echo ""
|
|
215
|
+
echo "❌ Failed to apply Kubernetes manifests"
|
|
216
|
+
echo ""
|
|
217
|
+
echo " Error details:"
|
|
218
|
+
echo " ${APPLY_OUTPUT}"
|
|
219
|
+
echo ""
|
|
220
|
+
echo " Common issues:"
|
|
221
|
+
echo " • Insufficient node capacity - check available GPU nodes"
|
|
222
|
+
echo " • Resource requests exceed node capacity"
|
|
223
|
+
echo " • RBAC permissions - ensure you have permission to create resources in namespace '${HYPERPOD_NAMESPACE}'"
|
|
224
|
+
echo " • Invalid manifest syntax"
|
|
225
|
+
<% if (fsxVolumeHandle) { %>
|
|
226
|
+
echo " • PVC creation failure - verify the FSx CSI driver is installed on the cluster"
|
|
227
|
+
echo " kubectl get csidriver -o name | grep fsx"
|
|
228
|
+
<% } %>
|
|
229
|
+
echo ""
|
|
230
|
+
echo " Debug commands:"
|
|
231
|
+
echo " kubectl get nodes -o wide"
|
|
232
|
+
echo " kubectl describe nodes"
|
|
233
|
+
echo " kubectl get events -n ${HYPERPOD_NAMESPACE}"
|
|
234
|
+
exit ${APPLY_EXIT_CODE}
|
|
235
|
+
fi
|
|
236
|
+
|
|
237
|
+
echo "✅ Kubernetes manifests applied"
|
|
238
|
+
|
|
239
|
+
# Record k8s deployment and service in manifest (non-blocking)
|
|
240
|
+
./do/manifest add \
|
|
241
|
+
--type k8s-deployment \
|
|
242
|
+
--id "${HYPERPOD_NAMESPACE}/${PROJECT_NAME}" \
|
|
243
|
+
--project "${PROJECT_NAME}" \
|
|
244
|
+
--meta "{\"namespace\":\"${HYPERPOD_NAMESPACE}\",\"deploymentName\":\"${PROJECT_NAME}\",\"clusterName\":\"${HYPERPOD_CLUSTER_NAME}\",\"region\":\"${AWS_REGION}\"}" \
|
|
245
|
+
2>/dev/null || true
|
|
246
|
+
|
|
247
|
+
./do/manifest add \
|
|
248
|
+
--type k8s-service \
|
|
249
|
+
--id "${HYPERPOD_NAMESPACE}/${PROJECT_NAME}" \
|
|
250
|
+
--project "${PROJECT_NAME}" \
|
|
251
|
+
--meta "{\"namespace\":\"${HYPERPOD_NAMESPACE}\",\"serviceName\":\"${PROJECT_NAME}\",\"clusterName\":\"${HYPERPOD_CLUSTER_NAME}\",\"region\":\"${AWS_REGION}\"}" \
|
|
252
|
+
2>/dev/null || true
|
|
253
|
+
|
|
254
|
+
# Wait for deployment to be ready
|
|
255
|
+
DEPLOY_TIMEOUT=${DEPLOY_TIMEOUT:-1200}
|
|
256
|
+
echo "⏳ Waiting for deployment to be ready (timeout: ${DEPLOY_TIMEOUT}s)..."
|
|
257
|
+
echo " This may take several minutes for GPU workloads..."
|
|
258
|
+
echo ""
|
|
259
|
+
|
|
260
|
+
# Poll pod status every 30s while rollout is in progress
|
|
261
|
+
(
|
|
262
|
+
while true; do
|
|
263
|
+
sleep 30
|
|
264
|
+
POD_STATUS=$(kubectl get pods -n "${HYPERPOD_NAMESPACE}" -l app=${PROJECT_NAME} \
|
|
265
|
+
--no-headers 2>/dev/null | head -5)
|
|
266
|
+
if [ -n "${POD_STATUS}" ]; then
|
|
267
|
+
echo " 📊 $(date +%H:%M:%S) Pod status:"
|
|
268
|
+
echo "${POD_STATUS}" | while read -r line; do echo " ${line}"; done
|
|
269
|
+
fi
|
|
270
|
+
done
|
|
271
|
+
) &
|
|
272
|
+
STATUS_PID=$!
|
|
273
|
+
trap "kill ${STATUS_PID} 2>/dev/null; wait ${STATUS_PID} 2>/dev/null" EXIT
|
|
274
|
+
|
|
275
|
+
ROLLOUT_OUTPUT=$(kubectl rollout status deployment/${PROJECT_NAME} -n "${HYPERPOD_NAMESPACE}" --timeout=${DEPLOY_TIMEOUT}s 2>&1) || {
|
|
276
|
+
ROLLOUT_EXIT_CODE=$?
|
|
277
|
+
kill ${STATUS_PID} 2>/dev/null
|
|
278
|
+
echo ""
|
|
279
|
+
echo "❌ Deployment failed to become ready within timeout"
|
|
280
|
+
echo ""
|
|
281
|
+
echo " Error details:"
|
|
282
|
+
echo " ${ROLLOUT_OUTPUT}"
|
|
283
|
+
echo ""
|
|
284
|
+
echo " Current pod state:"
|
|
285
|
+
kubectl get pods -n "${HYPERPOD_NAMESPACE}" -l app=${PROJECT_NAME} -o wide 2>/dev/null
|
|
286
|
+
echo ""
|
|
287
|
+
echo " Debug commands:"
|
|
288
|
+
echo " kubectl describe pods -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME}"
|
|
289
|
+
echo " kubectl logs -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME} --tail=100"
|
|
290
|
+
echo ""
|
|
291
|
+
echo " Common issues:"
|
|
292
|
+
echo " • Image pull errors - check ECR permissions"
|
|
293
|
+
echo " • Resource scheduling - insufficient GPU nodes"
|
|
294
|
+
echo " • Container crash - check application logs"
|
|
295
|
+
<% if (fsxVolumeHandle) { %>
|
|
296
|
+
echo " • PVC binding errors - verify FSx CSI driver is installed on the cluster"
|
|
297
|
+
echo " kubectl get pvc -n ${HYPERPOD_NAMESPACE}"
|
|
298
|
+
echo " kubectl describe pvc -n ${HYPERPOD_NAMESPACE}"
|
|
299
|
+
echo " kubectl get csidriver -o name | grep fsx"
|
|
300
|
+
<% } %>
|
|
301
|
+
exit ${ROLLOUT_EXIT_CODE}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
kill ${STATUS_PID} 2>/dev/null
|
|
305
|
+
wait ${STATUS_PID} 2>/dev/null
|
|
306
|
+
|
|
307
|
+
echo "✅ HyperPod EKS deployment complete!"
|
|
308
|
+
echo ""
|
|
309
|
+
echo "📋 Deployment Details:"
|
|
310
|
+
echo " Cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
311
|
+
echo " Namespace: ${HYPERPOD_NAMESPACE}"
|
|
312
|
+
echo " Deployment: ${PROJECT_NAME}"
|
|
313
|
+
echo " Replicas: ${HYPERPOD_REPLICAS}"
|
|
314
|
+
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
315
|
+
echo ""
|
|
316
|
+
echo "📋 What's next?"
|
|
317
|
+
echo " • Test your deployment: ./do/test"
|
|
318
|
+
echo " • Check pod status: kubectl get pods -n ${HYPERPOD_NAMESPACE}"
|
|
319
|
+
echo " • View pod logs: kubectl logs -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME}"
|
|
320
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
321
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
322
|
+
<% } %>
|
|
323
|
+
echo " • Register this deployment: ./do/register"
|
|
324
|
+
echo " • View logs: ./do/logs"
|
|
325
|
+
echo " • Clean up when done: ./do/clean hyperpod"
|
|
326
|
+
|
|
327
|
+
# Write kubeconfig path to config so other scripts can use it (idempotent)
|
|
328
|
+
_update_config_var() {
|
|
329
|
+
local var_name="$1" var_value="$2" config_file="${SCRIPT_DIR}/config"
|
|
330
|
+
if grep -q "^export ${var_name}=" "${config_file}" 2>/dev/null; then
|
|
331
|
+
sed -i.bak "s|^export ${var_name}=.*|export ${var_name}=\"${var_value}\"|" "${config_file}"
|
|
332
|
+
rm -f "${config_file}.bak"
|
|
333
|
+
else
|
|
334
|
+
echo "" >> "${config_file}"
|
|
335
|
+
echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
|
|
336
|
+
fi
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
_update_config_var "KUBECONFIG" "${KUBECONFIG_PATH}"
|