@aws/ml-container-creator 0.9.1 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +2049 -0
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -68
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +138 -138
- package/servers/instance-sizer/lib/instance-ranker.js +76 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/catalogs/models.json +182 -26
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +4 -390
- package/src/lib/bootstrap-command-handler.js +710 -1148
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +641 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +408 -0
- package/src/lib/config-manager.js +66 -1685
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +149 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +315 -311
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +71 -71
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/path-prover-brain.js +607 -0
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +82 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +422 -0
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +149 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/config +108 -5
- package/templates/do/deploy.d/managed-inference.ejs +192 -11
- package/templates/do/optimize +106 -37
- package/templates/do/register +89 -0
- package/templates/do/test +13 -0
- package/templates/do/tune +378 -59
- package/templates/do/validate +44 -4
- package/config/parameter-schema.json +0 -88
package/templates/do/config
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
# do-framework configuration
|
|
3
3
|
# This file is sourced by all do scripts
|
|
4
|
+
# Generated: <%= new Date().toISOString() %>
|
|
4
5
|
|
|
5
6
|
# Project identification
|
|
6
7
|
export PROJECT_NAME="<%= projectName %>"
|
|
@@ -10,16 +11,20 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
|
|
|
10
11
|
export FRAMEWORK="<%= framework %>"
|
|
11
12
|
export MODEL_SERVER="<%= modelServer %>"
|
|
12
13
|
|
|
14
|
+
# AWS configuration
|
|
15
|
+
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
16
|
+
export ECR_REPOSITORY_NAME="ml-container-creator"
|
|
17
|
+
|
|
13
18
|
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
14
19
|
# LoRA adapter serving
|
|
15
20
|
export ENABLE_LORA=true
|
|
16
21
|
export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
22
|
+
<% } else if (framework === 'transformers' || framework === 'diffusors') { %>
|
|
23
|
+
# LoRA adapter serving (uncomment to enable)
|
|
24
|
+
# export ENABLE_LORA=true
|
|
25
|
+
# export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
17
26
|
<% } %>
|
|
18
27
|
|
|
19
|
-
# AWS configuration
|
|
20
|
-
export AWS_REGION="<%= awsRegion %>"
|
|
21
|
-
export ECR_REPOSITORY_NAME="ml-container-creator"
|
|
22
|
-
|
|
23
28
|
# Build configuration — WHERE the Docker image gets built
|
|
24
29
|
export BUILD_TARGET="<%= buildTarget %>"
|
|
25
30
|
<% if (buildTarget === 'codebuild') { %>
|
|
@@ -42,14 +47,27 @@ export INSTANCE_TYPE="<%= instanceType %>"
|
|
|
42
47
|
# Instance pools: heterogeneous instance types with priority-based fallback
|
|
43
48
|
# Priority = selection order (1 = preferred, higher = fallback)
|
|
44
49
|
export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
|
|
50
|
+
<% } else { %>
|
|
51
|
+
# Instance pools: heterogeneous instance types with priority-based fallback (uncomment to enable)
|
|
52
|
+
# Format: [{"InstanceType":"ml.g6e.48xlarge","Priority":1},{"InstanceType":"ml.g5.48xlarge","Priority":2}]
|
|
53
|
+
# export INSTANCE_POOLS='[]'
|
|
45
54
|
<% } %>
|
|
46
55
|
<% if (inferenceAmiVersion) { %>
|
|
47
56
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
57
|
+
<% } else { %>
|
|
58
|
+
# Inference AMI version — auto-resolved from CUDA version (uncomment to override)
|
|
59
|
+
# Valid: al2-ami-sagemaker-inference-gpu-2, al2-ami-sagemaker-inference-gpu-2-1,
|
|
60
|
+
# al2-ami-sagemaker-inference-gpu-3-1, al2023-ami-sagemaker-inference-gpu-4-1
|
|
61
|
+
# export INFERENCE_AMI_VERSION=""
|
|
48
62
|
<% } %>
|
|
49
63
|
<% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
|
|
50
64
|
# Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
|
|
51
65
|
# If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
|
|
52
66
|
export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
|
|
67
|
+
<% } else { %>
|
|
68
|
+
# Capacity reservation (uncomment to use reserved capacity)
|
|
69
|
+
# Note: Mutually exclusive with INSTANCE_POOLS — reservation takes precedence.
|
|
70
|
+
# export CAPACITY_RESERVATION_ARN=""
|
|
53
71
|
<% } %>
|
|
54
72
|
<% } %>
|
|
55
73
|
<% } %>
|
|
@@ -59,6 +77,9 @@ export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
|
|
|
59
77
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
60
78
|
<% if (inferenceAmiVersion) { %>
|
|
61
79
|
export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
80
|
+
<% } else { %>
|
|
81
|
+
# Inference AMI version — auto-resolved from CUDA version (uncomment to override)
|
|
82
|
+
# export INFERENCE_AMI_VERSION=""
|
|
62
83
|
<% } %>
|
|
63
84
|
|
|
64
85
|
# Async-specific configuration
|
|
@@ -85,6 +106,9 @@ export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-contain
|
|
|
85
106
|
|
|
86
107
|
<% if (asyncMaxConcurrentInvocations) { %>
|
|
87
108
|
export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
|
|
109
|
+
<% } else { %>
|
|
110
|
+
# Max concurrent invocations per instance (uncomment to set)
|
|
111
|
+
# export ASYNC_MAX_CONCURRENT_INVOCATIONS=""
|
|
88
112
|
<% } %>
|
|
89
113
|
<% } %>
|
|
90
114
|
|
|
@@ -95,6 +119,9 @@ export HYPERPOD_NAMESPACE="<%= hyperPodNamespace %>"
|
|
|
95
119
|
export HYPERPOD_REPLICAS="<%= hyperPodReplicas %>"
|
|
96
120
|
<% if (fsxVolumeHandle) { %>
|
|
97
121
|
export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
|
|
122
|
+
<% } else { %>
|
|
123
|
+
# FSx for Lustre volume for shared model storage (uncomment to enable)
|
|
124
|
+
# export FSX_VOLUME_HANDLE=""
|
|
98
125
|
<% } %>
|
|
99
126
|
<% } %>
|
|
100
127
|
|
|
@@ -121,9 +148,15 @@ export BATCH_STRATEGY="<%= batchStrategy %>"
|
|
|
121
148
|
export BATCH_JOIN_SOURCE="<%= batchJoinSource || 'None' %>"
|
|
122
149
|
<% if (batchMaxConcurrentTransforms) { %>
|
|
123
150
|
export BATCH_MAX_CONCURRENT_TRANSFORMS="<%= batchMaxConcurrentTransforms %>"
|
|
151
|
+
<% } else { %>
|
|
152
|
+
# Max concurrent transforms per instance (uncomment to set)
|
|
153
|
+
# export BATCH_MAX_CONCURRENT_TRANSFORMS=""
|
|
124
154
|
<% } %>
|
|
125
155
|
<% if (batchMaxPayloadInMB) { %>
|
|
126
156
|
export BATCH_MAX_PAYLOAD_IN_MB="<%= batchMaxPayloadInMB %>"
|
|
157
|
+
<% } else { %>
|
|
158
|
+
# Max payload size in MB (uncomment to set, default: 6)
|
|
159
|
+
# export BATCH_MAX_PAYLOAD_IN_MB=""
|
|
127
160
|
<% } %>
|
|
128
161
|
<% } %>
|
|
129
162
|
|
|
@@ -140,6 +173,22 @@ export ENDPOINT_VARIANT_NAME="<%= endpointVariantName %>"
|
|
|
140
173
|
export ENDPOINT_VOLUME_SIZE="<%= endpointVolumeSize %>"
|
|
141
174
|
<% } %>
|
|
142
175
|
|
|
176
|
+
<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
|
|
177
|
+
# ─── Endpoint overrides (uncomment to customize) ───────────────────────────────
|
|
178
|
+
<% if (typeof endpointInitialInstanceCount === 'undefined' || endpointInitialInstanceCount == null) { %>
|
|
179
|
+
# export ENDPOINT_INITIAL_INSTANCE_COUNT="1" # Number of instances for the endpoint
|
|
180
|
+
<% } %>
|
|
181
|
+
<% if (typeof endpointDataCapturePercent === 'undefined' || endpointDataCapturePercent == null) { %>
|
|
182
|
+
# export ENDPOINT_DATA_CAPTURE_PERCENT="" # Percentage of requests to capture (0-100)
|
|
183
|
+
<% } %>
|
|
184
|
+
<% if (typeof endpointVariantName === 'undefined' || endpointVariantName == null) { %>
|
|
185
|
+
# export ENDPOINT_VARIANT_NAME="" # Custom variant name (default: AllTraffic)
|
|
186
|
+
<% } %>
|
|
187
|
+
<% if (typeof endpointVolumeSize === 'undefined' || endpointVolumeSize == null) { %>
|
|
188
|
+
# export ENDPOINT_VOLUME_SIZE="" # EBS volume size in GB for model download
|
|
189
|
+
<% } %>
|
|
190
|
+
<% } %>
|
|
191
|
+
|
|
143
192
|
<% if (typeof icCpuCount !== 'undefined' && icCpuCount != null) { %>
|
|
144
193
|
export IC_CPU_COUNT="<%= icCpuCount %>"
|
|
145
194
|
<% } %>
|
|
@@ -158,6 +207,22 @@ export IC_COPY_COUNT="<%= icCopyCount %>"
|
|
|
158
207
|
export IC_MODEL_WEIGHT="<%= icModelWeight %>"
|
|
159
208
|
<% } %>
|
|
160
209
|
|
|
210
|
+
<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
|
|
211
|
+
# ─── Inference Component overrides (uncomment to customize) ────────────────────
|
|
212
|
+
<% if (typeof icCpuCount === 'undefined' || icCpuCount == null) { %>
|
|
213
|
+
# export IC_CPU_COUNT="" # CPU cores reserved for this IC
|
|
214
|
+
<% } %>
|
|
215
|
+
<% if (typeof icMemorySize === 'undefined' || icMemorySize == null) { %>
|
|
216
|
+
# export IC_MEMORY_SIZE="" # Memory in MB reserved for this IC
|
|
217
|
+
<% } %>
|
|
218
|
+
<% if (typeof icCopyCount === 'undefined' || icCopyCount == null) { %>
|
|
219
|
+
# export IC_COPY_COUNT="" # Number of model copies (multi-IC scaling)
|
|
220
|
+
<% } %>
|
|
221
|
+
<% if (typeof icModelWeight === 'undefined' || icModelWeight == null) { %>
|
|
222
|
+
# export IC_MODEL_WEIGHT="" # Traffic weight for this IC (0-100)
|
|
223
|
+
<% } %>
|
|
224
|
+
<% } %>
|
|
225
|
+
|
|
161
226
|
<% if (typeof modelEnvVars !== 'undefined' && modelEnvVars && Object.keys(modelEnvVars).length > 0) { %>
|
|
162
227
|
# Model environment variables
|
|
163
228
|
<% Object.entries(modelEnvVars).forEach(([key, value]) => { %>
|
|
@@ -192,7 +257,22 @@ export NGC_API_KEY="<%= ngcApiKey %>"
|
|
|
192
257
|
<% if (deploymentTarget !== 'batch-transform') { %>
|
|
193
258
|
# Managed Model Customization (do/tune)
|
|
194
259
|
export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
|
|
260
|
+
<% if (typeof tuneSupported !== 'undefined' && tuneSupported) { %>
|
|
261
|
+
<% if (typeof tuneModelId !== 'undefined' && tuneModelId) { %>
|
|
262
|
+
# SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
|
|
263
|
+
# Flow: JumpStart model (tune) → LoRA adapter (S3) → do/adapter add → vLLM
|
|
264
|
+
export TUNE_MODEL_ID="<%= tuneModelId %>"
|
|
265
|
+
<% } else { %>
|
|
266
|
+
# SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
|
|
267
|
+
# To find your model's Hub ID:
|
|
268
|
+
# aws sagemaker list-hub-contents --hub-name SageMakerPublicHub \
|
|
269
|
+
# --hub-content-type Model --query "HubContentSummaries[].HubContentName"
|
|
270
|
+
# export TUNE_MODEL_ID=""
|
|
271
|
+
<% } %>
|
|
272
|
+
<% } %>
|
|
195
273
|
export TUNE_S3_BUCKET="mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
274
|
+
# MLflow App ARN for experiment tracking (set by bootstrap, or override manually)
|
|
275
|
+
# export MLFLOW_APP_ARN=""
|
|
196
276
|
<% } %>
|
|
197
277
|
<% } %>
|
|
198
278
|
|
|
@@ -210,10 +290,17 @@ export HF_TOKEN="<%= hfToken %>"
|
|
|
210
290
|
|
|
211
291
|
<% if (modelFormat) { %>
|
|
212
292
|
export MODEL_FORMAT="<%= modelFormat %>"
|
|
293
|
+
<% } else { %>
|
|
294
|
+
# Model format (uncomment if using quantized models)
|
|
295
|
+
# Valid: pkl, json, keras, safetensors, gguf, awq, gptq
|
|
296
|
+
# export MODEL_FORMAT=""
|
|
213
297
|
<% } %>
|
|
214
298
|
|
|
215
299
|
<% if (roleArn) { %>
|
|
216
300
|
export ROLE_ARN="<%= roleArn %>"
|
|
301
|
+
<% } else { %>
|
|
302
|
+
# IAM execution role for SageMaker (uncomment to override bootstrap role)
|
|
303
|
+
# export ROLE_ARN=""
|
|
217
304
|
<% } %>
|
|
218
305
|
|
|
219
306
|
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
@@ -234,6 +321,23 @@ export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identi
|
|
|
234
321
|
<% } %>
|
|
235
322
|
export BENCHMARK_JOB_NAME=""
|
|
236
323
|
export BENCHMARK_WORKLOAD_CONFIG_NAME=""
|
|
324
|
+
|
|
325
|
+
# CI Benchmark Athena persistence (set automatically from bootstrap --benchmark-infra)
|
|
326
|
+
<% if (typeof ciBenchmarkResultsBucket !== 'undefined' && ciBenchmarkResultsBucket) { %>
|
|
327
|
+
export CI_BENCHMARK_RESULTS_BUCKET="<%= ciBenchmarkResultsBucket %>"
|
|
328
|
+
<% } else { %>
|
|
329
|
+
# export CI_BENCHMARK_RESULTS_BUCKET="" # S3 bucket for Athena Parquet results (set by bootstrap --benchmark-infra)
|
|
330
|
+
<% } %>
|
|
331
|
+
<% } else if (framework === 'transformers' && deploymentTarget !== 'batch-transform') { %>
|
|
332
|
+
# ─── SageMaker AI Benchmarking (uncomment to enable) ──────────────────────────
|
|
333
|
+
# export BENCHMARK_CONCURRENCY="10" # Concurrent requests
|
|
334
|
+
# export BENCHMARK_INPUT_TOKENS_MEAN="550" # Mean input tokens per request
|
|
335
|
+
# export BENCHMARK_OUTPUT_TOKENS_MEAN="150" # Mean output tokens per request
|
|
336
|
+
# export BENCHMARK_STREAMING="true" # Enable streaming
|
|
337
|
+
# export BENCHMARK_REQUEST_COUNT="" # Total requests (empty = auto)
|
|
338
|
+
# export BENCHMARK_S3_OUTPUT_PATH="" # S3 path for results (empty = auto)
|
|
339
|
+
# export BENCHMARK_JOB_NAME="" # Resume/check existing job
|
|
340
|
+
# export BENCHMARK_WORKLOAD_CONFIG_NAME="" # Reuse existing workload config
|
|
237
341
|
<% } %>
|
|
238
342
|
|
|
239
343
|
<% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
|
|
@@ -246,7 +350,6 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
246
350
|
export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
|
|
247
351
|
|
|
248
352
|
# Allow environment variable overrides
|
|
249
|
-
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
250
353
|
<% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
251
354
|
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
|
|
252
355
|
<% } %>
|
|
@@ -10,9 +10,11 @@ set -o pipefail
|
|
|
10
10
|
FORCE_NEW=false
|
|
11
11
|
FORCE_IC=false
|
|
12
12
|
IC_TARGET=""
|
|
13
|
+
CI_FLAG=false
|
|
13
14
|
while [ $# -gt 0 ]; do
|
|
14
15
|
case "$1" in
|
|
15
16
|
--force) FORCE_NEW=true; shift ;;
|
|
17
|
+
--ci) CI_FLAG=true; shift ;;
|
|
16
18
|
--force-ic)
|
|
17
19
|
FORCE_IC=true
|
|
18
20
|
shift
|
|
@@ -32,13 +34,14 @@ while [ $# -gt 0 ]; do
|
|
|
32
34
|
shift 2
|
|
33
35
|
;;
|
|
34
36
|
--help|-h)
|
|
35
|
-
echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>]"
|
|
37
|
+
echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>] [--ci]"
|
|
36
38
|
echo ""
|
|
37
39
|
echo "Options:"
|
|
38
40
|
echo " --force Create a new endpoint and IC, even if one already exists."
|
|
39
41
|
echo " --force-ic Recreate ALL inference components on the existing endpoint."
|
|
40
42
|
echo " --force-ic <name> Recreate only the named IC on the existing endpoint."
|
|
41
43
|
echo " --ic <name> Deploy only the named IC (from do/ic/<name>.conf)."
|
|
44
|
+
echo " --ci Enable CI mode (structured errors, timeouts, idempotency)."
|
|
42
45
|
echo ""
|
|
43
46
|
echo "Without flags, deploy resumes from the last run."
|
|
44
47
|
exit 0
|
|
@@ -51,6 +54,160 @@ while [ $# -gt 0 ]; do
|
|
|
51
54
|
esac
|
|
52
55
|
done
|
|
53
56
|
|
|
57
|
+
# ============================================================
|
|
58
|
+
# CI-Mode Detection and Configuration
|
|
59
|
+
# ============================================================
|
|
60
|
+
# CI mode is activated by CI_MODE=true env var OR --ci flag
|
|
61
|
+
if [ "${CI_MODE:-false}" = "true" ] || [ "${CI_FLAG}" = "true" ]; then
|
|
62
|
+
CI_ACTIVE=true
|
|
63
|
+
else
|
|
64
|
+
CI_ACTIVE=false
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
# CI-mode timeout configuration (seconds)
|
|
68
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
69
|
+
CI_DEPLOY_TIMEOUT="${CI_DEPLOY_TIMEOUT_SECONDS:-1200}"
|
|
70
|
+
CI_DEPLOY_START=$(date +%s)
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
# _ci_emit_error <error_message> <error_type> <retryable>
|
|
74
|
+
# Emits structured JSON error output when in CI mode.
|
|
75
|
+
# In non-CI mode, prints human-readable error and exits.
|
|
76
|
+
_ci_emit_error() {
|
|
77
|
+
local error_msg="$1"
|
|
78
|
+
local error_type="$2"
|
|
79
|
+
local retryable="$3"
|
|
80
|
+
local elapsed=0
|
|
81
|
+
|
|
82
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
83
|
+
elapsed=$(( $(date +%s) - CI_DEPLOY_START ))
|
|
84
|
+
echo "{\"error\":\"${error_msg}\",\"error_type\":\"${error_type}\",\"instance_type\":\"${INSTANCE_TYPE:-unknown}\",\"region\":\"${AWS_REGION:-unknown}\",\"retryable\":${retryable},\"elapsed_seconds\":${elapsed}}"
|
|
85
|
+
exit 1
|
|
86
|
+
else
|
|
87
|
+
echo "❌ ${error_msg}"
|
|
88
|
+
exit 1
|
|
89
|
+
fi
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# _ci_check_timeout
|
|
93
|
+
# Checks if CI-mode timeout has been exceeded.
|
|
94
|
+
# Emits structured timeout error if so.
|
|
95
|
+
_ci_check_timeout() {
|
|
96
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
97
|
+
local elapsed=$(( $(date +%s) - CI_DEPLOY_START ))
|
|
98
|
+
if [ "${elapsed}" -ge "${CI_DEPLOY_TIMEOUT}" ]; then
|
|
99
|
+
_ci_emit_error "Deployment timed out after ${elapsed} seconds (limit: ${CI_DEPLOY_TIMEOUT}s)" "timeout" "true"
|
|
100
|
+
fi
|
|
101
|
+
fi
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
# _ci_create_endpoint_with_retry
|
|
105
|
+
# Wraps CreateEndpoint with exponential backoff for throttling.
|
|
106
|
+
# Base: 5 seconds, max 3 attempts.
|
|
107
|
+
_ci_create_endpoint_with_retry() {
|
|
108
|
+
local attempt=0
|
|
109
|
+
local max_attempts=3
|
|
110
|
+
local backoff=5
|
|
111
|
+
local ep_name="$1"
|
|
112
|
+
local ep_config="$2"
|
|
113
|
+
|
|
114
|
+
while [ "${attempt}" -lt "${max_attempts}" ]; do
|
|
115
|
+
attempt=$(( attempt + 1 ))
|
|
116
|
+
local create_output
|
|
117
|
+
create_output=$(aws sagemaker create-endpoint \
|
|
118
|
+
--endpoint-name "${ep_name}" \
|
|
119
|
+
--endpoint-config-name "${ep_config}" \
|
|
120
|
+
--region "${AWS_REGION}" 2>&1) && return 0
|
|
121
|
+
|
|
122
|
+
# Check if throttled
|
|
123
|
+
if echo "${create_output}" | grep -qi "ThrottlingException"; then
|
|
124
|
+
if [ "${attempt}" -lt "${max_attempts}" ]; then
|
|
125
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
126
|
+
echo "⏳ Throttled (attempt ${attempt}/${max_attempts}), retrying in ${backoff}s..."
|
|
127
|
+
else
|
|
128
|
+
echo "⏳ Throttled, retrying in ${backoff}s..."
|
|
129
|
+
fi
|
|
130
|
+
sleep "${backoff}"
|
|
131
|
+
backoff=$(( backoff * 2 ))
|
|
132
|
+
else
|
|
133
|
+
_ci_emit_error "CreateEndpoint throttled after ${max_attempts} attempts" "throttled" "true"
|
|
134
|
+
fi
|
|
135
|
+
elif echo "${create_output}" | grep -qi "InsufficientInstanceCapacity"; then
|
|
136
|
+
_ci_emit_error "InsufficientInstanceCapacity: Unable to provision ${INSTANCE_TYPE} in ${AWS_REGION}" "capacity" "true"
|
|
137
|
+
else
|
|
138
|
+
# Other API error
|
|
139
|
+
_ci_emit_error "CreateEndpoint failed: ${create_output}" "api_error" "false"
|
|
140
|
+
fi
|
|
141
|
+
done
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# _ci_handle_existing_endpoint
|
|
145
|
+
# CI-mode idempotent deployment logic.
|
|
146
|
+
# Returns 0 if deployment should be skipped (already InService with matching config).
|
|
147
|
+
# Returns 1 if a fresh deploy should proceed.
|
|
148
|
+
# Handles bad-state cleanup (Failed/OutOfService → delete + recreate).
|
|
149
|
+
_ci_handle_existing_endpoint() {
|
|
150
|
+
local ep_name="${ENDPOINT_NAME:-}"
|
|
151
|
+
if [ -z "${ep_name}" ]; then
|
|
152
|
+
return 1 # No existing endpoint, proceed with fresh deploy
|
|
153
|
+
fi
|
|
154
|
+
|
|
155
|
+
local ep_status
|
|
156
|
+
ep_status=$(_get_endpoint_status "${ep_name}" 2>/dev/null || echo "")
|
|
157
|
+
|
|
158
|
+
case "${ep_status}" in
|
|
159
|
+
InService)
|
|
160
|
+
# Check if config matches (idempotent check)
|
|
161
|
+
if [ -n "${INFERENCE_COMPONENT_NAME:-}" ]; then
|
|
162
|
+
local ic_status
|
|
163
|
+
ic_status=$(_get_ic_status "${INFERENCE_COMPONENT_NAME}" 2>/dev/null || echo "")
|
|
164
|
+
if [ "${ic_status}" = "InService" ]; then
|
|
165
|
+
echo "✅ [CI] Endpoint InService with matching config — skipping deployment"
|
|
166
|
+
echo " Endpoint: ${ep_name}"
|
|
167
|
+
echo " Inference Component: ${INFERENCE_COMPONENT_NAME}"
|
|
168
|
+
return 0
|
|
169
|
+
fi
|
|
170
|
+
fi
|
|
171
|
+
return 1
|
|
172
|
+
;;
|
|
173
|
+
Failed|OutOfService)
|
|
174
|
+
echo "⚠️ [CI] Endpoint in bad state (${ep_status}): ${ep_name}"
|
|
175
|
+
echo " Deleting endpoint for fresh deployment..."
|
|
176
|
+
|
|
177
|
+
aws sagemaker delete-endpoint \
|
|
178
|
+
--endpoint-name "${ep_name}" \
|
|
179
|
+
--region "${AWS_REGION}" 2>/dev/null || true
|
|
180
|
+
|
|
181
|
+
# Wait for deletion to complete
|
|
182
|
+
local delete_start
|
|
183
|
+
delete_start=$(date +%s)
|
|
184
|
+
local delete_timeout=300 # 5 minutes
|
|
185
|
+
|
|
186
|
+
while true; do
|
|
187
|
+
_ci_check_timeout
|
|
188
|
+
local check_status
|
|
189
|
+
check_status=$(_get_endpoint_status "${ep_name}" 2>/dev/null || echo "")
|
|
190
|
+
if [ -z "${check_status}" ]; then
|
|
191
|
+
echo " ✅ Endpoint deleted: ${ep_name}"
|
|
192
|
+
break
|
|
193
|
+
fi
|
|
194
|
+
local del_elapsed=$(( $(date +%s) - delete_start ))
|
|
195
|
+
if [ "${del_elapsed}" -ge "${delete_timeout}" ]; then
|
|
196
|
+
_ci_emit_error "Endpoint deletion timed out for ${ep_name} (state: ${ep_status})" "endpoint_failed" "true"
|
|
197
|
+
fi
|
|
198
|
+
sleep 10
|
|
199
|
+
done
|
|
200
|
+
|
|
201
|
+
# Clear endpoint name so fresh deploy proceeds
|
|
202
|
+
ENDPOINT_NAME=""
|
|
203
|
+
return 1
|
|
204
|
+
;;
|
|
205
|
+
*)
|
|
206
|
+
return 1 # Unknown/absent state, proceed with fresh deploy
|
|
207
|
+
;;
|
|
208
|
+
esac
|
|
209
|
+
}
|
|
210
|
+
|
|
54
211
|
# Source configuration
|
|
55
212
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
56
213
|
source "${SCRIPT_DIR}/config"
|
|
@@ -193,6 +350,16 @@ fi
|
|
|
193
350
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
194
351
|
resolve_secrets
|
|
195
352
|
|
|
353
|
+
# ============================================================
|
|
354
|
+
# CI-Mode: Idempotent deployment check (runs before normal idempotency)
|
|
355
|
+
# ============================================================
|
|
356
|
+
if [ "${CI_ACTIVE}" = "true" ] && [ "${FORCE_NEW}" != "true" ]; then
|
|
357
|
+
if _ci_handle_existing_endpoint; then
|
|
358
|
+
# Endpoint already InService with matching config — exit successfully
|
|
359
|
+
exit 0
|
|
360
|
+
fi
|
|
361
|
+
fi
|
|
362
|
+
|
|
196
363
|
# ============================================================
|
|
197
364
|
# Idempotency: check for existing deployment from a previous run
|
|
198
365
|
# ============================================================
|
|
@@ -380,16 +547,20 @@ if [ -z "${SKIP_TO}" ]; then
|
|
|
380
547
|
|
|
381
548
|
# Step 2: Create endpoint
|
|
382
549
|
echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
|
|
383
|
-
if
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
550
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
551
|
+
_ci_create_endpoint_with_retry "${ENDPOINT_NAME}" "${ENDPOINT_CONFIG_NAME}"
|
|
552
|
+
else
|
|
553
|
+
if ! aws sagemaker create-endpoint \
|
|
554
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
555
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
556
|
+
--region "${AWS_REGION}"; then
|
|
557
|
+
|
|
558
|
+
echo "❌ Failed to create endpoint"
|
|
559
|
+
echo " Check that:"
|
|
560
|
+
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
561
|
+
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
562
|
+
exit 4
|
|
563
|
+
fi
|
|
393
564
|
fi
|
|
394
565
|
|
|
395
566
|
echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
@@ -413,8 +584,18 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
|
413
584
|
echo " This may take a few minutes..."
|
|
414
585
|
echo " If this times out, re-run ./do/deploy to resume."
|
|
415
586
|
|
|
587
|
+
# CI-mode: check timeout during wait
|
|
588
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
589
|
+
_ci_check_timeout
|
|
590
|
+
fi
|
|
591
|
+
|
|
416
592
|
wait_endpoint "${ENDPOINT_NAME}"
|
|
417
593
|
|
|
594
|
+
# CI-mode: check timeout after wait completes
|
|
595
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
596
|
+
_ci_check_timeout
|
|
597
|
+
fi
|
|
598
|
+
|
|
418
599
|
echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
|
|
419
600
|
fi
|
|
420
601
|
|