@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +33 -22
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -67
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +166 -153
  23. package/servers/instance-sizer/lib/instance-ranker.js +120 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/instances.json +27 -0
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/custom-validators.js +13 -13
  31. package/servers/lib/dynamic-resolver.js +4 -4
  32. package/servers/marketplace-picker/index.js +342 -0
  33. package/servers/marketplace-picker/manifest.json +14 -0
  34. package/servers/marketplace-picker/package.json +18 -0
  35. package/servers/model-picker/index.js +382 -382
  36. package/servers/region-picker/index.js +56 -56
  37. package/servers/workload-picker/LICENSE +202 -0
  38. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  39. package/servers/workload-picker/index.js +171 -0
  40. package/servers/workload-picker/manifest.json +16 -0
  41. package/servers/workload-picker/package.json +16 -0
  42. package/src/app.js +12 -3
  43. package/src/lib/bootstrap-command-handler.js +609 -15
  44. package/src/lib/bootstrap-config.js +36 -0
  45. package/src/lib/bootstrap-profile-manager.js +48 -41
  46. package/src/lib/ci-register-helpers.js +74 -0
  47. package/src/lib/config-loader.js +3 -0
  48. package/src/lib/config-manager.js +7 -0
  49. package/src/lib/config-validator.js +1 -1
  50. package/src/lib/cuda-resolver.js +17 -8
  51. package/src/lib/generated/cli-options.js +319 -314
  52. package/src/lib/generated/parameter-matrix.js +672 -661
  53. package/src/lib/generated/validation-rules.js +76 -72
  54. package/src/lib/path-prover-brain.js +664 -0
  55. package/src/lib/prompts/infrastructure-prompts.js +2 -2
  56. package/src/lib/prompts/model-prompts.js +6 -0
  57. package/src/lib/prompts/project-prompts.js +12 -0
  58. package/src/lib/secrets-prompt-runner.js +4 -0
  59. package/src/lib/template-manager.js +1 -1
  60. package/src/lib/template-variable-resolver.js +87 -1
  61. package/src/lib/tune-catalog-validator.js +37 -4
  62. package/templates/Dockerfile +9 -0
  63. package/templates/code/adapter_sidecar.py +444 -0
  64. package/templates/code/serve +6 -0
  65. package/templates/code/serve.d/vllm.ejs +1 -1
  66. package/templates/do/.benchmark_writer.py +1476 -0
  67. package/templates/do/.tune_helper.py +982 -57
  68. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  69. package/templates/do/adapter +154 -0
  70. package/templates/do/benchmark +639 -85
  71. package/templates/do/build +5 -0
  72. package/templates/do/clean.d/async-inference.ejs +5 -0
  73. package/templates/do/clean.d/batch-transform.ejs +5 -0
  74. package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
  75. package/templates/do/clean.d/managed-inference.ejs +5 -0
  76. package/templates/do/config +115 -45
  77. package/templates/do/deploy.d/async-inference.ejs +30 -3
  78. package/templates/do/deploy.d/batch-transform.ejs +29 -3
  79. package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
  80. package/templates/do/deploy.d/managed-inference.ejs +216 -14
  81. package/templates/do/lib/endpoint-config.sh +1 -1
  82. package/templates/do/lib/profile.sh +44 -0
  83. package/templates/do/optimize +106 -37
  84. package/templates/do/push +5 -0
  85. package/templates/do/register +94 -0
  86. package/templates/do/stage +567 -0
  87. package/templates/do/submit +7 -0
  88. package/templates/do/test +14 -0
  89. package/templates/do/tune +382 -59
  90. package/templates/do/validate +44 -4
@@ -10,9 +10,11 @@ set -o pipefail
10
10
  FORCE_NEW=false
11
11
  FORCE_IC=false
12
12
  IC_TARGET=""
13
+ CI_FLAG=false
13
14
  while [ $# -gt 0 ]; do
14
15
  case "$1" in
15
16
  --force) FORCE_NEW=true; shift ;;
17
+ --ci) CI_FLAG=true; shift ;;
16
18
  --force-ic)
17
19
  FORCE_IC=true
18
20
  shift
@@ -32,13 +34,14 @@ while [ $# -gt 0 ]; do
32
34
  shift 2
33
35
  ;;
34
36
  --help|-h)
35
- echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>]"
37
+ echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>] [--ci]"
36
38
  echo ""
37
39
  echo "Options:"
38
40
  echo " --force Create a new endpoint and IC, even if one already exists."
39
41
  echo " --force-ic Recreate ALL inference components on the existing endpoint."
40
42
  echo " --force-ic <name> Recreate only the named IC on the existing endpoint."
41
43
  echo " --ic <name> Deploy only the named IC (from do/ic/<name>.conf)."
44
+ echo " --ci Enable CI mode (structured errors, timeouts, idempotency)."
42
45
  echo ""
43
46
  echo "Without flags, deploy resumes from the last run."
44
47
  exit 0
@@ -51,9 +54,169 @@ while [ $# -gt 0 ]; do
51
54
  esac
52
55
  done
53
56
 
57
+ # ============================================================
58
+ # CI-Mode Detection and Configuration
59
+ # ============================================================
60
+ # CI mode is activated by CI_MODE=true env var OR --ci flag
61
+ if [ "${CI_MODE:-false}" = "true" ] || [ "${CI_FLAG}" = "true" ]; then
62
+ CI_ACTIVE=true
63
+ else
64
+ CI_ACTIVE=false
65
+ fi
66
+
67
+ # CI-mode timeout configuration (seconds)
68
+ if [ "${CI_ACTIVE}" = "true" ]; then
69
+ CI_DEPLOY_TIMEOUT="${CI_DEPLOY_TIMEOUT_SECONDS:-1200}"
70
+ CI_DEPLOY_START=$(date +%s)
71
+ fi
72
+
73
+ # _ci_emit_error <error_message> <error_type> <retryable>
74
+ # Emits structured JSON error output when in CI mode.
75
+ # In non-CI mode, prints human-readable error and exits.
76
+ _ci_emit_error() {
77
+ local error_msg="$1"
78
+ local error_type="$2"
79
+ local retryable="$3"
80
+ local elapsed=0
81
+
82
+ if [ "${CI_ACTIVE}" = "true" ]; then
83
+ elapsed=$(( $(date +%s) - CI_DEPLOY_START ))
84
+ echo "{\"error\":\"${error_msg}\",\"error_type\":\"${error_type}\",\"instance_type\":\"${INSTANCE_TYPE:-unknown}\",\"region\":\"${AWS_REGION:-unknown}\",\"retryable\":${retryable},\"elapsed_seconds\":${elapsed}}"
85
+ exit 1
86
+ else
87
+ echo "❌ ${error_msg}"
88
+ exit 1
89
+ fi
90
+ }
91
+
92
+ # _ci_check_timeout
93
+ # Checks if CI-mode timeout has been exceeded.
94
+ # Emits structured timeout error if so.
95
+ _ci_check_timeout() {
96
+ if [ "${CI_ACTIVE}" = "true" ]; then
97
+ local elapsed=$(( $(date +%s) - CI_DEPLOY_START ))
98
+ if [ "${elapsed}" -ge "${CI_DEPLOY_TIMEOUT}" ]; then
99
+ _ci_emit_error "Deployment timed out after ${elapsed} seconds (limit: ${CI_DEPLOY_TIMEOUT}s)" "timeout" "true"
100
+ fi
101
+ fi
102
+ }
103
+
104
+ # _ci_create_endpoint_with_retry
105
+ # Wraps CreateEndpoint with exponential backoff for throttling.
106
+ # Base: 5 seconds, max 3 attempts.
107
+ _ci_create_endpoint_with_retry() {
108
+ local attempt=0
109
+ local max_attempts=3
110
+ local backoff=5
111
+ local ep_name="$1"
112
+ local ep_config="$2"
113
+
114
+ while [ "${attempt}" -lt "${max_attempts}" ]; do
115
+ attempt=$(( attempt + 1 ))
116
+ local create_output
117
+ create_output=$(aws sagemaker create-endpoint \
118
+ --endpoint-name "${ep_name}" \
119
+ --endpoint-config-name "${ep_config}" \
120
+ --region "${AWS_REGION}" 2>&1) && return 0
121
+
122
+ # Check if throttled
123
+ if echo "${create_output}" | grep -qi "ThrottlingException"; then
124
+ if [ "${attempt}" -lt "${max_attempts}" ]; then
125
+ if [ "${CI_ACTIVE}" = "true" ]; then
126
+ echo "⏳ Throttled (attempt ${attempt}/${max_attempts}), retrying in ${backoff}s..."
127
+ else
128
+ echo "⏳ Throttled, retrying in ${backoff}s..."
129
+ fi
130
+ sleep "${backoff}"
131
+ backoff=$(( backoff * 2 ))
132
+ else
133
+ _ci_emit_error "CreateEndpoint throttled after ${max_attempts} attempts" "throttled" "true"
134
+ fi
135
+ elif echo "${create_output}" | grep -qi "InsufficientInstanceCapacity"; then
136
+ _ci_emit_error "InsufficientInstanceCapacity: Unable to provision ${INSTANCE_TYPE} in ${AWS_REGION}" "capacity" "true"
137
+ else
138
+ # Other API error
139
+ _ci_emit_error "CreateEndpoint failed: ${create_output}" "api_error" "false"
140
+ fi
141
+ done
142
+ }
143
+
144
+ # _ci_handle_existing_endpoint
145
+ # CI-mode idempotent deployment logic.
146
+ # Returns 0 if deployment should be skipped (already InService with matching config).
147
+ # Returns 1 if a fresh deploy should proceed.
148
+ # Handles bad-state cleanup (Failed/OutOfService → delete + recreate).
149
+ _ci_handle_existing_endpoint() {
150
+ local ep_name="${ENDPOINT_NAME:-}"
151
+ if [ -z "${ep_name}" ]; then
152
+ return 1 # No existing endpoint, proceed with fresh deploy
153
+ fi
154
+
155
+ local ep_status
156
+ ep_status=$(_get_endpoint_status "${ep_name}" 2>/dev/null || echo "")
157
+
158
+ case "${ep_status}" in
159
+ InService)
160
+ # Check if config matches (idempotent check)
161
+ if [ -n "${INFERENCE_COMPONENT_NAME:-}" ]; then
162
+ local ic_status
163
+ ic_status=$(_get_ic_status "${INFERENCE_COMPONENT_NAME}" 2>/dev/null || echo "")
164
+ if [ "${ic_status}" = "InService" ]; then
165
+ echo "✅ [CI] Endpoint InService with matching config — skipping deployment"
166
+ echo " Endpoint: ${ep_name}"
167
+ echo " Inference Component: ${INFERENCE_COMPONENT_NAME}"
168
+ return 0
169
+ fi
170
+ fi
171
+ return 1
172
+ ;;
173
+ Failed|OutOfService)
174
+ echo "⚠️ [CI] Endpoint in bad state (${ep_status}): ${ep_name}"
175
+ echo " Deleting endpoint for fresh deployment..."
176
+
177
+ aws sagemaker delete-endpoint \
178
+ --endpoint-name "${ep_name}" \
179
+ --region "${AWS_REGION}" 2>/dev/null || true
180
+
181
+ # Wait for deletion to complete
182
+ local delete_start
183
+ delete_start=$(date +%s)
184
+ local delete_timeout=300 # 5 minutes
185
+
186
+ while true; do
187
+ _ci_check_timeout
188
+ local check_status
189
+ check_status=$(_get_endpoint_status "${ep_name}" 2>/dev/null || echo "")
190
+ if [ -z "${check_status}" ]; then
191
+ echo " ✅ Endpoint deleted: ${ep_name}"
192
+ break
193
+ fi
194
+ local del_elapsed=$(( $(date +%s) - delete_start ))
195
+ if [ "${del_elapsed}" -ge "${delete_timeout}" ]; then
196
+ _ci_emit_error "Endpoint deletion timed out for ${ep_name} (state: ${ep_status})" "endpoint_failed" "true"
197
+ fi
198
+ sleep 10
199
+ done
200
+
201
+ # Clear endpoint name so fresh deploy proceeds
202
+ ENDPOINT_NAME=""
203
+ return 1
204
+ ;;
205
+ *)
206
+ return 1 # Unknown/absent state, proceed with fresh deploy
207
+ ;;
208
+ esac
209
+ }
210
+
54
211
  # Source configuration
55
212
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
56
213
  source "${SCRIPT_DIR}/config"
214
+ source "${SCRIPT_DIR}/lib/profile.sh"
215
+
216
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
217
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
218
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
219
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
57
220
 
58
221
  echo "🚀 Deploying to AWS"
59
222
  echo " Project: ${PROJECT_NAME}"
@@ -150,14 +313,14 @@ source "${SCRIPT_DIR}/lib/inference-component.sh"
150
313
 
151
314
  # Validate execution role ARN
152
315
  if [ -z "${ROLE_ARN:-}" ]; then
153
- echo "❌ Execution role ARN not provided"
316
+ echo "❌ ROLE_ARN is not set."
317
+ echo " Run 'ml-container-creator bootstrap' to configure your profile,"
318
+ echo " or set ROLE_ARN as an environment variable."
154
319
  echo ""
155
320
  echo "Usage:"
156
321
  echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
157
322
  echo " ./do/deploy"
158
323
  echo ""
159
- echo "Or set ROLE_ARN in do/config"
160
- echo ""
161
324
  echo "The execution role must have permissions for:"
162
325
  echo " • SageMaker endpoint and inference component management"
163
326
  echo " • ECR image access"
@@ -193,6 +356,31 @@ fi
193
356
  # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
194
357
  resolve_secrets
195
358
 
359
+ <% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
360
+ # ============================================================
361
+ # Inject server environment variables into container Environment
362
+ # ============================================================
363
+ <% Object.keys(serverEnvVars).forEach(function(key) { %>
364
+ if [ -n "${<%= key %>:-}" ]; then
365
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
366
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
367
+ else
368
+ CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
369
+ fi
370
+ fi
371
+ <% }); %>
372
+ <% } %>
373
+
374
+ # ============================================================
375
+ # CI-Mode: Idempotent deployment check (runs before normal idempotency)
376
+ # ============================================================
377
+ if [ "${CI_ACTIVE}" = "true" ] && [ "${FORCE_NEW}" != "true" ]; then
378
+ if _ci_handle_existing_endpoint; then
379
+ # Endpoint already InService with matching config — exit successfully
380
+ exit 0
381
+ fi
382
+ fi
383
+
196
384
  # ============================================================
197
385
  # Idempotency: check for existing deployment from a previous run
198
386
  # ============================================================
@@ -380,16 +568,20 @@ if [ -z "${SKIP_TO}" ]; then
380
568
 
381
569
  # Step 2: Create endpoint
382
570
  echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
383
- if ! aws sagemaker create-endpoint \
384
- --endpoint-name "${ENDPOINT_NAME}" \
385
- --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
386
- --region "${AWS_REGION}"; then
387
-
388
- echo " Failed to create endpoint"
389
- echo " Check that:"
390
- echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
391
- echo " You have sufficient service quota in region: ${AWS_REGION}"
392
- exit 4
571
+ if [ "${CI_ACTIVE}" = "true" ]; then
572
+ _ci_create_endpoint_with_retry "${ENDPOINT_NAME}" "${ENDPOINT_CONFIG_NAME}"
573
+ else
574
+ if ! aws sagemaker create-endpoint \
575
+ --endpoint-name "${ENDPOINT_NAME}" \
576
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
577
+ --region "${AWS_REGION}"; then
578
+
579
+ echo " Failed to create endpoint"
580
+ echo " Check that:"
581
+ echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
582
+ echo " • You have sufficient service quota in region: ${AWS_REGION}"
583
+ exit 4
584
+ fi
393
585
  fi
394
586
 
395
587
  echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
@@ -413,8 +605,18 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
413
605
  echo " This may take a few minutes..."
414
606
  echo " If this times out, re-run ./do/deploy to resume."
415
607
 
608
+ # CI-mode: check timeout during wait
609
+ if [ "${CI_ACTIVE}" = "true" ]; then
610
+ _ci_check_timeout
611
+ fi
612
+
416
613
  wait_endpoint "${ENDPOINT_NAME}"
417
614
 
615
+ # CI-mode: check timeout after wait completes
616
+ if [ "${CI_ACTIVE}" = "true" ]; then
617
+ _ci_check_timeout
618
+ fi
619
+
418
620
  echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
419
621
  fi
420
622
 
@@ -165,7 +165,7 @@ create_endpoint_config() {
165
165
  # Optional: capacity reservation
166
166
  if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
167
167
  variant_json="${variant_json},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
168
- echo " ⚠️ Capacity reservation (experimental): ${CAPACITY_RESERVATION_ARN}"
168
+ echo " Capacity reservation: ${CAPACITY_RESERVATION_ARN}"
169
169
  fi
170
170
 
171
171
  variant_json="${variant_json}}]"
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env bash
2
+ # Profile loader — reads active bootstrap profile into _PROFILE[] associative array.
3
+ # Source this file after do/config. Values provide defaults; explicit env vars take precedence.
4
+ #
5
+ # Requires bash 4+ for associative array support.
6
+ # macOS ships with bash 3.2 — install bash 4+ via Homebrew: brew install bash
7
+ #
8
+ # Expected keys in _PROFILE:
9
+ # awsRegion, accountId, awsProfile, roleArn, ecrRepositoryName,
10
+ # benchmarkS3Bucket, ciBenchmarkResultsBucket, asyncS3Bucket, batchS3Bucket,
11
+ # ciTableName, ciInfraProvisioned
12
+
13
+ # Temporarily disable unbound variable checking for profile loading
14
+ # (keys may not exist in the profile config, and declare -A behavior
15
+ # varies across bash versions with set -u)
16
+ set +u 2>/dev/null || true
17
+
18
+ declare -A _PROFILE 2>/dev/null || true
19
+ if command -v python3 &>/dev/null; then
20
+ _PROFILE_RAW=$(python3 -c "
21
+ import json, os
22
+ try:
23
+ with open(os.path.expanduser('~/.ml-container-creator/config.json')) as f:
24
+ c = json.load(f)
25
+ p = c['profiles'][c['activeProfile']]
26
+ # Output as KEY=VALUE lines (simple, no JSON parsing in bash)
27
+ for k, v in p.items():
28
+ if isinstance(v, (str, int, float, bool)):
29
+ print(f'{k}={v}')
30
+ except:
31
+ pass
32
+ " 2>/dev/null) || _PROFILE_RAW=""
33
+
34
+ if [ -n "${_PROFILE_RAW}" ]; then
35
+ while IFS='=' read -r key value; do
36
+ [ -n "${key}" ] && _PROFILE["${key}"]="${value}"
37
+ done <<< "${_PROFILE_RAW}"
38
+ fi
39
+ fi
40
+
41
+ # NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
42
+ # their own shell options. Re-enabling set -u would cause "unbound variable"
43
+ # errors when accessing _PROFILE keys on bash versions where empty associative
44
+ # arrays are treated as unset (bash 5.x on some platforms).
@@ -106,6 +106,30 @@ elif [ -n "${INSTANCE_POOLS:-}" ]; then
106
106
  elif [ -n "${INSTANCE_TYPE:-}" ]; then
107
107
  INSTANCE_TYPES="${INSTANCE_TYPE}"
108
108
  echo " Instances (from INSTANCE_TYPE): ${INSTANCE_TYPES}"
109
+ elif [ "${ENDPOINT_EXTERNAL:-}" = "true" ] && [ -n "${ENDPOINT_NAME:-}" ]; then
110
+ # External endpoint — query the live endpoint config for instance type
111
+ echo " Discovering instance type from external endpoint: ${ENDPOINT_NAME}"
112
+ ENDPOINT_CONFIG_NAME=$(aws sagemaker describe-endpoint \
113
+ --endpoint-name "${ENDPOINT_NAME}" \
114
+ --region "${AWS_REGION}" \
115
+ --query 'EndpointConfigName' \
116
+ --output text 2>/dev/null) || ENDPOINT_CONFIG_NAME=""
117
+
118
+ if [ -n "${ENDPOINT_CONFIG_NAME}" ]; then
119
+ INSTANCE_TYPES=$(aws sagemaker describe-endpoint-config \
120
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
121
+ --region "${AWS_REGION}" \
122
+ --query 'ProductionVariants[0].InstanceType' \
123
+ --output text 2>/dev/null) || INSTANCE_TYPES=""
124
+ fi
125
+
126
+ if [ -n "${INSTANCE_TYPES}" ] && [ "${INSTANCE_TYPES}" != "None" ]; then
127
+ echo " Instances (from endpoint): ${INSTANCE_TYPES}"
128
+ else
129
+ echo "❌ Could not discover instance type from endpoint: ${ENDPOINT_NAME}"
130
+ echo " Provide --instances flag, or set INSTANCE_TYPE in do/config."
131
+ exit 1
132
+ fi
109
133
  else
110
134
  echo "❌ No instance types available."
111
135
  echo " Provide --instances flag, or set INSTANCE_POOLS or INSTANCE_TYPE in do/config."
@@ -132,9 +156,9 @@ RESUME_EXISTING=false
132
156
 
133
157
  if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
134
158
  EXISTING_STATUS=$(aws sagemaker describe-ai-recommendation-job \
135
- --job-name "${OPTIMIZE_JOB_NAME}" \
159
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
136
160
  --region "${AWS_REGION}" \
137
- --query 'Status' \
161
+ --query 'AIRecommendationJobStatus' \
138
162
  --output text 2>/dev/null) || EXISTING_STATUS=""
139
163
 
140
164
  case "${EXISTING_STATUS}" in
@@ -154,7 +178,7 @@ if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
154
178
  ;;
155
179
  FAILED|STOPPED)
156
180
  FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
157
- --job-name "${OPTIMIZE_JOB_NAME}" \
181
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
158
182
  --region "${AWS_REGION}" \
159
183
  --query 'FailureReason' \
160
184
  --output text 2>/dev/null) || FAILURE_REASON="unknown"
@@ -174,44 +198,86 @@ fi
174
198
  # ── Create recommendation job ─────────────────────────────────────────────────
175
199
  if [ "${RESUME_EXISTING}" = false ]; then
176
200
  OPTIMIZE_JOB_NAME="${PROJECT_NAME}-optimize-$(date +%Y%m%d-%H%M%S)"
201
+ WORKLOAD_CONFIG_NAME="${OPTIMIZE_JOB_NAME}-workload"
177
202
 
178
203
  echo "🚀 Creating AI Recommendation Job: ${OPTIMIZE_JOB_NAME}"
179
204
 
180
- # Build instance type list as JSON array
181
- INSTANCE_TYPES_JSON="["
182
- FIRST=true
205
+ # Step 1: Create workload config
206
+ echo " Creating workload config: ${WORKLOAD_CONFIG_NAME}"
207
+
208
+ WORKLOAD_SPEC_INNER="{\"benchmark\": {\"type\": \"aiperf\"}, \"parameters\": {\"prompt_input_tokens_mean\": ${INPUT_TOKENS}, \"prompt_input_tokens_stddev\": 150, \"output_tokens_mean\": ${OUTPUT_TOKENS}, \"output_tokens_stddev\": 50, \"concurrency\": ${CONCURRENCY}, \"streaming\": true}}"
209
+ WORKLOAD_SPEC_OUTER="{\"WorkloadSpec\": {\"Inline\": $(python3 -c "import json; print(json.dumps('${WORKLOAD_SPEC_INNER}'))")}}"
210
+
211
+ if ! aws sagemaker create-ai-workload-config \
212
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
213
+ --ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
214
+ --region "${AWS_REGION}" 2>&1 | grep -q "AIWorkloadConfigArn"; then
215
+ echo "❌ Failed to create workload config: ${WORKLOAD_CONFIG_NAME}"
216
+ echo " Check that the execution role has sagemaker:CreateAIWorkloadConfig permission."
217
+ # Show actual error
218
+ aws sagemaker create-ai-workload-config \
219
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
220
+ --ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
221
+ --region "${AWS_REGION}" 2>&1 || true
222
+ exit 1
223
+ fi
224
+ echo " ✅ Workload config created"
225
+
226
+ # Step 2: Build compute spec (instance types, max 3)
227
+ COMPUTE_SPEC_JSON=""
183
228
  IFS=',' read -ra TYPES <<< "${INSTANCE_TYPES}"
229
+ INSTANCE_LIST=""
230
+ COUNT=0
184
231
  for itype in "${TYPES[@]}"; do
185
232
  itype=$(echo "${itype}" | xargs) # trim whitespace
186
- if [ "${FIRST}" = true ]; then
187
- INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}\"${itype}\""
188
- FIRST=false
233
+ if [ ${COUNT} -ge 3 ]; then
234
+ echo " ⚠️ Max 3 instance types supported — truncating"
235
+ break
236
+ fi
237
+ if [ -n "${INSTANCE_LIST}" ]; then
238
+ INSTANCE_LIST="${INSTANCE_LIST},\"${itype}\""
189
239
  else
190
- INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON},\"${itype}\""
240
+ INSTANCE_LIST="\"${itype}\""
191
241
  fi
242
+ COUNT=$((COUNT + 1))
192
243
  done
193
- INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}]"
244
+ COMPUTE_SPEC_JSON="InstanceTypes=[${INSTANCE_LIST}]"
245
+
246
+ # Step 3: Map goal to performance target metric
247
+ case "${GOAL}" in
248
+ latency) PERF_METRIC="ttft-ms" ;;
249
+ throughput) PERF_METRIC="throughput" ;;
250
+ cost) PERF_METRIC="cost" ;;
251
+ esac
194
252
 
195
- # Build job input config
196
- # The model is specified as either a HuggingFace model ID or S3 path
197
- MODEL_SOURCE_JSON=""
253
+ # Step 4: Determine model source
254
+ # The recommendation API requires model artifacts as s3:// or https:// URI.
255
+ MODEL_SOURCE_ARG=""
198
256
  if [[ "${MODEL_NAME}" == s3://* ]]; then
199
- MODEL_SOURCE_JSON="{\"S3DataSource\":{\"S3Uri\":\"${MODEL_NAME}\"}}"
257
+ MODEL_SOURCE_ARG="S3={S3Uri=${MODEL_NAME}}"
200
258
  else
201
- MODEL_SOURCE_JSON="{\"ModelName\":\"${MODEL_NAME}\"}"
259
+ # HuggingFace model — use the HTTPS URL for the model on HuggingFace Hub
260
+ MODEL_SOURCE_ARG="S3={S3Uri=https://huggingface.co/${MODEL_NAME}}"
202
261
  fi
203
262
 
204
- # Build workload config
205
- WORKLOAD_JSON="{\"Concurrency\":${CONCURRENCY},\"InputTokens\":${INPUT_TOKENS},\"OutputTokens\":${OUTPUT_TOKENS}}"
206
-
207
- # Build the full job specification
208
- JOB_INPUT="{\"ModelSource\":${MODEL_SOURCE_JSON},\"Workload\":${WORKLOAD_JSON},\"InstanceTypes\":${INSTANCE_TYPES_JSON},\"OptimizationGoal\":\"${GOAL}\"}"
209
-
210
- if ! aws sagemaker create-ai-recommendation-job \
211
- --job-name "${OPTIMIZE_JOB_NAME}" \
212
- --role-arn "${ROLE_ARN}" \
213
- --input-config "${JOB_INPUT}" \
214
- --region "${AWS_REGION}"; then
263
+ # Step 5: Create the recommendation job
264
+ S3_OUTPUT="s3://${TUNE_S3_BUCKET:-mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null)-${AWS_REGION}}/${PROJECT_NAME}/optimize/"
265
+
266
+ RECOMMEND_CMD=(
267
+ aws sagemaker create-ai-recommendation-job
268
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}"
269
+ --model-source "${MODEL_SOURCE_ARG}"
270
+ --output-config "S3OutputLocation=${S3_OUTPUT}"
271
+ --ai-workload-config-identifier "${WORKLOAD_CONFIG_NAME}"
272
+ --performance-target "Constraints=[{Metric=${PERF_METRIC}}]"
273
+ --role-arn "${ROLE_ARN}"
274
+ --compute-spec "${COMPUTE_SPEC_JSON}"
275
+ --inference-specification "Framework=VLLM"
276
+ --region "${AWS_REGION}"
277
+ )
278
+
279
+ if ! "${RECOMMEND_CMD[@]}" 2>&1; then
280
+ echo ""
215
281
  echo "❌ Failed to create AI Recommendation Job"
216
282
  echo " Check that:"
217
283
  echo " • The execution role has sagemaker:CreateAIRecommendationJob permission"
@@ -232,7 +298,7 @@ fi
232
298
  POLL_INTERVAL=30
233
299
  MAX_POLL_ATTEMPTS=120 # 60 minutes max (120 * 30s)
234
300
 
235
- if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ]; then
301
+ if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ] && [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
236
302
 
237
303
  echo "⏳ Waiting for recommendation job to complete..."
238
304
  echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 60 min)"
@@ -243,27 +309,30 @@ JOB_STATUS=""
243
309
 
244
310
  while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
245
311
  JOB_STATUS=$(aws sagemaker describe-ai-recommendation-job \
246
- --job-name "${OPTIMIZE_JOB_NAME}" \
312
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
247
313
  --region "${AWS_REGION}" \
248
- --query 'Status' \
314
+ --query 'AIRecommendationJobStatus' \
249
315
  --output text 2>/dev/null) || {
250
316
  echo "⚠️ Failed to describe recommendation job (credentials may have expired)"
251
317
  echo " Re-run to check status:"
252
- echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
318
+ echo " aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
253
319
  exit 1
254
320
  }
255
321
 
256
322
  case "${JOB_STATUS}" in
257
- COMPLETED)
323
+ COMPLETED|Completed)
258
324
  echo "✅ Recommendation job completed!"
325
+ JOB_STATUS="COMPLETED"
259
326
  break
260
327
  ;;
261
- FAILED)
328
+ FAILED|Failed)
262
329
  echo "❌ Recommendation job failed"
330
+ JOB_STATUS="FAILED"
263
331
  break
264
332
  ;;
265
- STOPPED)
333
+ STOPPED|Stopped)
266
334
  echo "⚠️ Recommendation job was stopped"
335
+ JOB_STATUS="STOPPED"
267
336
  break
268
337
  ;;
269
338
  *)
@@ -292,7 +361,7 @@ if [ "${JOB_STATUS}" = "COMPLETED" ]; then
292
361
 
293
362
  # Get the full job description with results
294
363
  JOB_DESCRIPTION=$(aws sagemaker describe-ai-recommendation-job \
295
- --job-name "${OPTIMIZE_JOB_NAME}" \
364
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
296
365
  --region "${AWS_REGION}" \
297
366
  --output json 2>/dev/null) || {
298
367
  echo "❌ Failed to fetch recommendation results"
@@ -503,7 +572,7 @@ except:
503
572
 
504
573
  if [ "${FAILURE_REASON}" = "unknown" ]; then
505
574
  FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
506
- --job-name "${OPTIMIZE_JOB_NAME}" \
575
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
507
576
  --region "${AWS_REGION}" \
508
577
  --query 'FailureReason' \
509
578
  --output text 2>/dev/null) || FAILURE_REASON="unknown"
@@ -513,7 +582,7 @@ except:
513
582
  echo " Reason: ${FAILURE_REASON}"
514
583
  echo ""
515
584
  echo " Debug:"
516
- echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
585
+ echo " aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
517
586
 
518
587
  elif [ "${JOB_STATUS}" = "STOPPED" ]; then
519
588
  echo "⚠️ Recommendation job was stopped before completion"
package/templates/do/push CHANGED
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  echo "🚀 Pushing Docker image to Amazon ECR"
14
19
  echo " Project: ${PROJECT_NAME}"