@aws/ml-container-creator 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +1 -1
  2. package/bin/cli.js +1 -1
  3. package/config/tune-catalog.json +303 -1
  4. package/infra/ci-harness/lib/ci-harness-stack.ts +43 -0
  5. package/package.json +3 -2
  6. package/servers/base-image-picker/index.js +65 -18
  7. package/servers/instance-sizer/index.js +32 -0
  8. package/servers/lib/catalogs/fleet-drivers.json +38 -0
  9. package/servers/lib/catalogs/model-arch-support.json +51 -0
  10. package/servers/lib/catalogs/model-servers.json +2842 -1516
  11. package/servers/lib/schemas/image-catalog.schema.json +12 -0
  12. package/src/app.js +6 -4
  13. package/src/lib/bootstrap-command-handler.js +12 -2
  14. package/src/lib/bootstrap-profile-manager.js +16 -0
  15. package/src/lib/cross-cutting-checker.js +6 -1
  16. package/src/lib/generated/cli-options.js +1 -1
  17. package/src/lib/generated/parameter-matrix.js +1 -1
  18. package/src/lib/generated/validation-rules.js +1 -1
  19. package/src/lib/mcp-query-runner.js +110 -3
  20. package/src/lib/prompt-runner.js +66 -22
  21. package/src/lib/template-variable-resolver.js +8 -0
  22. package/src/lib/train-config-builder.js +339 -0
  23. package/templates/do/.benchmark_writer.py +3 -0
  24. package/templates/do/.eval_helper.py +409 -0
  25. package/templates/do/.register_helper.py +185 -11
  26. package/templates/do/.train_build_request.py +102 -113
  27. package/templates/do/.train_helper.py +433 -0
  28. package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
  29. package/templates/do/adapter +157 -0
  30. package/templates/do/benchmark +60 -3
  31. package/templates/do/deploy.d/managed-inference.ejs +83 -0
  32. package/templates/do/evaluate +272 -0
  33. package/templates/do/lib/resolve-instance.sh +155 -0
  34. package/templates/do/register +5 -0
  35. package/templates/do/test +1 -0
  36. package/templates/do/train +879 -126
  37. package/templates/do/training/config.yaml +83 -11
  38. package/templates/do/training/dpo/accelerate_config.yaml +24 -0
  39. package/templates/do/training/dpo/defaults.yaml +26 -0
  40. package/templates/do/training/dpo/prompts.json +8 -0
  41. package/templates/do/training/dpo/train.py +363 -0
  42. package/templates/do/training/sft/accelerate_config.yaml +22 -0
  43. package/templates/do/training/sft/defaults.yaml +18 -0
  44. package/templates/do/training/sft/prompts.json +7 -0
  45. package/templates/do/training/sft/train.py +310 -0
  46. package/templates/do/tune +11 -2
  47. package/templates/do/.train_poll_parser.py +0 -135
  48. package/templates/do/.train_status_parser.py +0 -187
  49. /package/templates/do/training/{train.py → custom/train.py} +0 -0
@@ -13,6 +13,7 @@ set -o pipefail
13
13
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
14
14
  source "${SCRIPT_DIR}/config"
15
15
  source "${SCRIPT_DIR}/lib/profile.sh"
16
+ source "${SCRIPT_DIR}/lib/resolve-instance.sh"
16
17
 
17
18
  # ── Parse flags ───────────────────────────────────────────────────────────────
18
19
  CLEAN_AFTER=false
@@ -69,8 +70,8 @@ done
69
70
  # Query the tracked benchmark job, display status, and if completed:
70
71
  # download results, display metrics, and write to Athena (if not already done).
71
72
  if [ "${ARG_STATUS}" = true ]; then
72
- # Resolve instance type: BENCHMARK_INSTANCE_TYPE (persisted by main flow) > INSTANCE_TYPE from config
73
- _STATUS_INSTANCE_TYPE="${BENCHMARK_INSTANCE_TYPE:-${INSTANCE_TYPE:-}}"
73
+ # Resolve instance type: BENCHMARK_INSTANCE_TYPE > DEPLOYED_INSTANCE_TYPE > INSTANCE_TYPE
74
+ _STATUS_INSTANCE_TYPE="${BENCHMARK_INSTANCE_TYPE:-${DEPLOYED_INSTANCE_TYPE:-${INSTANCE_TYPE:-}}}"
74
75
 
75
76
  JOB_NAME="${BENCHMARK_JOB_NAME:-}"
76
77
  if [ -z "${JOB_NAME}" ]; then
@@ -575,7 +576,7 @@ print(f'Combined {n_metrics} concurrency level results')
575
576
 
576
577
  echo ""
577
578
  echo "📋 Multi-level Summary:"
578
- echo " Levels tested: ${_NORMALIZED_LEVELS}"
579
+ echo " Levels tested: ${#_LEVELS[@]}"
579
580
  echo " Failures: ${_LEVEL_FAILURES} / ${#_LEVELS[@]}"
580
581
  echo " Results: ${_ALL_RESULTS_DIR}/"
581
582
 
@@ -845,6 +846,62 @@ except:
845
846
  [ "${RESOLVED_INSTANCE_TYPE}" = "None" ] && RESOLVED_INSTANCE_TYPE=""
846
847
  fi
847
848
  fi
849
+
850
+ # If still empty (heterogeneous pools), query the inference component directly
851
+ if [ -z "${RESOLVED_INSTANCE_TYPE}" ] && [ -n "${IC_NAME}" ]; then
852
+ RESOLVED_INSTANCE_TYPE=$(aws sagemaker describe-inference-component \
853
+ --inference-component-name "${IC_NAME}" \
854
+ --region "${AWS_REGION}" \
855
+ --output json 2>/dev/null | python3 -c "
856
+ import sys, json
857
+ try:
858
+ ic = json.load(sys.stdin)
859
+ # RuntimeConfig.CurrentCopyCount tells us it's running, but instance type
860
+ # is in the ResourceRequirements or the endpoint's routing
861
+ runtime = ic.get('RuntimeConfig', {})
862
+ # Try DesiredCopyCount path for instance pool info
863
+ spec = ic.get('Specification', {}).get('ComputeResourceRequirements', {})
864
+ # For pool-based endpoints, check the IC's runtime host (if available)
865
+ # Fallback: query endpoint InstancePools
866
+ print('')
867
+ except:
868
+ print('')
869
+ " 2>/dev/null) || RESOLVED_INSTANCE_TYPE=""
870
+ [ "${RESOLVED_INSTANCE_TYPE}" = "None" ] && RESOLVED_INSTANCE_TYPE=""
871
+ fi
872
+
873
+ # If still empty (heterogeneous pools), query endpoint config InstancePools
874
+ if [ -z "${RESOLVED_INSTANCE_TYPE}" ]; then
875
+ _EC_NAME="${_EC_NAME:-$(echo "${_EP_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('EndpointConfigName',''))" 2>/dev/null)}"
876
+ if [ -n "${_EC_NAME}" ]; then
877
+ RESOLVED_INSTANCE_TYPE=$(aws sagemaker describe-endpoint-config \
878
+ --endpoint-config-name "${_EC_NAME}" \
879
+ --region "${AWS_REGION}" \
880
+ --output json 2>/dev/null | python3 -c "
881
+ import sys, json
882
+ try:
883
+ ec = json.load(sys.stdin)
884
+ variants = ec.get('ProductionVariants', [])
885
+ for v in variants:
886
+ # Check InstancePoolConfig for pool-based endpoints
887
+ pools = v.get('InstancePoolConfig', {}).get('InstancePools', [])
888
+ if pools:
889
+ # Use the first pool's instance type
890
+ print(pools[0].get('InstanceType', ''))
891
+ break
892
+ # Check RoutingConfig ManagedInstanceScaling
893
+ it = v.get('InstanceType', '')
894
+ if it:
895
+ print(it)
896
+ break
897
+ else:
898
+ print('')
899
+ except:
900
+ print('')
901
+ " 2>/dev/null) || RESOLVED_INSTANCE_TYPE=""
902
+ [ "${RESOLVED_INSTANCE_TYPE}" = "None" ] && RESOLVED_INSTANCE_TYPE=""
903
+ fi
904
+ fi
848
905
  fi
849
906
 
850
907
  # Final fallback: use INSTANCE_TYPE from do/config
@@ -718,6 +718,38 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wa
718
718
  # Run capacity guardrail before deploying ICs
719
719
  _check_gpu_capacity
720
720
 
721
+ # _check_gpu_count_mismatch
722
+ # Non-fatal warning: if any IC has IC_GPU_COUNT=1 but the endpoint has
723
+ # multiple GPUs available, warn the user that they may be underutilizing.
724
+ # The user may intentionally use fewer GPUs (e.g., sharing endpoint across
725
+ # multiple ICs), so this is advisory only — no exit, no abort.
726
+ _check_gpu_count_mismatch() {
727
+ # Skip if DEPLOYED_GPU_COUNT is not set or is ≤1
728
+ if [ "${DEPLOYED_GPU_COUNT:-0}" -le 1 ]; then
729
+ return 0
730
+ fi
731
+
732
+ for conf in "${SCRIPT_DIR}"/ic/*.conf; do
733
+ [ -f "${conf}" ] || continue
734
+ local ic_gpus
735
+ ic_gpus=$(grep "^export IC_GPU_COUNT=" "${conf}" 2>/dev/null | sed 's/^export IC_GPU_COUNT=//' | tr -d '"' || echo "1")
736
+ if [ -z "${ic_gpus}" ]; then
737
+ ic_gpus=1
738
+ fi
739
+ if [ "${ic_gpus}" = "1" ] && [ "${DEPLOYED_GPU_COUNT:-0}" -gt 1 ]; then
740
+ local conf_name
741
+ conf_name=$(basename "${conf}")
742
+ echo ""
743
+ echo "⚠️ Warning: IC_GPU_COUNT=1 but endpoint has ${DEPLOYED_GPU_COUNT} GPUs available"
744
+ echo " Consider setting IC_GPU_COUNT=${DEPLOYED_GPU_COUNT} in do/ic/${conf_name} for tensor parallelism"
745
+ echo ""
746
+ break
747
+ fi
748
+ done
749
+ }
750
+
751
+ _check_gpu_count_mismatch
752
+
721
753
  # _delete_and_wait_ic <ic_name>
722
754
  # Deletes an inference component and waits for deletion to complete.
723
755
  # Polls until the IC is no longer found (avoids name conflicts on recreate).
@@ -910,6 +942,57 @@ elif [ "${SKIP_TO}" = "wait_ic" ]; then
910
942
  echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
911
943
  fi
912
944
 
945
+ # ── Resolve and persist actual instance type (for heterogeneous pools) ─────────
946
+ # On pool-based endpoints, INSTANCE_TYPE in do/config may be empty or reflect
947
+ # only the generation-time default. Query the live endpoint to determine what
948
+ # SageMaker actually provisioned, and persist as DEPLOYED_INSTANCE_TYPE.
949
+ # Downstream scripts (do/benchmark, do/register) read this for Athena/MPG metadata.
950
+ if [ -z "${INSTANCE_TYPE:-}" ] || [ -n "${INSTANCE_POOLS:-}" ]; then
951
+ _RESOLVED_INSTANCE=""
952
+ _EP_DESCRIBE=$(aws sagemaker describe-endpoint \
953
+ --endpoint-name "${ENDPOINT_NAME}" \
954
+ --region "${AWS_REGION}" \
955
+ --output json 2>/dev/null) || _EP_DESCRIBE=""
956
+
957
+ if [ -n "${_EP_DESCRIBE}" ]; then
958
+ _RESOLVED_INSTANCE=$(echo "${_EP_DESCRIBE}" | python3 -c "
959
+ import sys, json
960
+ try:
961
+ ep = json.load(sys.stdin)
962
+ variant = ep.get('ProductionVariants', [{}])[0]
963
+ print(variant.get('CurrentInstanceType') or variant.get('InstanceType') or '')
964
+ except:
965
+ print('')
966
+ " 2>/dev/null) || _RESOLVED_INSTANCE=""
967
+
968
+ # Fallback: query endpoint config if variant doesn't have it
969
+ if [ -z "${_RESOLVED_INSTANCE}" ]; then
970
+ _EC_NAME=$(echo "${_EP_DESCRIBE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('EndpointConfigName',''))" 2>/dev/null) || _EC_NAME=""
971
+ if [ -n "${_EC_NAME}" ]; then
972
+ _RESOLVED_INSTANCE=$(aws sagemaker describe-endpoint-config \
973
+ --endpoint-config-name "${_EC_NAME}" \
974
+ --region "${AWS_REGION}" \
975
+ --query 'ProductionVariants[0].InstanceType' \
976
+ --output text 2>/dev/null) || _RESOLVED_INSTANCE=""
977
+ [ "${_RESOLVED_INSTANCE}" = "None" ] && _RESOLVED_INSTANCE=""
978
+ fi
979
+ fi
980
+ fi
981
+
982
+ if [ -n "${_RESOLVED_INSTANCE}" ]; then
983
+ # Persist to do/config for downstream scripts
984
+ _config_file="${SCRIPT_DIR}/config"
985
+ if grep -q "^export DEPLOYED_INSTANCE_TYPE=" "${_config_file}" 2>/dev/null; then
986
+ sed -i.bak "s|^export DEPLOYED_INSTANCE_TYPE=.*|export DEPLOYED_INSTANCE_TYPE=\"${_RESOLVED_INSTANCE}\"|" "${_config_file}"
987
+ rm -f "${_config_file}.bak"
988
+ else
989
+ echo "export DEPLOYED_INSTANCE_TYPE=\"${_RESOLVED_INSTANCE}\"" >> "${_config_file}"
990
+ fi
991
+ INSTANCE_TYPE="${_RESOLVED_INSTANCE}"
992
+ echo " Resolved instance type: ${_RESOLVED_INSTANCE} (persisted to do/config)"
993
+ fi
994
+ fi
995
+
913
996
  echo "✅ Deployment complete!"
914
997
  echo ""
915
998
  echo "📋 Deployment Details:"
@@ -0,0 +1,272 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # do/evaluate — Model Quality Evaluation
6
+ # Evaluates a deployed model/adapter via inference requests against the endpoint.
7
+ # Computes technique-specific quality metrics (perplexity, reward accuracy, etc.).
8
+ #
9
+ # Project: <%= projectName %>
10
+
11
+ set -e
12
+ set -u
13
+ set -o pipefail
14
+
15
+ # ── Source project configuration ──────────────────────────────────────────────
16
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
17
+ source "${SCRIPT_DIR}/config"
18
+
19
+ # ── CLI Variables ─────────────────────────────────────────────────────────────
20
+ ARG_ADAPTER=""
21
+ ARG_EVAL_DATASET=""
22
+ ARG_SAMPLES=""
23
+ ARG_METRICS=""
24
+ ARG_DRY_RUN=false
25
+ ARG_HELP=false
26
+
27
+ # ── Parse arguments ───────────────────────────────────────────────────────────
28
+ while [ $# -gt 0 ]; do
29
+ case "$1" in
30
+ --adapter)
31
+ if [ -z "${2:-}" ]; then echo "❌ --adapter requires a name"; exit 1; fi
32
+ ARG_ADAPTER="$2"; shift 2 ;;
33
+ --eval-dataset)
34
+ if [ -z "${2:-}" ]; then echo "❌ --eval-dataset requires a value"; exit 1; fi
35
+ ARG_EVAL_DATASET="$2"; shift 2 ;;
36
+ --samples)
37
+ if [ -z "${2:-}" ]; then echo "❌ --samples requires a number"; exit 1; fi
38
+ ARG_SAMPLES="$2"; shift 2 ;;
39
+ --metrics)
40
+ if [ -z "${2:-}" ]; then echo "❌ --metrics requires a value"; exit 1; fi
41
+ ARG_METRICS="$2"; shift 2 ;;
42
+ --dry-run) ARG_DRY_RUN=true; shift ;;
43
+ --help|-h) ARG_HELP=true; shift ;;
44
+ *)
45
+ echo "❌ Unknown option: $1"
46
+ echo " Run ./do/evaluate --help for usage."
47
+ exit 1
48
+ ;;
49
+ esac
50
+ done
51
+
52
+ # ── Help ──────────────────────────────────────────────────────────────────────
53
+ if [ "${ARG_HELP}" = true ]; then
54
+ echo "Usage: ./do/evaluate [OPTIONS]"
55
+ echo " ./do/evaluate --adapter <name> --eval-dataset <source>"
56
+ echo ""
57
+ echo "Evaluate model/adapter quality via inference requests against the deployed endpoint."
58
+ echo "Computes technique-specific metrics: perplexity (SFT), reward accuracy (DPO)."
59
+ echo ""
60
+ echo "Options:"
61
+ echo " --adapter <name> Evaluate specific adapter (reads IC name from conf)"
62
+ echo " --eval-dataset <src> Evaluation dataset: s3://..., hf://..., or registry name"
63
+ echo " --samples <n> Limit evaluation to N samples (default: all)"
64
+ echo " --metrics <list> Comma-separated metrics to compute (default: all for technique)"
65
+ echo " --dry-run Show what would be evaluated without making requests"
66
+ echo " --help, -h Show this help message"
67
+ echo ""
68
+ echo "Examples:"
69
+ echo " ./do/evaluate # Evaluate default IC"
70
+ echo " ./do/evaluate --adapter sft-custom-a3f2 # Evaluate specific adapter"
71
+ echo " ./do/evaluate --eval-dataset \"hf://tatsu-lab/alpaca --take 50\""
72
+ echo " ./do/evaluate --adapter dpo-custom --eval-dataset s3://bucket/dpo-eval.jsonl"
73
+ echo ""
74
+ echo "Results saved to: .mlcc/eval-results/<adapter-or-ic>.json"
75
+ echo "Results are automatically included in do/register metadata."
76
+ exit 0
77
+ fi
78
+
79
+ # ── Resolve endpoint ──────────────────────────────────────────────────────────
80
+ ENDPOINT_NAME="${ENDPOINT_NAME:-}"
81
+ if [ -z "${ENDPOINT_NAME}" ]; then
82
+ echo "❌ No endpoint configured."
83
+ echo " Deploy first: ./do/deploy"
84
+ echo " Then run: ./do/evaluate"
85
+ exit 1
86
+ fi
87
+
88
+ # ── Resolve IC name ───────────────────────────────────────────────────────────
89
+ IC_NAME=""
90
+ ADAPTER_TECHNIQUE=""
91
+ EVAL_TARGET_NAME=""
92
+
93
+ if [ -n "${ARG_ADAPTER}" ]; then
94
+ # Adapter specified — look up IC from adapter conf
95
+ ADAPTER_CONF="${SCRIPT_DIR}/adapters/${ARG_ADAPTER}.conf"
96
+ if [ ! -f "${ADAPTER_CONF}" ]; then
97
+ echo "❌ Adapter config not found: do/adapters/${ARG_ADAPTER}.conf"
98
+ echo " Available adapters:"
99
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
100
+ for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
101
+ [ -f "${conf}" ] || continue
102
+ echo " • $(basename "${conf}" .conf)"
103
+ done
104
+ fi
105
+ exit 1
106
+ fi
107
+ source "${ADAPTER_CONF}"
108
+ IC_NAME="${ADAPTER_IC_NAME:-}"
109
+ ADAPTER_TECHNIQUE="${ADAPTER_TECHNIQUE:-${ADAPTER_TUNE_TECHNIQUE:-}}"
110
+ EVAL_TARGET_NAME="${ARG_ADAPTER}"
111
+ else
112
+ # No adapter — use default IC
113
+ if [ -d "${SCRIPT_DIR}/ic" ]; then
114
+ for conf in "${SCRIPT_DIR}"/ic/*.conf; do
115
+ [ -f "${conf}" ] || continue
116
+ IC_DEPLOYED_NAME=""
117
+ source "${conf}"
118
+ if [ -n "${IC_DEPLOYED_NAME}" ]; then
119
+ IC_NAME="${IC_DEPLOYED_NAME}"
120
+ break
121
+ fi
122
+ done
123
+ fi
124
+ # Fallback to legacy config
125
+ IC_NAME="${IC_NAME:-${INFERENCE_COMPONENT_NAME:-}}"
126
+ EVAL_TARGET_NAME="${IC_NAME:-default}"
127
+ fi
128
+
129
+ if [ -z "${IC_NAME}" ]; then
130
+ echo "❌ No inference component found to evaluate."
131
+ echo " Deploy first: ./do/deploy"
132
+ echo " Or specify an adapter: ./do/evaluate --adapter <name>"
133
+ exit 1
134
+ fi
135
+
136
+ # ── Resolve technique (for metric selection) ──────────────────────────────────
137
+ # Priority: adapter conf ADAPTER_TECHNIQUE > TRAIN_TECHNIQUE from config > default
138
+ TECHNIQUE="${ADAPTER_TECHNIQUE:-${TRAIN_TECHNIQUE:-}}"
139
+
140
+ # ── Resolve eval dataset ──────────────────────────────────────────────────────
141
+ EVAL_DATASET="${ARG_EVAL_DATASET:-${EVAL_DATASET:-}}"
142
+
143
+ # ── Display configuration ─────────────────────────────────────────────────────
144
+ echo "🔬 Model Evaluation"
145
+ echo ""
146
+ echo " Endpoint: ${ENDPOINT_NAME}"
147
+ echo " IC: ${IC_NAME}"
148
+ if [ -n "${ARG_ADAPTER}" ]; then
149
+ echo " Adapter: ${ARG_ADAPTER}"
150
+ fi
151
+ if [ -n "${TECHNIQUE}" ]; then
152
+ echo " Technique: ${TECHNIQUE}"
153
+ fi
154
+ if [ -n "${EVAL_DATASET}" ]; then
155
+ echo " Dataset: ${EVAL_DATASET}"
156
+ fi
157
+ if [ -n "${ARG_SAMPLES}" ]; then
158
+ echo " Samples: ${ARG_SAMPLES}"
159
+ fi
160
+ echo ""
161
+
162
+ # ── Dry run ───────────────────────────────────────────────────────────────────
163
+ if [ "${ARG_DRY_RUN}" = true ]; then
164
+ echo "🔍 Dry run — would evaluate with above configuration."
165
+ echo " No inference requests will be made."
166
+ exit 0
167
+ fi
168
+
169
+ # ── Check endpoint is InService ───────────────────────────────────────────────
170
+ echo "🔍 Checking endpoint status..."
171
+ EP_STATUS=$(aws sagemaker describe-endpoint \
172
+ --endpoint-name "${ENDPOINT_NAME}" \
173
+ --region "${AWS_REGION}" \
174
+ --query 'EndpointStatus' \
175
+ --output text 2>/dev/null) || EP_STATUS=""
176
+
177
+ if [ "${EP_STATUS}" != "InService" ]; then
178
+ echo "❌ Endpoint is not InService (status: ${EP_STATUS:-unknown})"
179
+ echo ""
180
+ echo " Deploy first: ./do/deploy"
181
+ echo " Then run: ./do/evaluate"
182
+ exit 1
183
+ fi
184
+ echo " ✅ Endpoint is InService"
185
+ echo ""
186
+
187
+ # ── Run evaluation via Python helper ─────────────────────────────────────────
188
+ echo "🧪 Running evaluation..."
189
+ echo ""
190
+
191
+ EVAL_ARGS=(
192
+ --endpoint-name "${ENDPOINT_NAME}"
193
+ --ic-name "${IC_NAME}"
194
+ --region "${AWS_REGION}"
195
+ )
196
+
197
+ if [ -n "${TECHNIQUE}" ]; then
198
+ EVAL_ARGS+=(--technique "${TECHNIQUE}")
199
+ fi
200
+ if [ -n "${EVAL_DATASET}" ]; then
201
+ EVAL_ARGS+=(--eval-dataset "${EVAL_DATASET}")
202
+ fi
203
+ if [ -n "${ARG_SAMPLES}" ]; then
204
+ EVAL_ARGS+=(--samples "${ARG_SAMPLES}")
205
+ fi
206
+ if [ -n "${ARG_METRICS}" ]; then
207
+ EVAL_ARGS+=(--metrics "${ARG_METRICS}")
208
+ fi
209
+
210
+ EVAL_OUTPUT=$(python3 "${SCRIPT_DIR}/.eval_helper.py" evaluate "${EVAL_ARGS[@]}" 2>/dev/null | grep -E '^\{' | tail -1) || EVAL_OUTPUT=""
211
+
212
+ if [ -z "${EVAL_OUTPUT}" ]; then
213
+ echo "❌ Evaluation failed (no output from helper)"
214
+ echo " Check: endpoint accessibility, eval dataset format, Python deps (requests)"
215
+ exit 1
216
+ fi
217
+
218
+ # Check for error
219
+ HAS_ERROR=$(echo "${EVAL_OUTPUT}" | python3 -c "import sys,json; d=json.load(sys.stdin); print('yes' if d.get('error') else 'no')" 2>/dev/null) || HAS_ERROR="yes"
220
+
221
+ if [ "${HAS_ERROR}" = "yes" ]; then
222
+ ERROR_MSG=$(echo "${EVAL_OUTPUT}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('message','Unknown error'))" 2>/dev/null) || ERROR_MSG="Unknown error"
223
+ echo "❌ Evaluation failed: ${ERROR_MSG}"
224
+ exit 1
225
+ fi
226
+
227
+ # ── Save results ──────────────────────────────────────────────────────────────
228
+ RESULTS_DIR="${SCRIPT_DIR}/../.mlcc/eval-results"
229
+ mkdir -p "${RESULTS_DIR}"
230
+ RESULTS_FILE="${RESULTS_DIR}/${EVAL_TARGET_NAME}.json"
231
+ echo "${EVAL_OUTPUT}" | python3 -c "import sys,json; json.dump(json.load(sys.stdin), open('${RESULTS_FILE}','w'), indent=2)"
232
+
233
+ echo "📁 Results saved to: .mlcc/eval-results/${EVAL_TARGET_NAME}.json"
234
+ echo ""
235
+
236
+ # ── Display summary ───────────────────────────────────────────────────────────
237
+ echo "╔══════════════════════════════════════════════════════════════════╗"
238
+ echo "║ Evaluation Results ║"
239
+ echo "╠══════════════════════════════════════════════════════════════════╣"
240
+ echo "${EVAL_OUTPUT}" | python3 -c "
241
+ import sys, json
242
+ data = json.load(sys.stdin)
243
+ print(f\"║ Target: {data.get('adapter_name', data.get('ic_name', 'unknown'))}\")
244
+ print(f\"║ Technique: {data.get('technique', 'unknown')}\")
245
+ print(f\"║ Samples: {data.get('samples_evaluated', 0)}\")
246
+ print(f\"║\")
247
+ metrics = data.get('metrics', {})
248
+ for name, value in metrics.items():
249
+ if isinstance(value, float):
250
+ print(f'║ {name}: {value:.4f}')
251
+ else:
252
+ print(f'║ {name}: {value}')
253
+ " 2>/dev/null
254
+ echo "╚══════════════════════════════════════════════════════════════════╝"
255
+ echo ""
256
+
257
+ # ── Optionally write to Athena ────────────────────────────────────────────────
258
+ if [ -n "${CI_BENCHMARK_RESULTS_BUCKET:-}" ]; then
259
+ echo "☁️ Persisting evaluation results to Athena..."
260
+ python3 "${SCRIPT_DIR}/.eval_helper.py" eval-write \
261
+ --results-file "${RESULTS_FILE}" \
262
+ --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
263
+ --region "${AWS_REGION}" 2>/dev/null | grep -E '^\{' | tail -1 > /dev/null || {
264
+ echo " ⚠️ Failed to persist to Athena (non-fatal)"
265
+ }
266
+ fi
267
+
268
+ echo "✅ Evaluation complete."
269
+ echo ""
270
+ echo " Next steps:"
271
+ echo " • Run ./do/register to include eval metrics in model package metadata"
272
+ echo " • Compare adapters: ./do/evaluate --adapter <other-adapter>"
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env bash
2
+ # Lazy instance type resolution for heterogeneous pool endpoints.
3
+ # Source this file after do/config + lib/profile.sh in any script that needs INSTANCE_TYPE.
4
+ #
5
+ # When INSTANCE_TYPE is empty (pool endpoints) and DEPLOYED_INSTANCE_TYPE hasn't been
6
+ # persisted yet (no do/deploy run), queries the live endpoint once and persists the result.
7
+ # Subsequent calls read from do/config without any AWS API calls.
8
+ #
9
+ # After sourcing, INSTANCE_TYPE is guaranteed to be set (or empty if resolution failed).
10
+ # DEPLOYED_GPU_COUNT is also resolved from a static lookup table (instances.json catalog)
11
+ # and persisted to do/config alongside the instance type.
12
+ #
13
+ # Usage:
14
+ # source "${SCRIPT_DIR}/config"
15
+ # source "${SCRIPT_DIR}/lib/profile.sh"
16
+ # source "${SCRIPT_DIR}/lib/resolve-instance.sh"
17
+ # # INSTANCE_TYPE and DEPLOYED_GPU_COUNT are now resolved
18
+
19
+ # Resolve SCRIPT_DIR if not already set (defensive — normally inherited from caller)
20
+ if [ -z "${SCRIPT_DIR:-}" ]; then
21
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
22
+ fi
23
+
24
+ # Skip if INSTANCE_TYPE is already set (single-instance endpoints)
25
+ if [ -n "${INSTANCE_TYPE:-}" ]; then
26
+ return 0 2>/dev/null || true
27
+ fi
28
+
29
+ # Check if DEPLOYED_INSTANCE_TYPE was previously persisted
30
+ if [ -n "${DEPLOYED_INSTANCE_TYPE:-}" ]; then
31
+ INSTANCE_TYPE="${DEPLOYED_INSTANCE_TYPE}"
32
+ export INSTANCE_TYPE
33
+ # Ensure DEPLOYED_GPU_COUNT is also exported (may already be in do/config)
34
+ if [ -n "${DEPLOYED_GPU_COUNT:-}" ]; then
35
+ export DEPLOYED_GPU_COUNT
36
+ fi
37
+ return 0 2>/dev/null || true
38
+ fi
39
+
40
+ # Check if BENCHMARK_INSTANCE_TYPE was previously persisted (by do/benchmark)
41
+ if [ -n "${BENCHMARK_INSTANCE_TYPE:-}" ]; then
42
+ INSTANCE_TYPE="${BENCHMARK_INSTANCE_TYPE}"
43
+ export INSTANCE_TYPE
44
+ return 0 2>/dev/null || true
45
+ fi
46
+
47
+ # ── Live resolution from endpoint (one-time, persisted) ──────────────────────
48
+ # Only attempt if ENDPOINT_NAME is configured and AWS credentials are available.
49
+ if [ -z "${ENDPOINT_NAME:-}" ]; then
50
+ return 0 2>/dev/null || true
51
+ fi
52
+
53
+ _RESOLVED_INSTANCE=""
54
+ _EP_DESCRIBE=$(aws sagemaker describe-endpoint \
55
+ --endpoint-name "${ENDPOINT_NAME}" \
56
+ --region "${AWS_REGION:-us-east-1}" \
57
+ --output json 2>/dev/null) || _EP_DESCRIBE=""
58
+
59
+ if [ -n "${_EP_DESCRIBE}" ]; then
60
+ _RESOLVED_INSTANCE=$(echo "${_EP_DESCRIBE}" | python3 -c "
61
+ import sys, json
62
+ try:
63
+ ep = json.load(sys.stdin)
64
+ variant = ep.get('ProductionVariants', [{}])[0]
65
+ print(variant.get('CurrentInstanceType') or variant.get('InstanceType') or '')
66
+ except:
67
+ print('')
68
+ " 2>/dev/null) || _RESOLVED_INSTANCE=""
69
+
70
+ # Fallback: query endpoint config for InstanceType or first pool entry
71
+ if [ -z "${_RESOLVED_INSTANCE}" ]; then
72
+ _EC_NAME=$(echo "${_EP_DESCRIBE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('EndpointConfigName',''))" 2>/dev/null) || _EC_NAME=""
73
+ if [ -n "${_EC_NAME}" ]; then
74
+ _RESOLVED_INSTANCE=$(aws sagemaker describe-endpoint-config \
75
+ --endpoint-config-name "${_EC_NAME}" \
76
+ --region "${AWS_REGION:-us-east-1}" \
77
+ --query 'ProductionVariants[0].InstanceType' \
78
+ --output text 2>/dev/null) || _RESOLVED_INSTANCE=""
79
+ [ "${_RESOLVED_INSTANCE}" = "None" ] && _RESOLVED_INSTANCE=""
80
+
81
+ # Final fallback: first entry in InstancePools
82
+ if [ -z "${_RESOLVED_INSTANCE}" ]; then
83
+ _RESOLVED_INSTANCE=$(aws sagemaker describe-endpoint-config \
84
+ --endpoint-config-name "${_EC_NAME}" \
85
+ --region "${AWS_REGION:-us-east-1}" \
86
+ --output json 2>/dev/null | python3 -c "
87
+ import sys, json
88
+ try:
89
+ ec = json.load(sys.stdin)
90
+ pools = ec.get('ProductionVariants', [{}])[0].get('InstancePools', [])
91
+ if pools:
92
+ best = min(pools, key=lambda p: p.get('Priority', 999))
93
+ print(best.get('InstanceType', ''))
94
+ else:
95
+ print('')
96
+ except:
97
+ print('')
98
+ " 2>/dev/null) || _RESOLVED_INSTANCE=""
99
+ fi
100
+ fi
101
+ fi
102
+ fi
103
+
104
+ # Persist to do/config (one-time write — subsequent sources read it directly)
105
+ if [ -n "${_RESOLVED_INSTANCE}" ]; then
106
+ _config_file="${SCRIPT_DIR}/config"
107
+ if grep -q "^export DEPLOYED_INSTANCE_TYPE=" "${_config_file}" 2>/dev/null; then
108
+ sed -i.bak "s|^export DEPLOYED_INSTANCE_TYPE=.*|export DEPLOYED_INSTANCE_TYPE=\"${_RESOLVED_INSTANCE}\"|" "${_config_file}"
109
+ rm -f "${_config_file}.bak"
110
+ else
111
+ echo "export DEPLOYED_INSTANCE_TYPE=\"${_RESOLVED_INSTANCE}\"" >> "${_config_file}"
112
+ fi
113
+ INSTANCE_TYPE="${_RESOLVED_INSTANCE}"
114
+ DEPLOYED_INSTANCE_TYPE="${_RESOLVED_INSTANCE}"
115
+ export INSTANCE_TYPE DEPLOYED_INSTANCE_TYPE
116
+
117
+ # ── Resolve GPU count from instance type ─────────────────────────────────
118
+ # Static lookup table derived from servers/lib/catalogs/instances.json.
119
+ # Maps known SageMaker instance types to their GPU count.
120
+ _resolve_gpu_count() {
121
+ case "$1" in
122
+ ml.g5.xlarge|ml.g5.2xlarge|ml.g5.4xlarge|ml.g5.8xlarge|ml.g5.16xlarge) echo 1 ;;
123
+ ml.g5.12xlarge|ml.g5.24xlarge) echo 4 ;;
124
+ ml.g5.48xlarge) echo 8 ;;
125
+ ml.g4dn.xlarge|ml.g4dn.2xlarge|ml.g4dn.4xlarge|ml.g4dn.8xlarge|ml.g4dn.16xlarge) echo 1 ;;
126
+ ml.g4dn.12xlarge) echo 4 ;;
127
+ ml.g6.xlarge|ml.g6.2xlarge|ml.g6.4xlarge|ml.g6.8xlarge|ml.g6.16xlarge) echo 1 ;;
128
+ ml.g6.12xlarge|ml.g6.24xlarge) echo 4 ;;
129
+ ml.g6.48xlarge) echo 8 ;;
130
+ ml.g6e.xlarge|ml.g6e.2xlarge|ml.g6e.4xlarge|ml.g6e.8xlarge|ml.g6e.16xlarge) echo 1 ;;
131
+ ml.g6e.12xlarge|ml.g6e.24xlarge) echo 4 ;;
132
+ ml.g6e.48xlarge) echo 8 ;;
133
+ ml.p4d.24xlarge|ml.p4de.24xlarge) echo 8 ;;
134
+ ml.p5.48xlarge|ml.p5e.48xlarge) echo 8 ;;
135
+ *) echo "" ;;
136
+ esac
137
+ }
138
+
139
+ _GPU_COUNT=$(_resolve_gpu_count "${_RESOLVED_INSTANCE}")
140
+ if [ -n "${_GPU_COUNT}" ]; then
141
+ if grep -q "^export DEPLOYED_GPU_COUNT=" "${_config_file}" 2>/dev/null; then
142
+ sed -i.bak "s|^export DEPLOYED_GPU_COUNT=.*|export DEPLOYED_GPU_COUNT=\"${_GPU_COUNT}\"|" "${_config_file}"
143
+ rm -f "${_config_file}.bak"
144
+ else
145
+ echo "export DEPLOYED_GPU_COUNT=\"${_GPU_COUNT}\"" >> "${_config_file}"
146
+ fi
147
+ DEPLOYED_GPU_COUNT="${_GPU_COUNT}"
148
+ export DEPLOYED_GPU_COUNT
149
+ fi
150
+ unset _GPU_COUNT
151
+ unset -f _resolve_gpu_count
152
+ fi
153
+
154
+ # Clean up internal vars
155
+ unset _RESOLVED_INSTANCE _EP_DESCRIBE _EC_NAME
@@ -10,11 +10,16 @@ set -o pipefail
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
12
  source "${SCRIPT_DIR}/lib/profile.sh"
13
+ source "${SCRIPT_DIR}/lib/resolve-instance.sh"
13
14
 
14
15
  # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
16
  ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
16
17
  ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
17
18
 
19
+ # Resolve INSTANCE_TYPE for heterogeneous pool endpoints (may be empty from config,
20
+ # filled by do/deploy after InService resolution)
21
+ INSTANCE_TYPE="${INSTANCE_TYPE:-${DEPLOYED_INSTANCE_TYPE:-${BENCHMARK_INSTANCE_TYPE:-}}}"
22
+
18
23
  # ============================================================
19
24
  # Register deployment to the deployment registry
20
25
  # ============================================================
package/templates/do/test CHANGED
@@ -10,6 +10,7 @@ set -o pipefail
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
12
  source "${SCRIPT_DIR}/lib/profile.sh"
13
+ source "${SCRIPT_DIR}/lib/resolve-instance.sh"
13
14
 
14
15
  <% if (deploymentTarget === 'realtime-inference') { %>
15
16
  # ============================================================