@aws/ml-container-creator 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/bin/cli.js +57 -0
- package/config/agent.json +16 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +43 -0
- package/package.json +5 -2
- package/pyproject.toml +3 -0
- package/servers/agent-knowledge/index.js +592 -0
- package/servers/agent-knowledge/package.json +15 -0
- package/servers/base-image-picker/index.js +65 -18
- package/servers/instance-sizer/index.js +32 -0
- package/servers/lib/catalogs/fleet-drivers.json +38 -0
- package/servers/lib/catalogs/model-arch-support.json +51 -0
- package/servers/lib/catalogs/model-servers.json +2842 -1730
- package/servers/lib/schemas/image-catalog.schema.json +12 -0
- package/src/agent/__init__.py +2 -0
- package/src/agent/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/agent/__pycache__/config_loader.cpython-312.pyc +0 -0
- package/src/agent/__pycache__/context.cpython-312.pyc +0 -0
- package/src/agent/__pycache__/health_check.cpython-312.pyc +0 -0
- package/src/agent/agent.py +513 -0
- package/src/agent/config_loader.py +215 -0
- package/src/agent/context.py +380 -0
- package/src/agent/data/capability-matrix.json +106 -0
- package/src/agent/health_check.py +341 -0
- package/src/agent/prompts/system.md +173 -0
- package/src/agent/requirements-agent.txt +3 -0
- package/src/app.js +6 -4
- package/src/lib/generated/cli-options.js +1 -1
- package/src/lib/generated/parameter-matrix.js +1 -1
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/mcp-query-runner.js +110 -3
- package/src/lib/prompt-runner.js +66 -22
- package/src/lib/template-variable-resolver.js +8 -0
- package/src/lib/train-config-builder.js +339 -0
- package/src/lib/tune-config-state.js +89 -68
- package/templates/do/.benchmark_writer.py +3 -0
- package/templates/do/.eval_helper.py +409 -0
- package/templates/do/.register_helper.py +185 -11
- package/templates/do/.train_build_request.py +102 -113
- package/templates/do/.train_helper.py +433 -0
- package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +157 -0
- package/templates/do/benchmark +60 -3
- package/templates/do/config +6 -1
- package/templates/do/deploy.d/managed-inference.ejs +83 -0
- package/templates/do/evaluate +272 -0
- package/templates/do/lib/resolve-instance.sh +155 -0
- package/templates/do/register +5 -0
- package/templates/do/test +1 -0
- package/templates/do/train +879 -126
- package/templates/do/training/config.yaml +83 -11
- package/templates/do/training/dpo/accelerate_config.yaml +24 -0
- package/templates/do/training/dpo/defaults.yaml +26 -0
- package/templates/do/training/dpo/prompts.json +8 -0
- package/templates/do/training/dpo/train.py +363 -0
- package/templates/do/training/sft/accelerate_config.yaml +22 -0
- package/templates/do/training/sft/defaults.yaml +18 -0
- package/templates/do/training/sft/prompts.json +7 -0
- package/templates/do/training/sft/train.py +310 -0
- package/templates/do/tune +11 -2
- package/src/lib/auto-prompt-builder.js +0 -172
- package/src/lib/cli-handler.js +0 -529
- package/src/lib/community-reports-validator.js +0 -91
- package/src/lib/configuration-exporter.js +0 -204
- package/src/lib/dataset-slug.js +0 -152
- package/src/lib/docker-introspection-validator.js +0 -51
- package/src/lib/known-flags-validator.js +0 -200
- package/src/lib/schema-validator.js +0 -157
- package/src/lib/train-config-parser.js +0 -136
- package/src/lib/train-config-persistence.js +0 -143
- package/src/lib/train-config-validator.js +0 -112
- package/src/lib/train-feedback.js +0 -46
- package/src/lib/train-idempotency.js +0 -97
- package/src/lib/train-request-builder.js +0 -120
- package/src/lib/tune-dataset-validator.js +0 -279
- package/src/lib/tune-output-resolver.js +0 -66
- package/templates/do/.train_poll_parser.py +0 -135
- package/templates/do/.train_status_parser.py +0 -187
- /package/templates/do/training/{train.py → custom/train.py} +0 -0
package/templates/do/benchmark
CHANGED
|
@@ -13,6 +13,7 @@ set -o pipefail
|
|
|
13
13
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
14
14
|
source "${SCRIPT_DIR}/config"
|
|
15
15
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
16
|
+
source "${SCRIPT_DIR}/lib/resolve-instance.sh"
|
|
16
17
|
|
|
17
18
|
# ── Parse flags ───────────────────────────────────────────────────────────────
|
|
18
19
|
CLEAN_AFTER=false
|
|
@@ -69,8 +70,8 @@ done
|
|
|
69
70
|
# Query the tracked benchmark job, display status, and if completed:
|
|
70
71
|
# download results, display metrics, and write to Athena (if not already done).
|
|
71
72
|
if [ "${ARG_STATUS}" = true ]; then
|
|
72
|
-
# Resolve instance type: BENCHMARK_INSTANCE_TYPE
|
|
73
|
-
_STATUS_INSTANCE_TYPE="${BENCHMARK_INSTANCE_TYPE:-${INSTANCE_TYPE:-}}"
|
|
73
|
+
# Resolve instance type: BENCHMARK_INSTANCE_TYPE > DEPLOYED_INSTANCE_TYPE > INSTANCE_TYPE
|
|
74
|
+
_STATUS_INSTANCE_TYPE="${BENCHMARK_INSTANCE_TYPE:-${DEPLOYED_INSTANCE_TYPE:-${INSTANCE_TYPE:-}}}"
|
|
74
75
|
|
|
75
76
|
JOB_NAME="${BENCHMARK_JOB_NAME:-}"
|
|
76
77
|
if [ -z "${JOB_NAME}" ]; then
|
|
@@ -575,7 +576,7 @@ print(f'Combined {n_metrics} concurrency level results')
|
|
|
575
576
|
|
|
576
577
|
echo ""
|
|
577
578
|
echo "📋 Multi-level Summary:"
|
|
578
|
-
echo " Levels tested: ${
|
|
579
|
+
echo " Levels tested: ${#_LEVELS[@]}"
|
|
579
580
|
echo " Failures: ${_LEVEL_FAILURES} / ${#_LEVELS[@]}"
|
|
580
581
|
echo " Results: ${_ALL_RESULTS_DIR}/"
|
|
581
582
|
|
|
@@ -845,6 +846,62 @@ except:
|
|
|
845
846
|
[ "${RESOLVED_INSTANCE_TYPE}" = "None" ] && RESOLVED_INSTANCE_TYPE=""
|
|
846
847
|
fi
|
|
847
848
|
fi
|
|
849
|
+
|
|
850
|
+
# If still empty (heterogeneous pools), query the inference component directly
|
|
851
|
+
if [ -z "${RESOLVED_INSTANCE_TYPE}" ] && [ -n "${IC_NAME}" ]; then
|
|
852
|
+
RESOLVED_INSTANCE_TYPE=$(aws sagemaker describe-inference-component \
|
|
853
|
+
--inference-component-name "${IC_NAME}" \
|
|
854
|
+
--region "${AWS_REGION}" \
|
|
855
|
+
--output json 2>/dev/null | python3 -c "
|
|
856
|
+
import sys, json
|
|
857
|
+
try:
|
|
858
|
+
ic = json.load(sys.stdin)
|
|
859
|
+
# RuntimeConfig.CurrentCopyCount tells us it's running, but instance type
|
|
860
|
+
# is in the ResourceRequirements or the endpoint's routing
|
|
861
|
+
runtime = ic.get('RuntimeConfig', {})
|
|
862
|
+
# Try DesiredCopyCount path for instance pool info
|
|
863
|
+
spec = ic.get('Specification', {}).get('ComputeResourceRequirements', {})
|
|
864
|
+
# For pool-based endpoints, check the IC's runtime host (if available)
|
|
865
|
+
# Fallback: query endpoint InstancePools
|
|
866
|
+
print('')
|
|
867
|
+
except:
|
|
868
|
+
print('')
|
|
869
|
+
" 2>/dev/null) || RESOLVED_INSTANCE_TYPE=""
|
|
870
|
+
[ "${RESOLVED_INSTANCE_TYPE}" = "None" ] && RESOLVED_INSTANCE_TYPE=""
|
|
871
|
+
fi
|
|
872
|
+
|
|
873
|
+
# If still empty (heterogeneous pools), query endpoint config InstancePools
|
|
874
|
+
if [ -z "${RESOLVED_INSTANCE_TYPE}" ]; then
|
|
875
|
+
_EC_NAME="${_EC_NAME:-$(echo "${_EP_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('EndpointConfigName',''))" 2>/dev/null)}"
|
|
876
|
+
if [ -n "${_EC_NAME}" ]; then
|
|
877
|
+
RESOLVED_INSTANCE_TYPE=$(aws sagemaker describe-endpoint-config \
|
|
878
|
+
--endpoint-config-name "${_EC_NAME}" \
|
|
879
|
+
--region "${AWS_REGION}" \
|
|
880
|
+
--output json 2>/dev/null | python3 -c "
|
|
881
|
+
import sys, json
|
|
882
|
+
try:
|
|
883
|
+
ec = json.load(sys.stdin)
|
|
884
|
+
variants = ec.get('ProductionVariants', [])
|
|
885
|
+
for v in variants:
|
|
886
|
+
# Check InstancePoolConfig for pool-based endpoints
|
|
887
|
+
pools = v.get('InstancePoolConfig', {}).get('InstancePools', [])
|
|
888
|
+
if pools:
|
|
889
|
+
# Use the first pool's instance type
|
|
890
|
+
print(pools[0].get('InstanceType', ''))
|
|
891
|
+
break
|
|
892
|
+
# Check RoutingConfig ManagedInstanceScaling
|
|
893
|
+
it = v.get('InstanceType', '')
|
|
894
|
+
if it:
|
|
895
|
+
print(it)
|
|
896
|
+
break
|
|
897
|
+
else:
|
|
898
|
+
print('')
|
|
899
|
+
except:
|
|
900
|
+
print('')
|
|
901
|
+
" 2>/dev/null) || RESOLVED_INSTANCE_TYPE=""
|
|
902
|
+
[ "${RESOLVED_INSTANCE_TYPE}" = "None" ] && RESOLVED_INSTANCE_TYPE=""
|
|
903
|
+
fi
|
|
904
|
+
fi
|
|
848
905
|
fi
|
|
849
906
|
|
|
850
907
|
# Final fallback: use INSTANCE_TYPE from do/config
|
package/templates/do/config
CHANGED
|
@@ -220,6 +220,9 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
220
220
|
<% Object.entries(icEnvVars).forEach(([key, value]) => { %>
|
|
221
221
|
export IC_ENV_<%= key %>=${IC_ENV_<%= key %>:-<%= value %>}
|
|
222
222
|
<% }); %>
|
|
223
|
+
<% if ((modelServer === 'vllm' || modelServer === 'sglang') && !icEnvVars['VLLM_MAX_MODEL_LEN'] && !icEnvVars['SGLANG_MAX_MODEL_LEN']) { %>
|
|
224
|
+
export IC_ENV_VLLM_MAX_MODEL_LEN=${IC_ENV_VLLM_MAX_MODEL_LEN:-4096}
|
|
225
|
+
<% } %>
|
|
223
226
|
<% } else if (deploymentTarget === 'realtime-inference') { %>
|
|
224
227
|
# ─── Deploy-time IC environment variables (uncomment to configure) ─────────────
|
|
225
228
|
# These are passed as the Environment field in InferenceComponent.create() at deploy time.
|
|
@@ -227,7 +230,9 @@ export IC_ENV_<%= key %>=${IC_ENV_<%= key %>:-<%= value %>}
|
|
|
227
230
|
# Max 16 vars, max 1024 chars per key/value.
|
|
228
231
|
# WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
|
|
229
232
|
# export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
|
|
230
|
-
|
|
233
|
+
<% if (modelServer === 'vllm' || modelServer === 'sglang') { %>
|
|
234
|
+
export IC_ENV_VLLM_MAX_MODEL_LEN=${IC_ENV_VLLM_MAX_MODEL_LEN:-4096}
|
|
235
|
+
<% } %>
|
|
231
236
|
# export IC_ENV_VLLM_GPU_MEMORY_UTILIZATION=0.85
|
|
232
237
|
<% } %>
|
|
233
238
|
|
|
@@ -718,6 +718,38 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wa
|
|
|
718
718
|
# Run capacity guardrail before deploying ICs
|
|
719
719
|
_check_gpu_capacity
|
|
720
720
|
|
|
721
|
+
# _check_gpu_count_mismatch
|
|
722
|
+
# Non-fatal warning: if any IC has IC_GPU_COUNT=1 but the endpoint has
|
|
723
|
+
# multiple GPUs available, warn the user that they may be underutilizing.
|
|
724
|
+
# The user may intentionally use fewer GPUs (e.g., sharing endpoint across
|
|
725
|
+
# multiple ICs), so this is advisory only — no exit, no abort.
|
|
726
|
+
_check_gpu_count_mismatch() {
|
|
727
|
+
# Skip if DEPLOYED_GPU_COUNT is not set or is ≤1
|
|
728
|
+
if [ "${DEPLOYED_GPU_COUNT:-0}" -le 1 ]; then
|
|
729
|
+
return 0
|
|
730
|
+
fi
|
|
731
|
+
|
|
732
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
733
|
+
[ -f "${conf}" ] || continue
|
|
734
|
+
local ic_gpus
|
|
735
|
+
ic_gpus=$(grep "^export IC_GPU_COUNT=" "${conf}" 2>/dev/null | sed 's/^export IC_GPU_COUNT=//' | tr -d '"' || echo "1")
|
|
736
|
+
if [ -z "${ic_gpus}" ]; then
|
|
737
|
+
ic_gpus=1
|
|
738
|
+
fi
|
|
739
|
+
if [ "${ic_gpus}" = "1" ] && [ "${DEPLOYED_GPU_COUNT:-0}" -gt 1 ]; then
|
|
740
|
+
local conf_name
|
|
741
|
+
conf_name=$(basename "${conf}")
|
|
742
|
+
echo ""
|
|
743
|
+
echo "⚠️ Warning: IC_GPU_COUNT=1 but endpoint has ${DEPLOYED_GPU_COUNT} GPUs available"
|
|
744
|
+
echo " Consider setting IC_GPU_COUNT=${DEPLOYED_GPU_COUNT} in do/ic/${conf_name} for tensor parallelism"
|
|
745
|
+
echo ""
|
|
746
|
+
break
|
|
747
|
+
fi
|
|
748
|
+
done
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
_check_gpu_count_mismatch
|
|
752
|
+
|
|
721
753
|
# _delete_and_wait_ic <ic_name>
|
|
722
754
|
# Deletes an inference component and waits for deletion to complete.
|
|
723
755
|
# Polls until the IC is no longer found (avoids name conflicts on recreate).
|
|
@@ -910,6 +942,57 @@ elif [ "${SKIP_TO}" = "wait_ic" ]; then
|
|
|
910
942
|
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
911
943
|
fi
|
|
912
944
|
|
|
945
|
+
# ── Resolve and persist actual instance type (for heterogeneous pools) ─────────
|
|
946
|
+
# On pool-based endpoints, INSTANCE_TYPE in do/config may be empty or reflect
|
|
947
|
+
# only the generation-time default. Query the live endpoint to determine what
|
|
948
|
+
# SageMaker actually provisioned, and persist as DEPLOYED_INSTANCE_TYPE.
|
|
949
|
+
# Downstream scripts (do/benchmark, do/register) read this for Athena/MPG metadata.
|
|
950
|
+
if [ -z "${INSTANCE_TYPE:-}" ] || [ -n "${INSTANCE_POOLS:-}" ]; then
|
|
951
|
+
_RESOLVED_INSTANCE=""
|
|
952
|
+
_EP_DESCRIBE=$(aws sagemaker describe-endpoint \
|
|
953
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
954
|
+
--region "${AWS_REGION}" \
|
|
955
|
+
--output json 2>/dev/null) || _EP_DESCRIBE=""
|
|
956
|
+
|
|
957
|
+
if [ -n "${_EP_DESCRIBE}" ]; then
|
|
958
|
+
_RESOLVED_INSTANCE=$(echo "${_EP_DESCRIBE}" | python3 -c "
|
|
959
|
+
import sys, json
|
|
960
|
+
try:
|
|
961
|
+
ep = json.load(sys.stdin)
|
|
962
|
+
variant = ep.get('ProductionVariants', [{}])[0]
|
|
963
|
+
print(variant.get('CurrentInstanceType') or variant.get('InstanceType') or '')
|
|
964
|
+
except:
|
|
965
|
+
print('')
|
|
966
|
+
" 2>/dev/null) || _RESOLVED_INSTANCE=""
|
|
967
|
+
|
|
968
|
+
# Fallback: query endpoint config if variant doesn't have it
|
|
969
|
+
if [ -z "${_RESOLVED_INSTANCE}" ]; then
|
|
970
|
+
_EC_NAME=$(echo "${_EP_DESCRIBE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('EndpointConfigName',''))" 2>/dev/null) || _EC_NAME=""
|
|
971
|
+
if [ -n "${_EC_NAME}" ]; then
|
|
972
|
+
_RESOLVED_INSTANCE=$(aws sagemaker describe-endpoint-config \
|
|
973
|
+
--endpoint-config-name "${_EC_NAME}" \
|
|
974
|
+
--region "${AWS_REGION}" \
|
|
975
|
+
--query 'ProductionVariants[0].InstanceType' \
|
|
976
|
+
--output text 2>/dev/null) || _RESOLVED_INSTANCE=""
|
|
977
|
+
[ "${_RESOLVED_INSTANCE}" = "None" ] && _RESOLVED_INSTANCE=""
|
|
978
|
+
fi
|
|
979
|
+
fi
|
|
980
|
+
fi
|
|
981
|
+
|
|
982
|
+
if [ -n "${_RESOLVED_INSTANCE}" ]; then
|
|
983
|
+
# Persist to do/config for downstream scripts
|
|
984
|
+
_config_file="${SCRIPT_DIR}/config"
|
|
985
|
+
if grep -q "^export DEPLOYED_INSTANCE_TYPE=" "${_config_file}" 2>/dev/null; then
|
|
986
|
+
sed -i.bak "s|^export DEPLOYED_INSTANCE_TYPE=.*|export DEPLOYED_INSTANCE_TYPE=\"${_RESOLVED_INSTANCE}\"|" "${_config_file}"
|
|
987
|
+
rm -f "${_config_file}.bak"
|
|
988
|
+
else
|
|
989
|
+
echo "export DEPLOYED_INSTANCE_TYPE=\"${_RESOLVED_INSTANCE}\"" >> "${_config_file}"
|
|
990
|
+
fi
|
|
991
|
+
INSTANCE_TYPE="${_RESOLVED_INSTANCE}"
|
|
992
|
+
echo " Resolved instance type: ${_RESOLVED_INSTANCE} (persisted to do/config)"
|
|
993
|
+
fi
|
|
994
|
+
fi
|
|
995
|
+
|
|
913
996
|
echo "✅ Deployment complete!"
|
|
914
997
|
echo ""
|
|
915
998
|
echo "📋 Deployment Details:"
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
# do/evaluate — Model Quality Evaluation
|
|
6
|
+
# Evaluates a deployed model/adapter via inference requests against the endpoint.
|
|
7
|
+
# Computes technique-specific quality metrics (perplexity, reward accuracy, etc.).
|
|
8
|
+
#
|
|
9
|
+
# Project: <%= projectName %>
|
|
10
|
+
|
|
11
|
+
set -e
|
|
12
|
+
set -u
|
|
13
|
+
set -o pipefail
|
|
14
|
+
|
|
15
|
+
# ── Source project configuration ──────────────────────────────────────────────
|
|
16
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
17
|
+
source "${SCRIPT_DIR}/config"
|
|
18
|
+
|
|
19
|
+
# ── CLI Variables ─────────────────────────────────────────────────────────────
|
|
20
|
+
ARG_ADAPTER=""
|
|
21
|
+
ARG_EVAL_DATASET=""
|
|
22
|
+
ARG_SAMPLES=""
|
|
23
|
+
ARG_METRICS=""
|
|
24
|
+
ARG_DRY_RUN=false
|
|
25
|
+
ARG_HELP=false
|
|
26
|
+
|
|
27
|
+
# ── Parse arguments ───────────────────────────────────────────────────────────
|
|
28
|
+
while [ $# -gt 0 ]; do
|
|
29
|
+
case "$1" in
|
|
30
|
+
--adapter)
|
|
31
|
+
if [ -z "${2:-}" ]; then echo "❌ --adapter requires a name"; exit 1; fi
|
|
32
|
+
ARG_ADAPTER="$2"; shift 2 ;;
|
|
33
|
+
--eval-dataset)
|
|
34
|
+
if [ -z "${2:-}" ]; then echo "❌ --eval-dataset requires a value"; exit 1; fi
|
|
35
|
+
ARG_EVAL_DATASET="$2"; shift 2 ;;
|
|
36
|
+
--samples)
|
|
37
|
+
if [ -z "${2:-}" ]; then echo "❌ --samples requires a number"; exit 1; fi
|
|
38
|
+
ARG_SAMPLES="$2"; shift 2 ;;
|
|
39
|
+
--metrics)
|
|
40
|
+
if [ -z "${2:-}" ]; then echo "❌ --metrics requires a value"; exit 1; fi
|
|
41
|
+
ARG_METRICS="$2"; shift 2 ;;
|
|
42
|
+
--dry-run) ARG_DRY_RUN=true; shift ;;
|
|
43
|
+
--help|-h) ARG_HELP=true; shift ;;
|
|
44
|
+
*)
|
|
45
|
+
echo "❌ Unknown option: $1"
|
|
46
|
+
echo " Run ./do/evaluate --help for usage."
|
|
47
|
+
exit 1
|
|
48
|
+
;;
|
|
49
|
+
esac
|
|
50
|
+
done
|
|
51
|
+
|
|
52
|
+
# ── Help ──────────────────────────────────────────────────────────────────────
|
|
53
|
+
if [ "${ARG_HELP}" = true ]; then
|
|
54
|
+
echo "Usage: ./do/evaluate [OPTIONS]"
|
|
55
|
+
echo " ./do/evaluate --adapter <name> --eval-dataset <source>"
|
|
56
|
+
echo ""
|
|
57
|
+
echo "Evaluate model/adapter quality via inference requests against the deployed endpoint."
|
|
58
|
+
echo "Computes technique-specific metrics: perplexity (SFT), reward accuracy (DPO)."
|
|
59
|
+
echo ""
|
|
60
|
+
echo "Options:"
|
|
61
|
+
echo " --adapter <name> Evaluate specific adapter (reads IC name from conf)"
|
|
62
|
+
echo " --eval-dataset <src> Evaluation dataset: s3://..., hf://..., or registry name"
|
|
63
|
+
echo " --samples <n> Limit evaluation to N samples (default: all)"
|
|
64
|
+
echo " --metrics <list> Comma-separated metrics to compute (default: all for technique)"
|
|
65
|
+
echo " --dry-run Show what would be evaluated without making requests"
|
|
66
|
+
echo " --help, -h Show this help message"
|
|
67
|
+
echo ""
|
|
68
|
+
echo "Examples:"
|
|
69
|
+
echo " ./do/evaluate # Evaluate default IC"
|
|
70
|
+
echo " ./do/evaluate --adapter sft-custom-a3f2 # Evaluate specific adapter"
|
|
71
|
+
echo " ./do/evaluate --eval-dataset \"hf://tatsu-lab/alpaca --take 50\""
|
|
72
|
+
echo " ./do/evaluate --adapter dpo-custom --eval-dataset s3://bucket/dpo-eval.jsonl"
|
|
73
|
+
echo ""
|
|
74
|
+
echo "Results saved to: .mlcc/eval-results/<adapter-or-ic>.json"
|
|
75
|
+
echo "Results are automatically included in do/register metadata."
|
|
76
|
+
exit 0
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
# ── Resolve endpoint ──────────────────────────────────────────────────────────
|
|
80
|
+
ENDPOINT_NAME="${ENDPOINT_NAME:-}"
|
|
81
|
+
if [ -z "${ENDPOINT_NAME}" ]; then
|
|
82
|
+
echo "❌ No endpoint configured."
|
|
83
|
+
echo " Deploy first: ./do/deploy"
|
|
84
|
+
echo " Then run: ./do/evaluate"
|
|
85
|
+
exit 1
|
|
86
|
+
fi
|
|
87
|
+
|
|
88
|
+
# ── Resolve IC name ───────────────────────────────────────────────────────────
|
|
89
|
+
IC_NAME=""
|
|
90
|
+
ADAPTER_TECHNIQUE=""
|
|
91
|
+
EVAL_TARGET_NAME=""
|
|
92
|
+
|
|
93
|
+
if [ -n "${ARG_ADAPTER}" ]; then
|
|
94
|
+
# Adapter specified — look up IC from adapter conf
|
|
95
|
+
ADAPTER_CONF="${SCRIPT_DIR}/adapters/${ARG_ADAPTER}.conf"
|
|
96
|
+
if [ ! -f "${ADAPTER_CONF}" ]; then
|
|
97
|
+
echo "❌ Adapter config not found: do/adapters/${ARG_ADAPTER}.conf"
|
|
98
|
+
echo " Available adapters:"
|
|
99
|
+
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
100
|
+
for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
101
|
+
[ -f "${conf}" ] || continue
|
|
102
|
+
echo " • $(basename "${conf}" .conf)"
|
|
103
|
+
done
|
|
104
|
+
fi
|
|
105
|
+
exit 1
|
|
106
|
+
fi
|
|
107
|
+
source "${ADAPTER_CONF}"
|
|
108
|
+
IC_NAME="${ADAPTER_IC_NAME:-}"
|
|
109
|
+
ADAPTER_TECHNIQUE="${ADAPTER_TECHNIQUE:-${ADAPTER_TUNE_TECHNIQUE:-}}"
|
|
110
|
+
EVAL_TARGET_NAME="${ARG_ADAPTER}"
|
|
111
|
+
else
|
|
112
|
+
# No adapter — use default IC
|
|
113
|
+
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
114
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
115
|
+
[ -f "${conf}" ] || continue
|
|
116
|
+
IC_DEPLOYED_NAME=""
|
|
117
|
+
source "${conf}"
|
|
118
|
+
if [ -n "${IC_DEPLOYED_NAME}" ]; then
|
|
119
|
+
IC_NAME="${IC_DEPLOYED_NAME}"
|
|
120
|
+
break
|
|
121
|
+
fi
|
|
122
|
+
done
|
|
123
|
+
fi
|
|
124
|
+
# Fallback to legacy config
|
|
125
|
+
IC_NAME="${IC_NAME:-${INFERENCE_COMPONENT_NAME:-}}"
|
|
126
|
+
EVAL_TARGET_NAME="${IC_NAME:-default}"
|
|
127
|
+
fi
|
|
128
|
+
|
|
129
|
+
if [ -z "${IC_NAME}" ]; then
|
|
130
|
+
echo "❌ No inference component found to evaluate."
|
|
131
|
+
echo " Deploy first: ./do/deploy"
|
|
132
|
+
echo " Or specify an adapter: ./do/evaluate --adapter <name>"
|
|
133
|
+
exit 1
|
|
134
|
+
fi
|
|
135
|
+
|
|
136
|
+
# ── Resolve technique (for metric selection) ──────────────────────────────────
|
|
137
|
+
# Priority: adapter conf ADAPTER_TECHNIQUE > TRAIN_TECHNIQUE from config > default
|
|
138
|
+
TECHNIQUE="${ADAPTER_TECHNIQUE:-${TRAIN_TECHNIQUE:-}}"
|
|
139
|
+
|
|
140
|
+
# ── Resolve eval dataset ──────────────────────────────────────────────────────
|
|
141
|
+
EVAL_DATASET="${ARG_EVAL_DATASET:-${EVAL_DATASET:-}}"
|
|
142
|
+
|
|
143
|
+
# ── Display configuration ─────────────────────────────────────────────────────
|
|
144
|
+
echo "🔬 Model Evaluation"
|
|
145
|
+
echo ""
|
|
146
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
147
|
+
echo " IC: ${IC_NAME}"
|
|
148
|
+
if [ -n "${ARG_ADAPTER}" ]; then
|
|
149
|
+
echo " Adapter: ${ARG_ADAPTER}"
|
|
150
|
+
fi
|
|
151
|
+
if [ -n "${TECHNIQUE}" ]; then
|
|
152
|
+
echo " Technique: ${TECHNIQUE}"
|
|
153
|
+
fi
|
|
154
|
+
if [ -n "${EVAL_DATASET}" ]; then
|
|
155
|
+
echo " Dataset: ${EVAL_DATASET}"
|
|
156
|
+
fi
|
|
157
|
+
if [ -n "${ARG_SAMPLES}" ]; then
|
|
158
|
+
echo " Samples: ${ARG_SAMPLES}"
|
|
159
|
+
fi
|
|
160
|
+
echo ""
|
|
161
|
+
|
|
162
|
+
# ── Dry run ───────────────────────────────────────────────────────────────────
|
|
163
|
+
if [ "${ARG_DRY_RUN}" = true ]; then
|
|
164
|
+
echo "🔍 Dry run — would evaluate with above configuration."
|
|
165
|
+
echo " No inference requests will be made."
|
|
166
|
+
exit 0
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
# ── Check endpoint is InService ───────────────────────────────────────────────
|
|
170
|
+
echo "🔍 Checking endpoint status..."
|
|
171
|
+
EP_STATUS=$(aws sagemaker describe-endpoint \
|
|
172
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
173
|
+
--region "${AWS_REGION}" \
|
|
174
|
+
--query 'EndpointStatus' \
|
|
175
|
+
--output text 2>/dev/null) || EP_STATUS=""
|
|
176
|
+
|
|
177
|
+
if [ "${EP_STATUS}" != "InService" ]; then
|
|
178
|
+
echo "❌ Endpoint is not InService (status: ${EP_STATUS:-unknown})"
|
|
179
|
+
echo ""
|
|
180
|
+
echo " Deploy first: ./do/deploy"
|
|
181
|
+
echo " Then run: ./do/evaluate"
|
|
182
|
+
exit 1
|
|
183
|
+
fi
|
|
184
|
+
echo " ✅ Endpoint is InService"
|
|
185
|
+
echo ""
|
|
186
|
+
|
|
187
|
+
# ── Run evaluation via Python helper ─────────────────────────────────────────
|
|
188
|
+
echo "🧪 Running evaluation..."
|
|
189
|
+
echo ""
|
|
190
|
+
|
|
191
|
+
EVAL_ARGS=(
|
|
192
|
+
--endpoint-name "${ENDPOINT_NAME}"
|
|
193
|
+
--ic-name "${IC_NAME}"
|
|
194
|
+
--region "${AWS_REGION}"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
if [ -n "${TECHNIQUE}" ]; then
|
|
198
|
+
EVAL_ARGS+=(--technique "${TECHNIQUE}")
|
|
199
|
+
fi
|
|
200
|
+
if [ -n "${EVAL_DATASET}" ]; then
|
|
201
|
+
EVAL_ARGS+=(--eval-dataset "${EVAL_DATASET}")
|
|
202
|
+
fi
|
|
203
|
+
if [ -n "${ARG_SAMPLES}" ]; then
|
|
204
|
+
EVAL_ARGS+=(--samples "${ARG_SAMPLES}")
|
|
205
|
+
fi
|
|
206
|
+
if [ -n "${ARG_METRICS}" ]; then
|
|
207
|
+
EVAL_ARGS+=(--metrics "${ARG_METRICS}")
|
|
208
|
+
fi
|
|
209
|
+
|
|
210
|
+
EVAL_OUTPUT=$(python3 "${SCRIPT_DIR}/.eval_helper.py" evaluate "${EVAL_ARGS[@]}" 2>/dev/null | grep -E '^\{' | tail -1) || EVAL_OUTPUT=""
|
|
211
|
+
|
|
212
|
+
if [ -z "${EVAL_OUTPUT}" ]; then
|
|
213
|
+
echo "❌ Evaluation failed (no output from helper)"
|
|
214
|
+
echo " Check: endpoint accessibility, eval dataset format, Python deps (requests)"
|
|
215
|
+
exit 1
|
|
216
|
+
fi
|
|
217
|
+
|
|
218
|
+
# Check for error
|
|
219
|
+
HAS_ERROR=$(echo "${EVAL_OUTPUT}" | python3 -c "import sys,json; d=json.load(sys.stdin); print('yes' if d.get('error') else 'no')" 2>/dev/null) || HAS_ERROR="yes"
|
|
220
|
+
|
|
221
|
+
if [ "${HAS_ERROR}" = "yes" ]; then
|
|
222
|
+
ERROR_MSG=$(echo "${EVAL_OUTPUT}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('message','Unknown error'))" 2>/dev/null) || ERROR_MSG="Unknown error"
|
|
223
|
+
echo "❌ Evaluation failed: ${ERROR_MSG}"
|
|
224
|
+
exit 1
|
|
225
|
+
fi
|
|
226
|
+
|
|
227
|
+
# ── Save results ──────────────────────────────────────────────────────────────
|
|
228
|
+
RESULTS_DIR="${SCRIPT_DIR}/../.mlcc/eval-results"
|
|
229
|
+
mkdir -p "${RESULTS_DIR}"
|
|
230
|
+
RESULTS_FILE="${RESULTS_DIR}/${EVAL_TARGET_NAME}.json"
|
|
231
|
+
echo "${EVAL_OUTPUT}" | python3 -c "import sys,json; json.dump(json.load(sys.stdin), open('${RESULTS_FILE}','w'), indent=2)"
|
|
232
|
+
|
|
233
|
+
echo "📁 Results saved to: .mlcc/eval-results/${EVAL_TARGET_NAME}.json"
|
|
234
|
+
echo ""
|
|
235
|
+
|
|
236
|
+
# ── Display summary ───────────────────────────────────────────────────────────
|
|
237
|
+
echo "╔══════════════════════════════════════════════════════════════════╗"
|
|
238
|
+
echo "║ Evaluation Results ║"
|
|
239
|
+
echo "╠══════════════════════════════════════════════════════════════════╣"
|
|
240
|
+
echo "${EVAL_OUTPUT}" | python3 -c "
|
|
241
|
+
import sys, json
|
|
242
|
+
data = json.load(sys.stdin)
|
|
243
|
+
print(f\"║ Target: {data.get('adapter_name', data.get('ic_name', 'unknown'))}\")
|
|
244
|
+
print(f\"║ Technique: {data.get('technique', 'unknown')}\")
|
|
245
|
+
print(f\"║ Samples: {data.get('samples_evaluated', 0)}\")
|
|
246
|
+
print(f\"║\")
|
|
247
|
+
metrics = data.get('metrics', {})
|
|
248
|
+
for name, value in metrics.items():
|
|
249
|
+
if isinstance(value, float):
|
|
250
|
+
print(f'║ {name}: {value:.4f}')
|
|
251
|
+
else:
|
|
252
|
+
print(f'║ {name}: {value}')
|
|
253
|
+
" 2>/dev/null
|
|
254
|
+
echo "╚══════════════════════════════════════════════════════════════════╝"
|
|
255
|
+
echo ""
|
|
256
|
+
|
|
257
|
+
# ── Optionally write to Athena ────────────────────────────────────────────────
|
|
258
|
+
if [ -n "${CI_BENCHMARK_RESULTS_BUCKET:-}" ]; then
|
|
259
|
+
echo "☁️ Persisting evaluation results to Athena..."
|
|
260
|
+
python3 "${SCRIPT_DIR}/.eval_helper.py" eval-write \
|
|
261
|
+
--results-file "${RESULTS_FILE}" \
|
|
262
|
+
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
263
|
+
--region "${AWS_REGION}" 2>/dev/null | grep -E '^\{' | tail -1 > /dev/null || {
|
|
264
|
+
echo " ⚠️ Failed to persist to Athena (non-fatal)"
|
|
265
|
+
}
|
|
266
|
+
fi
|
|
267
|
+
|
|
268
|
+
echo "✅ Evaluation complete."
|
|
269
|
+
echo ""
|
|
270
|
+
echo " Next steps:"
|
|
271
|
+
echo " • Run ./do/register to include eval metrics in model package metadata"
|
|
272
|
+
echo " • Compare adapters: ./do/evaluate --adapter <other-adapter>"
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Lazy instance type resolution for heterogeneous pool endpoints.
|
|
3
|
+
# Source this file after do/config + lib/profile.sh in any script that needs INSTANCE_TYPE.
|
|
4
|
+
#
|
|
5
|
+
# When INSTANCE_TYPE is empty (pool endpoints) and DEPLOYED_INSTANCE_TYPE hasn't been
|
|
6
|
+
# persisted yet (no do/deploy run), queries the live endpoint once and persists the result.
|
|
7
|
+
# Subsequent calls read from do/config without any AWS API calls.
|
|
8
|
+
#
|
|
9
|
+
# After sourcing, INSTANCE_TYPE is guaranteed to be set (or empty if resolution failed).
|
|
10
|
+
# DEPLOYED_GPU_COUNT is also resolved from a static lookup table (instances.json catalog)
|
|
11
|
+
# and persisted to do/config alongside the instance type.
|
|
12
|
+
#
|
|
13
|
+
# Usage:
|
|
14
|
+
# source "${SCRIPT_DIR}/config"
|
|
15
|
+
# source "${SCRIPT_DIR}/lib/profile.sh"
|
|
16
|
+
# source "${SCRIPT_DIR}/lib/resolve-instance.sh"
|
|
17
|
+
# # INSTANCE_TYPE and DEPLOYED_GPU_COUNT are now resolved
|
|
18
|
+
|
|
19
|
+
# Resolve SCRIPT_DIR if not already set (defensive — normally inherited from caller)
|
|
20
|
+
if [ -z "${SCRIPT_DIR:-}" ]; then
|
|
21
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
22
|
+
fi
|
|
23
|
+
|
|
24
|
+
# Skip if INSTANCE_TYPE is already set (single-instance endpoints)
|
|
25
|
+
if [ -n "${INSTANCE_TYPE:-}" ]; then
|
|
26
|
+
return 0 2>/dev/null || true
|
|
27
|
+
fi
|
|
28
|
+
|
|
29
|
+
# Check if DEPLOYED_INSTANCE_TYPE was previously persisted
|
|
30
|
+
if [ -n "${DEPLOYED_INSTANCE_TYPE:-}" ]; then
|
|
31
|
+
INSTANCE_TYPE="${DEPLOYED_INSTANCE_TYPE}"
|
|
32
|
+
export INSTANCE_TYPE
|
|
33
|
+
# Ensure DEPLOYED_GPU_COUNT is also exported (may already be in do/config)
|
|
34
|
+
if [ -n "${DEPLOYED_GPU_COUNT:-}" ]; then
|
|
35
|
+
export DEPLOYED_GPU_COUNT
|
|
36
|
+
fi
|
|
37
|
+
return 0 2>/dev/null || true
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# Check if BENCHMARK_INSTANCE_TYPE was previously persisted (by do/benchmark)
|
|
41
|
+
if [ -n "${BENCHMARK_INSTANCE_TYPE:-}" ]; then
|
|
42
|
+
INSTANCE_TYPE="${BENCHMARK_INSTANCE_TYPE}"
|
|
43
|
+
export INSTANCE_TYPE
|
|
44
|
+
return 0 2>/dev/null || true
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
# ── Live resolution from endpoint (one-time, persisted) ──────────────────────
|
|
48
|
+
# Only attempt if ENDPOINT_NAME is configured and AWS credentials are available.
|
|
49
|
+
if [ -z "${ENDPOINT_NAME:-}" ]; then
|
|
50
|
+
return 0 2>/dev/null || true
|
|
51
|
+
fi
|
|
52
|
+
|
|
53
|
+
_RESOLVED_INSTANCE=""
|
|
54
|
+
_EP_DESCRIBE=$(aws sagemaker describe-endpoint \
|
|
55
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
56
|
+
--region "${AWS_REGION:-us-east-1}" \
|
|
57
|
+
--output json 2>/dev/null) || _EP_DESCRIBE=""
|
|
58
|
+
|
|
59
|
+
if [ -n "${_EP_DESCRIBE}" ]; then
|
|
60
|
+
_RESOLVED_INSTANCE=$(echo "${_EP_DESCRIBE}" | python3 -c "
|
|
61
|
+
import sys, json
|
|
62
|
+
try:
|
|
63
|
+
ep = json.load(sys.stdin)
|
|
64
|
+
variant = ep.get('ProductionVariants', [{}])[0]
|
|
65
|
+
print(variant.get('CurrentInstanceType') or variant.get('InstanceType') or '')
|
|
66
|
+
except:
|
|
67
|
+
print('')
|
|
68
|
+
" 2>/dev/null) || _RESOLVED_INSTANCE=""
|
|
69
|
+
|
|
70
|
+
# Fallback: query endpoint config for InstanceType or first pool entry
|
|
71
|
+
if [ -z "${_RESOLVED_INSTANCE}" ]; then
|
|
72
|
+
_EC_NAME=$(echo "${_EP_DESCRIBE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('EndpointConfigName',''))" 2>/dev/null) || _EC_NAME=""
|
|
73
|
+
if [ -n "${_EC_NAME}" ]; then
|
|
74
|
+
_RESOLVED_INSTANCE=$(aws sagemaker describe-endpoint-config \
|
|
75
|
+
--endpoint-config-name "${_EC_NAME}" \
|
|
76
|
+
--region "${AWS_REGION:-us-east-1}" \
|
|
77
|
+
--query 'ProductionVariants[0].InstanceType' \
|
|
78
|
+
--output text 2>/dev/null) || _RESOLVED_INSTANCE=""
|
|
79
|
+
[ "${_RESOLVED_INSTANCE}" = "None" ] && _RESOLVED_INSTANCE=""
|
|
80
|
+
|
|
81
|
+
# Final fallback: first entry in InstancePools
|
|
82
|
+
if [ -z "${_RESOLVED_INSTANCE}" ]; then
|
|
83
|
+
_RESOLVED_INSTANCE=$(aws sagemaker describe-endpoint-config \
|
|
84
|
+
--endpoint-config-name "${_EC_NAME}" \
|
|
85
|
+
--region "${AWS_REGION:-us-east-1}" \
|
|
86
|
+
--output json 2>/dev/null | python3 -c "
|
|
87
|
+
import sys, json
|
|
88
|
+
try:
|
|
89
|
+
ec = json.load(sys.stdin)
|
|
90
|
+
pools = ec.get('ProductionVariants', [{}])[0].get('InstancePools', [])
|
|
91
|
+
if pools:
|
|
92
|
+
best = min(pools, key=lambda p: p.get('Priority', 999))
|
|
93
|
+
print(best.get('InstanceType', ''))
|
|
94
|
+
else:
|
|
95
|
+
print('')
|
|
96
|
+
except:
|
|
97
|
+
print('')
|
|
98
|
+
" 2>/dev/null) || _RESOLVED_INSTANCE=""
|
|
99
|
+
fi
|
|
100
|
+
fi
|
|
101
|
+
fi
|
|
102
|
+
fi
|
|
103
|
+
|
|
104
|
+
# Persist to do/config (one-time write — subsequent sources read it directly)
|
|
105
|
+
if [ -n "${_RESOLVED_INSTANCE}" ]; then
|
|
106
|
+
_config_file="${SCRIPT_DIR}/config"
|
|
107
|
+
if grep -q "^export DEPLOYED_INSTANCE_TYPE=" "${_config_file}" 2>/dev/null; then
|
|
108
|
+
sed -i.bak "s|^export DEPLOYED_INSTANCE_TYPE=.*|export DEPLOYED_INSTANCE_TYPE=\"${_RESOLVED_INSTANCE}\"|" "${_config_file}"
|
|
109
|
+
rm -f "${_config_file}.bak"
|
|
110
|
+
else
|
|
111
|
+
echo "export DEPLOYED_INSTANCE_TYPE=\"${_RESOLVED_INSTANCE}\"" >> "${_config_file}"
|
|
112
|
+
fi
|
|
113
|
+
INSTANCE_TYPE="${_RESOLVED_INSTANCE}"
|
|
114
|
+
DEPLOYED_INSTANCE_TYPE="${_RESOLVED_INSTANCE}"
|
|
115
|
+
export INSTANCE_TYPE DEPLOYED_INSTANCE_TYPE
|
|
116
|
+
|
|
117
|
+
# ── Resolve GPU count from instance type ─────────────────────────────────
|
|
118
|
+
# Static lookup table derived from servers/lib/catalogs/instances.json.
|
|
119
|
+
# Maps known SageMaker instance types to their GPU count.
|
|
120
|
+
_resolve_gpu_count() {
|
|
121
|
+
case "$1" in
|
|
122
|
+
ml.g5.xlarge|ml.g5.2xlarge|ml.g5.4xlarge|ml.g5.8xlarge|ml.g5.16xlarge) echo 1 ;;
|
|
123
|
+
ml.g5.12xlarge|ml.g5.24xlarge) echo 4 ;;
|
|
124
|
+
ml.g5.48xlarge) echo 8 ;;
|
|
125
|
+
ml.g4dn.xlarge|ml.g4dn.2xlarge|ml.g4dn.4xlarge|ml.g4dn.8xlarge|ml.g4dn.16xlarge) echo 1 ;;
|
|
126
|
+
ml.g4dn.12xlarge) echo 4 ;;
|
|
127
|
+
ml.g6.xlarge|ml.g6.2xlarge|ml.g6.4xlarge|ml.g6.8xlarge|ml.g6.16xlarge) echo 1 ;;
|
|
128
|
+
ml.g6.12xlarge|ml.g6.24xlarge) echo 4 ;;
|
|
129
|
+
ml.g6.48xlarge) echo 8 ;;
|
|
130
|
+
ml.g6e.xlarge|ml.g6e.2xlarge|ml.g6e.4xlarge|ml.g6e.8xlarge|ml.g6e.16xlarge) echo 1 ;;
|
|
131
|
+
ml.g6e.12xlarge|ml.g6e.24xlarge) echo 4 ;;
|
|
132
|
+
ml.g6e.48xlarge) echo 8 ;;
|
|
133
|
+
ml.p4d.24xlarge|ml.p4de.24xlarge) echo 8 ;;
|
|
134
|
+
ml.p5.48xlarge|ml.p5e.48xlarge) echo 8 ;;
|
|
135
|
+
*) echo "" ;;
|
|
136
|
+
esac
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
_GPU_COUNT=$(_resolve_gpu_count "${_RESOLVED_INSTANCE}")
|
|
140
|
+
if [ -n "${_GPU_COUNT}" ]; then
|
|
141
|
+
if grep -q "^export DEPLOYED_GPU_COUNT=" "${_config_file}" 2>/dev/null; then
|
|
142
|
+
sed -i.bak "s|^export DEPLOYED_GPU_COUNT=.*|export DEPLOYED_GPU_COUNT=\"${_GPU_COUNT}\"|" "${_config_file}"
|
|
143
|
+
rm -f "${_config_file}.bak"
|
|
144
|
+
else
|
|
145
|
+
echo "export DEPLOYED_GPU_COUNT=\"${_GPU_COUNT}\"" >> "${_config_file}"
|
|
146
|
+
fi
|
|
147
|
+
DEPLOYED_GPU_COUNT="${_GPU_COUNT}"
|
|
148
|
+
export DEPLOYED_GPU_COUNT
|
|
149
|
+
fi
|
|
150
|
+
unset _GPU_COUNT
|
|
151
|
+
unset -f _resolve_gpu_count
|
|
152
|
+
fi
|
|
153
|
+
|
|
154
|
+
# Clean up internal vars
|
|
155
|
+
unset _RESOLVED_INSTANCE _EP_DESCRIBE _EC_NAME
|
package/templates/do/register
CHANGED
|
@@ -10,11 +10,16 @@ set -o pipefail
|
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
12
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
source "${SCRIPT_DIR}/lib/resolve-instance.sh"
|
|
13
14
|
|
|
14
15
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
16
|
ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
|
|
16
17
|
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
|
|
17
18
|
|
|
19
|
+
# Resolve INSTANCE_TYPE for heterogeneous pool endpoints (may be empty from config,
|
|
20
|
+
# filled by do/deploy after InService resolution)
|
|
21
|
+
INSTANCE_TYPE="${INSTANCE_TYPE:-${DEPLOYED_INSTANCE_TYPE:-${BENCHMARK_INSTANCE_TYPE:-}}}"
|
|
22
|
+
|
|
18
23
|
# ============================================================
|
|
19
24
|
# Register deployment to the deployment registry
|
|
20
25
|
# ============================================================
|
package/templates/do/test
CHANGED
|
@@ -10,6 +10,7 @@ set -o pipefail
|
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
12
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
source "${SCRIPT_DIR}/lib/resolve-instance.sh"
|
|
13
14
|
|
|
14
15
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
15
16
|
# ============================================================
|