@aws/ml-container-creator 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/servers/endpoint-picker/index.js +24 -4
- package/src/lib/bootstrap-command-handler.js +8 -0
- package/src/lib/bootstrap-profile-manager.js +17 -0
- package/src/lib/bootstrap-provisioners.js +48 -0
- package/src/lib/path-prover-brain.js +57 -0
- package/src/lib/prove-pipeline-executor.js +35 -0
- package/templates/do/.benchmark_writer.py +114 -4
- package/templates/do/.register_helper.py +643 -67
- package/templates/do/.stage_helper.py +1 -0
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +267 -171
- package/templates/do/benchmark +60 -5
- package/templates/do/config +1 -1
- package/templates/do/lib/inference-component.sh +6 -25
- package/templates/do/register +29 -2
- package/templates/do/tune +94 -12
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/templates/do/adapter
CHANGED
|
@@ -44,6 +44,7 @@ _usage() {
|
|
|
44
44
|
echo ""
|
|
45
45
|
echo "Options:"
|
|
46
46
|
echo " --help, -h Show this help message"
|
|
47
|
+
echo " --force Bypass adapter-model compatibility check (add command)"
|
|
47
48
|
echo " --local Use local aws s3 cp instead of Processing Job (--from-tune)"
|
|
48
49
|
echo " --no-wait Submit Processing Job and return immediately (--from-tune)"
|
|
49
50
|
echo ""
|
|
@@ -378,6 +379,7 @@ _adapter_add() {
|
|
|
378
379
|
local registry_arn=""
|
|
379
380
|
local use_local=""
|
|
380
381
|
local no_wait=""
|
|
382
|
+
local force=""
|
|
381
383
|
|
|
382
384
|
# Parse add arguments
|
|
383
385
|
shift # remove 'add' from args
|
|
@@ -429,6 +431,10 @@ _adapter_add() {
|
|
|
429
431
|
no_wait="true"
|
|
430
432
|
shift
|
|
431
433
|
;;
|
|
434
|
+
--force)
|
|
435
|
+
force="true"
|
|
436
|
+
shift
|
|
437
|
+
;;
|
|
432
438
|
--help|-h)
|
|
433
439
|
echo "Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
434
440
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
@@ -449,6 +455,7 @@ _adapter_add() {
|
|
|
449
455
|
echo " With ARN: adds directly using specified version ARN"
|
|
450
456
|
echo " --local Use local aws s3 cp instead of Processing Job (--from-tune only)"
|
|
451
457
|
echo " --no-wait Submit Processing Job and return immediately (--from-tune only)"
|
|
458
|
+
echo " --force Bypass adapter-model compatibility check"
|
|
452
459
|
echo ""
|
|
453
460
|
echo "Note: --weights, --from-hub, --from-tune, and --from-registry are mutually exclusive."
|
|
454
461
|
echo ""
|
|
@@ -1030,14 +1037,30 @@ _adapter_add() {
|
|
|
1030
1037
|
fi
|
|
1031
1038
|
fi
|
|
1032
1039
|
|
|
1033
|
-
# ── Validate adapter name uniqueness
|
|
1040
|
+
# ── Validate adapter name uniqueness (AC-1.7: overwrite when --from-tune) ──
|
|
1034
1041
|
if [ -f "${SCRIPT_DIR}/adapters/${adapter_name}.conf" ]; then
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1042
|
+
if [ -n "${from_tune}" ]; then
|
|
1043
|
+
# AC-1.7: Overwrite existing adapter when re-running auto-register
|
|
1044
|
+
echo "ℹ️ Adapter '${adapter_name}' already exists — overwriting (re-tune)"
|
|
1045
|
+
# Delete existing IC if deployed, then remove conf
|
|
1046
|
+
local _existing_ic_name=""
|
|
1047
|
+
_existing_ic_name=$(grep "^export ADAPTER_IC_NAME=" "${SCRIPT_DIR}/adapters/${adapter_name}.conf" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
|
|
1048
|
+
if [ -n "${_existing_ic_name}" ]; then
|
|
1049
|
+
aws sagemaker delete-inference-component \
|
|
1050
|
+
--inference-component-name "${_existing_ic_name}" \
|
|
1051
|
+
--region "${AWS_REGION}" 2>/dev/null || true
|
|
1052
|
+
# Brief wait for deletion to propagate
|
|
1053
|
+
sleep 5
|
|
1054
|
+
fi
|
|
1055
|
+
rm -f "${SCRIPT_DIR}/adapters/${adapter_name}.conf"
|
|
1056
|
+
else
|
|
1057
|
+
echo "❌ Adapter already exists: ${adapter_name}"
|
|
1058
|
+
echo ""
|
|
1059
|
+
echo " An adapter with this name is already registered."
|
|
1060
|
+
echo " To update its weights, use: ./do/adapter update ${adapter_name} --weights <new-uri>"
|
|
1061
|
+
echo " To remove it first: ./do/adapter remove ${adapter_name}"
|
|
1062
|
+
exit 1
|
|
1063
|
+
fi
|
|
1041
1064
|
fi
|
|
1042
1065
|
|
|
1043
1066
|
echo "🔌 Adding adapter: ${adapter_name}"
|
|
@@ -1098,6 +1121,80 @@ _adapter_add() {
|
|
|
1098
1121
|
_validate_adapter_config "${weights_uri}" || true
|
|
1099
1122
|
fi
|
|
1100
1123
|
|
|
1124
|
+
# ── Compatibility check: adapter parent model vs deployed model ───────
|
|
1125
|
+
# Derive parent metadata early (same logic used later for conf file)
|
|
1126
|
+
local _compat_parent_arn=""
|
|
1127
|
+
local _compat_parent_slug=""
|
|
1128
|
+
|
|
1129
|
+
if [ -n "${from_tune}" ]; then
|
|
1130
|
+
_compat_parent_slug="${MODEL_NAME:-}"
|
|
1131
|
+
if [ -n "${MODEL_PKG_ARN:-}" ]; then
|
|
1132
|
+
_compat_parent_arn="${MODEL_PKG_ARN}"
|
|
1133
|
+
fi
|
|
1134
|
+
elif [ -n "${from_registry}" ] && [ -n "${version_line:-}" ]; then
|
|
1135
|
+
_compat_parent_arn=$(echo "${version_line}" | python3 -c "
|
|
1136
|
+
import sys, json
|
|
1137
|
+
data = json.loads(sys.stdin.read())
|
|
1138
|
+
metadata = data.get('metadata', {})
|
|
1139
|
+
print(metadata.get('parentModelVersionArn', ''))
|
|
1140
|
+
" 2>/dev/null || echo "")
|
|
1141
|
+
_compat_parent_slug=$(echo "${version_line}" | python3 -c "
|
|
1142
|
+
import sys, json
|
|
1143
|
+
data = json.loads(sys.stdin.read())
|
|
1144
|
+
metadata = data.get('metadata', {})
|
|
1145
|
+
print(metadata.get('modelName', ''))
|
|
1146
|
+
" 2>/dev/null || echo "")
|
|
1147
|
+
fi
|
|
1148
|
+
|
|
1149
|
+
if [ -z "${_compat_parent_arn}" ] && [ -z "${_compat_parent_slug}" ]; then
|
|
1150
|
+
echo "ℹ️ No parent model metadata — skipping compatibility check."
|
|
1151
|
+
elif [ "${force}" = "true" ]; then
|
|
1152
|
+
echo "ℹ️ --force: skipping compatibility check."
|
|
1153
|
+
else
|
|
1154
|
+
# Resolve base IC name for compat check (already validated InService above)
|
|
1155
|
+
local _compat_base_ic_name
|
|
1156
|
+
_compat_base_ic_name="${base_ic_name}"
|
|
1157
|
+
|
|
1158
|
+
# Get deployed model identity from base IC
|
|
1159
|
+
local _compat_deployed_model=""
|
|
1160
|
+
_compat_deployed_model=$(aws sagemaker describe-inference-component \
|
|
1161
|
+
--inference-component-name "${_compat_base_ic_name}" \
|
|
1162
|
+
--query 'Specification.Container.ArtifactUrl' --output text \
|
|
1163
|
+
--region "${AWS_REGION}" 2>/dev/null) || _compat_deployed_model=""
|
|
1164
|
+
|
|
1165
|
+
if [ -z "${_compat_deployed_model}" ] || [ "${_compat_deployed_model}" = "None" ]; then
|
|
1166
|
+
echo "ℹ️ Could not verify compatibility (DescribeInferenceComponent returned no artifact URL). Proceeding."
|
|
1167
|
+
else
|
|
1168
|
+
# Primary check: compare adapter parent MPG ARN against deployed MPG ARN
|
|
1169
|
+
local _compat_deployed_mpg="${MODEL_PKG_ARN:-}"
|
|
1170
|
+
local _compat_expected_slug="${_compat_parent_slug}"
|
|
1171
|
+
|
|
1172
|
+
local _compat_mismatch="false"
|
|
1173
|
+
if [ -n "${_compat_parent_arn}" ] && [ -n "${_compat_deployed_mpg}" ] && \
|
|
1174
|
+
[ "${_compat_parent_arn}" != "${_compat_deployed_mpg}" ]; then
|
|
1175
|
+
_compat_mismatch="true"
|
|
1176
|
+
fi
|
|
1177
|
+
|
|
1178
|
+
if [ "${_compat_mismatch}" = "true" ]; then
|
|
1179
|
+
# Fallback: check if artifact URL contains the expected model slug
|
|
1180
|
+
if [ -n "${_compat_expected_slug}" ] && \
|
|
1181
|
+
[[ "${_compat_deployed_model}" == *"${_compat_expected_slug}"* ]]; then
|
|
1182
|
+
: # Slug match — compatible despite ARN mismatch
|
|
1183
|
+
else
|
|
1184
|
+
echo "⚠️ Adapter was trained on: ${_compat_parent_arn}"
|
|
1185
|
+
echo " Deployed model: ${_compat_deployed_mpg:-${_compat_deployed_model:-unknown}}"
|
|
1186
|
+
if [ -t 0 ]; then
|
|
1187
|
+
read -p " Continue anyway? [y/N] " confirm
|
|
1188
|
+
[[ "${confirm}" =~ ^[Yy] ]] || exit 1
|
|
1189
|
+
else
|
|
1190
|
+
echo " Aborting (non-interactive). Use --force to override."
|
|
1191
|
+
exit 1
|
|
1192
|
+
fi
|
|
1193
|
+
fi
|
|
1194
|
+
fi
|
|
1195
|
+
fi
|
|
1196
|
+
fi
|
|
1197
|
+
|
|
1101
1198
|
# ── Build adapter IC name ─────────────────────────────────────────────
|
|
1102
1199
|
local adapter_ic_name="${PROJECT_NAME}-adapter-${adapter_name}"
|
|
1103
1200
|
|
|
@@ -1160,6 +1257,41 @@ export ADAPTER_SOURCE="tune"
|
|
|
1160
1257
|
export ADAPTER_TUNE_TECHNIQUE="${tune_technique_meta}"
|
|
1161
1258
|
export ADAPTER_TUNE_DATASET="${tune_dataset_meta}"
|
|
1162
1259
|
EOF
|
|
1260
|
+
|
|
1261
|
+
# Store parent model metadata for compat check (US-3 prerequisite)
|
|
1262
|
+
# ADAPTER_PARENT_MODEL_SLUG from MODEL_NAME in do/config
|
|
1263
|
+
local parent_model_slug="${MODEL_NAME:-}"
|
|
1264
|
+
# ADAPTER_PARENT_MODEL_ARN: resolve base model version ARN from deployment MPG
|
|
1265
|
+
local parent_model_arn=""
|
|
1266
|
+
if [ -n "${MODEL_PKG_ARN:-}" ]; then
|
|
1267
|
+
parent_model_arn="${MODEL_PKG_ARN}"
|
|
1268
|
+
else
|
|
1269
|
+
# Query the deployment MPG for the latest base model version
|
|
1270
|
+
local models_json
|
|
1271
|
+
models_json=$(python3 "${SCRIPT_DIR}/.register_helper.py" list-models \
|
|
1272
|
+
--project-name "${PROJECT_NAME}" \
|
|
1273
|
+
--region "${AWS_REGION}" 2>/dev/null || echo "")
|
|
1274
|
+
local models_line
|
|
1275
|
+
models_line=$(echo "${models_json}" | grep -E '^\{' | tail -1)
|
|
1276
|
+
if [ -n "${models_line}" ]; then
|
|
1277
|
+
parent_model_arn=$(echo "${models_line}" | python3 -c "
|
|
1278
|
+
import sys, json
|
|
1279
|
+
data = json.loads(sys.stdin.read())
|
|
1280
|
+
models = data.get('models', [])
|
|
1281
|
+
if models:
|
|
1282
|
+
print(models[0].get('arn', ''))
|
|
1283
|
+
else:
|
|
1284
|
+
print('')
|
|
1285
|
+
" 2>/dev/null || echo "")
|
|
1286
|
+
fi
|
|
1287
|
+
fi
|
|
1288
|
+
|
|
1289
|
+
if [ -n "${parent_model_arn}" ] || [ -n "${parent_model_slug}" ]; then
|
|
1290
|
+
cat >> "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
|
|
1291
|
+
export ADAPTER_PARENT_MODEL_ARN="${parent_model_arn}"
|
|
1292
|
+
export ADAPTER_PARENT_MODEL_SLUG="${parent_model_slug}"
|
|
1293
|
+
EOF
|
|
1294
|
+
fi
|
|
1163
1295
|
fi
|
|
1164
1296
|
|
|
1165
1297
|
# Add registry-specific metadata if --from-registry was used
|
|
@@ -1167,6 +1299,39 @@ EOF
|
|
|
1167
1299
|
cat >> "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
|
|
1168
1300
|
export ADAPTER_SOURCE="registry"
|
|
1169
1301
|
export ADAPTER_REGISTRY_ARN="${registry_arn}"
|
|
1302
|
+
EOF
|
|
1303
|
+
|
|
1304
|
+
# Store parent model metadata for compat check (US-3 prerequisite)
|
|
1305
|
+
# Extract parentModelVersionArn and modelName from registry version metadata
|
|
1306
|
+
local parent_model_arn=""
|
|
1307
|
+
local parent_model_slug=""
|
|
1308
|
+
if [ -n "${version_line:-}" ]; then
|
|
1309
|
+
parent_model_arn=$(echo "${version_line}" | python3 -c "
|
|
1310
|
+
import sys, json
|
|
1311
|
+
data = json.loads(sys.stdin.read())
|
|
1312
|
+
metadata = data.get('metadata', {})
|
|
1313
|
+
print(metadata.get('parentModelVersionArn', ''))
|
|
1314
|
+
" 2>/dev/null || echo "")
|
|
1315
|
+
parent_model_slug=$(echo "${version_line}" | python3 -c "
|
|
1316
|
+
import sys, json
|
|
1317
|
+
data = json.loads(sys.stdin.read())
|
|
1318
|
+
metadata = data.get('metadata', {})
|
|
1319
|
+
print(metadata.get('modelName', ''))
|
|
1320
|
+
" 2>/dev/null || echo "")
|
|
1321
|
+
fi
|
|
1322
|
+
|
|
1323
|
+
if [ -n "${parent_model_arn}" ] || [ -n "${parent_model_slug}" ]; then
|
|
1324
|
+
cat >> "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
|
|
1325
|
+
export ADAPTER_PARENT_MODEL_ARN="${parent_model_arn}"
|
|
1326
|
+
export ADAPTER_PARENT_MODEL_SLUG="${parent_model_slug}"
|
|
1327
|
+
EOF
|
|
1328
|
+
fi
|
|
1329
|
+
fi
|
|
1330
|
+
|
|
1331
|
+
# Default source: bare S3 URI (no --from-* flag)
|
|
1332
|
+
if [ -z "${from_hub}" ] && [ -z "${from_tune}" ] && [ -z "${from_registry}" ]; then
|
|
1333
|
+
cat >> "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
|
|
1334
|
+
export ADAPTER_SOURCE="s3"
|
|
1170
1335
|
EOF
|
|
1171
1336
|
fi
|
|
1172
1337
|
|
|
@@ -1194,171 +1359,102 @@ EOF
|
|
|
1194
1359
|
}
|
|
1195
1360
|
|
|
1196
1361
|
_adapter_list() {
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
echo "❌ Failed to list inference components on endpoint: ${ENDPOINT_NAME}"
|
|
1211
|
-
echo " Check that the endpoint exists and you have sagemaker:ListInferenceComponents permission."
|
|
1212
|
-
exit 1
|
|
1213
|
-
}
|
|
1214
|
-
|
|
1215
|
-
# Extract IC names from the list response
|
|
1216
|
-
local ic_names
|
|
1217
|
-
ic_names=$(echo "${ic_list}" | jq -r '.InferenceComponents[].InferenceComponentName' 2>/dev/null)
|
|
1218
|
-
|
|
1219
|
-
if [ -z "${ic_names}" ]; then
|
|
1220
|
-
echo "No adapters found on this endpoint."
|
|
1221
|
-
echo ""
|
|
1222
|
-
echo "Add one with: ./do/adapter add <name> --weights <s3-uri>"
|
|
1223
|
-
return 0
|
|
1224
|
-
fi
|
|
1225
|
-
|
|
1226
|
-
# ── Collect local adapter names for ownership check ───────────────────
|
|
1227
|
-
local local_adapters=""
|
|
1228
|
-
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
1229
|
-
for conf_file in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
1230
|
-
[ -f "${conf_file}" ] || continue
|
|
1231
|
-
local conf_adapter_name
|
|
1232
|
-
conf_adapter_name=$(grep "^export ADAPTER_IC_NAME=" "${conf_file}" 2>/dev/null | sed 's/^export ADAPTER_IC_NAME="//' | sed 's/"$//' || echo "")
|
|
1233
|
-
if [ -n "${conf_adapter_name}" ]; then
|
|
1234
|
-
local_adapters="${local_adapters} ${conf_adapter_name}"
|
|
1235
|
-
fi
|
|
1236
|
-
done
|
|
1237
|
-
fi
|
|
1238
|
-
|
|
1239
|
-
# ── Filter to adapter ICs and collect details ─────────────────────────
|
|
1240
|
-
local found_adapters=0
|
|
1241
|
-
local output_lines=""
|
|
1242
|
-
|
|
1243
|
-
for ic_name in ${ic_names}; do
|
|
1244
|
-
# Describe each IC to check if it's an adapter (has BaseInferenceComponentName)
|
|
1245
|
-
local ic_detail
|
|
1246
|
-
ic_detail=$(aws sagemaker describe-inference-component \
|
|
1247
|
-
--inference-component-name "${ic_name}" \
|
|
1248
|
-
--region "${AWS_REGION}" 2>/dev/null) || continue
|
|
1249
|
-
|
|
1250
|
-
# Check if this IC has a BaseInferenceComponentName (adapter IC)
|
|
1251
|
-
local base_ic
|
|
1252
|
-
base_ic=$(echo "${ic_detail}" | jq -r '.Specification.BaseInferenceComponentName // empty' 2>/dev/null)
|
|
1253
|
-
|
|
1254
|
-
if [ -z "${base_ic}" ]; then
|
|
1255
|
-
# Not an adapter IC — skip
|
|
1362
|
+
# Delegate to Python for Bash 3.2 compatibility (no associative arrays needed).
|
|
1363
|
+
# Merges 3 data sources: local confs, deployed adapter ICs, and registry.
|
|
1364
|
+
python3 - "${SCRIPT_DIR}" "${PROJECT_NAME}" "${ENDPOINT_NAME:-}" "${AWS_REGION}" <<'ADAPTER_LIST_PY'
|
|
1365
|
+
import sys, os, json, subprocess, glob
|
|
1366
|
+
|
|
1367
|
+
script_dir, project_name, endpoint_name, region = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
|
|
1368
|
+
|
|
1369
|
+
# ── Data source 1: Local adapter confs ──
|
|
1370
|
+
adapters = {} # name → {source, ic_name, technique, dataset}
|
|
1371
|
+
adapters_dir = os.path.join(script_dir, "adapters")
|
|
1372
|
+
if os.path.isdir(adapters_dir):
|
|
1373
|
+
for conf_path in sorted(glob.glob(os.path.join(adapters_dir, "*.conf"))):
|
|
1374
|
+
if os.path.basename(conf_path) == ".gitkeep":
|
|
1256
1375
|
continue
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
for
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
registry_count=$(echo "${registry_line}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(len(data.get('adapters',[])))" 2>/dev/null || echo "0")
|
|
1340
|
-
|
|
1341
|
-
if [ "${registry_count}" -eq 0 ]; then
|
|
1342
|
-
echo " (none found)"
|
|
1343
|
-
return 0
|
|
1344
|
-
fi
|
|
1345
|
-
|
|
1346
|
-
printf ' %-10s%-12s%-14s%s\n' "VERSION" "TECHNIQUE" "CREATED" "PARENT MODEL"
|
|
1347
|
-
|
|
1348
|
-
local ri=0
|
|
1349
|
-
while [ "${ri}" -lt "${registry_count}" ]; do
|
|
1350
|
-
local rv rt rc rp
|
|
1351
|
-
rv=$(echo "${registry_line}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['adapters'][${ri}].get('version','?'))" 2>/dev/null)
|
|
1352
|
-
rt=$(echo "${registry_line}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['adapters'][${ri}].get('tuneTechnique','?'))" 2>/dev/null)
|
|
1353
|
-
rc=$(echo "${registry_line}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); t=data['adapters'][${ri}].get('createdAt',''); print(t[:10] if t else '?')" 2>/dev/null)
|
|
1354
|
-
rp=$(echo "${registry_line}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); a=data['adapters'][${ri}].get('parentModelVersionArn',''); print(a.split('/')[-2]+'/'+a.split('/')[-1] if '/' in a else a[:40])" 2>/dev/null)
|
|
1355
|
-
|
|
1356
|
-
printf ' %-10s%-12s%-14s%s\n' "v${rv}" "${rt}" "${rc}" "${rp}"
|
|
1357
|
-
ri=$((ri + 1))
|
|
1358
|
-
done
|
|
1359
|
-
|
|
1360
|
-
echo ""
|
|
1361
|
-
echo "Add from registry: ./do/adapter add <name> --from-registry [version-arn]"
|
|
1376
|
+
props = {}
|
|
1377
|
+
with open(conf_path) as f:
|
|
1378
|
+
for line in f:
|
|
1379
|
+
if line.startswith("export "):
|
|
1380
|
+
line = line[7:].strip()
|
|
1381
|
+
if "=" in line:
|
|
1382
|
+
k, v = line.split("=", 1)
|
|
1383
|
+
props[k] = v.strip('"').strip("'")
|
|
1384
|
+
name = props.get("ADAPTER_NAME", os.path.basename(conf_path).replace(".conf", ""))
|
|
1385
|
+
adapters[name] = {
|
|
1386
|
+
"source": props.get("ADAPTER_SOURCE", "s3"),
|
|
1387
|
+
"ic_name": props.get("ADAPTER_IC_NAME", ""),
|
|
1388
|
+
"technique": props.get("ADAPTER_TUNE_TECHNIQUE", props.get("ADAPTER_TECHNIQUE", "")),
|
|
1389
|
+
"dataset": props.get("ADAPTER_TUNE_DATASET", ""),
|
|
1390
|
+
"status": "not deployed",
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
# ── Data source 2: Deployed adapter ICs ──
|
|
1394
|
+
if endpoint_name:
|
|
1395
|
+
try:
|
|
1396
|
+
result = subprocess.run(
|
|
1397
|
+
["aws", "sagemaker", "list-inference-components",
|
|
1398
|
+
"--endpoint-name-equals", endpoint_name, "--region", region],
|
|
1399
|
+
capture_output=True, text=True, timeout=15)
|
|
1400
|
+
if result.returncode == 0:
|
|
1401
|
+
ic_data = json.loads(result.stdout)
|
|
1402
|
+
for ic in ic_data.get("InferenceComponents", []):
|
|
1403
|
+
ic_name = ic["InferenceComponentName"]
|
|
1404
|
+
ic_status = ic.get("InferenceComponentStatus", "Unknown")
|
|
1405
|
+
# Check if adapter (has BaseInferenceComponentName) via describe
|
|
1406
|
+
desc = subprocess.run(
|
|
1407
|
+
["aws", "sagemaker", "describe-inference-component",
|
|
1408
|
+
"--inference-component-name", ic_name, "--region", region],
|
|
1409
|
+
capture_output=True, text=True, timeout=10)
|
|
1410
|
+
if desc.returncode == 0:
|
|
1411
|
+
detail = json.loads(desc.stdout)
|
|
1412
|
+
base_ic = detail.get("Specification", {}).get("BaseInferenceComponentName", "")
|
|
1413
|
+
if not base_ic:
|
|
1414
|
+
continue # Base IC, not adapter
|
|
1415
|
+
ic_status = detail.get("InferenceComponentStatus", ic_status)
|
|
1416
|
+
display = ic_name
|
|
1417
|
+
prefix = f"{project_name}-adapter-"
|
|
1418
|
+
if ic_name.startswith(prefix):
|
|
1419
|
+
display = ic_name[len(prefix):]
|
|
1420
|
+
if display in adapters:
|
|
1421
|
+
adapters[display]["status"] = ic_status
|
|
1422
|
+
else:
|
|
1423
|
+
adapters[display] = {"source": "external", "ic_name": ic_name,
|
|
1424
|
+
"technique": "", "dataset": "", "status": ic_status}
|
|
1425
|
+
except Exception:
|
|
1426
|
+
print("⚠️ Could not query endpoint — showing local confs only.", file=sys.stderr)
|
|
1427
|
+
|
|
1428
|
+
# ── Output ──
|
|
1429
|
+
if not adapters:
|
|
1430
|
+
print("No adapters found.")
|
|
1431
|
+
print("")
|
|
1432
|
+
print("Add one with: ./do/adapter add <name> --weights <s3-uri>")
|
|
1433
|
+
print(" ./do/adapter add <name> --from-hub <hf-repo-id>")
|
|
1434
|
+
print(" ./do/adapter add <name> --from-registry")
|
|
1435
|
+
sys.exit(0)
|
|
1436
|
+
|
|
1437
|
+
print(f"Adapters on endpoint: {endpoint_name or '<not deployed>'}")
|
|
1438
|
+
print("")
|
|
1439
|
+
|
|
1440
|
+
# Column widths
|
|
1441
|
+
max_n = max(len("NAME"), max(len(n) for n in adapters))
|
|
1442
|
+
max_s = max(len("SOURCE"), max(len(a["source"]) for a in adapters.values()))
|
|
1443
|
+
fmt = f" {{:<{max_n + 3}}}{{:<{max_s + 3}}}{{}} {{}}"
|
|
1444
|
+
|
|
1445
|
+
print(fmt.format("NAME", "SOURCE", "STATUS", ""))
|
|
1446
|
+
print(fmt.format("----", "------", "------", ""))
|
|
1447
|
+
for name in sorted(adapters):
|
|
1448
|
+
a = adapters[name]
|
|
1449
|
+
tune_info = ""
|
|
1450
|
+
if a["technique"]:
|
|
1451
|
+
tune_info = f"(tune: {a['technique']}" + (f" / {a['dataset']}" if a["dataset"] else "") + ")"
|
|
1452
|
+
print(fmt.format(name, a["source"], a["status"], tune_info))
|
|
1453
|
+
|
|
1454
|
+
print("")
|
|
1455
|
+
print("Add adapter: ./do/adapter add <name> --weights <s3-uri>")
|
|
1456
|
+
print("From registry: ./do/adapter add <name> --from-registry [version-arn]")
|
|
1457
|
+
ADAPTER_LIST_PY
|
|
1362
1458
|
}
|
|
1363
1459
|
|
|
1364
1460
|
_adapter_remove() {
|
package/templates/do/benchmark
CHANGED
|
@@ -69,6 +69,9 @@ done
|
|
|
69
69
|
# Query the tracked benchmark job, display status, and if completed:
|
|
70
70
|
# download results, display metrics, and write to Athena (if not already done).
|
|
71
71
|
if [ "${ARG_STATUS}" = true ]; then
|
|
72
|
+
# Resolve instance type: BENCHMARK_INSTANCE_TYPE (persisted by main flow) > INSTANCE_TYPE from config
|
|
73
|
+
_STATUS_INSTANCE_TYPE="${BENCHMARK_INSTANCE_TYPE:-${INSTANCE_TYPE:-}}"
|
|
74
|
+
|
|
72
75
|
JOB_NAME="${BENCHMARK_JOB_NAME:-}"
|
|
73
76
|
if [ -z "${JOB_NAME}" ]; then
|
|
74
77
|
echo "❌ No benchmark job tracked"
|
|
@@ -98,7 +101,7 @@ if [ "${ARG_STATUS}" = true ]; then
|
|
|
98
101
|
# Check if results already exist locally
|
|
99
102
|
PROJECT_ROOT="${SCRIPT_DIR}/.."
|
|
100
103
|
LOCAL_RESULTS_DIR="${PROJECT_ROOT}/benchmarks/${JOB_NAME}"
|
|
101
|
-
RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
|
|
104
|
+
RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1 || true)
|
|
102
105
|
|
|
103
106
|
if [ -z "${RESULTS_JSONL}" ]; then
|
|
104
107
|
echo ""
|
|
@@ -115,7 +118,7 @@ if [ "${ARG_STATUS}" = true ]; then
|
|
|
115
118
|
--region "${AWS_REGION}" --quiet
|
|
116
119
|
# Untar if output.tar.gz exists
|
|
117
120
|
tar_file=""
|
|
118
|
-
tar_file=$(find "${LOCAL_RESULTS_DIR}" -name "output.tar.gz" -type f 2>/dev/null | head -1)
|
|
121
|
+
tar_file=$(find "${LOCAL_RESULTS_DIR}" -name "output.tar.gz" -type f 2>/dev/null | head -1 || true)
|
|
119
122
|
if [ -n "${tar_file}" ]; then
|
|
120
123
|
# Detect whether ALL entries share a common leading directory prefix
|
|
121
124
|
_tar_prefix_count=""
|
|
@@ -129,7 +132,7 @@ if [ "${ARG_STATUS}" = true ]; then
|
|
|
129
132
|
fi
|
|
130
133
|
fi
|
|
131
134
|
# Re-search after extraction
|
|
132
|
-
RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
|
|
135
|
+
RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1 || true)
|
|
133
136
|
echo " ✅ Results downloaded to: benchmarks/${JOB_NAME}/"
|
|
134
137
|
fi
|
|
135
138
|
else
|
|
@@ -142,7 +145,7 @@ if [ "${ARG_STATUS}" = true ]; then
|
|
|
142
145
|
if [ -n "${RESULTS_JSONL}" ] && [ -f "${RESULTS_JSONL}" ]; then
|
|
143
146
|
_WRITER_INPUT="${RESULTS_JSONL}"
|
|
144
147
|
else
|
|
145
|
-
_WRITER_INPUT=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export_aiperf.json" -type f 2>/dev/null | head -1)
|
|
148
|
+
_WRITER_INPUT=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export_aiperf.json" -type f 2>/dev/null | head -1 || true)
|
|
146
149
|
fi
|
|
147
150
|
|
|
148
151
|
if [ -n "${_WRITER_INPUT}" ]; then
|
|
@@ -155,7 +158,8 @@ if [ "${ARG_STATUS}" = true ]; then
|
|
|
155
158
|
--workload "${BENCHMARK_WORKLOAD:-manual}" \
|
|
156
159
|
--concurrency "${BENCHMARK_CONCURRENCY:-2}" \
|
|
157
160
|
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
158
|
-
--region "${AWS_REGION:-${REGION}}" \
|
|
161
|
+
--region "${AWS_REGION:-${REGION:-us-east-1}}" \
|
|
162
|
+
${_STATUS_INSTANCE_TYPE:+--instance-type "${_STATUS_INSTANCE_TYPE}"} \
|
|
159
163
|
${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
|
|
160
164
|
echo " ✅ Results persisted to Athena"
|
|
161
165
|
else
|
|
@@ -561,6 +565,7 @@ print(f'Combined {n_metrics} concurrency level results')
|
|
|
561
565
|
--workload "${BENCHMARK_WORKLOAD:-manual}" \
|
|
562
566
|
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
563
567
|
--region "${AWS_REGION:-${REGION}}" \
|
|
568
|
+
${RESOLVED_INSTANCE_TYPE:+--instance-type "${RESOLVED_INSTANCE_TYPE}"} \
|
|
564
569
|
${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
|
|
565
570
|
echo "✅ Multi-level benchmark results persisted to S3"
|
|
566
571
|
else
|
|
@@ -801,6 +806,55 @@ fi
|
|
|
801
806
|
|
|
802
807
|
echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
|
|
803
808
|
|
|
809
|
+
# ── Resolve actual instance type from endpoint ────────────────────────────────
|
|
810
|
+
# For heterogeneous instance pools, INSTANCE_TYPE in do/config may not reflect
|
|
811
|
+
# the actual provisioned instance. Query the endpoint to determine the real type.
|
|
812
|
+
RESOLVED_INSTANCE_TYPE=""
|
|
813
|
+
_EP_JSON=$(aws sagemaker describe-endpoint \
|
|
814
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
815
|
+
--region "${AWS_REGION}" \
|
|
816
|
+
--output json 2>/dev/null) || _EP_JSON=""
|
|
817
|
+
|
|
818
|
+
if [ -n "${_EP_JSON}" ]; then
|
|
819
|
+
# Try InstanceType from the primary variant runtime response
|
|
820
|
+
RESOLVED_INSTANCE_TYPE=$(echo "${_EP_JSON}" | python3 -c "
|
|
821
|
+
import sys, json
|
|
822
|
+
try:
|
|
823
|
+
ep = json.load(sys.stdin)
|
|
824
|
+
variant = ep.get('ProductionVariants', [{}])[0]
|
|
825
|
+
it = variant.get('CurrentInstanceType') or variant.get('InstanceType') or ''
|
|
826
|
+
if it:
|
|
827
|
+
print(it)
|
|
828
|
+
else:
|
|
829
|
+
# Fall back to endpoint config for pool-based endpoints
|
|
830
|
+
print('')
|
|
831
|
+
except:
|
|
832
|
+
print('')
|
|
833
|
+
" 2>/dev/null) || RESOLVED_INSTANCE_TYPE=""
|
|
834
|
+
|
|
835
|
+
# If still empty, query endpoint config for InstancePools
|
|
836
|
+
if [ -z "${RESOLVED_INSTANCE_TYPE}" ]; then
|
|
837
|
+
_EC_NAME=$(echo "${_EP_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('EndpointConfigName',''))" 2>/dev/null) || _EC_NAME=""
|
|
838
|
+
if [ -n "${_EC_NAME}" ]; then
|
|
839
|
+
RESOLVED_INSTANCE_TYPE=$(aws sagemaker describe-endpoint-config \
|
|
840
|
+
--endpoint-config-name "${_EC_NAME}" \
|
|
841
|
+
--region "${AWS_REGION}" \
|
|
842
|
+
--query 'ProductionVariants[0].InstanceType' \
|
|
843
|
+
--output text 2>/dev/null) || RESOLVED_INSTANCE_TYPE=""
|
|
844
|
+
# 'None' is returned as literal text when the field is null
|
|
845
|
+
[ "${RESOLVED_INSTANCE_TYPE}" = "None" ] && RESOLVED_INSTANCE_TYPE=""
|
|
846
|
+
fi
|
|
847
|
+
fi
|
|
848
|
+
fi
|
|
849
|
+
|
|
850
|
+
# Final fallback: use INSTANCE_TYPE from do/config
|
|
851
|
+
RESOLVED_INSTANCE_TYPE="${RESOLVED_INSTANCE_TYPE:-${INSTANCE_TYPE:-}}"
|
|
852
|
+
|
|
853
|
+
# Persist to do/config for --status (endpoint may be gone by then)
|
|
854
|
+
if [ -n "${RESOLVED_INSTANCE_TYPE}" ]; then
|
|
855
|
+
_update_benchmark_var "BENCHMARK_INSTANCE_TYPE" "${RESOLVED_INSTANCE_TYPE}"
|
|
856
|
+
fi
|
|
857
|
+
|
|
804
858
|
# ── Pre-flight check: Ensure S3 output bucket exists ──────────────────────────
|
|
805
859
|
echo "🔍 Pre-flight: Checking S3 output bucket..."
|
|
806
860
|
|
|
@@ -1371,6 +1425,7 @@ except Exception as e:
|
|
|
1371
1425
|
--concurrency "${BENCHMARK_CONCURRENCY}" \
|
|
1372
1426
|
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
1373
1427
|
--region "${AWS_REGION:-${REGION}}" \
|
|
1428
|
+
${RESOLVED_INSTANCE_TYPE:+--instance-type "${RESOLVED_INSTANCE_TYPE}"} \
|
|
1374
1429
|
${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
|
|
1375
1430
|
echo "✅ Benchmark results persisted to S3"
|
|
1376
1431
|
else
|