@aws/ml-container-creator 0.13.5 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/parameter-schema-v2.json +32 -4
- package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
- package/infra/ci-harness/package-lock.json +121 -111
- package/infra/ci-harness/package.json +1 -1
- package/package.json +2 -2
- package/servers/instance-sizer/index.js +72 -4
- package/servers/instance-sizer/lib/model-resolver.js +28 -2
- package/src/app.js +15 -0
- package/src/lib/config-loader.js +18 -0
- package/src/lib/config-manager.js +6 -1
- package/src/lib/dataset-slug.js +152 -0
- package/src/lib/generated/cli-options.js +9 -3
- package/src/lib/generated/parameter-matrix.js +14 -3
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/mcp-query-runner.js +6 -0
- package/src/lib/prompt-runner.js +5 -0
- package/src/lib/prompts/feature-prompts.js +1 -1
- package/src/lib/template-manager.js +0 -7
- package/src/lib/template-variable-resolver.js +51 -1
- package/src/lib/tune-config-state.js +14 -1
- package/templates/do/.benchmark_writer.py +9 -0
- package/templates/do/.register_helper.py +1163 -0
- package/templates/do/.tune_helper.py +168 -2
- package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +319 -27
- package/templates/do/add-ic +85 -3
- package/templates/do/benchmark +28 -8
- package/templates/do/config +20 -0
- package/templates/do/lib/inference-component.sh +56 -3
- package/templates/do/register +552 -6
- package/templates/do/test +12 -2
- package/templates/do/tune +201 -6
package/templates/do/tune
CHANGED
|
@@ -46,10 +46,13 @@ ARG_DRY_RUN=false
|
|
|
46
46
|
ARG_LIST_MODELS=false
|
|
47
47
|
ARG_NO_STALE_WARNING=false
|
|
48
48
|
ARG_DISCOVER=false
|
|
49
|
+
ARG_LIST_DATASETS=false
|
|
49
50
|
ARG_DISCOVER_FILTER=""
|
|
50
51
|
ARG_COLUMN_MAP=""
|
|
51
52
|
ARG_TAKE=""
|
|
52
53
|
ARG_ACCEPT_EULA=false
|
|
54
|
+
ARG_DATASET_NAME=""
|
|
55
|
+
ARG_EVALUATOR_NAME=""
|
|
53
56
|
|
|
54
57
|
|
|
55
58
|
# ── _parse_args() ─────────────────────────────────────────────────────────────
|
|
@@ -148,6 +151,7 @@ _parse_args() {
|
|
|
148
151
|
--help|-h) ARG_HELP=true; shift ;;
|
|
149
152
|
--dry-run) ARG_DRY_RUN=true; shift ;;
|
|
150
153
|
--list-models) ARG_LIST_MODELS=true; shift ;;
|
|
154
|
+
--list-datasets) ARG_LIST_DATASETS=true; shift ;;
|
|
151
155
|
--no-stale-warning) ARG_NO_STALE_WARNING=true; shift ;;
|
|
152
156
|
--column-map)
|
|
153
157
|
if [ -z "${2:-}" ]; then
|
|
@@ -169,6 +173,18 @@ _parse_args() {
|
|
|
169
173
|
exit 1
|
|
170
174
|
fi
|
|
171
175
|
ARG_TAKE="$2"; shift 2 ;;
|
|
176
|
+
--dataset-name)
|
|
177
|
+
if [ -z "${2:-}" ]; then
|
|
178
|
+
echo "❌ --dataset-name requires a registered dataset name"
|
|
179
|
+
exit 1
|
|
180
|
+
fi
|
|
181
|
+
ARG_DATASET_NAME="$2"; shift 2 ;;
|
|
182
|
+
--evaluator-name)
|
|
183
|
+
if [ -z "${2:-}" ]; then
|
|
184
|
+
echo "❌ --evaluator-name requires a registered evaluator name"
|
|
185
|
+
exit 1
|
|
186
|
+
fi
|
|
187
|
+
ARG_EVALUATOR_NAME="$2"; shift 2 ;;
|
|
172
188
|
*)
|
|
173
189
|
echo "❌ Unknown option: $1"
|
|
174
190
|
echo " Run ./do/tune --help for usage."
|
|
@@ -775,10 +791,51 @@ else:
|
|
|
775
791
|
_validate_dataset() {
|
|
776
792
|
local dataset="${ARG_DATASET}"
|
|
777
793
|
|
|
794
|
+
# If --dataset-name is set, resolve from registry (AC-2b.4)
|
|
795
|
+
# --dataset-name takes precedence over --dataset for named registry lookup
|
|
796
|
+
if [ -n "${ARG_DATASET_NAME}" ]; then
|
|
797
|
+
echo "🔍 Resolving dataset '${ARG_DATASET_NAME}' from registry..."
|
|
798
|
+
local resolve_result
|
|
799
|
+
resolve_result=$(python3 "${SCRIPT_DIR}/.register_helper.py" resolve-dataset \
|
|
800
|
+
--name "${ARG_DATASET_NAME}" 2>/dev/null) || resolve_result=""
|
|
801
|
+
|
|
802
|
+
if [ -n "${resolve_result}" ]; then
|
|
803
|
+
local resolved_uri
|
|
804
|
+
resolved_uri=$(echo "${resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('s3_uri',''))" 2>/dev/null) || resolved_uri=""
|
|
805
|
+
if [ -n "${resolved_uri}" ]; then
|
|
806
|
+
echo " Resolved to: ${resolved_uri}"
|
|
807
|
+
dataset="${resolved_uri}"
|
|
808
|
+
ARG_DATASET="${resolved_uri}"
|
|
809
|
+
else
|
|
810
|
+
echo "❌ Dataset '${ARG_DATASET_NAME}' not found in registry"
|
|
811
|
+
echo " Register it first: ./do/register --dataset --dataset-name ${ARG_DATASET_NAME} --dataset-s3-uri s3://..."
|
|
812
|
+
exit 1
|
|
813
|
+
fi
|
|
814
|
+
else
|
|
815
|
+
echo "❌ Failed to resolve dataset '${ARG_DATASET_NAME}' from registry"
|
|
816
|
+
echo " Register it first: ./do/register --dataset --dataset-name ${ARG_DATASET_NAME} --dataset-s3-uri s3://..."
|
|
817
|
+
exit 1
|
|
818
|
+
fi
|
|
819
|
+
fi
|
|
820
|
+
|
|
821
|
+
# If --dataset value is not an S3 URI or HF reference, treat as a registry name
|
|
822
|
+
if [ -z "${ARG_DATASET_NAME}" ] && [ -n "${dataset}" ] && \
|
|
823
|
+
[[ "${dataset}" != s3://* ]] && [[ "${dataset}" != hf://* ]]; then
|
|
824
|
+
# Looks like a name — resolve from registry
|
|
825
|
+
ARG_DATASET_NAME="${dataset}"
|
|
826
|
+
dataset=""
|
|
827
|
+
fi
|
|
828
|
+
|
|
778
829
|
if [ -z "${dataset}" ]; then
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
830
|
+
if [ -n "${ARG_DATASET_NAME}" ]; then
|
|
831
|
+
# Name-based resolution happens below via resolve-dataset
|
|
832
|
+
:
|
|
833
|
+
else
|
|
834
|
+
echo "❌ --dataset is required"
|
|
835
|
+
echo " Provide an S3 URI (s3://bucket/path.jsonl), HF reference (hf://org/name), or registered name"
|
|
836
|
+
echo " Run ./do/tune --list-datasets to see available registered datasets."
|
|
837
|
+
exit 1
|
|
838
|
+
fi
|
|
782
839
|
fi
|
|
783
840
|
|
|
784
841
|
# Determine dataset type
|
|
@@ -908,11 +965,10 @@ _validate_dataset() {
|
|
|
908
965
|
fi
|
|
909
966
|
|
|
910
967
|
RESOLVED_DATASET_S3_URI=$(echo "${stage_result}" | python3 -c "import sys,json; print(json.load(sys.stdin)['s3_uri'])" 2>/dev/null)
|
|
911
|
-
|
|
912
|
-
num_records=$(echo "${stage_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('num_records',0))" 2>/dev/null) || num_records="0"
|
|
968
|
+
RESOLVED_DATASET_ROW_COUNT=$(echo "${stage_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('num_records',0))" 2>/dev/null) || RESOLVED_DATASET_ROW_COUNT="0"
|
|
913
969
|
|
|
914
970
|
echo " ✅ Staged to: ${RESOLVED_DATASET_S3_URI}"
|
|
915
|
-
echo " Records: ${
|
|
971
|
+
echo " Records: ${RESOLVED_DATASET_ROW_COUNT}"
|
|
916
972
|
echo ""
|
|
917
973
|
|
|
918
974
|
else
|
|
@@ -1126,6 +1182,37 @@ print(entry.get('provider', ''))
|
|
|
1126
1182
|
if [ -n "${ARG_REWARD_PROMPT}" ]; then
|
|
1127
1183
|
submit_args+=(--reward-prompt "${ARG_REWARD_PROMPT}")
|
|
1128
1184
|
fi
|
|
1185
|
+
|
|
1186
|
+
# Resolve evaluator from registry if --evaluator-name is set (AC-2c.3, AC-2c.4)
|
|
1187
|
+
if [ -n "${ARG_EVALUATOR_NAME}" ] && [ -z "${ARG_REWARD_FUNCTION}" ] && [ -z "${ARG_REWARD_PROMPT}" ]; then
|
|
1188
|
+
echo "🔍 Resolving evaluator '${ARG_EVALUATOR_NAME}' from registry..."
|
|
1189
|
+
local ev_resolve_result
|
|
1190
|
+
ev_resolve_result=$(python3 "${SCRIPT_DIR}/.register_helper.py" resolve-evaluator \
|
|
1191
|
+
--name "${ARG_EVALUATOR_NAME}" 2>/dev/null) || ev_resolve_result=""
|
|
1192
|
+
|
|
1193
|
+
if [ -n "${ev_resolve_result}" ]; then
|
|
1194
|
+
local ev_type ev_arn_or_uri
|
|
1195
|
+
ev_type=$(echo "${ev_resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('type',''))" 2>/dev/null) || ev_type=""
|
|
1196
|
+
ev_arn_or_uri=$(echo "${ev_resolve_result}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('arn_or_uri',''))" 2>/dev/null) || ev_arn_or_uri=""
|
|
1197
|
+
|
|
1198
|
+
if [ -n "${ev_arn_or_uri}" ]; then
|
|
1199
|
+
echo " Resolved evaluator: ${ev_type} → ${ev_arn_or_uri}"
|
|
1200
|
+
if [ "${ev_type}" = "lambda" ]; then
|
|
1201
|
+
submit_args+=(--reward-function "${ev_arn_or_uri}")
|
|
1202
|
+
else
|
|
1203
|
+
submit_args+=(--reward-prompt "${ev_arn_or_uri}")
|
|
1204
|
+
fi
|
|
1205
|
+
else
|
|
1206
|
+
echo "⚠️ Evaluator '${ARG_EVALUATOR_NAME}' not found in registry"
|
|
1207
|
+
echo " Register it first: ./do/register --evaluator --evaluator-name ${ARG_EVALUATOR_NAME} ..."
|
|
1208
|
+
exit 1
|
|
1209
|
+
fi
|
|
1210
|
+
else
|
|
1211
|
+
echo "⚠️ Failed to resolve evaluator '${ARG_EVALUATOR_NAME}' from registry"
|
|
1212
|
+
echo " Register it first: ./do/register --evaluator --evaluator-name ${ARG_EVALUATOR_NAME} ..."
|
|
1213
|
+
exit 1
|
|
1214
|
+
fi
|
|
1215
|
+
fi
|
|
1129
1216
|
if [ "${ARG_ACCEPT_EULA}" = true ]; then
|
|
1130
1217
|
submit_args+=(--accept-eula)
|
|
1131
1218
|
fi
|
|
@@ -1217,6 +1304,14 @@ print(entry.get('provider', ''))
|
|
|
1217
1304
|
_update_config_var "TUNE_TECHNIQUE" "${ARG_TECHNIQUE}"
|
|
1218
1305
|
_update_config_var "TUNE_TRAINING_TYPE" "${ARG_TRAINING_TYPE}"
|
|
1219
1306
|
_update_config_var "TUNE_DATASET_PATH" "${ARG_DATASET}"
|
|
1307
|
+
_update_config_var "TUNE_DATASET_S3_URI" "${RESOLVED_DATASET_S3_URI:-}"
|
|
1308
|
+
_update_config_var "TUNE_DATASET_ROW_COUNT" "${RESOLVED_DATASET_ROW_COUNT:-0}"
|
|
1309
|
+
_update_config_var "TUNE_DATASET_SOURCE" "${ARG_DATASET}"
|
|
1310
|
+
_update_config_var "TUNE_LAST_JOB_NAME" "${JOB_NAME}"
|
|
1311
|
+
# Technique-specific dataset vars (allows --from-tune <technique> resolution)
|
|
1312
|
+
_update_config_var "TUNE_DATASET_S3_URI_${technique_upper}" "${RESOLVED_DATASET_S3_URI:-}"
|
|
1313
|
+
_update_config_var "TUNE_DATASET_ROW_COUNT_${technique_upper}" "${RESOLVED_DATASET_ROW_COUNT:-0}"
|
|
1314
|
+
_update_config_var "TUNE_DATASET_SOURCE_${technique_upper}" "${ARG_DATASET}"
|
|
1220
1315
|
}
|
|
1221
1316
|
|
|
1222
1317
|
|
|
@@ -1292,6 +1387,56 @@ _handle_interrupt() {
|
|
|
1292
1387
|
exit 130
|
|
1293
1388
|
}
|
|
1294
1389
|
|
|
1390
|
+
# ── _derive_dataset_slug() ────────────────────────────────────────────────────
|
|
1391
|
+
# Derive a short slug from the dataset argument for config variable naming.
|
|
1392
|
+
# Rules: lowercase, strip non-alphanumeric (keep hyphens), truncate to 20 chars,
|
|
1393
|
+
# collapse consecutive hyphens, strip leading/trailing hyphens.
|
|
1394
|
+
#
|
|
1395
|
+
# Examples:
|
|
1396
|
+
# hf://tatsu-lab/alpaca → alpaca
|
|
1397
|
+
# hf://Open-Orca/OpenOrca → openorca
|
|
1398
|
+
# s3://bucket/path/train.jsonl → train
|
|
1399
|
+
# s3://bucket/path/file.parquet → file
|
|
1400
|
+
_derive_dataset_slug() {
|
|
1401
|
+
local dataset="${1:-}"
|
|
1402
|
+
local slug=""
|
|
1403
|
+
|
|
1404
|
+
if [ -z "${dataset}" ]; then
|
|
1405
|
+
echo ""
|
|
1406
|
+
return
|
|
1407
|
+
fi
|
|
1408
|
+
|
|
1409
|
+
if [[ "${dataset}" == hf://* ]]; then
|
|
1410
|
+
# HuggingFace format: hf://org/name or hf://org/name?file=...
|
|
1411
|
+
# Strip query params
|
|
1412
|
+
local hf_path="${dataset#hf://}"
|
|
1413
|
+
hf_path="${hf_path%%\?*}"
|
|
1414
|
+
# Take the last component (dataset name, not org)
|
|
1415
|
+
slug="${hf_path##*/}"
|
|
1416
|
+
elif [[ "${dataset}" == s3://* ]]; then
|
|
1417
|
+
# S3 format: s3://bucket/path/file.ext → slug from filename without extension
|
|
1418
|
+
local filename="${dataset##*/}"
|
|
1419
|
+
slug="${filename%.*}"
|
|
1420
|
+
else
|
|
1421
|
+
# Fallback: use the last path component without extension
|
|
1422
|
+
local filename="${dataset##*/}"
|
|
1423
|
+
slug="${filename%.*}"
|
|
1424
|
+
fi
|
|
1425
|
+
|
|
1426
|
+
# Slugification: lowercase, strip non-alphanumeric (keep hyphens)
|
|
1427
|
+
slug=$(echo "${slug}" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]//g')
|
|
1428
|
+
# Collapse consecutive hyphens
|
|
1429
|
+
slug=$(echo "${slug}" | sed 's/-\{2,\}/-/g')
|
|
1430
|
+
# Strip leading/trailing hyphens
|
|
1431
|
+
slug=$(echo "${slug}" | sed 's/^-//;s/-$//')
|
|
1432
|
+
# Truncate to 20 chars
|
|
1433
|
+
slug="${slug:0:20}"
|
|
1434
|
+
# Strip trailing hyphen after truncation
|
|
1435
|
+
slug=$(echo "${slug}" | sed 's/-$//')
|
|
1436
|
+
|
|
1437
|
+
echo "${slug}"
|
|
1438
|
+
}
|
|
1439
|
+
|
|
1295
1440
|
# ── _handle_completion() ──────────────────────────────────────────────────────
|
|
1296
1441
|
# Store output paths, detect output type, print next-step commands.
|
|
1297
1442
|
_handle_completion() {
|
|
@@ -1351,6 +1496,14 @@ _handle_completion() {
|
|
|
1351
1496
|
# Store output paths in config
|
|
1352
1497
|
if [ "${output_type}" = "adapter" ]; then
|
|
1353
1498
|
_update_config_var "TUNE_ADAPTER_PATH_${technique_upper}" "${artifact_path}"
|
|
1499
|
+
# Write dataset-specific adapter path if dataset slug is available
|
|
1500
|
+
local dataset_slug
|
|
1501
|
+
dataset_slug=$(_derive_dataset_slug "${ARG_DATASET:-}")
|
|
1502
|
+
if [ -n "${dataset_slug}" ]; then
|
|
1503
|
+
local slug_upper
|
|
1504
|
+
slug_upper=$(echo "${dataset_slug}" | tr '[:lower:]' '[:upper:]' | sed 's/-/_/g')
|
|
1505
|
+
_update_config_var "TUNE_ADAPTER_PATH_${technique_upper}_${slug_upper}" "${artifact_path}"
|
|
1506
|
+
fi
|
|
1354
1507
|
else
|
|
1355
1508
|
_update_config_var "TUNE_MODEL_PATH_${technique_upper}" "${artifact_path}"
|
|
1356
1509
|
fi
|
|
@@ -1361,9 +1514,14 @@ _handle_completion() {
|
|
|
1361
1514
|
echo "📋 Next steps:"
|
|
1362
1515
|
echo ""
|
|
1363
1516
|
if [ "${output_type}" = "adapter" ]; then
|
|
1517
|
+
local dataset_slug
|
|
1518
|
+
dataset_slug=$(_derive_dataset_slug "${ARG_DATASET:-}")
|
|
1364
1519
|
echo " Deploy as LoRA adapter:"
|
|
1365
1520
|
echo " ./do/adapter add tuned-${ARG_TECHNIQUE} --from-tune"
|
|
1366
1521
|
echo " ./do/adapter add tuned-${ARG_TECHNIQUE} --from-tune ${ARG_TECHNIQUE}"
|
|
1522
|
+
if [ -n "${dataset_slug}" ]; then
|
|
1523
|
+
echo " ./do/adapter add tuned-${ARG_TECHNIQUE}-${dataset_slug} --from-tune ${ARG_TECHNIQUE}-${dataset_slug}"
|
|
1524
|
+
fi
|
|
1367
1525
|
echo " ./do/adapter add tuned-${ARG_TECHNIQUE} --weights ${artifact_path}"
|
|
1368
1526
|
else
|
|
1369
1527
|
echo " Deploy as new inference component:"
|
|
@@ -1460,6 +1618,43 @@ if [ "${ARG_DISCOVER}" = true ]; then
|
|
|
1460
1618
|
exit 0
|
|
1461
1619
|
fi
|
|
1462
1620
|
|
|
1621
|
+
# Handle --list-datasets (before requiring --technique and --dataset)
|
|
1622
|
+
if [ "${ARG_LIST_DATASETS}" = true ]; then
|
|
1623
|
+
echo ""
|
|
1624
|
+
echo "📦 Registered datasets:"
|
|
1625
|
+
echo ""
|
|
1626
|
+
|
|
1627
|
+
_ds_output=$(python3 "${SCRIPT_DIR}/.register_helper.py" list-datasets 2>/dev/null) || _ds_output=""
|
|
1628
|
+
_ds_json=$(echo "${_ds_output}" | grep -E '^\{' | tail -1)
|
|
1629
|
+
|
|
1630
|
+
if [ -n "${_ds_json}" ]; then
|
|
1631
|
+
_ds_count=$(echo "${_ds_json}" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('datasets',[])))" 2>/dev/null) || _ds_count=0
|
|
1632
|
+
if [ "${_ds_count}" -gt 0 ]; then
|
|
1633
|
+
printf " %-25s %-10s %-8s %s\n" "NAME" "TECHNIQUE" "ROWS" "S3 URI"
|
|
1634
|
+
printf " %-25s %-10s %-8s %s\n" "----" "---------" "----" "------"
|
|
1635
|
+
echo "${_ds_json}" | python3 -c "
|
|
1636
|
+
import sys, json
|
|
1637
|
+
data = json.load(sys.stdin)
|
|
1638
|
+
for ds in data.get('datasets', []):
|
|
1639
|
+
name = ds.get('name','')[:25]
|
|
1640
|
+
tech = ds.get('technique','')[:10]
|
|
1641
|
+
rows = str(ds.get('row_count',''))[:8]
|
|
1642
|
+
uri = ds.get('s3_uri','')
|
|
1643
|
+
print(f' {name:<25} {tech:<10} {rows:<8} {uri}')
|
|
1644
|
+
" 2>/dev/null
|
|
1645
|
+
else
|
|
1646
|
+
echo " (none registered)"
|
|
1647
|
+
fi
|
|
1648
|
+
else
|
|
1649
|
+
echo " (none registered)"
|
|
1650
|
+
fi
|
|
1651
|
+
echo ""
|
|
1652
|
+
echo " Register: ./do/register dataset <name> --s3-uri <uri> --technique <sft|dpo>"
|
|
1653
|
+
echo " Use: ./do/tune --technique sft --dataset <name>"
|
|
1654
|
+
echo ""
|
|
1655
|
+
exit 0
|
|
1656
|
+
fi
|
|
1657
|
+
|
|
1463
1658
|
# Validate required arguments for job submission
|
|
1464
1659
|
if [ -z "${ARG_TECHNIQUE}" ]; then
|
|
1465
1660
|
echo "❌ --technique is required"
|