@aws/ml-container-creator 0.13.4 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -5
- package/config/parameter-schema-v2.json +32 -4
- package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
- package/infra/ci-harness/package-lock.json +122 -116
- package/infra/ci-harness/package.json +1 -1
- package/package.json +5 -3
- package/pyproject.toml +21 -0
- package/requirements.txt +19 -0
- package/servers/instance-sizer/index.js +72 -4
- package/servers/instance-sizer/lib/model-resolver.js +28 -2
- package/src/app.js +17 -0
- package/src/lib/bootstrap-command-handler.js +33 -23
- package/src/lib/config-loader.js +18 -0
- package/src/lib/config-manager.js +6 -1
- package/src/lib/dataset-slug.js +152 -0
- package/src/lib/generated/cli-options.js +9 -3
- package/src/lib/generated/parameter-matrix.js +14 -3
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/mcp-query-runner.js +6 -0
- package/src/lib/prompt-runner.js +5 -0
- package/src/lib/prompts/feature-prompts.js +1 -1
- package/src/lib/template-manager.js +0 -7
- package/src/lib/template-variable-resolver.js +51 -1
- package/src/lib/tune-config-state.js +14 -1
- package/templates/do/.adapter_helper.py +451 -0
- package/templates/do/.benchmark_writer.py +22 -0
- package/templates/do/.register_helper.py +1163 -0
- package/templates/do/.stage_helper.py +419 -0
- package/templates/do/.tune_helper.py +379 -65
- package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +427 -27
- package/templates/do/add-ic +85 -3
- package/templates/do/benchmark +173 -15
- package/templates/do/config +24 -0
- package/templates/do/lib/inference-component.sh +56 -3
- package/templates/do/lib/profile.sh +5 -0
- package/templates/do/register +552 -6
- package/templates/do/stage +91 -272
- package/templates/do/test +12 -2
- package/templates/do/tune +264 -12
package/templates/do/register
CHANGED
|
@@ -19,7 +19,61 @@ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-con
|
|
|
19
19
|
# Register deployment to the deployment registry
|
|
20
20
|
# ============================================================
|
|
21
21
|
|
|
22
|
-
#
|
|
22
|
+
# ── Subcommand routing ────────────────────────────────────────────────────────
|
|
23
|
+
# Supports: model (default), dataset, evaluator
|
|
24
|
+
# Backward-compat: --dataset and --evaluator flags still work
|
|
25
|
+
|
|
26
|
+
_show_usage() {
|
|
27
|
+
echo "Usage: ./do/register [model] [options] Register the deployed model to MPG"
|
|
28
|
+
echo " ./do/register dataset <name> [options] Register a dataset to the registry"
|
|
29
|
+
echo " ./do/register evaluator <name> [options] Register an evaluator to the registry"
|
|
30
|
+
echo " ./do/register --benchmark-status <completed|failed|in-progress> --benchmark-run-id <run-id>"
|
|
31
|
+
echo ""
|
|
32
|
+
echo "Subcommands:"
|
|
33
|
+
echo " model (default) Register the current deployment as a Model Package version"
|
|
34
|
+
echo " dataset Register a training dataset for reuse across tuning jobs"
|
|
35
|
+
echo " evaluator Register a reward function or preference model for RL tuning"
|
|
36
|
+
echo ""
|
|
37
|
+
echo "Dataset options:"
|
|
38
|
+
echo " <name> Dataset name (required, positional)"
|
|
39
|
+
echo " --s3-uri <uri> S3 URI of the dataset file (required)"
|
|
40
|
+
echo " --format <fmt> Format: jsonl, parquet, csv (default: jsonl)"
|
|
41
|
+
echo " --technique <tech> Technique: sft, dpo, rlaif, rlvr (default: sft)"
|
|
42
|
+
echo " --row-count <n> Number of records"
|
|
43
|
+
echo " --column-schema <j> Column schema as JSON string"
|
|
44
|
+
echo ""
|
|
45
|
+
echo "Evaluator options:"
|
|
46
|
+
echo " <name> Evaluator name (required, positional)"
|
|
47
|
+
echo " --type <type> Type: lambda or model (required)"
|
|
48
|
+
echo " --arn <arn> Lambda ARN or model S3 URI (required)"
|
|
49
|
+
echo " --technique <tech> Technique: rlvr or rlaif (required)"
|
|
50
|
+
echo " --description <d> Optional description"
|
|
51
|
+
echo ""
|
|
52
|
+
echo "Model options:"
|
|
53
|
+
echo " --notes \"text\" Optional deployment notes"
|
|
54
|
+
echo " --status <s> Deployment status: success|partial|failed (default: success)"
|
|
55
|
+
echo " --json Output JSON"
|
|
56
|
+
echo " --ci CI mode (writes to DynamoDB)"
|
|
57
|
+
echo ""
|
|
58
|
+
echo "Examples:"
|
|
59
|
+
echo " ./do/register # Register model (default)"
|
|
60
|
+
echo " ./do/register dataset alpaca-sft-1k --s3-uri s3://bucket/data.jsonl --technique sft --row-count 1000"
|
|
61
|
+
echo " ./do/register evaluator my-reward --type lambda --arn arn:aws:lambda:... --technique rlvr"
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# Detect subcommand or legacy flags
|
|
65
|
+
SUBCOMMAND="model"
|
|
66
|
+
case "${1:-}" in
|
|
67
|
+
dataset) SUBCOMMAND="dataset"; shift ;;
|
|
68
|
+
evaluator) SUBCOMMAND="evaluator"; shift ;;
|
|
69
|
+
model) SUBCOMMAND="model"; shift ;;
|
|
70
|
+
--help|-h) _show_usage; exit 0 ;;
|
|
71
|
+
--dataset) SUBCOMMAND="dataset"; shift ;; # backward compat
|
|
72
|
+
--evaluator) SUBCOMMAND="evaluator"; shift ;; # backward compat
|
|
73
|
+
esac
|
|
74
|
+
|
|
75
|
+
# ── Parse remaining arguments per subcommand ──────────────────────────────────
|
|
76
|
+
|
|
23
77
|
NOTES=""
|
|
24
78
|
STATUS="success"
|
|
25
79
|
PROJECT_FLAG=""
|
|
@@ -29,9 +83,158 @@ CI_TABLE_NAME="${CI_TABLE_NAME:-mlcc-ci-table}"
|
|
|
29
83
|
CI_BUILD_STRATEGY="codebuild-submit"
|
|
30
84
|
BENCHMARK_STATUS=""
|
|
31
85
|
BENCHMARK_RUN_ID=""
|
|
86
|
+
DATASET_NAME=""
|
|
87
|
+
DATASET_S3_URI=""
|
|
88
|
+
DATASET_FORMAT="jsonl"
|
|
89
|
+
DATASET_TECHNIQUE="sft"
|
|
90
|
+
DATASET_ROW_COUNT=""
|
|
91
|
+
DATASET_COLUMN_SCHEMA=""
|
|
92
|
+
EVALUATOR_NAME=""
|
|
93
|
+
EVALUATOR_TYPE=""
|
|
94
|
+
EVALUATOR_ARN_OR_URI=""
|
|
95
|
+
EVALUATOR_TECHNIQUE=""
|
|
96
|
+
EVALUATOR_DESCRIPTION=""
|
|
97
|
+
EXCLUDE_ADAPTERS=""
|
|
98
|
+
BASE_ONLY=false
|
|
99
|
+
|
|
100
|
+
# ── Dataset subcommand arg parsing ────────────────────────────────────────────
|
|
101
|
+
if [ "${SUBCOMMAND}" = "dataset" ]; then
|
|
102
|
+
FROM_TUNE_FLAG=""
|
|
103
|
+
FROM_TUNE_TECHNIQUE=""
|
|
104
|
+
# First positional arg is the dataset name
|
|
105
|
+
if [ $# -gt 0 ] && [[ "${1}" != -* ]]; then
|
|
106
|
+
DATASET_NAME="$1"; shift
|
|
107
|
+
fi
|
|
108
|
+
while [[ $# -gt 0 ]]; do
|
|
109
|
+
case "$1" in
|
|
110
|
+
--from-tune)
|
|
111
|
+
FROM_TUNE_FLAG="true"
|
|
112
|
+
# Check if next arg is a technique (not another flag)
|
|
113
|
+
if [ -n "${2:-}" ] && [[ "${2}" != -* ]]; then
|
|
114
|
+
FROM_TUNE_TECHNIQUE="$2"; shift 2
|
|
115
|
+
else
|
|
116
|
+
shift
|
|
117
|
+
fi
|
|
118
|
+
;;
|
|
119
|
+
--s3-uri) DATASET_S3_URI="$2"; shift 2 ;;
|
|
120
|
+
--s3-uri=*) DATASET_S3_URI="${1#*=}"; shift ;;
|
|
121
|
+
--format) DATASET_FORMAT="$2"; shift 2 ;;
|
|
122
|
+
--format=*) DATASET_FORMAT="${1#*=}"; shift ;;
|
|
123
|
+
--technique) DATASET_TECHNIQUE="$2"; shift 2 ;;
|
|
124
|
+
--technique=*) DATASET_TECHNIQUE="${1#*=}"; shift ;;
|
|
125
|
+
--row-count) DATASET_ROW_COUNT="$2"; shift 2 ;;
|
|
126
|
+
--row-count=*) DATASET_ROW_COUNT="${1#*=}"; shift ;;
|
|
127
|
+
--column-schema) DATASET_COLUMN_SCHEMA="$2"; shift 2 ;;
|
|
128
|
+
--column-schema=*) DATASET_COLUMN_SCHEMA="${1#*=}"; shift ;;
|
|
129
|
+
--name) DATASET_NAME="$2"; shift 2 ;; # alternate to positional
|
|
130
|
+
--name=*) DATASET_NAME="${1#*=}"; shift ;;
|
|
131
|
+
# Legacy compat: --dataset-* prefixed args
|
|
132
|
+
--dataset-name) DATASET_NAME="$2"; shift 2 ;;
|
|
133
|
+
--dataset-s3-uri) DATASET_S3_URI="$2"; shift 2 ;;
|
|
134
|
+
--dataset-format) DATASET_FORMAT="$2"; shift 2 ;;
|
|
135
|
+
--dataset-technique) DATASET_TECHNIQUE="$2"; shift 2 ;;
|
|
136
|
+
--dataset-row-count) DATASET_ROW_COUNT="$2"; shift 2 ;;
|
|
137
|
+
--dataset-column-schema) DATASET_COLUMN_SCHEMA="$2"; shift 2 ;;
|
|
138
|
+
--help|-h) _show_usage; exit 0 ;;
|
|
139
|
+
*) echo "⚠️ Unknown dataset option: $1"; _show_usage; exit 1 ;;
|
|
140
|
+
esac
|
|
141
|
+
done
|
|
142
|
+
|
|
143
|
+
# ── Resolve --from-tune: populate vars from do/config ──────────────────────
|
|
144
|
+
if [ "${FROM_TUNE_FLAG}" = "true" ]; then
|
|
145
|
+
# Resolve technique-specific or latest vars
|
|
146
|
+
_resolve_technique="${FROM_TUNE_TECHNIQUE:-${TUNE_TECHNIQUE:-}}"
|
|
147
|
+
_resolve_technique_upper=""
|
|
148
|
+
if [ -n "${_resolve_technique}" ]; then
|
|
149
|
+
_resolve_technique_upper=$(echo "${_resolve_technique}" | tr '[:lower:]' '[:upper:]')
|
|
150
|
+
fi
|
|
151
|
+
|
|
152
|
+
# Try technique-specific vars first, then fall back to latest
|
|
153
|
+
_resolve_s3_uri=""
|
|
154
|
+
_resolve_row_count=""
|
|
155
|
+
_resolve_source=""
|
|
156
|
+
if [ -n "${_resolve_technique_upper}" ]; then
|
|
157
|
+
_var_s3="TUNE_DATASET_S3_URI_${_resolve_technique_upper}"
|
|
158
|
+
_var_rows="TUNE_DATASET_ROW_COUNT_${_resolve_technique_upper}"
|
|
159
|
+
_var_src="TUNE_DATASET_SOURCE_${_resolve_technique_upper}"
|
|
160
|
+
_resolve_s3_uri="${!_var_s3:-}"
|
|
161
|
+
_resolve_row_count="${!_var_rows:-}"
|
|
162
|
+
_resolve_source="${!_var_src:-}"
|
|
163
|
+
fi
|
|
164
|
+
# Fall back to latest (non-technique-specific)
|
|
165
|
+
[ -z "${_resolve_s3_uri}" ] && _resolve_s3_uri="${TUNE_DATASET_S3_URI:-}"
|
|
166
|
+
[ -z "${_resolve_row_count}" ] && _resolve_row_count="${TUNE_DATASET_ROW_COUNT:-}"
|
|
167
|
+
[ -z "${_resolve_source}" ] && _resolve_source="${TUNE_DATASET_SOURCE:-}"
|
|
168
|
+
|
|
169
|
+
if [ -z "${_resolve_s3_uri}" ]; then
|
|
170
|
+
echo "❌ No staged dataset found in do/config"
|
|
171
|
+
echo " Run ./do/tune --dataset <uri> first to stage a dataset."
|
|
172
|
+
exit 1
|
|
173
|
+
fi
|
|
174
|
+
|
|
175
|
+
# Fill in from persisted tune state (don't override explicit flags)
|
|
176
|
+
[ -z "${DATASET_S3_URI}" ] && DATASET_S3_URI="${_resolve_s3_uri}"
|
|
177
|
+
[ "${DATASET_TECHNIQUE}" = "sft" ] && DATASET_TECHNIQUE="${_resolve_technique:-sft}"
|
|
178
|
+
[ -z "${DATASET_ROW_COUNT}" ] && DATASET_ROW_COUNT="${_resolve_row_count}"
|
|
179
|
+
|
|
180
|
+
# Derive name from source if not provided
|
|
181
|
+
if [ -z "${DATASET_NAME}" ]; then
|
|
182
|
+
_source="${_resolve_source}"
|
|
183
|
+
_slug=""
|
|
184
|
+
if [[ "${_source}" == hf://* ]]; then
|
|
185
|
+
# Strip hf:// and take the repo name part
|
|
186
|
+
_slug="${_source#hf://}"
|
|
187
|
+
_slug="${_slug%%\?*}" # remove ?file= params
|
|
188
|
+
_slug="${_slug%%/*}" # take org (if org/name, take org)
|
|
189
|
+
# Actually take the name part after /
|
|
190
|
+
if [[ "${TUNE_DATASET_SOURCE}" == */* ]]; then
|
|
191
|
+
_slug="${TUNE_DATASET_SOURCE#hf://*/}"
|
|
192
|
+
_slug="${_slug%%\?*}"
|
|
193
|
+
_slug="${_slug%%/*}"
|
|
194
|
+
fi
|
|
195
|
+
elif [[ "${_source}" == s3://* ]]; then
|
|
196
|
+
_slug=$(basename "${_source}" .jsonl)
|
|
197
|
+
fi
|
|
198
|
+
_slug=$(echo "${_slug}" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/-/g' | sed 's/--*/-/g' | sed 's/^-//' | sed 's/-$//')
|
|
199
|
+
DATASET_NAME="${_slug:-dataset}-${TUNE_TECHNIQUE:-sft}-${TUNE_DATASET_ROW_COUNT:-0}"
|
|
200
|
+
fi
|
|
201
|
+
fi
|
|
202
|
+
|
|
203
|
+
# ── Evaluator subcommand arg parsing ──────────────────────────────────────────
|
|
204
|
+
elif [ "${SUBCOMMAND}" = "evaluator" ]; then
|
|
205
|
+
# First positional arg is the evaluator name
|
|
206
|
+
if [ $# -gt 0 ] && [[ "${1}" != -* ]]; then
|
|
207
|
+
EVALUATOR_NAME="$1"; shift
|
|
208
|
+
fi
|
|
209
|
+
while [[ $# -gt 0 ]]; do
|
|
210
|
+
case "$1" in
|
|
211
|
+
--type) EVALUATOR_TYPE="$2"; shift 2 ;;
|
|
212
|
+
--type=*) EVALUATOR_TYPE="${1#*=}"; shift ;;
|
|
213
|
+
--arn) EVALUATOR_ARN_OR_URI="$2"; shift 2 ;;
|
|
214
|
+
--arn=*) EVALUATOR_ARN_OR_URI="${1#*=}"; shift ;;
|
|
215
|
+
--arn-or-uri) EVALUATOR_ARN_OR_URI="$2"; shift 2 ;;
|
|
216
|
+
--arn-or-uri=*) EVALUATOR_ARN_OR_URI="${1#*=}"; shift ;;
|
|
217
|
+
--technique) EVALUATOR_TECHNIQUE="$2"; shift 2 ;;
|
|
218
|
+
--technique=*) EVALUATOR_TECHNIQUE="${1#*=}"; shift ;;
|
|
219
|
+
--description) EVALUATOR_DESCRIPTION="$2"; shift 2 ;;
|
|
220
|
+
--description=*) EVALUATOR_DESCRIPTION="${1#*=}"; shift ;;
|
|
221
|
+
--name) EVALUATOR_NAME="$2"; shift 2 ;; # alternate to positional
|
|
222
|
+
--name=*) EVALUATOR_NAME="${1#*=}"; shift ;;
|
|
223
|
+
# Legacy compat
|
|
224
|
+
--evaluator-name) EVALUATOR_NAME="$2"; shift 2 ;;
|
|
225
|
+
--evaluator-type) EVALUATOR_TYPE="$2"; shift 2 ;;
|
|
226
|
+
--evaluator-arn-or-uri) EVALUATOR_ARN_OR_URI="$2"; shift 2 ;;
|
|
227
|
+
--evaluator-technique) EVALUATOR_TECHNIQUE="$2"; shift 2 ;;
|
|
228
|
+
--evaluator-description) EVALUATOR_DESCRIPTION="$2"; shift 2 ;;
|
|
229
|
+
--help|-h) _show_usage; exit 0 ;;
|
|
230
|
+
*) echo "⚠️ Unknown evaluator option: $1"; _show_usage; exit 1 ;;
|
|
231
|
+
esac
|
|
232
|
+
done
|
|
32
233
|
|
|
33
|
-
|
|
34
|
-
|
|
234
|
+
# ── Model subcommand arg parsing (default) ────────────────────────────────────
|
|
235
|
+
else
|
|
236
|
+
while [[ $# -gt 0 ]]; do
|
|
237
|
+
case "$1" in
|
|
35
238
|
--notes)
|
|
36
239
|
NOTES="$2"
|
|
37
240
|
shift 2
|
|
@@ -93,15 +296,26 @@ while [[ $# -gt 0 ]]; do
|
|
|
93
296
|
BENCHMARK_RUN_ID="${1#*=}"
|
|
94
297
|
shift
|
|
95
298
|
;;
|
|
299
|
+
--exclude)
|
|
300
|
+
EXCLUDE_ADAPTERS="${EXCLUDE_ADAPTERS:+${EXCLUDE_ADAPTERS},}$2"
|
|
301
|
+
shift 2
|
|
302
|
+
;;
|
|
303
|
+
--exclude=*)
|
|
304
|
+
EXCLUDE_ADAPTERS="${EXCLUDE_ADAPTERS:+${EXCLUDE_ADAPTERS},}${1#*=}"
|
|
305
|
+
shift
|
|
306
|
+
;;
|
|
307
|
+
--base-only)
|
|
308
|
+
BASE_ONLY=true
|
|
309
|
+
shift
|
|
310
|
+
;;
|
|
96
311
|
*)
|
|
97
312
|
echo "⚠️ Unknown option: $1"
|
|
98
|
-
|
|
99
|
-
echo "Usage: ./do/register [--notes \"text\"] [--status success|partial|failed] [--project] [--json] [--ci] [--ci-table <name>] [--build-strategy <strategy>]"
|
|
100
|
-
echo " ./do/register --benchmark-status <completed|failed|in-progress> --benchmark-run-id <run-id>"
|
|
313
|
+
_show_usage
|
|
101
314
|
exit 1
|
|
102
315
|
;;
|
|
103
316
|
esac
|
|
104
317
|
done
|
|
318
|
+
fi
|
|
105
319
|
|
|
106
320
|
# ============================================================
|
|
107
321
|
# Handle --benchmark-status mode (Stage 2 failure isolation)
|
|
@@ -173,6 +387,128 @@ if [ -n "${BENCHMARK_STATUS}" ]; then
|
|
|
173
387
|
exit 0
|
|
174
388
|
fi
|
|
175
389
|
|
|
390
|
+
# ============================================================
|
|
391
|
+
# Handle --dataset mode (register dataset, AC-2b.1)
|
|
392
|
+
# ============================================================
|
|
393
|
+
|
|
394
|
+
if [ "${SUBCOMMAND}" = "dataset" ]; then
|
|
395
|
+
if [ -z "${DATASET_NAME}" ]; then
|
|
396
|
+
echo "❌ Dataset name is required"
|
|
397
|
+
echo " Usage: ./do/register dataset <name> --s3-uri <uri>"
|
|
398
|
+
exit 1
|
|
399
|
+
fi
|
|
400
|
+
if [ -z "${DATASET_S3_URI}" ]; then
|
|
401
|
+
echo "❌ --s3-uri is required"
|
|
402
|
+
echo " Usage: ./do/register dataset <name> --s3-uri <uri>"
|
|
403
|
+
exit 1
|
|
404
|
+
fi
|
|
405
|
+
|
|
406
|
+
echo "📋 Registering dataset: ${DATASET_NAME}"
|
|
407
|
+
echo " S3 URI: ${DATASET_S3_URI}"
|
|
408
|
+
echo " Format: ${DATASET_FORMAT}"
|
|
409
|
+
echo " Technique: ${DATASET_TECHNIQUE}"
|
|
410
|
+
echo ""
|
|
411
|
+
|
|
412
|
+
# Build register-dataset args
|
|
413
|
+
DS_ARGS=(
|
|
414
|
+
"register-dataset"
|
|
415
|
+
"--name" "${DATASET_NAME}"
|
|
416
|
+
"--s3-uri" "${DATASET_S3_URI}"
|
|
417
|
+
"--format" "${DATASET_FORMAT}"
|
|
418
|
+
"--technique" "${DATASET_TECHNIQUE}"
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
if [ -n "${DATASET_ROW_COUNT}" ]; then
|
|
422
|
+
DS_ARGS+=("--row-count" "${DATASET_ROW_COUNT}")
|
|
423
|
+
fi
|
|
424
|
+
|
|
425
|
+
if [ -n "${DATASET_COLUMN_SCHEMA}" ]; then
|
|
426
|
+
DS_ARGS+=("--column-schema" "${DATASET_COLUMN_SCHEMA}")
|
|
427
|
+
fi
|
|
428
|
+
|
|
429
|
+
DS_ARGS+=("--project-name" "${PROJECT_NAME}")
|
|
430
|
+
|
|
431
|
+
# Call .register_helper.py register-dataset
|
|
432
|
+
if ds_output=$(python3 "${SCRIPT_DIR}/.register_helper.py" "${DS_ARGS[@]}" 2>/dev/null); then
|
|
433
|
+
ds_json=$(echo "${ds_output}" | grep -E '^\{' | tail -1)
|
|
434
|
+
if [ -n "${ds_json}" ]; then
|
|
435
|
+
echo "✅ Dataset registered successfully"
|
|
436
|
+
echo "${ds_json}" | python3 -c "import sys,json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "${ds_json}"
|
|
437
|
+
else
|
|
438
|
+
echo "⚠️ Dataset registration returned no JSON output"
|
|
439
|
+
fi
|
|
440
|
+
else
|
|
441
|
+
echo "❌ Dataset registration failed"
|
|
442
|
+
exit 1
|
|
443
|
+
fi
|
|
444
|
+
|
|
445
|
+
exit 0
|
|
446
|
+
fi
|
|
447
|
+
|
|
448
|
+
# ============================================================
|
|
449
|
+
# Handle --evaluator mode (register evaluator, AC-2c.1)
|
|
450
|
+
# ============================================================
|
|
451
|
+
|
|
452
|
+
if [ "${SUBCOMMAND}" = "evaluator" ]; then
|
|
453
|
+
if [ -z "${EVALUATOR_NAME}" ]; then
|
|
454
|
+
echo "❌ Evaluator name is required"
|
|
455
|
+
echo " Usage: ./do/register evaluator <name> --type <lambda|model> --arn <arn> --technique <rlvr|rlaif>"
|
|
456
|
+
exit 1
|
|
457
|
+
fi
|
|
458
|
+
if [ -z "${EVALUATOR_TYPE}" ]; then
|
|
459
|
+
echo "❌ --type is required (lambda or model)"
|
|
460
|
+
exit 1
|
|
461
|
+
fi
|
|
462
|
+
if [ -z "${EVALUATOR_ARN_OR_URI}" ]; then
|
|
463
|
+
echo "❌ --arn is required (Lambda ARN or model S3 URI)"
|
|
464
|
+
exit 1
|
|
465
|
+
fi
|
|
466
|
+
if [ -z "${EVALUATOR_TECHNIQUE}" ]; then
|
|
467
|
+
echo "❌ --technique is required (rlvr or rlaif)"
|
|
468
|
+
exit 1
|
|
469
|
+
fi
|
|
470
|
+
|
|
471
|
+
echo "📋 Registering evaluator: ${EVALUATOR_NAME}"
|
|
472
|
+
echo " Type: ${EVALUATOR_TYPE}"
|
|
473
|
+
echo " ARN/URI: ${EVALUATOR_ARN_OR_URI}"
|
|
474
|
+
echo " Technique: ${EVALUATOR_TECHNIQUE}"
|
|
475
|
+
if [ -n "${EVALUATOR_DESCRIPTION}" ]; then
|
|
476
|
+
echo " Description: ${EVALUATOR_DESCRIPTION}"
|
|
477
|
+
fi
|
|
478
|
+
echo ""
|
|
479
|
+
|
|
480
|
+
# Build register-evaluator args
|
|
481
|
+
EV_ARGS=(
|
|
482
|
+
"register-evaluator"
|
|
483
|
+
"--name" "${EVALUATOR_NAME}"
|
|
484
|
+
"--type" "${EVALUATOR_TYPE}"
|
|
485
|
+
"--arn-or-uri" "${EVALUATOR_ARN_OR_URI}"
|
|
486
|
+
"--technique" "${EVALUATOR_TECHNIQUE}"
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
if [ -n "${EVALUATOR_DESCRIPTION}" ]; then
|
|
490
|
+
EV_ARGS+=("--description" "${EVALUATOR_DESCRIPTION}")
|
|
491
|
+
fi
|
|
492
|
+
|
|
493
|
+
EV_ARGS+=("--project-name" "${PROJECT_NAME}")
|
|
494
|
+
|
|
495
|
+
# Call .register_helper.py register-evaluator
|
|
496
|
+
if ev_output=$(python3 "${SCRIPT_DIR}/.register_helper.py" "${EV_ARGS[@]}" 2>/dev/null); then
|
|
497
|
+
ev_json=$(echo "${ev_output}" | grep -E '^\{' | tail -1)
|
|
498
|
+
if [ -n "${ev_json}" ]; then
|
|
499
|
+
echo "✅ Evaluator registered successfully"
|
|
500
|
+
echo "${ev_json}" | python3 -c "import sys,json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "${ev_json}"
|
|
501
|
+
else
|
|
502
|
+
echo "⚠️ Evaluator registration returned no JSON output"
|
|
503
|
+
fi
|
|
504
|
+
else
|
|
505
|
+
echo "❌ Evaluator registration failed"
|
|
506
|
+
exit 1
|
|
507
|
+
fi
|
|
508
|
+
|
|
509
|
+
exit 0
|
|
510
|
+
fi
|
|
511
|
+
|
|
176
512
|
# Validate status
|
|
177
513
|
case "${STATUS}" in
|
|
178
514
|
success|partial|failed) ;;
|
|
@@ -795,3 +1131,213 @@ fi
|
|
|
795
1131
|
|
|
796
1132
|
echo "📝 Writing to registry..."
|
|
797
1133
|
ml-container-creator "${CMD_ARGS[@]}"
|
|
1134
|
+
|
|
1135
|
+
# ============================================================
|
|
1136
|
+
# Register to SageMaker Model Package Group (non-fatal, AC-1.6)
|
|
1137
|
+
# ============================================================
|
|
1138
|
+
|
|
1139
|
+
# Container image URI for the deployed model
|
|
1140
|
+
CONTAINER_IMAGE_URI="${ECR_REPOSITORY_NAME}:${PROJECT_NAME}-latest"
|
|
1141
|
+
if [ -n "${BASE_IMAGE:-}" ]; then
|
|
1142
|
+
CONTAINER_IMAGE_URI="${BASE_IMAGE}"
|
|
1143
|
+
fi
|
|
1144
|
+
|
|
1145
|
+
# Model data S3 URI (from do/config if set)
|
|
1146
|
+
MODEL_S3_URI="${MODEL_DATA_URL:-${MODEL_S3_URI:-}}"
|
|
1147
|
+
|
|
1148
|
+
# Build register-model args
|
|
1149
|
+
REG_ARGS=(
|
|
1150
|
+
"register-model"
|
|
1151
|
+
"--project-name" "${PROJECT_NAME}"
|
|
1152
|
+
"--deployment-config" "${DEPLOYMENT_CONFIG}"
|
|
1153
|
+
"--container-image" "${CONTAINER_IMAGE_URI}"
|
|
1154
|
+
"--instance-type" "${INSTANCE_TYPE}"
|
|
1155
|
+
"--architecture" "${ARCHITECTURE}"
|
|
1156
|
+
"--backend" "${BACKEND}"
|
|
1157
|
+
)
|
|
1158
|
+
|
|
1159
|
+
if [ -n "${MODEL_NAME:-}" ]; then
|
|
1160
|
+
REG_ARGS+=("--model-name" "${MODEL_NAME}")
|
|
1161
|
+
fi
|
|
1162
|
+
|
|
1163
|
+
if [ -n "${MODEL_S3_URI:-}" ]; then
|
|
1164
|
+
REG_ARGS+=("--model-data-url" "${MODEL_S3_URI}")
|
|
1165
|
+
fi
|
|
1166
|
+
|
|
1167
|
+
if [ -n "${BASE_IMAGE:-}" ]; then
|
|
1168
|
+
REG_ARGS+=("--base-image" "${BASE_IMAGE}")
|
|
1169
|
+
fi
|
|
1170
|
+
|
|
1171
|
+
if [ -n "${MODEL_FORMAT:-}" ]; then
|
|
1172
|
+
REG_ARGS+=("--model-format" "${MODEL_FORMAT}")
|
|
1173
|
+
fi
|
|
1174
|
+
|
|
1175
|
+
if [ -n "${GENERATOR_VERSION}" ]; then
|
|
1176
|
+
REG_ARGS+=("--generator-version" "${GENERATOR_VERSION}")
|
|
1177
|
+
fi
|
|
1178
|
+
|
|
1179
|
+
if [ -n "${AWS_REGION:-}" ]; then
|
|
1180
|
+
REG_ARGS+=("--region" "${AWS_REGION}")
|
|
1181
|
+
fi
|
|
1182
|
+
|
|
1183
|
+
if [ -n "${ROLE_ARN:-}" ]; then
|
|
1184
|
+
REG_ARGS+=("--role-arn" "${ROLE_ARN}")
|
|
1185
|
+
fi
|
|
1186
|
+
|
|
1187
|
+
echo ""
|
|
1188
|
+
echo "📦 Registering to SageMaker Model Package Group..."
|
|
1189
|
+
|
|
1190
|
+
# Call .register_helper.py — non-fatal on failure (AC-1.6)
|
|
1191
|
+
# Use robust JSON extraction pattern (design lesson #4)
|
|
1192
|
+
if helper_output=$(python3 "${SCRIPT_DIR}/.register_helper.py" "${REG_ARGS[@]}" 2>/dev/null); then
|
|
1193
|
+
json_line=$(echo "${helper_output}" | grep -E '^\{' | tail -1)
|
|
1194
|
+
if [ -n "${json_line}" ]; then
|
|
1195
|
+
# Extract fields from JSON response
|
|
1196
|
+
MPG_ARN=$(echo "${json_line}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('mpg_arn',''))" 2>/dev/null || echo "")
|
|
1197
|
+
MODEL_PKG_ARN=$(echo "${json_line}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('model_package_arn',''))" 2>/dev/null || echo "")
|
|
1198
|
+
MPG_VERSION=$(echo "${json_line}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('version',0))" 2>/dev/null || echo "0")
|
|
1199
|
+
|
|
1200
|
+
if [ -n "${MODEL_PKG_ARN}" ] && [ "${MODEL_PKG_ARN}" != "" ]; then
|
|
1201
|
+
echo "✅ Model registered: version ${MPG_VERSION}"
|
|
1202
|
+
echo " MPG ARN: ${MPG_ARN}"
|
|
1203
|
+
echo " Package ARN: ${MODEL_PKG_ARN}"
|
|
1204
|
+
|
|
1205
|
+
# Update local deployment registry entry with MPG ARN and version (AC-1.5)
|
|
1206
|
+
# Patch the most recent entry in the registry JSON file
|
|
1207
|
+
REGISTRY_FILE="${HOME}/.ml-container-creator/registry.json"
|
|
1208
|
+
if [ -f "${REGISTRY_FILE}" ]; then
|
|
1209
|
+
python3 -c "
|
|
1210
|
+
import json, sys
|
|
1211
|
+
try:
|
|
1212
|
+
with open('${REGISTRY_FILE}', 'r') as f:
|
|
1213
|
+
data = json.load(f)
|
|
1214
|
+
entries = data.get('entries', data) if isinstance(data, dict) else data
|
|
1215
|
+
if isinstance(entries, list) and len(entries) > 0:
|
|
1216
|
+
entries[-1]['modelPackageArn'] = '${MODEL_PKG_ARN}'
|
|
1217
|
+
entries[-1]['modelPackageVersion'] = ${MPG_VERSION}
|
|
1218
|
+
if isinstance(data, dict):
|
|
1219
|
+
data['entries'] = entries
|
|
1220
|
+
with open('${REGISTRY_FILE}', 'w') as f:
|
|
1221
|
+
json.dump(data, f, indent=2)
|
|
1222
|
+
else:
|
|
1223
|
+
with open('${REGISTRY_FILE}', 'w') as f:
|
|
1224
|
+
json.dump(entries, f, indent=2)
|
|
1225
|
+
except Exception as e:
|
|
1226
|
+
print(f'Warning: Could not update registry with MPG info: {e}', file=sys.stderr)
|
|
1227
|
+
" 2>/dev/null || true
|
|
1228
|
+
fi
|
|
1229
|
+
else
|
|
1230
|
+
echo "⚠️ MPG registration returned incomplete data (non-fatal)"
|
|
1231
|
+
fi
|
|
1232
|
+
else
|
|
1233
|
+
echo "⚠️ MPG registration returned no JSON output (non-fatal)"
|
|
1234
|
+
fi
|
|
1235
|
+
else
|
|
1236
|
+
echo "⚠️ MPG registration failed (non-fatal) — local registry is the primary record"
|
|
1237
|
+
fi
|
|
1238
|
+
|
|
1239
|
+
# ============================================================
|
|
1240
|
+
# Register adapter into same MPG if detected (AC-2.1, AC-2.4)
|
|
1241
|
+
# ============================================================
|
|
1242
|
+
# Adapter presence: TUNE_OUTPUT_PATH_LATEST set (from do/tune)
|
|
1243
|
+
# OR adapter conf files exist in do/adapters/
|
|
1244
|
+
# SFTTrainer may auto-register — do/register supplements with deployment metadata.
|
|
1245
|
+
# Registration failure is non-fatal (warning only, AC-1.6 applies).
|
|
1246
|
+
|
|
1247
|
+
# ── Register ALL adapters into MPG ────────────────────────────────────────────
|
|
1248
|
+
# Loop over do/adapters/*.conf and register each one as a linked ModelPackage version.
|
|
1249
|
+
# Use --exclude to skip specific adapters (comma-separated or repeated flag).
|
|
1250
|
+
# Use --base-only to skip all adapter registration.
|
|
1251
|
+
|
|
1252
|
+
_is_excluded() {
|
|
1253
|
+
local name="$1"
|
|
1254
|
+
IFS=',' read -ra _excl_list <<< "${EXCLUDE_ADAPTERS}"
|
|
1255
|
+
for _excl in "${_excl_list[@]}"; do
|
|
1256
|
+
[ "${_excl}" = "${name}" ] && return 0
|
|
1257
|
+
done
|
|
1258
|
+
return 1
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
if [ "${BASE_ONLY}" = true ]; then
|
|
1262
|
+
echo ""
|
|
1263
|
+
echo "ℹ️ --base-only: skipping adapter registration"
|
|
1264
|
+
elif [ -n "${MODEL_PKG_ARN:-}" ] && [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
1265
|
+
ADAPTERS_REGISTERED=0
|
|
1266
|
+
ADAPTERS_SKIPPED=0
|
|
1267
|
+
|
|
1268
|
+
for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
1269
|
+
[ -f "${conf}" ] || continue
|
|
1270
|
+
[[ "$(basename "${conf}")" == ".gitkeep" ]] && continue
|
|
1271
|
+
|
|
1272
|
+
_ADAPTER_NAME=$(basename "${conf}" .conf)
|
|
1273
|
+
|
|
1274
|
+
# Check exclude list
|
|
1275
|
+
if _is_excluded "${_ADAPTER_NAME}"; then
|
|
1276
|
+
echo " ⏭️ Skipping excluded adapter: ${_ADAPTER_NAME}"
|
|
1277
|
+
ADAPTERS_SKIPPED=$((ADAPTERS_SKIPPED + 1))
|
|
1278
|
+
continue
|
|
1279
|
+
fi
|
|
1280
|
+
|
|
1281
|
+
# Read adapter config
|
|
1282
|
+
ADAPTER_WEIGHTS_URI=""
|
|
1283
|
+
ADAPTER_TECHNIQUE=""
|
|
1284
|
+
eval "$(grep '^export ADAPTER_WEIGHTS_URI=' "${conf}" 2>/dev/null)" 2>/dev/null || true
|
|
1285
|
+
eval "$(grep '^export ADAPTER_TECHNIQUE=' "${conf}" 2>/dev/null)" 2>/dev/null || true
|
|
1286
|
+
|
|
1287
|
+
_ADAPTER_DATA_URL="${ADAPTER_WEIGHTS_URI:-}"
|
|
1288
|
+
_ADAPTER_TECHNIQUE="${ADAPTER_TECHNIQUE:-${TUNE_TECHNIQUE:-}}"
|
|
1289
|
+
|
|
1290
|
+
echo ""
|
|
1291
|
+
echo "📦 Registering adapter: ${_ADAPTER_NAME}"
|
|
1292
|
+
|
|
1293
|
+
# Build register-adapter args
|
|
1294
|
+
ADAPTER_REG_ARGS=(
|
|
1295
|
+
"register-adapter"
|
|
1296
|
+
"--project-name" "${PROJECT_NAME}"
|
|
1297
|
+
"--parent-version-arn" "${MODEL_PKG_ARN}"
|
|
1298
|
+
"--deployment-config" "${DEPLOYMENT_CONFIG}"
|
|
1299
|
+
"--container-image" "${CONTAINER_IMAGE_URI}"
|
|
1300
|
+
"--instance-type" "${INSTANCE_TYPE}"
|
|
1301
|
+
"--architecture" "${ARCHITECTURE}"
|
|
1302
|
+
"--backend" "${BACKEND}"
|
|
1303
|
+
)
|
|
1304
|
+
|
|
1305
|
+
[ -n "${_ADAPTER_TECHNIQUE}" ] && ADAPTER_REG_ARGS+=("--tune-technique" "${_ADAPTER_TECHNIQUE}")
|
|
1306
|
+
[ -n "${_ADAPTER_DATA_URL}" ] && ADAPTER_REG_ARGS+=("--model-data-url" "${_ADAPTER_DATA_URL}")
|
|
1307
|
+
[ -n "${MODEL_NAME:-}" ] && ADAPTER_REG_ARGS+=("--model-name" "${MODEL_NAME}")
|
|
1308
|
+
[ -n "${BASE_IMAGE:-}" ] && ADAPTER_REG_ARGS+=("--base-image" "${BASE_IMAGE}")
|
|
1309
|
+
[ -n "${MODEL_FORMAT:-}" ] && ADAPTER_REG_ARGS+=("--model-format" "${MODEL_FORMAT}")
|
|
1310
|
+
[ -n "${GENERATOR_VERSION:-}" ] && ADAPTER_REG_ARGS+=("--generator-version" "${GENERATOR_VERSION}")
|
|
1311
|
+
[ -n "${AWS_REGION:-}" ] && ADAPTER_REG_ARGS+=("--region" "${AWS_REGION}")
|
|
1312
|
+
[ -n "${ROLE_ARN:-}" ] && ADAPTER_REG_ARGS+=("--role-arn" "${ROLE_ARN}")
|
|
1313
|
+
|
|
1314
|
+
# Call .register_helper.py register-adapter — non-fatal on failure
|
|
1315
|
+
if adapter_output=$(python3 "${SCRIPT_DIR}/.register_helper.py" "${ADAPTER_REG_ARGS[@]}" 2>/dev/null); then
|
|
1316
|
+
adapter_json=$(echo "${adapter_output}" | grep -E '^\{' | tail -1)
|
|
1317
|
+
if [ -n "${adapter_json}" ]; then
|
|
1318
|
+
_PKG_ARN=$(echo "${adapter_json}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('model_package_arn',''))" 2>/dev/null || echo "")
|
|
1319
|
+
_VERSION=$(echo "${adapter_json}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('version',0))" 2>/dev/null || echo "0")
|
|
1320
|
+
|
|
1321
|
+
if [ -n "${_PKG_ARN}" ] && [ "${_PKG_ARN}" != "" ]; then
|
|
1322
|
+
echo " ✅ Registered: version ${_VERSION} (${_PKG_ARN})"
|
|
1323
|
+
ADAPTERS_REGISTERED=$((ADAPTERS_REGISTERED + 1))
|
|
1324
|
+
else
|
|
1325
|
+
echo " ⚠️ Registration returned incomplete data (non-fatal)"
|
|
1326
|
+
fi
|
|
1327
|
+
else
|
|
1328
|
+
echo " ⚠️ Registration returned no JSON output (non-fatal)"
|
|
1329
|
+
fi
|
|
1330
|
+
else
|
|
1331
|
+
echo " ⚠️ Registration failed for ${_ADAPTER_NAME} (non-fatal)"
|
|
1332
|
+
fi
|
|
1333
|
+
|
|
1334
|
+
unset ADAPTER_WEIGHTS_URI ADAPTER_TECHNIQUE
|
|
1335
|
+
done
|
|
1336
|
+
|
|
1337
|
+
echo ""
|
|
1338
|
+
echo "📋 Adapter registration summary: ${ADAPTERS_REGISTERED} registered, ${ADAPTERS_SKIPPED} skipped"
|
|
1339
|
+
|
|
1340
|
+
elif [ -z "${MODEL_PKG_ARN:-}" ] && [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
1341
|
+
echo ""
|
|
1342
|
+
echo "⚠️ Adapters detected but no parent model version ARN — skipping adapter registration"
|
|
1343
|
+
fi
|