@aws/ml-container-creator 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +9 -0
- package/config/bootstrap-stack.json +106 -9
- package/infra/ci-harness/package-lock.json +5 -1
- package/package.json +1 -1
- package/servers/instance-sizer/index.js +4 -4
- package/servers/instance-sizer/lib/model-resolver.js +1 -1
- package/servers/lib/catalogs/model-sizes.json +135 -90
- package/servers/lib/catalogs/models.json +483 -411
- package/src/app.js +29 -1
- package/src/lib/bootstrap-command-handler.js +71 -23
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +1 -1
- package/src/lib/mcp-client.js +3 -3
- package/src/lib/prompt-runner.js +5 -5
- package/src/lib/prompts.js +31 -5
- package/src/lib/tune-catalog-validator.js +143 -0
- package/src/lib/tune-config-state.js +116 -0
- package/src/lib/tune-dataset-validator.js +279 -0
- package/src/lib/tune-output-resolver.js +66 -0
- package/templates/do/.tune_helper.py +768 -0
- package/templates/do/adapter +128 -17
- package/templates/do/add-ic +155 -19
- package/templates/do/config +11 -4
- package/templates/do/tune +1143 -0
package/templates/do/adapter
CHANGED
|
@@ -29,6 +29,7 @@ _usage() {
|
|
|
29
29
|
echo "Commands:"
|
|
30
30
|
echo " add <name> --weights <s3-uri> Add a new LoRA adapter from S3"
|
|
31
31
|
echo " add <name> --from-hub <hf-repo-id> Add a new LoRA adapter from HuggingFace Hub"
|
|
32
|
+
echo " add <name> --from-tune [technique] Add adapter from do/tune output"
|
|
32
33
|
echo " list List all adapters on the endpoint"
|
|
33
34
|
echo " remove <name> Remove an adapter"
|
|
34
35
|
echo " update <name> --weights <new-s3-uri> Update adapter weights from S3"
|
|
@@ -41,6 +42,8 @@ _usage() {
|
|
|
41
42
|
echo "Examples:"
|
|
42
43
|
echo " ./do/adapter add ectsum --weights s3://my-bucket/adapters/ectsum/adapter.tar.gz"
|
|
43
44
|
echo " ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
|
|
45
|
+
echo " ./do/adapter add tuned-sft --from-tune"
|
|
46
|
+
echo " ./do/adapter add tuned-sft --from-tune sft"
|
|
44
47
|
echo " ./do/adapter list"
|
|
45
48
|
echo " ./do/adapter remove ectsum"
|
|
46
49
|
echo " ./do/adapter update ectsum --weights s3://my-bucket/adapters/ectsum-v2/adapter.tar.gz"
|
|
@@ -48,7 +51,7 @@ _usage() {
|
|
|
48
51
|
echo ""
|
|
49
52
|
echo "Adapter metadata is stored in do/adapters/<name>.conf"
|
|
50
53
|
echo ""
|
|
51
|
-
echo "Note: --weights and --from-
|
|
54
|
+
echo "Note: --weights, --from-hub, and --from-tune are mutually exclusive."
|
|
52
55
|
}
|
|
53
56
|
|
|
54
57
|
# ── Validate LoRA is enabled ──────────────────────────────────────────────────
|
|
@@ -162,8 +165,8 @@ _download_from_hub() {
|
|
|
162
165
|
|
|
163
166
|
# ── Resolve S3 bucket ─────────────────────────────────────────────────
|
|
164
167
|
local s3_bucket=""
|
|
165
|
-
if [ -n "${
|
|
166
|
-
s3_bucket="${
|
|
168
|
+
if [ -n "${ADAPTER_S3_BUCKET:-}" ]; then
|
|
169
|
+
s3_bucket="${ADAPTER_S3_BUCKET}"
|
|
167
170
|
else
|
|
168
171
|
local account_id
|
|
169
172
|
account_id=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "")
|
|
@@ -172,14 +175,30 @@ _download_from_hub() {
|
|
|
172
175
|
echo " Ensure AWS credentials are configured."
|
|
173
176
|
exit 1
|
|
174
177
|
fi
|
|
175
|
-
s3_bucket="
|
|
178
|
+
s3_bucket="mlcc-adapters-${account_id}-${AWS_REGION}"
|
|
176
179
|
fi
|
|
177
180
|
|
|
178
181
|
# ── Create temp directory ─────────────────────────────────────────────
|
|
179
182
|
mkdir -p "${tmp_dir}/adapter_files"
|
|
180
183
|
|
|
181
184
|
# ── Download adapter files ────────────────────────────────────────────
|
|
182
|
-
if command -v
|
|
185
|
+
if command -v hf &>/dev/null; then
|
|
186
|
+
echo " Using hf CLI to download..."
|
|
187
|
+
local hf_args=("download" "${hf_repo_id}" "--local-dir" "${tmp_dir}/adapter_files")
|
|
188
|
+
if [ -n "${HF_TOKEN:-}" ]; then
|
|
189
|
+
hf_args+=("--token" "${HF_TOKEN}")
|
|
190
|
+
fi
|
|
191
|
+
if ! hf "${hf_args[@]}" 2>/dev/null; then
|
|
192
|
+
echo "❌ Failed to download adapter from HuggingFace Hub: ${hf_repo_id}"
|
|
193
|
+
echo ""
|
|
194
|
+
echo " Check that:"
|
|
195
|
+
echo " • The repository exists: https://huggingface.co/${hf_repo_id}"
|
|
196
|
+
echo " • For gated repos, set HF_TOKEN environment variable"
|
|
197
|
+
echo " • You have network connectivity to huggingface.co"
|
|
198
|
+
rm -rf "${tmp_dir}"
|
|
199
|
+
exit 1
|
|
200
|
+
fi
|
|
201
|
+
elif command -v huggingface-cli &>/dev/null; then
|
|
183
202
|
echo " Using huggingface-cli to download..."
|
|
184
203
|
local hf_args=("download" "${hf_repo_id}" "--local-dir" "${tmp_dir}/adapter_files")
|
|
185
204
|
if [ -n "${HF_TOKEN:-}" ]; then
|
|
@@ -341,6 +360,8 @@ _adapter_add() {
|
|
|
341
360
|
local adapter_name=""
|
|
342
361
|
local weights_uri=""
|
|
343
362
|
local from_hub=""
|
|
363
|
+
local from_tune=""
|
|
364
|
+
local from_tune_technique=""
|
|
344
365
|
|
|
345
366
|
# Parse add arguments
|
|
346
367
|
shift # remove 'add' from args
|
|
@@ -364,28 +385,45 @@ _adapter_add() {
|
|
|
364
385
|
from_hub="$2"
|
|
365
386
|
shift 2
|
|
366
387
|
;;
|
|
388
|
+
--from-tune)
|
|
389
|
+
from_tune="true"
|
|
390
|
+
# Check if next argument is a technique (not another flag and not empty)
|
|
391
|
+
if [ -n "${2:-}" ] && [[ "${2}" != -* ]]; then
|
|
392
|
+
from_tune_technique="$2"
|
|
393
|
+
shift 2
|
|
394
|
+
else
|
|
395
|
+
shift
|
|
396
|
+
fi
|
|
397
|
+
;;
|
|
367
398
|
--help|-h)
|
|
368
399
|
echo "Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
369
400
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
401
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
370
402
|
echo ""
|
|
371
403
|
echo "Add a new LoRA adapter to the endpoint."
|
|
372
404
|
echo ""
|
|
373
405
|
echo "Arguments:"
|
|
374
|
-
echo " <name>
|
|
375
|
-
echo " --weights <s3-uri>
|
|
376
|
-
echo " --from-hub <hf-repo-id>
|
|
406
|
+
echo " <name> Adapter name (lowercase alphanumeric + hyphens, 1-50 chars)"
|
|
407
|
+
echo " --weights <s3-uri> S3 URI to adapter weights (.tar.gz)"
|
|
408
|
+
echo " --from-hub <hf-repo-id> Download adapter from HuggingFace Hub"
|
|
409
|
+
echo " --from-tune [technique] Use adapter output from do/tune"
|
|
410
|
+
echo " Without technique: uses latest tune output"
|
|
411
|
+
echo " With technique (e.g., sft, dpo): uses technique-specific output"
|
|
377
412
|
echo ""
|
|
378
|
-
echo "Note: --weights and --from-
|
|
413
|
+
echo "Note: --weights, --from-hub, and --from-tune are mutually exclusive."
|
|
379
414
|
echo ""
|
|
380
415
|
echo "Examples:"
|
|
381
416
|
echo " ./do/adapter add ectsum --weights s3://bucket/adapters/ectsum/adapter.tar.gz"
|
|
382
417
|
echo " ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
|
|
418
|
+
echo " ./do/adapter add tuned-sft --from-tune"
|
|
419
|
+
echo " ./do/adapter add tuned-sft --from-tune sft"
|
|
383
420
|
exit 0
|
|
384
421
|
;;
|
|
385
422
|
-*)
|
|
386
423
|
echo "❌ Unknown option: $1"
|
|
387
424
|
echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
388
425
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
426
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
389
427
|
exit 1
|
|
390
428
|
;;
|
|
391
429
|
*)
|
|
@@ -395,6 +433,7 @@ _adapter_add() {
|
|
|
395
433
|
echo "❌ Unexpected argument: $1"
|
|
396
434
|
echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
397
435
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
436
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
398
437
|
exit 1
|
|
399
438
|
fi
|
|
400
439
|
shift
|
|
@@ -407,26 +446,85 @@ _adapter_add() {
|
|
|
407
446
|
echo "❌ Adapter name is required"
|
|
408
447
|
echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
409
448
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
449
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
410
450
|
exit 1
|
|
411
451
|
fi
|
|
412
452
|
|
|
413
453
|
# ── Mutual exclusivity check ─────────────────────────────────────────
|
|
414
|
-
|
|
415
|
-
|
|
454
|
+
local source_count=0
|
|
455
|
+
[ -n "${weights_uri}" ] && source_count=$((source_count + 1))
|
|
456
|
+
[ -n "${from_hub}" ] && source_count=$((source_count + 1))
|
|
457
|
+
[ -n "${from_tune}" ] && source_count=$((source_count + 1))
|
|
458
|
+
|
|
459
|
+
if [ "${source_count}" -gt 1 ]; then
|
|
460
|
+
echo "❌ --weights, --from-hub, and --from-tune are mutually exclusive"
|
|
416
461
|
echo ""
|
|
417
|
-
echo " Use one
|
|
462
|
+
echo " Use one of:"
|
|
418
463
|
echo " ./do/adapter add ${adapter_name} --weights <s3-uri>"
|
|
419
464
|
echo " ./do/adapter add ${adapter_name} --from-hub <hf-repo-id>"
|
|
465
|
+
echo " ./do/adapter add ${adapter_name} --from-tune [technique]"
|
|
420
466
|
exit 1
|
|
421
467
|
fi
|
|
422
468
|
|
|
423
|
-
if [
|
|
424
|
-
echo "❌
|
|
469
|
+
if [ "${source_count}" -eq 0 ]; then
|
|
470
|
+
echo "❌ One of --weights, --from-hub, or --from-tune is required"
|
|
425
471
|
echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
426
472
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
473
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
427
474
|
exit 1
|
|
428
475
|
fi
|
|
429
476
|
|
|
477
|
+
# ── Resolve --from-tune to weights_uri ────────────────────────────────
|
|
478
|
+
if [ -n "${from_tune}" ]; then
|
|
479
|
+
if [ -n "${from_tune_technique}" ]; then
|
|
480
|
+
# Technique-specific: read TUNE_ADAPTER_PATH_<TECHNIQUE>
|
|
481
|
+
local technique_upper
|
|
482
|
+
technique_upper=$(echo "${from_tune_technique}" | tr '[:lower:]' '[:upper:]')
|
|
483
|
+
local tune_var="TUNE_ADAPTER_PATH_${technique_upper}"
|
|
484
|
+
local tune_path="${!tune_var:-}"
|
|
485
|
+
|
|
486
|
+
if [ -z "${tune_path}" ]; then
|
|
487
|
+
echo "❌ No adapter output found for technique: ${from_tune_technique}"
|
|
488
|
+
echo ""
|
|
489
|
+
echo " ${tune_var} is not set in do/config."
|
|
490
|
+
echo ""
|
|
491
|
+
echo " Run a tune job first:"
|
|
492
|
+
echo " ./do/tune --technique ${from_tune_technique} --dataset <source>"
|
|
493
|
+
exit 1
|
|
494
|
+
fi
|
|
495
|
+
|
|
496
|
+
weights_uri="${tune_path}"
|
|
497
|
+
echo "📦 Using tune adapter output for technique '${from_tune_technique}': ${weights_uri}"
|
|
498
|
+
else
|
|
499
|
+
# No technique: read TUNE_OUTPUT_PATH_LATEST and verify type
|
|
500
|
+
if [ -z "${TUNE_OUTPUT_PATH_LATEST:-}" ]; then
|
|
501
|
+
echo "❌ No tune output found."
|
|
502
|
+
echo ""
|
|
503
|
+
echo " TUNE_OUTPUT_PATH_LATEST is not set in do/config."
|
|
504
|
+
echo ""
|
|
505
|
+
echo " Run a tune job first:"
|
|
506
|
+
echo " ./do/tune --technique <technique> --dataset <source>"
|
|
507
|
+
exit 1
|
|
508
|
+
fi
|
|
509
|
+
|
|
510
|
+
# Verify output type is adapter (not full-model)
|
|
511
|
+
if [ "${TUNE_OUTPUT_TYPE_LATEST:-}" = "full-model" ]; then
|
|
512
|
+
echo "❌ Latest tune output is a full model, not an adapter."
|
|
513
|
+
echo ""
|
|
514
|
+
echo " TUNE_OUTPUT_TYPE_LATEST=full-model"
|
|
515
|
+
echo ""
|
|
516
|
+
echo " Full model outputs cannot be added as adapters."
|
|
517
|
+
echo " Use do/add-ic instead:"
|
|
518
|
+
echo " ./do/add-ic ${adapter_name} --from-tune"
|
|
519
|
+
exit 1
|
|
520
|
+
fi
|
|
521
|
+
|
|
522
|
+
weights_uri="${TUNE_OUTPUT_PATH_LATEST}"
|
|
523
|
+
echo "📦 Using latest tune adapter output: ${weights_uri}"
|
|
524
|
+
fi
|
|
525
|
+
echo ""
|
|
526
|
+
fi
|
|
527
|
+
|
|
430
528
|
# ── Validate HF repo ID format (if --from-hub) ───────────────────────
|
|
431
529
|
if [ -n "${from_hub}" ]; then
|
|
432
530
|
# Valid formats: "org/name" or "name" (alphanumeric, hyphens, underscores, dots)
|
|
@@ -452,8 +550,8 @@ _adapter_add() {
|
|
|
452
550
|
exit 1
|
|
453
551
|
fi
|
|
454
552
|
|
|
455
|
-
# ── Validate S3 URI format (only when --weights is used)
|
|
456
|
-
if [ -n "${weights_uri}" ]; then
|
|
553
|
+
# ── Validate S3 URI format (only when --weights is explicitly used) ──
|
|
554
|
+
if [ -n "${weights_uri}" ] && [ -z "${from_hub}" ] && [ -z "${from_tune}" ]; then
|
|
457
555
|
if ! echo "${weights_uri}" | grep -qE '^s3://.*\.tar\.gz$'; then
|
|
458
556
|
echo "❌ Invalid S3 URI: ${weights_uri}"
|
|
459
557
|
echo ""
|
|
@@ -479,6 +577,9 @@ _adapter_add() {
|
|
|
479
577
|
echo "🔌 Adding adapter: ${adapter_name}"
|
|
480
578
|
if [ -n "${from_hub}" ]; then
|
|
481
579
|
echo " Source: HuggingFace Hub (${from_hub})"
|
|
580
|
+
elif [ -n "${from_tune}" ]; then
|
|
581
|
+
echo " Source: do/tune output"
|
|
582
|
+
echo " Weights: ${weights_uri}"
|
|
482
583
|
else
|
|
483
584
|
echo " Weights: ${weights_uri}"
|
|
484
585
|
fi
|
|
@@ -577,6 +678,14 @@ export ADAPTER_HF_REPO="${from_hub}"
|
|
|
577
678
|
EOF
|
|
578
679
|
fi
|
|
579
680
|
|
|
681
|
+
# Add tune-specific metadata if --from-tune was used
|
|
682
|
+
if [ -n "${from_tune}" ]; then
|
|
683
|
+
cat >> "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
|
|
684
|
+
export ADAPTER_SOURCE="tune"
|
|
685
|
+
export ADAPTER_TUNE_TECHNIQUE="${from_tune_technique:-latest}"
|
|
686
|
+
EOF
|
|
687
|
+
fi
|
|
688
|
+
|
|
580
689
|
echo ""
|
|
581
690
|
echo "✅ Adapter added successfully!"
|
|
582
691
|
echo ""
|
|
@@ -586,6 +695,8 @@ EOF
|
|
|
586
695
|
echo " Weights: ${weights_uri}"
|
|
587
696
|
if [ -n "${from_hub}" ]; then
|
|
588
697
|
echo " Source: HuggingFace Hub (${from_hub})"
|
|
698
|
+
elif [ -n "${from_tune}" ]; then
|
|
699
|
+
echo " Source: do/tune (${from_tune_technique:-latest})"
|
|
589
700
|
fi
|
|
590
701
|
echo " Created: ${created_at}"
|
|
591
702
|
echo ""
|
|
@@ -693,7 +804,7 @@ _adapter_list() {
|
|
|
693
804
|
|
|
694
805
|
# ── Print table ───────────────────────────────────────────────────────
|
|
695
806
|
printf '%-14s%-12s%s\n' "NAME" "STATUS" "WEIGHTS"
|
|
696
|
-
echo -e "${output_lines}" |
|
|
807
|
+
echo -e "${output_lines}" | sed '$ { /^$/d; }'
|
|
697
808
|
}
|
|
698
809
|
|
|
699
810
|
_adapter_remove() {
|
package/templates/do/add-ic
CHANGED
|
@@ -4,6 +4,10 @@
|
|
|
4
4
|
#
|
|
5
5
|
# Add a new inference component to this project.
|
|
6
6
|
# Creates a new IC config file in do/ic/ and deploys it immediately.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# ./do/add-ic [name] [--from-tune] [--model-data <s3-uri>]
|
|
10
|
+
# ./do/add-ic --help
|
|
7
11
|
|
|
8
12
|
set -e
|
|
9
13
|
set -u
|
|
@@ -12,38 +16,157 @@ set -o pipefail
|
|
|
12
16
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
13
17
|
source "${SCRIPT_DIR}/config"
|
|
14
18
|
|
|
19
|
+
# ============================================================
|
|
20
|
+
# Usage
|
|
21
|
+
# ============================================================
|
|
22
|
+
_usage() {
|
|
23
|
+
echo "Usage: ./do/add-ic [name] [options]"
|
|
24
|
+
echo ""
|
|
25
|
+
echo "Add a new inference component to this project."
|
|
26
|
+
echo ""
|
|
27
|
+
echo "Arguments:"
|
|
28
|
+
echo " [name] IC name (optional, prompted if not provided)"
|
|
29
|
+
echo ""
|
|
30
|
+
echo "Options:"
|
|
31
|
+
echo " --from-tune Use model output from do/tune (reads TUNE_OUTPUT_PATH_LATEST)"
|
|
32
|
+
echo " --model-data <s3-uri> S3 URI to model data (tar.gz or model directory)"
|
|
33
|
+
echo " --help, -h Show this help message"
|
|
34
|
+
echo ""
|
|
35
|
+
echo "Note: --from-tune and --model-data are mutually exclusive."
|
|
36
|
+
echo ""
|
|
37
|
+
echo "Examples:"
|
|
38
|
+
echo " ./do/add-ic # Interactive mode"
|
|
39
|
+
echo " ./do/add-ic tuned-v1 --from-tune # Use latest tune output"
|
|
40
|
+
echo " ./do/add-ic tuned-v1 --model-data s3://bucket/model.tar.gz"
|
|
41
|
+
echo ""
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# ============================================================
|
|
45
|
+
# Parse arguments
|
|
46
|
+
# ============================================================
|
|
47
|
+
IC_NAME=""
|
|
48
|
+
FROM_TUNE=""
|
|
49
|
+
MODEL_DATA=""
|
|
50
|
+
|
|
51
|
+
while [ $# -gt 0 ]; do
|
|
52
|
+
case "$1" in
|
|
53
|
+
--from-tune)
|
|
54
|
+
FROM_TUNE="true"
|
|
55
|
+
shift
|
|
56
|
+
;;
|
|
57
|
+
--model-data)
|
|
58
|
+
if [ -z "${2:-}" ]; then
|
|
59
|
+
echo "❌ --model-data requires an S3 URI argument"
|
|
60
|
+
echo " Usage: ./do/add-ic <name> --model-data <s3-uri>"
|
|
61
|
+
exit 1
|
|
62
|
+
fi
|
|
63
|
+
MODEL_DATA="$2"
|
|
64
|
+
shift 2
|
|
65
|
+
;;
|
|
66
|
+
--help|-h)
|
|
67
|
+
_usage
|
|
68
|
+
exit 0
|
|
69
|
+
;;
|
|
70
|
+
-*)
|
|
71
|
+
echo "❌ Unknown option: $1"
|
|
72
|
+
_usage
|
|
73
|
+
exit 1
|
|
74
|
+
;;
|
|
75
|
+
*)
|
|
76
|
+
if [ -z "${IC_NAME}" ]; then
|
|
77
|
+
IC_NAME="$1"
|
|
78
|
+
else
|
|
79
|
+
echo "❌ Unexpected argument: $1"
|
|
80
|
+
_usage
|
|
81
|
+
exit 1
|
|
82
|
+
fi
|
|
83
|
+
shift
|
|
84
|
+
;;
|
|
85
|
+
esac
|
|
86
|
+
done
|
|
87
|
+
|
|
88
|
+
# ============================================================
|
|
89
|
+
# Mutual exclusivity check
|
|
90
|
+
# ============================================================
|
|
91
|
+
if [ -n "${FROM_TUNE}" ] && [ -n "${MODEL_DATA}" ]; then
|
|
92
|
+
echo "❌ --from-tune and --model-data are mutually exclusive"
|
|
93
|
+
echo ""
|
|
94
|
+
echo " Use one of:"
|
|
95
|
+
echo " ./do/add-ic <name> --from-tune"
|
|
96
|
+
echo " ./do/add-ic <name> --model-data <s3-uri>"
|
|
97
|
+
exit 1
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
# ============================================================
|
|
101
|
+
# Resolve --from-tune to MODEL_DATA
|
|
102
|
+
# ============================================================
|
|
103
|
+
if [ -n "${FROM_TUNE}" ]; then
|
|
104
|
+
if [ -z "${TUNE_OUTPUT_PATH_LATEST:-}" ]; then
|
|
105
|
+
echo "❌ No tune output found."
|
|
106
|
+
echo ""
|
|
107
|
+
echo " TUNE_OUTPUT_PATH_LATEST is not set in do/config."
|
|
108
|
+
echo ""
|
|
109
|
+
echo " Run a tune job first:"
|
|
110
|
+
echo " ./do/tune --technique <technique> --dataset <source>"
|
|
111
|
+
exit 1
|
|
112
|
+
fi
|
|
113
|
+
|
|
114
|
+
MODEL_DATA="${TUNE_OUTPUT_PATH_LATEST}"
|
|
115
|
+
echo "📦 Using tune output: ${MODEL_DATA}"
|
|
116
|
+
echo ""
|
|
117
|
+
fi
|
|
118
|
+
|
|
15
119
|
echo "➕ Add New Inference Component"
|
|
16
120
|
echo " Project: ${PROJECT_NAME}"
|
|
121
|
+
if [ -n "${MODEL_DATA}" ]; then
|
|
122
|
+
echo " Model data: ${MODEL_DATA}"
|
|
123
|
+
fi
|
|
17
124
|
echo ""
|
|
18
125
|
|
|
19
126
|
# ============================================================
|
|
20
|
-
# Prompt for IC name
|
|
127
|
+
# Prompt for IC name (if not provided as argument)
|
|
21
128
|
# ============================================================
|
|
22
|
-
|
|
23
|
-
|
|
129
|
+
if [ -z "${IC_NAME}" ]; then
|
|
130
|
+
while true; do
|
|
131
|
+
read -p "IC name (lowercase alphanumeric + hyphens): " IC_NAME
|
|
24
132
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
133
|
+
# Validate: non-empty
|
|
134
|
+
if [ -z "${IC_NAME}" ]; then
|
|
135
|
+
echo " ❌ IC name cannot be empty."
|
|
136
|
+
continue
|
|
137
|
+
fi
|
|
138
|
+
|
|
139
|
+
# Validate: lowercase alphanumeric + hyphens only
|
|
140
|
+
if ! echo "${IC_NAME}" | grep -qE '^[a-z0-9]([a-z0-9-]*[a-z0-9])?$'; then
|
|
141
|
+
echo " ❌ IC name must be lowercase alphanumeric with hyphens (e.g., 'llama-70b')."
|
|
142
|
+
echo " Must start and end with a letter or number."
|
|
143
|
+
continue
|
|
144
|
+
fi
|
|
30
145
|
|
|
31
|
-
|
|
146
|
+
# Validate: no collision with existing config
|
|
147
|
+
if [ -f "${SCRIPT_DIR}/ic/${IC_NAME}.conf" ]; then
|
|
148
|
+
echo " ❌ IC config already exists: do/ic/${IC_NAME}.conf"
|
|
149
|
+
echo " Choose a different name or edit the existing config."
|
|
150
|
+
continue
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
break
|
|
154
|
+
done
|
|
155
|
+
else
|
|
156
|
+
# Validate provided IC name
|
|
32
157
|
if ! echo "${IC_NAME}" | grep -qE '^[a-z0-9]([a-z0-9-]*[a-z0-9])?$'; then
|
|
33
|
-
echo "
|
|
34
|
-
echo "
|
|
35
|
-
|
|
158
|
+
echo "❌ Invalid IC name: ${IC_NAME}"
|
|
159
|
+
echo " IC name must be lowercase alphanumeric with hyphens (e.g., 'llama-70b')."
|
|
160
|
+
echo " Must start and end with a letter or number."
|
|
161
|
+
exit 1
|
|
36
162
|
fi
|
|
37
163
|
|
|
38
|
-
# Validate: no collision with existing config
|
|
39
164
|
if [ -f "${SCRIPT_DIR}/ic/${IC_NAME}.conf" ]; then
|
|
40
|
-
echo "
|
|
41
|
-
echo "
|
|
42
|
-
|
|
165
|
+
echo "❌ IC config already exists: do/ic/${IC_NAME}.conf"
|
|
166
|
+
echo " Choose a different name or edit the existing config."
|
|
167
|
+
exit 1
|
|
43
168
|
fi
|
|
44
|
-
|
|
45
|
-
break
|
|
46
|
-
done
|
|
169
|
+
fi
|
|
47
170
|
|
|
48
171
|
# ============================================================
|
|
49
172
|
# Prompt for image tag
|
|
@@ -107,6 +230,16 @@ export IC_GPU_COUNT=${IC_GPU_COUNT}
|
|
|
107
230
|
export IC_COPY_COUNT=${IC_COPY_COUNT}
|
|
108
231
|
export IC_MIN_MEMORY_MB=${IC_MIN_MEMORY_MB}
|
|
109
232
|
export IC_STARTUP_TIMEOUT=900
|
|
233
|
+
EOF
|
|
234
|
+
|
|
235
|
+
# Add model data if provided (from --from-tune or --model-data)
|
|
236
|
+
if [ -n "${MODEL_DATA}" ]; then
|
|
237
|
+
cat >> "${IC_CONF_PATH}" <<EOF
|
|
238
|
+
export IC_MODEL_DATA="${MODEL_DATA}"
|
|
239
|
+
EOF
|
|
240
|
+
fi
|
|
241
|
+
|
|
242
|
+
cat >> "${IC_CONF_PATH}" <<EOF
|
|
110
243
|
|
|
111
244
|
# Optional overrides:
|
|
112
245
|
# export IC_MODEL_NAME="my-model-v2"
|
|
@@ -120,6 +253,9 @@ echo " Image tag: ${IC_IMAGE_TAG}"
|
|
|
120
253
|
echo " GPU count: ${IC_GPU_COUNT}"
|
|
121
254
|
echo " Copy count: ${IC_COPY_COUNT}"
|
|
122
255
|
echo " Memory MB: ${IC_MIN_MEMORY_MB}"
|
|
256
|
+
if [ -n "${MODEL_DATA}" ]; then
|
|
257
|
+
echo " Model data: ${MODEL_DATA}"
|
|
258
|
+
fi
|
|
123
259
|
echo ""
|
|
124
260
|
|
|
125
261
|
# ============================================================
|
package/templates/do/config
CHANGED
|
@@ -13,6 +13,7 @@ export MODEL_SERVER="<%= modelServer %>"
|
|
|
13
13
|
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
14
14
|
# LoRA adapter serving
|
|
15
15
|
export ENABLE_LORA=true
|
|
16
|
+
export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
16
17
|
<% } %>
|
|
17
18
|
|
|
18
19
|
# AWS configuration
|
|
@@ -67,7 +68,7 @@ ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/nu
|
|
|
67
68
|
<% if (asyncS3OutputPath) { %>
|
|
68
69
|
export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
|
|
69
70
|
<% } else { %>
|
|
70
|
-
export ASYNC_S3_OUTPUT_PATH="s3://
|
|
71
|
+
export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
71
72
|
<% } %>
|
|
72
73
|
|
|
73
74
|
<% if (asyncSnsSuccessTopic) { %>
|
|
@@ -107,12 +108,12 @@ ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/nu
|
|
|
107
108
|
<% if (batchInputPath) { %>
|
|
108
109
|
export BATCH_INPUT_PATH="<%= batchInputPath %>"
|
|
109
110
|
<% } else { %>
|
|
110
|
-
export BATCH_INPUT_PATH="s3://
|
|
111
|
+
export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
|
|
111
112
|
<% } %>
|
|
112
113
|
<% if (batchOutputPath) { %>
|
|
113
114
|
export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
|
|
114
115
|
<% } else { %>
|
|
115
|
-
export BATCH_OUTPUT_PATH="s3://
|
|
116
|
+
export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
116
117
|
<% } %>
|
|
117
118
|
export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
|
|
118
119
|
export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
|
|
@@ -187,6 +188,12 @@ export NGC_API_KEY_ARN="<%= ngcTokenArn %>"
|
|
|
187
188
|
<% } else if (ngcApiKey) { %>
|
|
188
189
|
export NGC_API_KEY="<%= ngcApiKey %>"
|
|
189
190
|
<% } %>
|
|
191
|
+
|
|
192
|
+
<% if (deploymentTarget !== 'batch-transform') { %>
|
|
193
|
+
# Managed Model Customization (do/tune)
|
|
194
|
+
export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
|
|
195
|
+
export TUNE_S3_BUCKET="mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
196
|
+
<% } %>
|
|
190
197
|
<% } %>
|
|
191
198
|
|
|
192
199
|
<% if (framework === 'diffusors') { %>
|
|
@@ -223,7 +230,7 @@ export BENCHMARK_REQUEST_COUNT=""
|
|
|
223
230
|
<% if (benchmarkS3OutputPath) { %>
|
|
224
231
|
export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
|
|
225
232
|
<% } else { %>
|
|
226
|
-
export BENCHMARK_S3_OUTPUT_PATH="s3://
|
|
233
|
+
export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identity --query Account --output text)-${AWS_REGION}/${PROJECT_NAME}/"
|
|
227
234
|
<% } %>
|
|
228
235
|
export BENCHMARK_JOB_NAME=""
|
|
229
236
|
export BENCHMARK_WORKLOAD_CONFIG_NAME=""
|