@aws/ml-container-creator 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +9 -0
- package/config/bootstrap-stack.json +69 -3
- package/package.json +1 -1
- package/src/app.js +29 -1
- package/src/lib/bootstrap-command-handler.js +68 -26
- package/src/lib/tune-catalog-validator.js +143 -0
- package/src/lib/tune-config-state.js +116 -0
- package/src/lib/tune-dataset-validator.js +279 -0
- package/src/lib/tune-output-resolver.js +66 -0
- package/templates/Dockerfile +2 -0
- package/templates/code/cw_log_forwarder.py +64 -0
- package/templates/code/serve +12 -1
- package/templates/deploy_notebook_generator.py +897 -0
- package/templates/do/.tune_helper.py +768 -0
- package/templates/do/adapter +107 -12
- package/templates/do/add-ic +155 -19
- package/templates/do/config +6 -0
- package/templates/do/export +19 -2
- package/templates/do/lib/endpoint-config.sh +3 -1
- package/templates/do/lib/inference-component.sh +5 -1
- package/templates/do/tune +1143 -0
package/templates/do/adapter
CHANGED
|
@@ -29,6 +29,7 @@ _usage() {
|
|
|
29
29
|
echo "Commands:"
|
|
30
30
|
echo " add <name> --weights <s3-uri> Add a new LoRA adapter from S3"
|
|
31
31
|
echo " add <name> --from-hub <hf-repo-id> Add a new LoRA adapter from HuggingFace Hub"
|
|
32
|
+
echo " add <name> --from-tune [technique] Add adapter from do/tune output"
|
|
32
33
|
echo " list List all adapters on the endpoint"
|
|
33
34
|
echo " remove <name> Remove an adapter"
|
|
34
35
|
echo " update <name> --weights <new-s3-uri> Update adapter weights from S3"
|
|
@@ -41,6 +42,8 @@ _usage() {
|
|
|
41
42
|
echo "Examples:"
|
|
42
43
|
echo " ./do/adapter add ectsum --weights s3://my-bucket/adapters/ectsum/adapter.tar.gz"
|
|
43
44
|
echo " ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
|
|
45
|
+
echo " ./do/adapter add tuned-sft --from-tune"
|
|
46
|
+
echo " ./do/adapter add tuned-sft --from-tune sft"
|
|
44
47
|
echo " ./do/adapter list"
|
|
45
48
|
echo " ./do/adapter remove ectsum"
|
|
46
49
|
echo " ./do/adapter update ectsum --weights s3://my-bucket/adapters/ectsum-v2/adapter.tar.gz"
|
|
@@ -48,7 +51,7 @@ _usage() {
|
|
|
48
51
|
echo ""
|
|
49
52
|
echo "Adapter metadata is stored in do/adapters/<name>.conf"
|
|
50
53
|
echo ""
|
|
51
|
-
echo "Note: --weights and --from-
|
|
54
|
+
echo "Note: --weights, --from-hub, and --from-tune are mutually exclusive."
|
|
52
55
|
}
|
|
53
56
|
|
|
54
57
|
# ── Validate LoRA is enabled ──────────────────────────────────────────────────
|
|
@@ -357,6 +360,8 @@ _adapter_add() {
|
|
|
357
360
|
local adapter_name=""
|
|
358
361
|
local weights_uri=""
|
|
359
362
|
local from_hub=""
|
|
363
|
+
local from_tune=""
|
|
364
|
+
local from_tune_technique=""
|
|
360
365
|
|
|
361
366
|
# Parse add arguments
|
|
362
367
|
shift # remove 'add' from args
|
|
@@ -380,28 +385,45 @@ _adapter_add() {
|
|
|
380
385
|
from_hub="$2"
|
|
381
386
|
shift 2
|
|
382
387
|
;;
|
|
388
|
+
--from-tune)
|
|
389
|
+
from_tune="true"
|
|
390
|
+
# Check if next argument is a technique (not another flag and not empty)
|
|
391
|
+
if [ -n "${2:-}" ] && [[ "${2}" != -* ]]; then
|
|
392
|
+
from_tune_technique="$2"
|
|
393
|
+
shift 2
|
|
394
|
+
else
|
|
395
|
+
shift
|
|
396
|
+
fi
|
|
397
|
+
;;
|
|
383
398
|
--help|-h)
|
|
384
399
|
echo "Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
385
400
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
401
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
386
402
|
echo ""
|
|
387
403
|
echo "Add a new LoRA adapter to the endpoint."
|
|
388
404
|
echo ""
|
|
389
405
|
echo "Arguments:"
|
|
390
|
-
echo " <name>
|
|
391
|
-
echo " --weights <s3-uri>
|
|
392
|
-
echo " --from-hub <hf-repo-id>
|
|
406
|
+
echo " <name> Adapter name (lowercase alphanumeric + hyphens, 1-50 chars)"
|
|
407
|
+
echo " --weights <s3-uri> S3 URI to adapter weights (.tar.gz)"
|
|
408
|
+
echo " --from-hub <hf-repo-id> Download adapter from HuggingFace Hub"
|
|
409
|
+
echo " --from-tune [technique] Use adapter output from do/tune"
|
|
410
|
+
echo " Without technique: uses latest tune output"
|
|
411
|
+
echo " With technique (e.g., sft, dpo): uses technique-specific output"
|
|
393
412
|
echo ""
|
|
394
|
-
echo "Note: --weights and --from-
|
|
413
|
+
echo "Note: --weights, --from-hub, and --from-tune are mutually exclusive."
|
|
395
414
|
echo ""
|
|
396
415
|
echo "Examples:"
|
|
397
416
|
echo " ./do/adapter add ectsum --weights s3://bucket/adapters/ectsum/adapter.tar.gz"
|
|
398
417
|
echo " ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
|
|
418
|
+
echo " ./do/adapter add tuned-sft --from-tune"
|
|
419
|
+
echo " ./do/adapter add tuned-sft --from-tune sft"
|
|
399
420
|
exit 0
|
|
400
421
|
;;
|
|
401
422
|
-*)
|
|
402
423
|
echo "❌ Unknown option: $1"
|
|
403
424
|
echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
404
425
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
426
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
405
427
|
exit 1
|
|
406
428
|
;;
|
|
407
429
|
*)
|
|
@@ -411,6 +433,7 @@ _adapter_add() {
|
|
|
411
433
|
echo "❌ Unexpected argument: $1"
|
|
412
434
|
echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
413
435
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
436
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
414
437
|
exit 1
|
|
415
438
|
fi
|
|
416
439
|
shift
|
|
@@ -423,26 +446,85 @@ _adapter_add() {
|
|
|
423
446
|
echo "❌ Adapter name is required"
|
|
424
447
|
echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
425
448
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
449
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
426
450
|
exit 1
|
|
427
451
|
fi
|
|
428
452
|
|
|
429
453
|
# ── Mutual exclusivity check ─────────────────────────────────────────
|
|
430
|
-
|
|
431
|
-
|
|
454
|
+
local source_count=0
|
|
455
|
+
[ -n "${weights_uri}" ] && source_count=$((source_count + 1))
|
|
456
|
+
[ -n "${from_hub}" ] && source_count=$((source_count + 1))
|
|
457
|
+
[ -n "${from_tune}" ] && source_count=$((source_count + 1))
|
|
458
|
+
|
|
459
|
+
if [ "${source_count}" -gt 1 ]; then
|
|
460
|
+
echo "❌ --weights, --from-hub, and --from-tune are mutually exclusive"
|
|
432
461
|
echo ""
|
|
433
|
-
echo " Use one
|
|
462
|
+
echo " Use one of:"
|
|
434
463
|
echo " ./do/adapter add ${adapter_name} --weights <s3-uri>"
|
|
435
464
|
echo " ./do/adapter add ${adapter_name} --from-hub <hf-repo-id>"
|
|
465
|
+
echo " ./do/adapter add ${adapter_name} --from-tune [technique]"
|
|
436
466
|
exit 1
|
|
437
467
|
fi
|
|
438
468
|
|
|
439
|
-
if [
|
|
440
|
-
echo "❌
|
|
469
|
+
if [ "${source_count}" -eq 0 ]; then
|
|
470
|
+
echo "❌ One of --weights, --from-hub, or --from-tune is required"
|
|
441
471
|
echo " Usage: ./do/adapter add <name> --weights <s3-uri>"
|
|
442
472
|
echo " ./do/adapter add <name> --from-hub <hf-repo-id>"
|
|
473
|
+
echo " ./do/adapter add <name> --from-tune [technique]"
|
|
443
474
|
exit 1
|
|
444
475
|
fi
|
|
445
476
|
|
|
477
|
+
# ── Resolve --from-tune to weights_uri ────────────────────────────────
|
|
478
|
+
if [ -n "${from_tune}" ]; then
|
|
479
|
+
if [ -n "${from_tune_technique}" ]; then
|
|
480
|
+
# Technique-specific: read TUNE_ADAPTER_PATH_<TECHNIQUE>
|
|
481
|
+
local technique_upper
|
|
482
|
+
technique_upper=$(echo "${from_tune_technique}" | tr '[:lower:]' '[:upper:]')
|
|
483
|
+
local tune_var="TUNE_ADAPTER_PATH_${technique_upper}"
|
|
484
|
+
local tune_path="${!tune_var:-}"
|
|
485
|
+
|
|
486
|
+
if [ -z "${tune_path}" ]; then
|
|
487
|
+
echo "❌ No adapter output found for technique: ${from_tune_technique}"
|
|
488
|
+
echo ""
|
|
489
|
+
echo " ${tune_var} is not set in do/config."
|
|
490
|
+
echo ""
|
|
491
|
+
echo " Run a tune job first:"
|
|
492
|
+
echo " ./do/tune --technique ${from_tune_technique} --dataset <source>"
|
|
493
|
+
exit 1
|
|
494
|
+
fi
|
|
495
|
+
|
|
496
|
+
weights_uri="${tune_path}"
|
|
497
|
+
echo "📦 Using tune adapter output for technique '${from_tune_technique}': ${weights_uri}"
|
|
498
|
+
else
|
|
499
|
+
# No technique: read TUNE_OUTPUT_PATH_LATEST and verify type
|
|
500
|
+
if [ -z "${TUNE_OUTPUT_PATH_LATEST:-}" ]; then
|
|
501
|
+
echo "❌ No tune output found."
|
|
502
|
+
echo ""
|
|
503
|
+
echo " TUNE_OUTPUT_PATH_LATEST is not set in do/config."
|
|
504
|
+
echo ""
|
|
505
|
+
echo " Run a tune job first:"
|
|
506
|
+
echo " ./do/tune --technique <technique> --dataset <source>"
|
|
507
|
+
exit 1
|
|
508
|
+
fi
|
|
509
|
+
|
|
510
|
+
# Verify output type is adapter (not full-model)
|
|
511
|
+
if [ "${TUNE_OUTPUT_TYPE_LATEST:-}" = "full-model" ]; then
|
|
512
|
+
echo "❌ Latest tune output is a full model, not an adapter."
|
|
513
|
+
echo ""
|
|
514
|
+
echo " TUNE_OUTPUT_TYPE_LATEST=full-model"
|
|
515
|
+
echo ""
|
|
516
|
+
echo " Full model outputs cannot be added as adapters."
|
|
517
|
+
echo " Use do/add-ic instead:"
|
|
518
|
+
echo " ./do/add-ic ${adapter_name} --from-tune"
|
|
519
|
+
exit 1
|
|
520
|
+
fi
|
|
521
|
+
|
|
522
|
+
weights_uri="${TUNE_OUTPUT_PATH_LATEST}"
|
|
523
|
+
echo "📦 Using latest tune adapter output: ${weights_uri}"
|
|
524
|
+
fi
|
|
525
|
+
echo ""
|
|
526
|
+
fi
|
|
527
|
+
|
|
446
528
|
# ── Validate HF repo ID format (if --from-hub) ───────────────────────
|
|
447
529
|
if [ -n "${from_hub}" ]; then
|
|
448
530
|
# Valid formats: "org/name" or "name" (alphanumeric, hyphens, underscores, dots)
|
|
@@ -468,8 +550,8 @@ _adapter_add() {
|
|
|
468
550
|
exit 1
|
|
469
551
|
fi
|
|
470
552
|
|
|
471
|
-
# ── Validate S3 URI format (only when --weights is used)
|
|
472
|
-
if [ -n "${weights_uri}" ]; then
|
|
553
|
+
# ── Validate S3 URI format (only when --weights is explicitly used) ──
|
|
554
|
+
if [ -n "${weights_uri}" ] && [ -z "${from_hub}" ] && [ -z "${from_tune}" ]; then
|
|
473
555
|
if ! echo "${weights_uri}" | grep -qE '^s3://.*\.tar\.gz$'; then
|
|
474
556
|
echo "❌ Invalid S3 URI: ${weights_uri}"
|
|
475
557
|
echo ""
|
|
@@ -495,6 +577,9 @@ _adapter_add() {
|
|
|
495
577
|
echo "🔌 Adding adapter: ${adapter_name}"
|
|
496
578
|
if [ -n "${from_hub}" ]; then
|
|
497
579
|
echo " Source: HuggingFace Hub (${from_hub})"
|
|
580
|
+
elif [ -n "${from_tune}" ]; then
|
|
581
|
+
echo " Source: do/tune output"
|
|
582
|
+
echo " Weights: ${weights_uri}"
|
|
498
583
|
else
|
|
499
584
|
echo " Weights: ${weights_uri}"
|
|
500
585
|
fi
|
|
@@ -593,6 +678,14 @@ export ADAPTER_HF_REPO="${from_hub}"
|
|
|
593
678
|
EOF
|
|
594
679
|
fi
|
|
595
680
|
|
|
681
|
+
# Add tune-specific metadata if --from-tune was used
|
|
682
|
+
if [ -n "${from_tune}" ]; then
|
|
683
|
+
cat >> "${SCRIPT_DIR}/adapters/${adapter_name}.conf" <<EOF
|
|
684
|
+
export ADAPTER_SOURCE="tune"
|
|
685
|
+
export ADAPTER_TUNE_TECHNIQUE="${from_tune_technique:-latest}"
|
|
686
|
+
EOF
|
|
687
|
+
fi
|
|
688
|
+
|
|
596
689
|
echo ""
|
|
597
690
|
echo "✅ Adapter added successfully!"
|
|
598
691
|
echo ""
|
|
@@ -602,6 +695,8 @@ EOF
|
|
|
602
695
|
echo " Weights: ${weights_uri}"
|
|
603
696
|
if [ -n "${from_hub}" ]; then
|
|
604
697
|
echo " Source: HuggingFace Hub (${from_hub})"
|
|
698
|
+
elif [ -n "${from_tune}" ]; then
|
|
699
|
+
echo " Source: do/tune (${from_tune_technique:-latest})"
|
|
605
700
|
fi
|
|
606
701
|
echo " Created: ${created_at}"
|
|
607
702
|
echo ""
|
package/templates/do/add-ic
CHANGED
|
@@ -4,6 +4,10 @@
|
|
|
4
4
|
#
|
|
5
5
|
# Add a new inference component to this project.
|
|
6
6
|
# Creates a new IC config file in do/ic/ and deploys it immediately.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# ./do/add-ic [name] [--from-tune] [--model-data <s3-uri>]
|
|
10
|
+
# ./do/add-ic --help
|
|
7
11
|
|
|
8
12
|
set -e
|
|
9
13
|
set -u
|
|
@@ -12,38 +16,157 @@ set -o pipefail
|
|
|
12
16
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
13
17
|
source "${SCRIPT_DIR}/config"
|
|
14
18
|
|
|
19
|
+
# ============================================================
|
|
20
|
+
# Usage
|
|
21
|
+
# ============================================================
|
|
22
|
+
_usage() {
|
|
23
|
+
echo "Usage: ./do/add-ic [name] [options]"
|
|
24
|
+
echo ""
|
|
25
|
+
echo "Add a new inference component to this project."
|
|
26
|
+
echo ""
|
|
27
|
+
echo "Arguments:"
|
|
28
|
+
echo " [name] IC name (optional, prompted if not provided)"
|
|
29
|
+
echo ""
|
|
30
|
+
echo "Options:"
|
|
31
|
+
echo " --from-tune Use model output from do/tune (reads TUNE_OUTPUT_PATH_LATEST)"
|
|
32
|
+
echo " --model-data <s3-uri> S3 URI to model data (tar.gz or model directory)"
|
|
33
|
+
echo " --help, -h Show this help message"
|
|
34
|
+
echo ""
|
|
35
|
+
echo "Note: --from-tune and --model-data are mutually exclusive."
|
|
36
|
+
echo ""
|
|
37
|
+
echo "Examples:"
|
|
38
|
+
echo " ./do/add-ic # Interactive mode"
|
|
39
|
+
echo " ./do/add-ic tuned-v1 --from-tune # Use latest tune output"
|
|
40
|
+
echo " ./do/add-ic tuned-v1 --model-data s3://bucket/model.tar.gz"
|
|
41
|
+
echo ""
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# ============================================================
|
|
45
|
+
# Parse arguments
|
|
46
|
+
# ============================================================
|
|
47
|
+
IC_NAME=""
|
|
48
|
+
FROM_TUNE=""
|
|
49
|
+
MODEL_DATA=""
|
|
50
|
+
|
|
51
|
+
while [ $# -gt 0 ]; do
|
|
52
|
+
case "$1" in
|
|
53
|
+
--from-tune)
|
|
54
|
+
FROM_TUNE="true"
|
|
55
|
+
shift
|
|
56
|
+
;;
|
|
57
|
+
--model-data)
|
|
58
|
+
if [ -z "${2:-}" ]; then
|
|
59
|
+
echo "❌ --model-data requires an S3 URI argument"
|
|
60
|
+
echo " Usage: ./do/add-ic <name> --model-data <s3-uri>"
|
|
61
|
+
exit 1
|
|
62
|
+
fi
|
|
63
|
+
MODEL_DATA="$2"
|
|
64
|
+
shift 2
|
|
65
|
+
;;
|
|
66
|
+
--help|-h)
|
|
67
|
+
_usage
|
|
68
|
+
exit 0
|
|
69
|
+
;;
|
|
70
|
+
-*)
|
|
71
|
+
echo "❌ Unknown option: $1"
|
|
72
|
+
_usage
|
|
73
|
+
exit 1
|
|
74
|
+
;;
|
|
75
|
+
*)
|
|
76
|
+
if [ -z "${IC_NAME}" ]; then
|
|
77
|
+
IC_NAME="$1"
|
|
78
|
+
else
|
|
79
|
+
echo "❌ Unexpected argument: $1"
|
|
80
|
+
_usage
|
|
81
|
+
exit 1
|
|
82
|
+
fi
|
|
83
|
+
shift
|
|
84
|
+
;;
|
|
85
|
+
esac
|
|
86
|
+
done
|
|
87
|
+
|
|
88
|
+
# ============================================================
|
|
89
|
+
# Mutual exclusivity check
|
|
90
|
+
# ============================================================
|
|
91
|
+
if [ -n "${FROM_TUNE}" ] && [ -n "${MODEL_DATA}" ]; then
|
|
92
|
+
echo "❌ --from-tune and --model-data are mutually exclusive"
|
|
93
|
+
echo ""
|
|
94
|
+
echo " Use one of:"
|
|
95
|
+
echo " ./do/add-ic <name> --from-tune"
|
|
96
|
+
echo " ./do/add-ic <name> --model-data <s3-uri>"
|
|
97
|
+
exit 1
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
# ============================================================
|
|
101
|
+
# Resolve --from-tune to MODEL_DATA
|
|
102
|
+
# ============================================================
|
|
103
|
+
if [ -n "${FROM_TUNE}" ]; then
|
|
104
|
+
if [ -z "${TUNE_OUTPUT_PATH_LATEST:-}" ]; then
|
|
105
|
+
echo "❌ No tune output found."
|
|
106
|
+
echo ""
|
|
107
|
+
echo " TUNE_OUTPUT_PATH_LATEST is not set in do/config."
|
|
108
|
+
echo ""
|
|
109
|
+
echo " Run a tune job first:"
|
|
110
|
+
echo " ./do/tune --technique <technique> --dataset <source>"
|
|
111
|
+
exit 1
|
|
112
|
+
fi
|
|
113
|
+
|
|
114
|
+
MODEL_DATA="${TUNE_OUTPUT_PATH_LATEST}"
|
|
115
|
+
echo "📦 Using tune output: ${MODEL_DATA}"
|
|
116
|
+
echo ""
|
|
117
|
+
fi
|
|
118
|
+
|
|
15
119
|
echo "➕ Add New Inference Component"
|
|
16
120
|
echo " Project: ${PROJECT_NAME}"
|
|
121
|
+
if [ -n "${MODEL_DATA}" ]; then
|
|
122
|
+
echo " Model data: ${MODEL_DATA}"
|
|
123
|
+
fi
|
|
17
124
|
echo ""
|
|
18
125
|
|
|
19
126
|
# ============================================================
|
|
20
|
-
# Prompt for IC name
|
|
127
|
+
# Prompt for IC name (if not provided as argument)
|
|
21
128
|
# ============================================================
|
|
22
|
-
|
|
23
|
-
|
|
129
|
+
if [ -z "${IC_NAME}" ]; then
|
|
130
|
+
while true; do
|
|
131
|
+
read -p "IC name (lowercase alphanumeric + hyphens): " IC_NAME
|
|
24
132
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
133
|
+
# Validate: non-empty
|
|
134
|
+
if [ -z "${IC_NAME}" ]; then
|
|
135
|
+
echo " ❌ IC name cannot be empty."
|
|
136
|
+
continue
|
|
137
|
+
fi
|
|
138
|
+
|
|
139
|
+
# Validate: lowercase alphanumeric + hyphens only
|
|
140
|
+
if ! echo "${IC_NAME}" | grep -qE '^[a-z0-9]([a-z0-9-]*[a-z0-9])?$'; then
|
|
141
|
+
echo " ❌ IC name must be lowercase alphanumeric with hyphens (e.g., 'llama-70b')."
|
|
142
|
+
echo " Must start and end with a letter or number."
|
|
143
|
+
continue
|
|
144
|
+
fi
|
|
30
145
|
|
|
31
|
-
|
|
146
|
+
# Validate: no collision with existing config
|
|
147
|
+
if [ -f "${SCRIPT_DIR}/ic/${IC_NAME}.conf" ]; then
|
|
148
|
+
echo " ❌ IC config already exists: do/ic/${IC_NAME}.conf"
|
|
149
|
+
echo " Choose a different name or edit the existing config."
|
|
150
|
+
continue
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
break
|
|
154
|
+
done
|
|
155
|
+
else
|
|
156
|
+
# Validate provided IC name
|
|
32
157
|
if ! echo "${IC_NAME}" | grep -qE '^[a-z0-9]([a-z0-9-]*[a-z0-9])?$'; then
|
|
33
|
-
echo "
|
|
34
|
-
echo "
|
|
35
|
-
|
|
158
|
+
echo "❌ Invalid IC name: ${IC_NAME}"
|
|
159
|
+
echo " IC name must be lowercase alphanumeric with hyphens (e.g., 'llama-70b')."
|
|
160
|
+
echo " Must start and end with a letter or number."
|
|
161
|
+
exit 1
|
|
36
162
|
fi
|
|
37
163
|
|
|
38
|
-
# Validate: no collision with existing config
|
|
39
164
|
if [ -f "${SCRIPT_DIR}/ic/${IC_NAME}.conf" ]; then
|
|
40
|
-
echo "
|
|
41
|
-
echo "
|
|
42
|
-
|
|
165
|
+
echo "❌ IC config already exists: do/ic/${IC_NAME}.conf"
|
|
166
|
+
echo " Choose a different name or edit the existing config."
|
|
167
|
+
exit 1
|
|
43
168
|
fi
|
|
44
|
-
|
|
45
|
-
break
|
|
46
|
-
done
|
|
169
|
+
fi
|
|
47
170
|
|
|
48
171
|
# ============================================================
|
|
49
172
|
# Prompt for image tag
|
|
@@ -107,6 +230,16 @@ export IC_GPU_COUNT=${IC_GPU_COUNT}
|
|
|
107
230
|
export IC_COPY_COUNT=${IC_COPY_COUNT}
|
|
108
231
|
export IC_MIN_MEMORY_MB=${IC_MIN_MEMORY_MB}
|
|
109
232
|
export IC_STARTUP_TIMEOUT=900
|
|
233
|
+
EOF
|
|
234
|
+
|
|
235
|
+
# Add model data if provided (from --from-tune or --model-data)
|
|
236
|
+
if [ -n "${MODEL_DATA}" ]; then
|
|
237
|
+
cat >> "${IC_CONF_PATH}" <<EOF
|
|
238
|
+
export IC_MODEL_DATA="${MODEL_DATA}"
|
|
239
|
+
EOF
|
|
240
|
+
fi
|
|
241
|
+
|
|
242
|
+
cat >> "${IC_CONF_PATH}" <<EOF
|
|
110
243
|
|
|
111
244
|
# Optional overrides:
|
|
112
245
|
# export IC_MODEL_NAME="my-model-v2"
|
|
@@ -120,6 +253,9 @@ echo " Image tag: ${IC_IMAGE_TAG}"
|
|
|
120
253
|
echo " GPU count: ${IC_GPU_COUNT}"
|
|
121
254
|
echo " Copy count: ${IC_COPY_COUNT}"
|
|
122
255
|
echo " Memory MB: ${IC_MIN_MEMORY_MB}"
|
|
256
|
+
if [ -n "${MODEL_DATA}" ]; then
|
|
257
|
+
echo " Model data: ${MODEL_DATA}"
|
|
258
|
+
fi
|
|
123
259
|
echo ""
|
|
124
260
|
|
|
125
261
|
# ============================================================
|
package/templates/do/config
CHANGED
|
@@ -188,6 +188,12 @@ export NGC_API_KEY_ARN="<%= ngcTokenArn %>"
|
|
|
188
188
|
<% } else if (ngcApiKey) { %>
|
|
189
189
|
export NGC_API_KEY="<%= ngcApiKey %>"
|
|
190
190
|
<% } %>
|
|
191
|
+
|
|
192
|
+
<% if (deploymentTarget !== 'batch-transform') { %>
|
|
193
|
+
# Managed Model Customization (do/tune)
|
|
194
|
+
export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
|
|
195
|
+
export TUNE_S3_BUCKET="mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
196
|
+
<% } %>
|
|
191
197
|
<% } %>
|
|
192
198
|
|
|
193
199
|
<% if (framework === 'diffusors') { %>
|
package/templates/do/export
CHANGED
|
@@ -2,16 +2,33 @@
|
|
|
2
2
|
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
# Export current configuration as a CLI command or
|
|
6
|
-
# Usage: ./do/export [--json]
|
|
5
|
+
# Export current configuration as a CLI command, JSON object, or Jupyter notebook
|
|
6
|
+
# Usage: ./do/export [--json | --notebook]
|
|
7
7
|
|
|
8
8
|
# Source configuration (suppress the summary output)
|
|
9
9
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
10
10
|
source "${SCRIPT_DIR}/config" > /dev/null 2>&1
|
|
11
11
|
|
|
12
|
+
# ── Notebook output mode ──────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
if [ "${1:-}" = "--notebook" ]; then
|
|
15
|
+
# Ensure not combined with --json
|
|
16
|
+
if [ "${2:-}" = "--json" ]; then
|
|
17
|
+
echo "Error: --notebook and --json are mutually exclusive" >&2
|
|
18
|
+
exit 1
|
|
19
|
+
fi
|
|
20
|
+
python3 "${SCRIPT_DIR}/../deploy_notebook_generator.py"
|
|
21
|
+
exit 0
|
|
22
|
+
fi
|
|
23
|
+
|
|
12
24
|
# ── JSON output mode ─────────────────────────────────────────────────────────
|
|
13
25
|
|
|
14
26
|
if [ "${1:-}" = "--json" ]; then
|
|
27
|
+
# Ensure not combined with --notebook
|
|
28
|
+
if [ "${2:-}" = "--notebook" ]; then
|
|
29
|
+
echo "Error: --notebook and --json are mutually exclusive" >&2
|
|
30
|
+
exit 1
|
|
31
|
+
fi
|
|
15
32
|
# Build a JSON object with all configuration parameters.
|
|
16
33
|
# Uses ConfigManager camelCase keys so the output can be fed directly
|
|
17
34
|
# back into the generator via --config=<file>.
|
|
@@ -152,7 +152,9 @@ create_endpoint_config() {
|
|
|
152
152
|
variant_json="${variant_json}}]"
|
|
153
153
|
else
|
|
154
154
|
# Standard path: single instance type
|
|
155
|
-
|
|
155
|
+
# RoutingConfig is required for IC-based endpoints — without it the IC scheduler
|
|
156
|
+
# cannot place containers and the IC stays in Creating with no logs.
|
|
157
|
+
variant_json="[{\"VariantName\":\"AllTraffic\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1,\"RoutingConfig\":{\"RoutingStrategy\":\"LEAST_OUTSTANDING_REQUESTS\"}"
|
|
156
158
|
|
|
157
159
|
# Optional: AMI version
|
|
158
160
|
if [ -n "${INFERENCE_AMI_VERSION:-}" ]; then
|
|
@@ -46,10 +46,14 @@ create_inference_component() {
|
|
|
46
46
|
|
|
47
47
|
# Build container spec JSON
|
|
48
48
|
local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
|
|
49
|
+
# Always inject IC name for CW log forwarder
|
|
50
|
+
local ic_env="\"INFERENCE_COMPONENT_NAME\":\"${ic_name}\""
|
|
49
51
|
if [ -n "${CONTAINER_ENV_JSON}${IC_CONTAINER_ENV_EXTRA:-}" ]; then
|
|
50
52
|
local env_json="${CONTAINER_ENV_JSON}"
|
|
51
53
|
[ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
|
|
52
|
-
container_spec="${container_spec},\"Environment\":{${env_json}}"
|
|
54
|
+
container_spec="${container_spec},\"Environment\":{${ic_env},${env_json}}"
|
|
55
|
+
else
|
|
56
|
+
container_spec="${container_spec},\"Environment\":{${ic_env}}"
|
|
53
57
|
fi
|
|
54
58
|
container_spec="${container_spec}}"
|
|
55
59
|
|