@aws/ml-container-creator 0.6.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +1 -1
- package/infra/ci-harness/buildspec.yml +4 -0
- package/package.json +1 -1
- package/servers/lib/catalogs/model-servers.json +80 -0
- package/servers/model-picker/index.js +27 -16
- package/src/app.js +89 -21
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +39 -2
- package/src/lib/cross-cutting-checker.js +146 -33
- package/src/lib/deployment-config-resolver.js +10 -4
- package/src/lib/e2e-bootstrap.js +227 -0
- package/src/lib/e2e-catalog-validator.js +103 -0
- package/src/lib/e2e-quota-validator.js +135 -0
- package/src/lib/prompt-runner.js +290 -22
- package/src/lib/prompts.js +9 -3
- package/src/lib/template-manager.js +10 -4
- package/src/lib/tune-catalog-validator.js +5 -5
- package/templates/Dockerfile +2 -0
- package/templates/code/cw_log_forwarder.py +64 -0
- package/templates/code/serve +14 -3
- package/templates/code/serving.properties +2 -2
- package/templates/deploy_notebook_generator.py +897 -0
- package/templates/diffusors/serve +3 -3
- package/templates/do/.tune_helper.py +2 -2
- package/templates/do/export +19 -2
- package/templates/do/lib/endpoint-config.sh +3 -1
- package/templates/do/lib/inference-component.sh +5 -1
- package/templates/do/register +8 -2
- package/templates/do/test +5 -5
- package/templates/do/tune +2 -2
- package/templates/marketplace/config +118 -0
- package/templates/marketplace/deploy +890 -0
- package/templates/marketplace/test +453 -0
|
@@ -9,10 +9,10 @@ echo "Starting vLLM-Omni server (diffusion model serving)"
|
|
|
9
9
|
|
|
10
10
|
# Resolve model URI prefixes that engines cannot handle natively.
|
|
11
11
|
# The generator's model-picker may store provider-specific URIs
|
|
12
|
-
# (e.g.
|
|
13
|
-
#
|
|
12
|
+
# (e.g. registry://my-model-group/1) as the model identifier.
|
|
13
|
+
# vLLM expects a HuggingFace repo ID or local path.
|
|
14
14
|
_RAW_MODEL="${VLLM_MODEL:-}"
|
|
15
|
-
if [[ "$_RAW_MODEL" ==
|
|
15
|
+
if [[ "$_RAW_MODEL" == registry://* ]]; then
|
|
16
16
|
if [ -d /opt/ml/model ] && [ "$(ls -A /opt/ml/model 2>/dev/null)" ]; then
|
|
17
17
|
echo "Resolved VLLM_MODEL='${_RAW_MODEL}' → /opt/ml/model (local artifacts found)"
|
|
18
18
|
export VLLM_MODEL="/opt/ml/model"
|
|
@@ -176,7 +176,7 @@ def cmd_submit(args):
|
|
|
176
176
|
)
|
|
177
177
|
elif "ValidationException" in error_msg and "license" in error_msg.lower():
|
|
178
178
|
_error_exit(
|
|
179
|
-
f"Model license not accepted. Accept the license
|
|
179
|
+
f"Model license not accepted. Accept the model license before "
|
|
180
180
|
f"using this model for customization. Details: {error_msg}"
|
|
181
181
|
)
|
|
182
182
|
else:
|
|
@@ -660,7 +660,7 @@ def main():
|
|
|
660
660
|
|
|
661
661
|
# ── submit ────────────────────────────────────────────────────────────────
|
|
662
662
|
submit_parser = subparsers.add_parser("submit", help="Submit a customization job")
|
|
663
|
-
submit_parser.add_argument("--model-id", required=True, help="
|
|
663
|
+
submit_parser.add_argument("--model-id", required=True, help="Model ID")
|
|
664
664
|
submit_parser.add_argument("--technique", required=True,
|
|
665
665
|
choices=["sft", "dpo", "rlaif", "rlvr"],
|
|
666
666
|
help="Customization technique")
|
package/templates/do/export
CHANGED
|
@@ -2,16 +2,33 @@
|
|
|
2
2
|
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
# Export current configuration as a CLI command or
|
|
6
|
-
# Usage: ./do/export [--json]
|
|
5
|
+
# Export current configuration as a CLI command, JSON object, or Jupyter notebook
|
|
6
|
+
# Usage: ./do/export [--json | --notebook]
|
|
7
7
|
|
|
8
8
|
# Source configuration (suppress the summary output)
|
|
9
9
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
10
10
|
source "${SCRIPT_DIR}/config" > /dev/null 2>&1
|
|
11
11
|
|
|
12
|
+
# ── Notebook output mode ──────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
if [ "${1:-}" = "--notebook" ]; then
|
|
15
|
+
# Ensure not combined with --json
|
|
16
|
+
if [ "${2:-}" = "--json" ]; then
|
|
17
|
+
echo "Error: --notebook and --json are mutually exclusive" >&2
|
|
18
|
+
exit 1
|
|
19
|
+
fi
|
|
20
|
+
python3 "${SCRIPT_DIR}/../deploy_notebook_generator.py"
|
|
21
|
+
exit 0
|
|
22
|
+
fi
|
|
23
|
+
|
|
12
24
|
# ── JSON output mode ─────────────────────────────────────────────────────────
|
|
13
25
|
|
|
14
26
|
if [ "${1:-}" = "--json" ]; then
|
|
27
|
+
# Ensure not combined with --notebook
|
|
28
|
+
if [ "${2:-}" = "--notebook" ]; then
|
|
29
|
+
echo "Error: --notebook and --json are mutually exclusive" >&2
|
|
30
|
+
exit 1
|
|
31
|
+
fi
|
|
15
32
|
# Build a JSON object with all configuration parameters.
|
|
16
33
|
# Uses ConfigManager camelCase keys so the output can be fed directly
|
|
17
34
|
# back into the generator via --config=<file>.
|
|
@@ -152,7 +152,9 @@ create_endpoint_config() {
|
|
|
152
152
|
variant_json="${variant_json}}]"
|
|
153
153
|
else
|
|
154
154
|
# Standard path: single instance type
|
|
155
|
-
|
|
155
|
+
# RoutingConfig is required for IC-based endpoints — without it the IC scheduler
|
|
156
|
+
# cannot place containers and the IC stays in Creating with no logs.
|
|
157
|
+
variant_json="[{\"VariantName\":\"AllTraffic\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1,\"RoutingConfig\":{\"RoutingStrategy\":\"LEAST_OUTSTANDING_REQUESTS\"}"
|
|
156
158
|
|
|
157
159
|
# Optional: AMI version
|
|
158
160
|
if [ -n "${INFERENCE_AMI_VERSION:-}" ]; then
|
|
@@ -46,10 +46,14 @@ create_inference_component() {
|
|
|
46
46
|
|
|
47
47
|
# Build container spec JSON
|
|
48
48
|
local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
|
|
49
|
+
# Always inject IC name for CW log forwarder
|
|
50
|
+
local ic_env="\"INFERENCE_COMPONENT_NAME\":\"${ic_name}\""
|
|
49
51
|
if [ -n "${CONTAINER_ENV_JSON}${IC_CONTAINER_ENV_EXTRA:-}" ]; then
|
|
50
52
|
local env_json="${CONTAINER_ENV_JSON}"
|
|
51
53
|
[ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
|
|
52
|
-
container_spec="${container_spec},\"Environment\":{${env_json}}"
|
|
54
|
+
container_spec="${container_spec},\"Environment\":{${ic_env},${env_json}}"
|
|
55
|
+
else
|
|
56
|
+
container_spec="${container_spec},\"Environment\":{${ic_env}}"
|
|
53
57
|
fi
|
|
54
58
|
container_spec="${container_spec}}"
|
|
55
59
|
|
package/templates/do/register
CHANGED
|
@@ -191,8 +191,14 @@ fi
|
|
|
191
191
|
# ============================================================
|
|
192
192
|
|
|
193
193
|
# DEPLOYMENT_CONFIG format: <architecture>-<backend> (e.g., transformers-vllm, http-flask, triton-fil)
|
|
194
|
-
|
|
195
|
-
|
|
194
|
+
# Special case: marketplace has no backend
|
|
195
|
+
if [ "${DEPLOYMENT_CONFIG}" = "marketplace" ]; then
|
|
196
|
+
ARCHITECTURE="marketplace"
|
|
197
|
+
BACKEND=""
|
|
198
|
+
else
|
|
199
|
+
ARCHITECTURE="${DEPLOYMENT_CONFIG%%-*}"
|
|
200
|
+
BACKEND="${DEPLOYMENT_CONFIG#*-}"
|
|
201
|
+
fi
|
|
196
202
|
|
|
197
203
|
echo "📋 Registering deployment to registry"
|
|
198
204
|
echo " Project: ${PROJECT_NAME}"
|
package/templates/do/test
CHANGED
|
@@ -103,9 +103,9 @@ case "${FRAMEWORK}" in
|
|
|
103
103
|
case "${MODEL_SERVER}" in
|
|
104
104
|
vllm|sglang)
|
|
105
105
|
# OpenAI-compatible chat completions format
|
|
106
|
-
# For S3/
|
|
106
|
+
# For S3/registry models, vLLM registers the model under the local path
|
|
107
107
|
VLLM_MODEL_NAME="${MODEL_NAME}"
|
|
108
|
-
if [[ "${MODEL_NAME}" ==
|
|
108
|
+
if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
|
|
109
109
|
VLLM_MODEL_NAME="/opt/ml/model"
|
|
110
110
|
fi
|
|
111
111
|
TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
|
|
@@ -431,7 +431,7 @@ case "${FRAMEWORK}" in
|
|
|
431
431
|
case "${MODEL_SERVER}" in
|
|
432
432
|
vllm|sglang)
|
|
433
433
|
VLLM_MODEL_NAME="${MODEL_NAME}"
|
|
434
|
-
if [[ "${MODEL_NAME}" ==
|
|
434
|
+
if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
|
|
435
435
|
VLLM_MODEL_NAME="/opt/ml/model"
|
|
436
436
|
fi
|
|
437
437
|
TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
|
|
@@ -808,7 +808,7 @@ case "${FRAMEWORK}" in
|
|
|
808
808
|
vllm|sglang)
|
|
809
809
|
# OpenAI-compatible chat completions format
|
|
810
810
|
VLLM_MODEL_NAME="${MODEL_NAME}"
|
|
811
|
-
if [[ "${MODEL_NAME}" ==
|
|
811
|
+
if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
|
|
812
812
|
VLLM_MODEL_NAME="/opt/ml/model"
|
|
813
813
|
fi
|
|
814
814
|
TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
|
|
@@ -1095,7 +1095,7 @@ case "${FRAMEWORK}" in
|
|
|
1095
1095
|
case "${MODEL_SERVER}" in
|
|
1096
1096
|
vllm|sglang)
|
|
1097
1097
|
VLLM_MODEL_NAME="${MODEL_NAME}"
|
|
1098
|
-
if [[ "${MODEL_NAME}" ==
|
|
1098
|
+
if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
|
|
1099
1099
|
VLLM_MODEL_NAME="/opt/ml/model"
|
|
1100
1100
|
fi
|
|
1101
1101
|
TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
|
package/templates/do/tune
CHANGED
|
@@ -67,7 +67,7 @@ _parse_args() {
|
|
|
67
67
|
ARG_TRAINING_TYPE="$2"; shift 2 ;;
|
|
68
68
|
--model)
|
|
69
69
|
if [ -z "${2:-}" ]; then
|
|
70
|
-
echo "❌ --model requires a
|
|
70
|
+
echo "❌ --model requires a model ID"
|
|
71
71
|
exit 1
|
|
72
72
|
fi
|
|
73
73
|
ARG_MODEL="$2"; shift 2 ;;
|
|
@@ -287,7 +287,7 @@ for family in sorted(families.keys()):
|
|
|
287
287
|
for entry in entries:
|
|
288
288
|
techniques = list(entry.get('techniques', {}).keys())
|
|
289
289
|
print(f' • {entry[\"displayName\"]}')
|
|
290
|
-
print(f' ID: {entry[\"
|
|
290
|
+
print(f' ID: {entry[\"modelId\"]}')
|
|
291
291
|
for t in techniques:
|
|
292
292
|
tc = entry['techniques'][t]
|
|
293
293
|
types = ', '.join(tc.get('trainingTypes', []))
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# do-framework configuration (marketplace)
|
|
3
|
+
# This file is sourced by all do scripts
|
|
4
|
+
|
|
5
|
+
# Project identification
|
|
6
|
+
export PROJECT_NAME="<%= projectName %>"
|
|
7
|
+
export DEPLOYMENT_CONFIG="marketplace"
|
|
8
|
+
|
|
9
|
+
# Marketplace model package
|
|
10
|
+
export MODEL_PACKAGE_ARN="<%= modelPackageArn %>"
|
|
11
|
+
|
|
12
|
+
# AWS configuration
|
|
13
|
+
export AWS_REGION="<%= awsRegion %>"
|
|
14
|
+
|
|
15
|
+
# Deployment configuration
|
|
16
|
+
export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
|
|
17
|
+
export INSTANCE_TYPE="<%= instanceType %>"
|
|
18
|
+
|
|
19
|
+
<% if (roleArn) { %>
|
|
20
|
+
export ROLE_ARN="<%= roleArn %>"
|
|
21
|
+
<% } %>
|
|
22
|
+
|
|
23
|
+
<% if (deploymentTarget === 'async-inference') { %>
|
|
24
|
+
# Async-specific configuration
|
|
25
|
+
ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
|
|
26
|
+
|
|
27
|
+
<% if (asyncS3OutputPath) { %>
|
|
28
|
+
export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
|
|
29
|
+
<% } else { %>
|
|
30
|
+
export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
31
|
+
<% } %>
|
|
32
|
+
|
|
33
|
+
<% if (asyncSnsSuccessTopic) { %>
|
|
34
|
+
export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
|
|
35
|
+
<% } else { %>
|
|
36
|
+
export ASYNC_SNS_SUCCESS_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-success"
|
|
37
|
+
<% } %>
|
|
38
|
+
|
|
39
|
+
<% if (asyncSnsErrorTopic) { %>
|
|
40
|
+
export ASYNC_SNS_ERROR_TOPIC="<%= asyncSnsErrorTopic %>"
|
|
41
|
+
<% } else { %>
|
|
42
|
+
export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-error"
|
|
43
|
+
<% } %>
|
|
44
|
+
|
|
45
|
+
<% if (asyncMaxConcurrentInvocations) { %>
|
|
46
|
+
export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
|
|
47
|
+
<% } %>
|
|
48
|
+
<% } %>
|
|
49
|
+
|
|
50
|
+
<% if (deploymentTarget === 'batch-transform') { %>
|
|
51
|
+
# Batch Transform configuration
|
|
52
|
+
ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
|
|
53
|
+
|
|
54
|
+
<% if (batchInputPath) { %>
|
|
55
|
+
export BATCH_INPUT_PATH="<%= batchInputPath %>"
|
|
56
|
+
<% } else { %>
|
|
57
|
+
export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
|
|
58
|
+
<% } %>
|
|
59
|
+
<% if (batchOutputPath) { %>
|
|
60
|
+
export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
|
|
61
|
+
<% } else { %>
|
|
62
|
+
export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
63
|
+
<% } %>
|
|
64
|
+
export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
|
|
65
|
+
export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
|
|
66
|
+
export BATCH_STRATEGY="<%= batchStrategy %>"
|
|
67
|
+
export BATCH_JOIN_SOURCE="<%= batchJoinSource || 'None' %>"
|
|
68
|
+
<% if (batchMaxConcurrentTransforms) { %>
|
|
69
|
+
export BATCH_MAX_CONCURRENT_TRANSFORMS="<%= batchMaxConcurrentTransforms %>"
|
|
70
|
+
<% } %>
|
|
71
|
+
<% if (batchMaxPayloadInMB) { %>
|
|
72
|
+
export BATCH_MAX_PAYLOAD_IN_MB="<%= batchMaxPayloadInMB %>"
|
|
73
|
+
<% } %>
|
|
74
|
+
<% } %>
|
|
75
|
+
|
|
76
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
77
|
+
# SageMaker AI Benchmarking configuration
|
|
78
|
+
export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
|
|
79
|
+
export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
|
|
80
|
+
export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
|
|
81
|
+
export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
|
|
82
|
+
<% if (benchmarkRequestCount) { %>
|
|
83
|
+
export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
|
|
84
|
+
<% } else { %>
|
|
85
|
+
export BENCHMARK_REQUEST_COUNT=""
|
|
86
|
+
<% } %>
|
|
87
|
+
<% if (benchmarkS3OutputPath) { %>
|
|
88
|
+
export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
|
|
89
|
+
<% } else { %>
|
|
90
|
+
export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identity --query Account --output text)-${AWS_REGION}/${PROJECT_NAME}/"
|
|
91
|
+
<% } %>
|
|
92
|
+
export BENCHMARK_JOB_NAME=""
|
|
93
|
+
export BENCHMARK_WORKLOAD_CONFIG_NAME=""
|
|
94
|
+
<% } %>
|
|
95
|
+
|
|
96
|
+
# Allow environment variable overrides
|
|
97
|
+
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
98
|
+
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
|
|
99
|
+
|
|
100
|
+
# Print configuration summary
|
|
101
|
+
echo "⚙️ Configuration loaded"
|
|
102
|
+
echo " Project: ${PROJECT_NAME}"
|
|
103
|
+
echo " Config: ${DEPLOYMENT_CONFIG}"
|
|
104
|
+
echo " Region: ${AWS_REGION}"
|
|
105
|
+
echo " Model package: ${MODEL_PACKAGE_ARN}"
|
|
106
|
+
echo " Deployment target: ${DEPLOYMENT_TARGET}"
|
|
107
|
+
echo " Instance: ${INSTANCE_TYPE}"
|
|
108
|
+
<% if (deploymentTarget === 'async-inference') { %>
|
|
109
|
+
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
110
|
+
echo " SNS success: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
111
|
+
echo " SNS error: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
112
|
+
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
113
|
+
echo " Instance count: ${BATCH_INSTANCE_COUNT}"
|
|
114
|
+
echo " S3 input: ${BATCH_INPUT_PATH}"
|
|
115
|
+
echo " S3 output: ${BATCH_OUTPUT_PATH}"
|
|
116
|
+
echo " Split type: ${BATCH_SPLIT_TYPE}"
|
|
117
|
+
echo " Strategy: ${BATCH_STRATEGY}"
|
|
118
|
+
<% } %>
|