@aws/ml-container-creator 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/LICENSE-THIRD-PARTY +68620 -0
- package/NOTICE +2 -0
- package/README.md +106 -0
- package/bin/cli.js +365 -0
- package/config/defaults.json +32 -0
- package/config/presets/transformers-djl.json +26 -0
- package/config/presets/transformers-gpu.json +24 -0
- package/config/presets/transformers-lmi.json +27 -0
- package/package.json +129 -0
- package/servers/README.md +419 -0
- package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
- package/servers/base-image-picker/catalogs/python-slim.json +38 -0
- package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
- package/servers/base-image-picker/catalogs/triton.json +38 -0
- package/servers/base-image-picker/index.js +495 -0
- package/servers/base-image-picker/manifest.json +17 -0
- package/servers/base-image-picker/package.json +15 -0
- package/servers/hyperpod-cluster-picker/LICENSE +202 -0
- package/servers/hyperpod-cluster-picker/index.js +424 -0
- package/servers/hyperpod-cluster-picker/manifest.json +14 -0
- package/servers/hyperpod-cluster-picker/package.json +17 -0
- package/servers/instance-recommender/LICENSE +202 -0
- package/servers/instance-recommender/catalogs/instances.json +852 -0
- package/servers/instance-recommender/index.js +284 -0
- package/servers/instance-recommender/manifest.json +16 -0
- package/servers/instance-recommender/package.json +15 -0
- package/servers/lib/LICENSE +202 -0
- package/servers/lib/bedrock-client.js +160 -0
- package/servers/lib/custom-validators.js +46 -0
- package/servers/lib/dynamic-resolver.js +36 -0
- package/servers/lib/package.json +11 -0
- package/servers/lib/schemas/image-catalog.schema.json +185 -0
- package/servers/lib/schemas/instances.schema.json +124 -0
- package/servers/lib/schemas/manifest.schema.json +64 -0
- package/servers/lib/schemas/model-catalog.schema.json +91 -0
- package/servers/lib/schemas/regions.schema.json +26 -0
- package/servers/lib/schemas/triton-backends.schema.json +51 -0
- package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
- package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
- package/servers/model-picker/catalogs/popular-transformers.json +226 -0
- package/servers/model-picker/index.js +1693 -0
- package/servers/model-picker/manifest.json +18 -0
- package/servers/model-picker/package.json +20 -0
- package/servers/region-picker/LICENSE +202 -0
- package/servers/region-picker/catalogs/regions.json +263 -0
- package/servers/region-picker/index.js +230 -0
- package/servers/region-picker/manifest.json +16 -0
- package/servers/region-picker/package.json +15 -0
- package/src/app.js +1007 -0
- package/src/copy-tpl.js +77 -0
- package/src/lib/accelerator-validator.js +39 -0
- package/src/lib/asset-manager.js +385 -0
- package/src/lib/aws-profile-parser.js +181 -0
- package/src/lib/bootstrap-command-handler.js +1647 -0
- package/src/lib/bootstrap-config.js +238 -0
- package/src/lib/ci-register-helpers.js +124 -0
- package/src/lib/ci-report-helpers.js +158 -0
- package/src/lib/ci-stage-helpers.js +268 -0
- package/src/lib/cli-handler.js +529 -0
- package/src/lib/comment-generator.js +544 -0
- package/src/lib/community-reports-validator.js +91 -0
- package/src/lib/config-manager.js +2106 -0
- package/src/lib/configuration-exporter.js +204 -0
- package/src/lib/configuration-manager.js +695 -0
- package/src/lib/configuration-matcher.js +221 -0
- package/src/lib/cpu-validator.js +36 -0
- package/src/lib/cuda-validator.js +57 -0
- package/src/lib/deployment-config-resolver.js +103 -0
- package/src/lib/deployment-entry-schema.js +125 -0
- package/src/lib/deployment-registry.js +598 -0
- package/src/lib/docker-introspection-validator.js +51 -0
- package/src/lib/engine-prefix-resolver.js +60 -0
- package/src/lib/huggingface-client.js +172 -0
- package/src/lib/key-value-parser.js +37 -0
- package/src/lib/known-flags-validator.js +200 -0
- package/src/lib/manifest-cli.js +280 -0
- package/src/lib/mcp-client.js +303 -0
- package/src/lib/mcp-command-handler.js +532 -0
- package/src/lib/neuron-validator.js +80 -0
- package/src/lib/parameter-schema-validator.js +284 -0
- package/src/lib/prompt-runner.js +1349 -0
- package/src/lib/prompts.js +1138 -0
- package/src/lib/registry-command-handler.js +519 -0
- package/src/lib/registry-loader.js +198 -0
- package/src/lib/rocm-validator.js +80 -0
- package/src/lib/schema-validator.js +157 -0
- package/src/lib/sensitive-redactor.js +59 -0
- package/src/lib/template-engine.js +156 -0
- package/src/lib/template-manager.js +341 -0
- package/src/lib/validation-engine.js +314 -0
- package/src/prompt-adapter.js +63 -0
- package/templates/Dockerfile +300 -0
- package/templates/IAM_PERMISSIONS.md +84 -0
- package/templates/MIGRATION.md +488 -0
- package/templates/PROJECT_README.md +439 -0
- package/templates/TEMPLATE_SYSTEM.md +243 -0
- package/templates/buildspec.yml +64 -0
- package/templates/code/chat_template.jinja +1 -0
- package/templates/code/flask/gunicorn_config.py +35 -0
- package/templates/code/flask/wsgi.py +10 -0
- package/templates/code/model_handler.py +387 -0
- package/templates/code/serve +300 -0
- package/templates/code/serve.py +175 -0
- package/templates/code/serving.properties +105 -0
- package/templates/code/start_server.py +39 -0
- package/templates/code/start_server.sh +39 -0
- package/templates/diffusors/Dockerfile +72 -0
- package/templates/diffusors/patch_image_api.py +35 -0
- package/templates/diffusors/serve +115 -0
- package/templates/diffusors/start_server.sh +114 -0
- package/templates/do/.gitkeep +1 -0
- package/templates/do/README.md +541 -0
- package/templates/do/build +83 -0
- package/templates/do/ci +681 -0
- package/templates/do/clean +811 -0
- package/templates/do/config +260 -0
- package/templates/do/deploy +1560 -0
- package/templates/do/export +306 -0
- package/templates/do/logs +319 -0
- package/templates/do/manifest +12 -0
- package/templates/do/push +119 -0
- package/templates/do/register +580 -0
- package/templates/do/run +113 -0
- package/templates/do/submit +417 -0
- package/templates/do/test +1147 -0
- package/templates/hyperpod/configmap.yaml +24 -0
- package/templates/hyperpod/deployment.yaml +71 -0
- package/templates/hyperpod/pvc.yaml +42 -0
- package/templates/hyperpod/service.yaml +17 -0
- package/templates/nginx-diffusors.conf +74 -0
- package/templates/nginx-predictors.conf +47 -0
- package/templates/nginx-tensorrt.conf +74 -0
- package/templates/requirements.txt +61 -0
- package/templates/sample_model/test_inference.py +123 -0
- package/templates/sample_model/train_abalone.py +252 -0
- package/templates/test/test_endpoint.sh +79 -0
- package/templates/test/test_local_image.sh +80 -0
- package/templates/test/test_model_handler.py +180 -0
- package/templates/triton/Dockerfile +128 -0
- package/templates/triton/config.pbtxt +163 -0
- package/templates/triton/model.py +130 -0
- package/templates/triton/requirements.txt +11 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
<% if (comments && comments.acceleratorInfo) { %>
|
|
5
|
+
<%= comments.acceleratorInfo %>
|
|
6
|
+
<% } %>
|
|
7
|
+
|
|
8
|
+
<% if (comments && comments.validationInfo) { %>
|
|
9
|
+
<%= comments.validationInfo %>
|
|
10
|
+
<% } %>
|
|
11
|
+
|
|
12
|
+
# vLLM-Omni: Diffusion model serving with OpenAI-compatible image generation API
|
|
13
|
+
# Image source: https://hub.docker.com/r/vllm/vllm-omni (public, no NGC auth required)
|
|
14
|
+
FROM vllm/vllm-omni:v0.16.0
|
|
15
|
+
|
|
16
|
+
# Set a docker label to name this project, postpended with the build time
|
|
17
|
+
LABEL project.name="<%= projectName %>-<%= buildTimestamp %>" \
|
|
18
|
+
project.base-name="<%= projectName %>" \
|
|
19
|
+
project.build-time="<%= buildTimestamp %>"
|
|
20
|
+
|
|
21
|
+
# Set a docker label to enable container to use SAGEMAKER_BIND_TO_PORT environment variable if present
|
|
22
|
+
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
|
|
23
|
+
|
|
24
|
+
# Set the model name for the diffusion model
|
|
25
|
+
ENV VLLM_MODEL="<%= modelName %>"
|
|
26
|
+
|
|
27
|
+
<% if (hfToken) { %>
|
|
28
|
+
# Set HuggingFace authentication token
|
|
29
|
+
ENV HF_TOKEN="<%= hfToken %>"
|
|
30
|
+
<% } %>
|
|
31
|
+
|
|
32
|
+
<% if (comments && comments.envVarExplanations && Object.keys(comments.envVarExplanations).length > 0) { %>
|
|
33
|
+
# Environment Variables Configuration
|
|
34
|
+
<% for (const [category, comment] of Object.entries(comments.envVarExplanations)) { %>
|
|
35
|
+
<%= comment %>
|
|
36
|
+
<% } %>
|
|
37
|
+
<% } %>
|
|
38
|
+
|
|
39
|
+
<% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
|
|
40
|
+
# Additional environment variables from configuration
|
|
41
|
+
<% orderedEnvVars.forEach(({ key, value }) => { %>
|
|
42
|
+
ENV <%= key %>=<%= value %>
|
|
43
|
+
<% }); %>
|
|
44
|
+
<% } %>
|
|
45
|
+
|
|
46
|
+
# Patch vLLM-Omni encode_image_base64 to handle numpy.ndarray outputs
|
|
47
|
+
# Some diffusion models return numpy arrays instead of PIL Images from their
|
|
48
|
+
# pipeline, which causes an AttributeError on .save() during response encoding.
|
|
49
|
+
# This patch adds a safe isinstance guard to convert ndarray → PIL.Image.
|
|
50
|
+
COPY code/patch_image_api.py /tmp/patch_image_api.py
|
|
51
|
+
RUN python3 /tmp/patch_image_api.py && rm /tmp/patch_image_api.py
|
|
52
|
+
|
|
53
|
+
# Install nginx for SageMaker endpoint routing
|
|
54
|
+
# Maps /invocations -> /v1/images/generations and /ping -> /health
|
|
55
|
+
RUN apt-get update && apt-get install -y --no-install-recommends nginx \
|
|
56
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
57
|
+
|
|
58
|
+
# Copy nginx configuration
|
|
59
|
+
COPY nginx-diffusors.conf /etc/nginx/nginx.conf
|
|
60
|
+
|
|
61
|
+
# Copy serve entrypoint and startup scripts
|
|
62
|
+
COPY code/serve /usr/bin/serve
|
|
63
|
+
RUN chmod 777 /usr/bin/serve
|
|
64
|
+
|
|
65
|
+
COPY code/start_server.sh /usr/bin/start_server.sh
|
|
66
|
+
RUN chmod +x /usr/bin/start_server.sh
|
|
67
|
+
|
|
68
|
+
<% if (comments && comments.troubleshooting) { %>
|
|
69
|
+
<%= comments.troubleshooting %>
|
|
70
|
+
<% } %>
|
|
71
|
+
|
|
72
|
+
ENTRYPOINT [ "/usr/bin/serve" ]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Patch vLLM-Omni image_api_utils.encode_image_base64 to handle numpy arrays.
|
|
3
|
+
|
|
4
|
+
Some diffusion models (especially video models like Wan2.1) return numpy
|
|
5
|
+
arrays with extra batch/frame dimensions and float32 dtype. The upstream
|
|
6
|
+
encode_image_base64() expects a PIL Image. This patch normalises the array
|
|
7
|
+
to (H, W, 3) uint8 before converting to PIL.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import vllm_omni.entrypoints.openai.image_api_utils as mod
|
|
11
|
+
|
|
12
|
+
path = mod.__file__
|
|
13
|
+
source = open(path).read()
|
|
14
|
+
|
|
15
|
+
# Match the save() call site — resilient to function signature changes
|
|
16
|
+
old = " image.save(buffer, format=\"PNG\")"
|
|
17
|
+
new = """\
|
|
18
|
+
import numpy as np
|
|
19
|
+
from PIL import Image as _PILImage
|
|
20
|
+
if isinstance(image, np.ndarray):
|
|
21
|
+
# Squeeze batch / frame dims: (1,1,H,W,3) or (1,H,W,3) -> (H,W,3)
|
|
22
|
+
while image.ndim > 3:
|
|
23
|
+
image = image[0]
|
|
24
|
+
# float [0,1] -> uint8 [0,255]
|
|
25
|
+
if image.dtype != np.uint8:
|
|
26
|
+
image = np.clip(image * 255.0, 0, 255).astype(np.uint8)
|
|
27
|
+
image = _PILImage.fromarray(image)
|
|
28
|
+
image.save(buffer, format="PNG")"""
|
|
29
|
+
|
|
30
|
+
if old not in source:
|
|
31
|
+
print("WARN: patch target not found — already patched or API changed")
|
|
32
|
+
else:
|
|
33
|
+
source = source.replace(old, new, 1)
|
|
34
|
+
open(path, "w").write(source)
|
|
35
|
+
print(f"Patched {path}")
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
echo "Starting vLLM-Omni server (diffusion model serving)"
|
|
6
|
+
|
|
7
|
+
# Resolve model URI prefixes that engines cannot handle natively.
|
|
8
|
+
# The generator's model-picker may store provider-specific URIs
|
|
9
|
+
# (e.g. jumpstart://model-txt2img-stabilityai-stable-diffusion-v2-1-base)
|
|
10
|
+
# as the model identifier. vLLM expects a HuggingFace repo ID or local path.
|
|
11
|
+
_RAW_MODEL="${VLLM_MODEL:-}"
|
|
12
|
+
if [[ "$_RAW_MODEL" == jumpstart://* ]] || [[ "$_RAW_MODEL" == jumpstart-hub://* ]] || [[ "$_RAW_MODEL" == registry://* ]]; then
|
|
13
|
+
if [ -d /opt/ml/model ] && [ "$(ls -A /opt/ml/model 2>/dev/null)" ]; then
|
|
14
|
+
echo "Resolved VLLM_MODEL='${_RAW_MODEL}' → /opt/ml/model (local artifacts found)"
|
|
15
|
+
export VLLM_MODEL="/opt/ml/model"
|
|
16
|
+
else
|
|
17
|
+
_BARE_ID="${_RAW_MODEL#*://}"
|
|
18
|
+
echo "Warning: VLLM_MODEL='${_RAW_MODEL}' has a provider prefix but /opt/ml/model is empty."
|
|
19
|
+
echo "Stripping prefix → '${_BARE_ID}' (engine will attempt to fetch from model hub)"
|
|
20
|
+
export VLLM_MODEL="${_BARE_ID}"
|
|
21
|
+
fi
|
|
22
|
+
fi
|
|
23
|
+
unset _RAW_MODEL _BARE_ID
|
|
24
|
+
|
|
25
|
+
# Validate that the model name is set
|
|
26
|
+
if [ -z "$VLLM_MODEL" ]; then
|
|
27
|
+
echo "Error: VLLM_MODEL environment variable is not set"
|
|
28
|
+
exit 1
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
# Initialize server arguments with --omni flag
|
|
32
|
+
# --omni activates vLLM-Omni diffusion/multi-stage support
|
|
33
|
+
# port 8081 is the internal port; nginx on 8080 handles SageMaker routing
|
|
34
|
+
# /invocations -> /v1/images/generations
|
|
35
|
+
# /ping -> /health
|
|
36
|
+
SERVER_ARGS=(--omni --host 0.0.0.0 --port 8081)
|
|
37
|
+
|
|
38
|
+
# Define the prefix for environment variables to look for
|
|
39
|
+
# Uses VLLM_OMNI_ prefix to avoid conflicts with base vLLM env vars
|
|
40
|
+
PREFIX="VLLM_OMNI_"
|
|
41
|
+
ARG_PREFIX="--"
|
|
42
|
+
|
|
43
|
+
# Define environment variables to exclude from CLI flag conversion
|
|
44
|
+
# VLLM_MODEL is used as the positional model argument, not a --flag
|
|
45
|
+
EXCLUDE_VARS=("VLLM_MODEL")
|
|
46
|
+
|
|
47
|
+
# Declare and populate array of matching environment variables
|
|
48
|
+
mapfile -t env_vars < <(env | grep "^${PREFIX}")
|
|
49
|
+
|
|
50
|
+
# Loop through the array and convert to command-line arguments
|
|
51
|
+
for var in "${env_vars[@]}"; do
|
|
52
|
+
IFS='=' read -r key value <<< "$var"
|
|
53
|
+
|
|
54
|
+
# Skip excluded variables
|
|
55
|
+
skip=false
|
|
56
|
+
for exclude in "${EXCLUDE_VARS[@]}"; do
|
|
57
|
+
if [ "$key" = "$exclude" ]; then
|
|
58
|
+
skip=true
|
|
59
|
+
break
|
|
60
|
+
fi
|
|
61
|
+
done
|
|
62
|
+
|
|
63
|
+
if [ "$skip" = true ]; then
|
|
64
|
+
continue
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
# Remove prefix, convert to lowercase, and replace underscores with dashes
|
|
68
|
+
arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')
|
|
69
|
+
SERVER_ARGS+=("${ARG_PREFIX}${arg_name}")
|
|
70
|
+
if [ -n "$value" ]; then
|
|
71
|
+
SERVER_ARGS+=("$value")
|
|
72
|
+
fi
|
|
73
|
+
done
|
|
74
|
+
|
|
75
|
+
echo "-------------------------------------------------------------------"
|
|
76
|
+
echo "vLLM-Omni engine args: vllm serve $VLLM_MODEL [${SERVER_ARGS[@]}]"
|
|
77
|
+
echo "-------------------------------------------------------------------"
|
|
78
|
+
|
|
79
|
+
# Launch vLLM-Omni on internal port (8081), then nginx on SageMaker port (8080)
|
|
80
|
+
vllm serve "$VLLM_MODEL" "${SERVER_ARGS[@]}" &
|
|
81
|
+
VLLM_PID=$!
|
|
82
|
+
|
|
83
|
+
# Wait for vLLM-Omni to be ready before starting nginx
|
|
84
|
+
echo "Waiting for vLLM-Omni server to start..."
|
|
85
|
+
for i in {1..300}; do
|
|
86
|
+
if curl -s http://localhost:8081/health > /dev/null 2>&1; then
|
|
87
|
+
echo "vLLM-Omni server is ready!"
|
|
88
|
+
break
|
|
89
|
+
fi
|
|
90
|
+
if ! kill -0 $VLLM_PID 2>/dev/null; then
|
|
91
|
+
echo "Error: vLLM-Omni process exited unexpectedly"
|
|
92
|
+
exit 1
|
|
93
|
+
fi
|
|
94
|
+
if [ $i -eq 300 ]; then
|
|
95
|
+
echo "Error: vLLM-Omni server failed to start within 300 seconds"
|
|
96
|
+
exit 1
|
|
97
|
+
fi
|
|
98
|
+
sleep 1
|
|
99
|
+
done
|
|
100
|
+
|
|
101
|
+
echo "Starting nginx reverse proxy on port 8080..."
|
|
102
|
+
nginx -c /etc/nginx/nginx.conf &
|
|
103
|
+
NGINX_PID=$!
|
|
104
|
+
|
|
105
|
+
# Wait for either process to exit (this keeps the container running)
|
|
106
|
+
wait -n $VLLM_PID $NGINX_PID
|
|
107
|
+
|
|
108
|
+
# If we get here, one process exited - this is an error condition
|
|
109
|
+
EXIT_CODE=$?
|
|
110
|
+
echo "Error: Process exited with code $EXIT_CODE"
|
|
111
|
+
|
|
112
|
+
# Kill any remaining processes
|
|
113
|
+
kill $VLLM_PID $NGINX_PID 2>/dev/null || true
|
|
114
|
+
|
|
115
|
+
exit $EXIT_CODE
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
# start_server.sh — Alternative startup script for vLLM-Omni diffusion model serving.
|
|
6
|
+
# This script builds the vllm serve command with explicit diffusion-specific CLI flags.
|
|
7
|
+
# The primary entrypoint is the `serve` script; use this for manual customization.
|
|
8
|
+
|
|
9
|
+
set -e
|
|
10
|
+
|
|
11
|
+
echo "Starting vLLM-Omni server (diffusion model serving)"
|
|
12
|
+
|
|
13
|
+
# ---------------------------------------------------------------
|
|
14
|
+
# Validate required environment variables
|
|
15
|
+
# ---------------------------------------------------------------
|
|
16
|
+
if [ -z "$VLLM_MODEL" ]; then
|
|
17
|
+
echo "Error: VLLM_MODEL environment variable is not set."
|
|
18
|
+
echo "Set it to a HuggingFace diffusion model ID, e.g.:"
|
|
19
|
+
echo " export VLLM_MODEL=stabilityai/stable-diffusion-3.5-medium"
|
|
20
|
+
exit 1
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------
|
|
24
|
+
# Build the base vllm serve command
|
|
25
|
+
# --omni: activates vLLM-Omni diffusion/multi-stage support
|
|
26
|
+
# --port 8080: SageMaker requires containers to listen on port 8080
|
|
27
|
+
# https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html
|
|
28
|
+
# ---------------------------------------------------------------
|
|
29
|
+
CMD="vllm serve $VLLM_MODEL --omni --host 0.0.0.0 --port 8081"
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------
|
|
32
|
+
# Diffusion-specific CLI flags (configurable via environment variables)
|
|
33
|
+
# ---------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
# --num-gpus: Number of GPUs for diffusion inference (tensor parallelism)
|
|
36
|
+
# Example: export VLLM_NUM_GPUS=4
|
|
37
|
+
if [ -n "$VLLM_NUM_GPUS" ]; then
|
|
38
|
+
CMD="$CMD --num-gpus $VLLM_NUM_GPUS"
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
# --cache-backend: Diffusion acceleration backend
|
|
42
|
+
# Options: tea_cache (hook-based adaptive caching), cache_dit (library-based), none
|
|
43
|
+
# Example: export VLLM_CACHE_BACKEND=tea_cache
|
|
44
|
+
if [ -n "$VLLM_CACHE_BACKEND" ]; then
|
|
45
|
+
CMD="$CMD --cache-backend $VLLM_CACHE_BACKEND"
|
|
46
|
+
fi
|
|
47
|
+
|
|
48
|
+
# --vae-use-tiling: Process VAE in tiles to reduce VRAM usage
|
|
49
|
+
# Set to any non-empty value to enable
|
|
50
|
+
# Example: export VLLM_VAE_USE_TILING=1
|
|
51
|
+
if [ -n "$VLLM_VAE_USE_TILING" ]; then
|
|
52
|
+
CMD="$CMD --vae-use-tiling"
|
|
53
|
+
fi
|
|
54
|
+
|
|
55
|
+
# --ulysses-degree: Ulysses sequence parallelism degree for large models
|
|
56
|
+
# Example: export VLLM_ULYSSES_DEGREE=2
|
|
57
|
+
if [ -n "$VLLM_ULYSSES_DEGREE" ]; then
|
|
58
|
+
CMD="$CMD --ulysses-degree $VLLM_ULYSSES_DEGREE"
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
# --ring-degree: Ring sequence parallelism degree
|
|
62
|
+
# Example: export VLLM_RING_DEGREE=2
|
|
63
|
+
if [ -n "$VLLM_RING_DEGREE" ]; then
|
|
64
|
+
CMD="$CMD --ring-degree $VLLM_RING_DEGREE"
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
# --enable-cpu-offload: Offload model weights to CPU to save GPU memory
|
|
68
|
+
# Set to any non-empty value to enable
|
|
69
|
+
# Example: export VLLM_ENABLE_CPU_OFFLOAD=1
|
|
70
|
+
if [ -n "$VLLM_ENABLE_CPU_OFFLOAD" ]; then
|
|
71
|
+
CMD="$CMD --enable-cpu-offload"
|
|
72
|
+
fi
|
|
73
|
+
|
|
74
|
+
echo "-------------------------------------------------------------------"
|
|
75
|
+
echo "vLLM-Omni command: $CMD"
|
|
76
|
+
echo "-------------------------------------------------------------------"
|
|
77
|
+
|
|
78
|
+
# Launch vLLM-Omni on internal port (8081), then nginx on SageMaker port (8080)
|
|
79
|
+
$CMD &
|
|
80
|
+
VLLM_PID=$!
|
|
81
|
+
|
|
82
|
+
# Wait for vLLM-Omni to be ready before starting nginx
|
|
83
|
+
echo "Waiting for vLLM-Omni server to start..."
|
|
84
|
+
for i in {1..300}; do
|
|
85
|
+
if curl -s http://localhost:8081/health > /dev/null 2>&1; then
|
|
86
|
+
echo "vLLM-Omni server is ready!"
|
|
87
|
+
break
|
|
88
|
+
fi
|
|
89
|
+
if ! kill -0 $VLLM_PID 2>/dev/null; then
|
|
90
|
+
echo "Error: vLLM-Omni process exited unexpectedly"
|
|
91
|
+
exit 1
|
|
92
|
+
fi
|
|
93
|
+
if [ $i -eq 300 ]; then
|
|
94
|
+
echo "Error: vLLM-Omni server failed to start within 300 seconds"
|
|
95
|
+
exit 1
|
|
96
|
+
fi
|
|
97
|
+
sleep 1
|
|
98
|
+
done
|
|
99
|
+
|
|
100
|
+
echo "Starting nginx reverse proxy on port 8080..."
|
|
101
|
+
nginx -c /etc/nginx/nginx.conf &
|
|
102
|
+
NGINX_PID=$!
|
|
103
|
+
|
|
104
|
+
# Wait for either process to exit (this keeps the container running)
|
|
105
|
+
wait -n $VLLM_PID $NGINX_PID
|
|
106
|
+
|
|
107
|
+
# If we get here, one process exited - this is an error condition
|
|
108
|
+
EXIT_CODE=$?
|
|
109
|
+
echo "Error: Process exited with code $EXIT_CODE"
|
|
110
|
+
|
|
111
|
+
# Kill any remaining processes
|
|
112
|
+
kill $VLLM_PID $NGINX_PID 2>/dev/null || true
|
|
113
|
+
|
|
114
|
+
exit $EXIT_CODE
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# This file ensures the do/ directory is tracked by git
|