aws-cdk-neuronx-patterns 0.0.13 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.jsii +33 -33
- package/README.md +13 -0
- package/lib/base/aws-batch/neuronx-batch-compute-environment.js +1 -1
- package/lib/base/aws-batch/neuronx-batch-ecs-job-definition.js +1 -1
- package/lib/base/aws-batch/neuronx-batch.js +1 -1
- package/lib/base/aws-ecs-patterns/application-load-balanced-neuronx-service.js +58 -12
- package/lib/base/neuronx/deep-learning-containers.js +1 -1
- package/lib/base/neuronx/model.js +2 -2
- package/lib/base/neuronx/neuron-optimized-machine-image.js +3 -3
- package/lib/base/neuronx/neuronx-instance-type.js +3 -3
- package/lib/base/neuronx-compiler/neuronx-compiler.js +1 -1
- package/lib/base/server-engine/vllm-engine/vllm-engine-argments.js +4 -3
- package/lib/vllm-nxd-inference/vllm-nxd-inference-compiler.js +2 -2
- package/lib/vllm-nxd-inference/vllm-nxd-inference-ecs-patterns.js +6 -5
- package/package.json +4 -4
- package/scripts/compile/vllm-nxd-inference/Dockerfile +8 -2
- package/scripts/compile/vllm-nxd-inference/entrypoint.sh +9 -5
- package/scripts/inference/vllm-nxd-inference/Dockerfile +2 -5
- package/scripts/inference/vllm-nxd-inference/entrypoint.sh +2 -3
|
@@ -19,6 +19,11 @@ RUN pip install -U pip && \
|
|
|
19
19
|
|
|
20
20
|
WORKDIR /usr/src
|
|
21
21
|
|
|
22
|
+
# install AWS CLI v2
|
|
23
|
+
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
|
24
|
+
unzip awscliv2.zip && \
|
|
25
|
+
./aws/install
|
|
26
|
+
|
|
22
27
|
RUN git clone -b ${VLLM_GIT_BRANCH} https://github.com/aws-neuron/upstreaming-to-vllm.git && \
|
|
23
28
|
pip install -r ./upstreaming-to-vllm/requirements/neuron.txt && \
|
|
24
29
|
SETUPTOOLS_SCM_PRETEND_VERSION="0.9.0" VLLM_TARGET_DEVICE="neuron" pip install -e ./upstreaming-to-vllm
|
|
@@ -26,10 +31,11 @@ RUN git clone -b ${VLLM_GIT_BRANCH} https://github.com/aws-neuron/upstreaming-to
|
|
|
26
31
|
WORKDIR /home/$USERNAME/vllm
|
|
27
32
|
|
|
28
33
|
RUN chown -R $USERNAME:$USERNAME /home/$USERNAME && chsh -s /bin/bash $USERNAME
|
|
29
|
-
COPY --chown=$USERNAME:$USERNAME entrypoint.sh /home/$USERNAME/vllm/
|
|
30
|
-
RUN chmod +x /home/$USERNAME/vllm/entrypoint.sh
|
|
34
|
+
COPY --chown=$USERNAME:$USERNAME --chmod=755 entrypoint.sh /home/$USERNAME/vllm/
|
|
31
35
|
USER $USERNAME
|
|
32
36
|
|
|
37
|
+
RUN aws configure set default.s3.preferred_transfer_client crt
|
|
38
|
+
|
|
33
39
|
RUN pip install -U "huggingface_hub[hf_xet]"
|
|
34
40
|
|
|
35
41
|
ENTRYPOINT [ "bash", "entrypoint.sh" ]
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
|
|
3
|
-
LOG_FILE
|
|
3
|
+
LOG_FILE=~/vllm.log
|
|
4
|
+
touch $LOG_FILE
|
|
4
5
|
wait_for_log_to_be_detected() {
|
|
5
6
|
local SEARCH_TEXT="$1"
|
|
6
7
|
echo "wait for \"$SEARCH_TEXT\" to be detected in \`$LOG_FILE\`..."
|
|
@@ -14,16 +15,19 @@ wait_for_log_to_be_detected() {
|
|
|
14
15
|
fi
|
|
15
16
|
}
|
|
16
17
|
|
|
18
|
+
mkdir compile
|
|
19
|
+
cd compile
|
|
20
|
+
|
|
17
21
|
if [[ $MODEL_ID == "s3://"* ]]; then
|
|
18
22
|
aws s3 sync $MODEL_ID $MODEL_NAME
|
|
19
23
|
else
|
|
20
24
|
huggingface-cli download $MODEL_ID --local-dir $MODEL_NAME
|
|
21
25
|
fi
|
|
22
26
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
vllm serve ./$MODEL_NAME "$@" 2>&1 | tee $LOG_FILE &
|
|
27
|
+
vllm serve "$@" 2>&1 | tee $LOG_FILE &
|
|
26
28
|
|
|
27
29
|
wait_for_log_to_be_detected "Application startup complete" || exit 1
|
|
28
30
|
|
|
29
|
-
aws s3 cp --no-progress --recursive ./ $COMPILED_ARTIFACTS_S3_URI
|
|
31
|
+
aws s3 cp --no-progress --recursive ./ $COMPILED_ARTIFACTS_S3_URI \
|
|
32
|
+
--exclude "**/.cache/*" \
|
|
33
|
+
--exclude global_metric_store.json
|
|
@@ -26,10 +26,7 @@ RUN git clone -b ${VLLM_GIT_BRANCH} https://github.com/aws-neuron/upstreaming-to
|
|
|
26
26
|
WORKDIR /home/$USERNAME/vllm
|
|
27
27
|
|
|
28
28
|
RUN chown -R $USERNAME:$USERNAME /home/$USERNAME && chsh -s /bin/bash $USERNAME
|
|
29
|
-
COPY --chown=$USERNAME:$USERNAME entrypoint.sh /
|
|
30
|
-
RUN chmod +x /home/$USERNAME/vllm/entrypoint.sh
|
|
29
|
+
COPY --chown=$USERNAME:$USERNAME --chmod=755 entrypoint.sh /entrypoint.sh
|
|
31
30
|
USER $USERNAME
|
|
32
31
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
ENTRYPOINT [ "bash", "entrypoint.sh" ]
|
|
32
|
+
ENTRYPOINT [ "bash", "/entrypoint.sh" ]
|