npm - aws-cdk-neuronx-patterns - Versions diffs - 0.0.13 → 0.0.15 - Mend

aws-cdk-neuronx-patterns 0.0.13 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/scripts/compile/vllm-nxd-inference/Dockerfile CHANGED Viewed

@@ -19,6 +19,11 @@ RUN pip install -U pip && \
 WORKDIR /usr/src
+# install AWS CLI v2
+RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
+    unzip awscliv2.zip && \
+    ./aws/install
 RUN git clone -b ${VLLM_GIT_BRANCH} https://github.com/aws-neuron/upstreaming-to-vllm.git && \
     pip install -r ./upstreaming-to-vllm/requirements/neuron.txt && \
     SETUPTOOLS_SCM_PRETEND_VERSION="0.9.0" VLLM_TARGET_DEVICE="neuron" pip install -e ./upstreaming-to-vllm
@@ -26,10 +31,11 @@ RUN git clone -b ${VLLM_GIT_BRANCH} https://github.com/aws-neuron/upstreaming-to
 WORKDIR /home/$USERNAME/vllm
 RUN chown -R $USERNAME:$USERNAME /home/$USERNAME && chsh -s /bin/bash $USERNAME
-COPY --chown=$USERNAME:$USERNAME entrypoint.sh /home/$USERNAME/vllm/
-RUN chmod +x /home/$USERNAME/vllm/entrypoint.sh
+COPY --chown=$USERNAME:$USERNAME --chmod=755 entrypoint.sh /home/$USERNAME/vllm/
 USER $USERNAME
+RUN aws configure set default.s3.preferred_transfer_client crt
 RUN pip install -U "huggingface_hub[hf_xet]"
 ENTRYPOINT [ "bash", "entrypoint.sh" ]

package/scripts/compile/vllm-nxd-inference/entrypoint.sh CHANGED Viewed

@@ -1,6 +1,7 @@
 #!/bin/bash
-LOG_FILE=vllm.log
+LOG_FILE=~/vllm.log
+touch $LOG_FILE
 wait_for_log_to_be_detected() {
     local SEARCH_TEXT="$1"
     echo "wait for \"$SEARCH_TEXT\" to be detected in \`$LOG_FILE\`..."
@@ -14,16 +15,19 @@ wait_for_log_to_be_detected() {
     fi
 }
+mkdir compile
+cd compile
 if [[ $MODEL_ID == "s3://"* ]]; then
     aws s3 sync $MODEL_ID $MODEL_NAME
 else
     huggingface-cli download $MODEL_ID --local-dir $MODEL_NAME
 fi
-aws s3 cp --no-progress $CONFIG_S3_URI vllm-config.yaml
-vllm serve ./$MODEL_NAME "$@" 2>&1 | tee $LOG_FILE &
+vllm serve "$@" 2>&1 | tee $LOG_FILE &
 wait_for_log_to_be_detected "Application startup complete" || exit 1
-aws s3 cp --no-progress --recursive ./ $COMPILED_ARTIFACTS_S3_URI --exclude ".cache/*"
+aws s3 cp --no-progress --recursive ./ $COMPILED_ARTIFACTS_S3_URI \
+    --exclude "**/.cache/*" \
+    --exclude global_metric_store.json

package/scripts/inference/vllm-nxd-inference/Dockerfile CHANGED Viewed

@@ -26,10 +26,7 @@ RUN git clone -b ${VLLM_GIT_BRANCH} https://github.com/aws-neuron/upstreaming-to
 WORKDIR /home/$USERNAME/vllm
 RUN chown -R $USERNAME:$USERNAME /home/$USERNAME && chsh -s /bin/bash $USERNAME
-COPY --chown=$USERNAME:$USERNAME entrypoint.sh /home/$USERNAME/vllm/
-RUN chmod +x /home/$USERNAME/vllm/entrypoint.sh
+COPY --chown=$USERNAME:$USERNAME --chmod=755 entrypoint.sh /entrypoint.sh
 USER $USERNAME
-RUN pip install -U "huggingface_hub[hf_xet]"
-ENTRYPOINT [ "bash", "entrypoint.sh" ]
+ENTRYPOINT [ "bash", "/entrypoint.sh" ]

package/scripts/inference/vllm-nxd-inference/entrypoint.sh CHANGED Viewed

@@ -1,5 +1,4 @@
 #!/bin/bash
-aws s3 sync --no-progress $COMPILED_ARTIFACTS_S3_URI ./
-exec vllm serve ./$MODEL_NAME "$@"
+cd /opt/ml/model/$COMPILED_ARTIFACTS_S3_PREFIX
+exec vllm serve "$@"