aws-cdk-neuronx-patterns 0.0.13 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,11 @@ RUN pip install -U pip && \
19
19
 
20
20
  WORKDIR /usr/src
21
21
 
22
+ # install AWS CLI v2
23
+ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
24
+ unzip awscliv2.zip && \
25
+ ./aws/install
26
+
22
27
  RUN git clone -b ${VLLM_GIT_BRANCH} https://github.com/aws-neuron/upstreaming-to-vllm.git && \
23
28
  pip install -r ./upstreaming-to-vllm/requirements/neuron.txt && \
24
29
  SETUPTOOLS_SCM_PRETEND_VERSION="0.9.0" VLLM_TARGET_DEVICE="neuron" pip install -e ./upstreaming-to-vllm
@@ -26,10 +31,11 @@ RUN git clone -b ${VLLM_GIT_BRANCH} https://github.com/aws-neuron/upstreaming-to
26
31
  WORKDIR /home/$USERNAME/vllm
27
32
 
28
33
  RUN chown -R $USERNAME:$USERNAME /home/$USERNAME && chsh -s /bin/bash $USERNAME
29
- COPY --chown=$USERNAME:$USERNAME entrypoint.sh /home/$USERNAME/vllm/
30
- RUN chmod +x /home/$USERNAME/vllm/entrypoint.sh
34
+ COPY --chown=$USERNAME:$USERNAME --chmod=755 entrypoint.sh /home/$USERNAME/vllm/
31
35
  USER $USERNAME
32
36
 
37
+ RUN aws configure set default.s3.preferred_transfer_client crt
38
+
33
39
  RUN pip install -U "huggingface_hub[hf_xet]"
34
40
 
35
41
  ENTRYPOINT [ "bash", "entrypoint.sh" ]
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
 
3
- LOG_FILE=vllm.log
3
+ LOG_FILE=~/vllm.log
4
+ touch $LOG_FILE
4
5
  wait_for_log_to_be_detected() {
5
6
  local SEARCH_TEXT="$1"
6
7
  echo "wait for \"$SEARCH_TEXT\" to be detected in \`$LOG_FILE\`..."
@@ -14,16 +15,19 @@ wait_for_log_to_be_detected() {
14
15
  fi
15
16
  }
16
17
 
18
+ mkdir compile
19
+ cd compile
20
+
17
21
  if [[ $MODEL_ID == "s3://"* ]]; then
18
22
  aws s3 sync $MODEL_ID $MODEL_NAME
19
23
  else
20
24
  huggingface-cli download $MODEL_ID --local-dir $MODEL_NAME
21
25
  fi
22
26
 
23
- aws s3 cp --no-progress $CONFIG_S3_URI vllm-config.yaml
24
-
25
- vllm serve ./$MODEL_NAME "$@" 2>&1 | tee $LOG_FILE &
27
+ vllm serve "$@" 2>&1 | tee $LOG_FILE &
26
28
 
27
29
  wait_for_log_to_be_detected "Application startup complete" || exit 1
28
30
 
29
- aws s3 cp --no-progress --recursive ./ $COMPILED_ARTIFACTS_S3_URI --exclude ".cache/*"
31
+ aws s3 cp --no-progress --recursive ./ $COMPILED_ARTIFACTS_S3_URI \
32
+ --exclude "**/.cache/*" \
33
+ --exclude global_metric_store.json
@@ -26,10 +26,7 @@ RUN git clone -b ${VLLM_GIT_BRANCH} https://github.com/aws-neuron/upstreaming-to
26
26
  WORKDIR /home/$USERNAME/vllm
27
27
 
28
28
  RUN chown -R $USERNAME:$USERNAME /home/$USERNAME && chsh -s /bin/bash $USERNAME
29
- COPY --chown=$USERNAME:$USERNAME entrypoint.sh /home/$USERNAME/vllm/
30
- RUN chmod +x /home/$USERNAME/vllm/entrypoint.sh
29
+ COPY --chown=$USERNAME:$USERNAME --chmod=755 entrypoint.sh /entrypoint.sh
31
30
  USER $USERNAME
32
31
 
33
- RUN pip install -U "huggingface_hub[hf_xet]"
34
-
35
- ENTRYPOINT [ "bash", "entrypoint.sh" ]
32
+ ENTRYPOINT [ "bash", "/entrypoint.sh" ]
@@ -1,5 +1,4 @@
1
1
  #!/bin/bash
2
2
 
3
- aws s3 sync --no-progress $COMPILED_ARTIFACTS_S3_URI ./
4
-
5
- exec vllm serve ./$MODEL_NAME "$@"
3
+ cd /opt/ml/model/$COMPILED_ARTIFACTS_S3_PREFIX
4
+ exec vllm serve "$@"