PyPI - llama-cpp-python - Versions diffs - 0.2.34__tar.gz → 0.2.36__tar.gz - Mend

llama-cpp-python 0.2.34tar.gz → 0.2.36tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (670) hide show

llama_cpp_python-0.2.36/.git/FETCH_HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ 464af5b39fea3cf1ba16e755a9df85f09bbb25ac '464af5b39fea3cf1ba16e755a9df85f09bbb25ac' of https://github.com/abetlen/llama-cpp-python

llama_cpp_python-0.2.36/.git/HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ 464af5b39fea3cf1ba16e755a9df85f09bbb25ac

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/.git/config RENAMED Viewed

@@ -9,7 +9,7 @@
 [gc]
 	auto = 0
 [http "https://github.com/"]
-	extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX1V4SmxBM3dPRzR4cG8ydGdrVlJweXlRVjRMQVFsbjF3QnBDdg==
+	extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX1Y2cUFITVRmZ0hoM1VTanhaR2VKRTRhUGxXUHJjRzQwRWhTSg==
 [submodule "vendor/llama.cpp"]
 	active = true
 	url = https://github.com/ggerganov/llama.cpp.git

llama_cpp_python-0.2.36/.git/index ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/logs/HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0000000000000000000000000000000000000000 464af5b39fea3cf1ba16e755a9df85f09bbb25ac runner <runner@fv-az695-903.gmayhch2cacunn5b1cp1zlasod.dx.internal.cloudapp.net> 1706543240 +0000 checkout: moving from master to refs/tags/v0.2.36

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ 2aed77eb06a329f0d82bb1c467f4244904d4073f

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/.git/modules/vendor/llama.cpp/config RENAMED Viewed

@@ -13,7 +13,7 @@
 [gc]
 	auto = 0
 [http "https://github.com/"]
-	extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX1V4SmxBM3dPRzR4cG8ydGdrVlJweXlRVjRMQVFsbjF3QnBDdg==
+	extraheader = AUTHORIZATION: basic eC1hY2Nlc3MtdG9rZW46Z2hzX1Y2cUFITVRmZ0hoM1VTanhaR2VKRTRhUGxXUHJjRzQwRWhTSg==
 [url "https://github.com/"]
 	insteadOf = git@github.com:
 	insteadOf = org-6826477@github.com:

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/index ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/logs/HEAD ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ 0000000000000000000000000000000000000000 2aed77eb06a329f0d82bb1c467f4244904d4073f runner <runner@fv-az695-903.gmayhch2cacunn5b1cp1zlasod.dx.internal.cloudapp.net> 1706543241 +0000 clone: from https://github.com/ggerganov/llama.cpp.git
2	+ 2aed77eb06a329f0d82bb1c467f4244904d4073f 2aed77eb06a329f0d82bb1c467f4244904d4073f runner <runner@fv-az695-903.gmayhch2cacunn5b1cp1zlasod.dx.internal.cloudapp.net> 1706543241 +0000 checkout: moving from master to 2aed77eb06a329f0d82bb1c467f4244904d4073f

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/logs/refs/heads/master ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0000000000000000000000000000000000000000 2aed77eb06a329f0d82bb1c467f4244904d4073f runner <runner@fv-az695-903.gmayhch2cacunn5b1cp1zlasod.dx.internal.cloudapp.net> 1706543241 +0000 clone: from https://github.com/ggerganov/llama.cpp.git

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/logs/refs/remotes/origin/HEAD ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0000000000000000000000000000000000000000 2aed77eb06a329f0d82bb1c467f4244904d4073f runner <runner@fv-az695-903.gmayhch2cacunn5b1cp1zlasod.dx.internal.cloudapp.net> 1706543241 +0000 clone: from https://github.com/ggerganov/llama.cpp.git

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/objects/pack/pack-45c5d7da4d130e32bb1f98f3b58ea9cd2784fad3.idx ADDED Viewed

Binary file

llama_cpp_python-0.2.34/.git/modules/vendor/llama.cpp/objects/pack/pack-1ad8805aff0ba166c9097b7c2bd8caee41b57147.pack → llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/objects/pack/pack-45c5d7da4d130e32bb1f98f3b58ea9cd2784fad3.pack RENAMED Viewed

Binary file

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/objects/pack/pack-45c5d7da4d130e32bb1f98f3b58ea9cd2784fad3.rev ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/packed-refs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # pack-refs with: peeled fully-peeled sorted
2	+ 2aed77eb06a329f0d82bb1c467f4244904d4073f refs/remotes/origin/master

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/refs/heads/master ADDED Viewed

	@@ -0,0 +1 @@
1	+ 2aed77eb06a329f0d82bb1c467f4244904d4073f

llama_cpp_python-0.2.36/.git/modules/vendor/llama.cpp/shallow ADDED Viewed

	@@ -0,0 +1 @@
1	+ 2aed77eb06a329f0d82bb1c467f4244904d4073f

llama_cpp_python-0.2.34/.git/objects/6c/7c6db7f0dc20814bf39cba40760e9c468ee61d → llama_cpp_python-0.2.36/.git/objects/03/667ba9b14656308c89e62f61377b295604a99d RENAMED Viewed

Binary file

llama_cpp_python-0.2.36/.git/objects/46/4af5b39fea3cf1ba16e755a9df85f09bbb25ac ADDED Viewed

@@ -0,0 +1,3 @@
+x��]
+�0�}�)�J�6M@D{�M��B���z~�G�m���i-e��p��s�$�F"g��AYJ�hU�|�5��QPӔ)3{b)�)Ԟ�b<,���Z<jj<��}�
+W��p��˴��A:�F9g�R��;�5�^6�p{�k_c�H+

llama_cpp_python-0.2.36/.git/objects/54/66de3a4e33002429b33db9668367bb32af67eb ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/objects/7a/bb04aa9c0f718d18a085923cdb87a670cb5437 ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/objects/80/6b120c5e62a7be5fecc631e341d1856dac79a5 ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ x�TMo1��r��E��6�F��HU[!�ڀ�wmٻT(��ػҲ��<ϛT�>\|��]e8+E�t��݃,\ɔ"��,�Ui�y��4�T�SjEVY'w��z+ډ�k�B�z�S��,ʌ��+�z�R
2	+ ��kY@�\),!\�ߗ]\|��T��e\x��%��u I+��w¦� ��egx�fߍ��\|Х�$��k��Bk��$�E��D/�x�j��/��dߪ�*٦9�M��(l�8��?~a}r��h'�gW�-+Z4��xr��ҕ�y��Ű7�Vha��W��Ҍ��]��

llama_cpp_python-0.2.36/.git/objects/c1/0aee42e0da547428df7cac9845e246badf1803 ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/objects/e9/9dd1767bb3a30508e8a0de0e41bd426c13c387 ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/objects/f6/66a05325305ef8813f914ad5b909541bc8f786 ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/objects/f7/3f3d42f48aa84ceb664f490290d1840f6873e6 ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/objects/f7/9baa89ba3d84bc4b49e8ed314c018b9e4d4bbc ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/objects/fa/f4a87a2e4515dbf0dbea0cc64f91fc7b81b8bb ADDED Viewed

Binary file

llama_cpp_python-0.2.36/.git/refs/tags/v0.2.36 ADDED Viewed

	@@ -0,0 +1 @@
1	+ 464af5b39fea3cf1ba16e755a9df85f09bbb25ac

llama_cpp_python-0.2.36/.git/shallow ADDED Viewed

	@@ -0,0 +1 @@
1	+ 464af5b39fea3cf1ba16e755a9df85f09bbb25ac

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.2.36]
+- feat: Update llama.cpp to ggerganov/llama.cpp@2aed77eb06a329f0d82bb1c467f4244904d4073f
+- feat: Add mistral instruct chat format as "mistral-instruct" by @Rafaelblsilva in #799
+## [0.2.35]
+- feat: Update llama.cpp to ggerganov/llama.cpp@d2f650cb5b04ee2726663e79b47da5efe196ce00
 ## [0.2.34]
 - feat: Update llama.cpp to ggerganov/llama.cpp@6db2b41a76ee78d5efdd5c3cddd5d7ad3f646855

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/Makefile RENAMED Viewed

@@ -27,6 +27,9 @@ build.blis:
 build.metal:
 	CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install --verbose -e .
+build.vulkan:
+	CMAKE_ARGS="-DLLAMA_VULKAN=on" python3 -m pip install --verbose -e .
 build.sdist:
 	python3 -m build --sdist

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llama_cpp_python
-Version: 0.2.34
+Version: 0.2.36
 Summary: Python bindings for the llama.cpp library
 Author-Email: Andrei Betlen <abetlen@gmail.com>
 License: MIT
@@ -259,6 +259,59 @@ Note that `chat_format` option must be set for the particular model you are usin
 Chat completion is available through the [`create_chat_completion`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion) method of the [`Llama`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama) class.
+### JSON and JSON Schema Mode
+If you want to constrain chat responses to only valid JSON or a specific JSON Schema you can use the `response_format` argument to the `create_chat_completion` method.
+#### JSON Mode
+The following example will constrain the response to be valid JSON.
+```python
+>>> from llama_cpp import Llama
+>>> llm = Llama(model_path="path/to/model.gguf", chat_format="chatml")
+>>> llm.create_chat_completion(
+    messages=[
+        {
+            "role": "system",
+            "content": "You are a helpful assistant that outputs in JSON.",
+        },
+        {"role": "user", "content": "Who won the world series in 2020"},
+    ],
+    response_format={
+        "type": "json_object",
+    },
+    temperature=0.7,
+)
+```
+#### JSON Schema Mode
+To constrain the response to a specific JSON Schema, you can use the `schema` property of the `response_format` argument.
+```python
+>>> from llama_cpp import Llama
+>>> llm = Llama(model_path="path/to/model.gguf", chat_format="chatml")
+>>> llm.create_chat_completion(
+    messages=[
+        {
+            "role": "system",
+            "content": "You are a helpful assistant that outputs in JSON.",
+        },
+        {"role": "user", "content": "Who won the world series in 2020"},
+    ],
+    response_format={
+        "type": "json_object",
+        "schema": {
+            "type": "object",
+            "properties": {"team_name": {"type": "string"}},
+            "required": ["team_name"],
+        },
+    },
+    temperature=0.7,
+)
+```
 ### Function Calling
 The high-level API also provides a simple interface for function calling.

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/README.md RENAMED Viewed

@@ -216,6 +216,59 @@ Note that `chat_format` option must be set for the particular model you are usin
 Chat completion is available through the [`create_chat_completion`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion) method of the [`Llama`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama) class.
+### JSON and JSON Schema Mode
+If you want to constrain chat responses to only valid JSON or a specific JSON Schema you can use the `response_format` argument to the `create_chat_completion` method.
+#### JSON Mode
+The following example will constrain the response to be valid JSON.
+```python
+>>> from llama_cpp import Llama
+>>> llm = Llama(model_path="path/to/model.gguf", chat_format="chatml")
+>>> llm.create_chat_completion(
+    messages=[
+        {
+            "role": "system",
+            "content": "You are a helpful assistant that outputs in JSON.",
+        },
+        {"role": "user", "content": "Who won the world series in 2020"},
+    ],
+    response_format={
+        "type": "json_object",
+    },
+    temperature=0.7,
+)
+```
+#### JSON Schema Mode
+To constrain the response to a specific JSON Schema, you can use the `schema` property of the `response_format` argument.
+```python
+>>> from llama_cpp import Llama
+>>> llm = Llama(model_path="path/to/model.gguf", chat_format="chatml")
+>>> llm.create_chat_completion(
+    messages=[
+        {
+            "role": "system",
+            "content": "You are a helpful assistant that outputs in JSON.",
+        },
+        {"role": "user", "content": "Who won the world series in 2020"},
+    ],
+    response_format={
+        "type": "json_object",
+        "schema": {
+            "type": "object",
+            "properties": {"team_name": {"type": "string"}},
+            "required": ["team_name"],
+        },
+    },
+    temperature=0.7,
+)
+```
 ### Function Calling
 The high-level API also provides a simple interface for function calling.

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/llama_cpp/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
-__version__ = "0.2.34"
+__version__ = "0.2.36"

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/llama_cpp/llama_chat_format.py RENAMED Viewed

@@ -877,6 +877,24 @@ def format_chatml(
     return ChatFormatterResponse(prompt=_prompt, stop=_sep)
+@register_chat_format("mistral-instruct")
+def format_mistral_instruct(
+    messages: List[llama_types.ChatCompletionRequestMessage],
+    **kwargs: Any,
+) -> ChatFormatterResponse:
+    bos = "<s>"
+    eos = "</s>"
+    stop = eos
+    prompt = bos
+    for message in messages:
+        if message["role"] == "user" and message["content"] is not None and isinstance(message["content"], str):
+            prompt += "[INST] " + message["content"]
+        elif message["role"] == "assistant" and message["content"] is not None and isinstance(message["content"], str):
+            prompt += " [/INST]" + message["content"] + eos
+    prompt += " [/INST]"
+    return ChatFormatterResponse(prompt=prompt, stop=stop)
 @register_chat_format("chatglm3")
 def format_chatglm3(
     messages: List[llama_types.ChatCompletionRequestMessage],

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/tests/test_llama_chat_format.py RENAMED Viewed

@@ -1,10 +1,33 @@
 import json
+import jinja2
 from llama_cpp import (
     ChatCompletionRequestUserMessage,
 )
+import llama_cpp.llama_types as llama_types
+import llama_cpp.llama_chat_format as llama_chat_format
 from llama_cpp.llama_chat_format import hf_tokenizer_config_to_chat_formatter
+def test_mistral_instruct():
+    chat_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
+    chat_formatter = jinja2.Template(chat_template)
+    messages = [
+        llama_types.ChatCompletionRequestUserMessage(role="user", content="Instruction"),
+        llama_types.ChatCompletionRequestAssistantMessage(role="assistant", content="Model answer"),
+        llama_types.ChatCompletionRequestUserMessage(role="user", content="Follow-up instruction"),
+    ]
+    response = llama_chat_format.format_mistral_instruct(
+        messages=messages,
+    )
+    reference = chat_formatter.render(
+        messages=messages,
+        bos_token="<s>",
+        eos_token="</s>",
+    )
+    assert response.prompt == reference
 mistral_7b_tokenizer_config = """{
   "add_bos_token": true,

llama_cpp_python-0.2.36/vendor/llama.cpp/.devops/server-cuda.Dockerfile ADDED Viewed

@@ -0,0 +1,32 @@
+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG CUDA_VERSION=11.7.1
+# Target the CUDA build image
+ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+# Target the CUDA runtime image
+ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+FROM ${BASE_CUDA_DEV_CONTAINER} as build
+# Unless otherwise specified, we make a fat build.
+ARG CUDA_DOCKER_ARCH=all
+RUN apt-get update && \
+    apt-get install -y build-essential git
+WORKDIR /app
+COPY . .
+# Set nvcc architecture
+ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
+# Enable cuBLAS
+ENV LLAMA_CUBLAS=1
+RUN make
+FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
+COPY --from=build /app/server /server
+ENTRYPOINT [ "/server" ]

llama_cpp_python-0.2.36/vendor/llama.cpp/.devops/server-intel.Dockerfile ADDED Viewed

@@ -0,0 +1,25 @@
+ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
+ARG UBUNTU_VERSION=22.04
+FROM intel/hpckit:$ONEAPI_VERSION as build
+RUN apt-get update && \
+    apt-get install -y git
+WORKDIR /app
+COPY . .
+# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance
+RUN mkdir build && \
+    cd build && \
+    cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \
+    cmake --build . --config Release --target main server
+FROM ubuntu:$UBUNTU_VERSION as runtime
+COPY --from=build /app/build/bin/server /server
+ENV LC_ALL=C.utf8
+ENTRYPOINT [ "/server" ]

llama_cpp_python-0.2.36/vendor/llama.cpp/.devops/server-rocm.Dockerfile ADDED Viewed

@@ -0,0 +1,45 @@
+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG ROCM_VERSION=5.6
+# Target the CUDA build image
+ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
+FROM ${BASE_ROCM_DEV_CONTAINER} as build
+# Unless otherwise specified, we make a fat build.
+# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
+# This is mostly tied to rocBLAS supported archs.
+ARG ROCM_DOCKER_ARCH=\
+    gfx803 \
+    gfx900 \
+    gfx906 \
+    gfx908 \
+    gfx90a \
+    gfx1010 \
+    gfx1030 \
+    gfx1100 \
+    gfx1101 \
+    gfx1102
+COPY requirements.txt   requirements.txt
+COPY requirements       requirements
+RUN pip install --upgrade pip setuptools wheel \
+    && pip install -r requirements.txt
+WORKDIR /app
+COPY . .
+# Set nvcc architecture
+ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
+# Enable ROCm
+ENV LLAMA_HIPBLAS=1
+ENV CC=/opt/rocm/llvm/bin/clang
+ENV CXX=/opt/rocm/llvm/bin/clang++
+RUN make
+ENTRYPOINT [ "/app/server" ]

llama_cpp_python-0.2.36/vendor/llama.cpp/.devops/server.Dockerfile ADDED Viewed

@@ -0,0 +1,20 @@
+ARG UBUNTU_VERSION=22.04
+FROM ubuntu:$UBUNTU_VERSION as build
+RUN apt-get update && \
+    apt-get install -y build-essential git
+WORKDIR /app
+COPY . .
+RUN make
+FROM ubuntu:$UBUNTU_VERSION as runtime
+COPY --from=build /app/server /server
+ENV LC_ALL=C.utf8
+ENTRYPOINT [ "/server" ]

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/vendor/llama.cpp/.github/workflows/build.yml RENAMED Viewed

@@ -143,6 +143,47 @@ jobs:
           cd build
           ctest -L main --verbose
+  ubuntu-22-cmake-sycl:
+    runs-on: ubuntu-22.04
+    continue-on-error: true
+    steps:
+      - uses: actions/checkout@v2
+      - name: add oneAPI to apt
+        shell: bash
+        run: |
+          cd /tmp
+          wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+          sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+          rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+          sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+      - name: install oneAPI dpcpp compiler
+        shell: bash
+        run: |
+          sudo apt update
+          sudo apt install intel-oneapi-compiler-dpcpp-cpp
+      - name: install oneAPI MKL library
+        shell: bash
+        run: |
+          sudo apt install intel-oneapi-mkl-devel
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+      - name: Build
+        id: cmake_build
+        run: |
+          source /opt/intel/oneapi/setvars.sh
+          mkdir build
+          cd build
+          cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
+          cmake --build . --config Release -j $(nproc)
   # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
   #       how to debug it.
   #       ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124

{llama_cpp_python-0.2.34 → llama_cpp_python-0.2.36}/vendor/llama.cpp/.github/workflows/docker.yml RENAMED Viewed

@@ -28,14 +28,18 @@ jobs:
         config:
           - { tag: "light", dockerfile: ".devops/main.Dockerfile", platforms: "linux/amd64,linux/arm64" }
           - { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
+          - { tag: "server", dockerfile: ".devops/server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
           # NOTE(canardletter): The CUDA builds on arm64 are very slow, so I
           #                     have disabled them for now until the reason why
           #                     is understood.
           - { tag: "light-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platforms: "linux/amd64" }
           - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
+          - { tag: "server-cuda", dockerfile: ".devops/server-cuda.Dockerfile", platforms: "linux/amd64" }
           - { tag: "light-rocm", dockerfile: ".devops/main-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
           - { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
+          - { tag: "server-rocm", dockerfile: ".devops/server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
           - { tag: "light-intel", dockerfile: ".devops/main-intel.Dockerfile", platforms: "linux/amd64" }
+          - { tag: "server-intel", dockerfile: ".devops/server-intel.Dockerfile", platforms: "linux/amd64" }
     steps:
       - name: Check out the repo
         uses: actions/checkout@v3

llama-cpp-python 0.2.34__tar.gz → 0.2.36__tar.gz

llama-cpp-python 0.2.34tar.gz → 0.2.36tar.gz