PyPI - llama-cpp-python - Versions diffs - 0.1.8__tar.gz → 0.1.10__tar.gz - Mend

llama-cpp-python 0.1.8tar.gz → 0.1.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llama_cpp_python
-Version: 0.1.8
+Version: 0.1.10
 Summary: A Python wrapper for llama.cpp
 Author: Andrei Betlen
 Author-email: abetlen@gmail.com

llama_cpp_python-0.1.10/_skbuild/linux-x86_64-3.8/cmake-install/llama_cpp/libllama.so ADDED Viewed

Binary file

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp/llama.py RENAMED Viewed

@@ -105,6 +105,20 @@ class Llama:
             output += llama_cpp.llama_token_to_str(self.ctx, token)
         return output
+    def embed(self, text: str):
+        """Embed a string.
+        Args:
+            text: The utf-8 encoded string to embed.
+        Returns:
+            A list of embeddings.
+        """
+        tokens = self.tokenize(text.encode("utf-8"))
+        self._eval(tokens, 0)
+        embeddings = llama_cpp.llama_get_embeddings(self.ctx)
+        return embeddings[:llama_cpp.llama_n_embd(self.ctx)]
     def _eval(self, tokens: List[int], n_past):
         rc = llama_cpp.llama_eval(
             self.ctx,

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp/llama_cpp.py RENAMED Viewed

@@ -3,7 +3,6 @@ import ctypes
 from ctypes import (
     c_int,
     c_float,
-    c_double,
     c_char_p,
     c_void_p,
     c_bool,
@@ -40,7 +39,7 @@ class llama_token_data(Structure):
 llama_token_data_p = POINTER(llama_token_data)
-llama_progress_callback = ctypes.CFUNCTYPE(None, c_double, c_void_p)
+llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
 class llama_context_params(Structure):
@@ -48,6 +47,7 @@ class llama_context_params(Structure):
         ("n_ctx", c_int),  # text context
         ("n_parts", c_int),  # -1 for default
         ("seed", c_int),  # RNG seed, 0 for random
         ("f16_kv", c_bool),  # use fp16 for KV cache
         (
             "logits_all",
@@ -56,6 +56,7 @@ class llama_context_params(Structure):
         ("vocab_only", c_bool),  # only load the vocabulary, no weights
         ("use_mlock", c_bool),  # force system to keep model in RAM
         ("embedding", c_bool),  # embedding mode only
         # called with a progress value between 0 and 1, pass NULL to disable
         ("progress_callback", llama_progress_callback),
         # context pointer passed to the progress callback
@@ -70,8 +71,7 @@ llama_context_params_p = POINTER(llama_context_params)
 def llama_context_default_params() -> llama_context_params:
-    params = _lib.llama_context_default_params()
-    return params
+    return _lib.llama_context_default_params()
 _lib.llama_context_default_params.argtypes = []
@@ -229,9 +229,9 @@ def llama_sample_top_p_top_k(
     last_n_tokens_data: llama_token_p,
     last_n_tokens_size: c_int,
     top_k: c_int,
-    top_p: c_double,
-    temp: c_double,
-    repeat_penalty: c_double,
+    top_p: c_float,
+    temp: c_float,
+    repeat_penalty: c_float,
 ) -> llama_token:
     return _lib.llama_sample_top_p_top_k(
         ctx, last_n_tokens_data, last_n_tokens_size, top_k, top_p, temp, repeat_penalty
@@ -243,9 +243,9 @@ _lib.llama_sample_top_p_top_k.argtypes = [
     llama_token_p,
     c_int,
     c_int,
-    c_double,
-    c_double,
-    c_double,
+    c_float,
+    c_float,
+    c_float,
 ]
 _lib.llama_sample_top_p_top_k.restype = llama_token

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp_python.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llama-cpp-python
-Version: 0.1.8
+Version: 0.1.10
 Summary: A Python wrapper for llama.cpp
 Author: Andrei Betlen
 Author-email: abetlen@gmail.com

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "llama_cpp"
-version = "0.1.8"
+version = "0.1.10"
 description = "Python bindings for the llama.cpp library"
 authors = ["Andrei Betlen <abetlen@gmail.com>"]
 license = "MIT"

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/setup.py RENAMED Viewed

@@ -3,7 +3,7 @@ from skbuild import setup
 setup(
     name="llama_cpp_python",
     description="A Python wrapper for llama.cpp",
-    version="0.1.8",
+    version="0.1.10",
     author="Andrei Betlen",
     author_email="abetlen@gmail.com",
     license="MIT",

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.github/workflows/build.yml RENAMED Viewed

@@ -8,10 +8,10 @@ on:
         required: true
         type: boolean
   push:
-    paths: ['.github/workflows/**', 'CMakeLists.txt', 'Makefile', '**.h', '*.c', '**.cpp']
+    paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
   pull_request:
     types: [opened, synchronize, edited, reopened, review_requested, ready_for_review]
-    paths: ['CMakeLists.txt', 'Makefile', '**.h', '*.c', '**.cpp']
+    paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
 env:
  BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -62,7 +62,7 @@ jobs:
         id: cmake_test
         run: |
           cd build
-          ctest --output-on-failure
+          ctest --verbose
   ubuntu-latest-cmake-sanitizer:
     runs-on: ubuntu-latest
@@ -98,7 +98,7 @@ jobs:
         id: cmake_test
         run: |
           cd build
-          ctest --output-on-failure
+          ctest --verbose
   macOS-latest-make:
     runs-on: macos-latest
@@ -143,7 +143,7 @@ jobs:
         id: cmake_test
         run: |
           cd build
-          ctest --output-on-failure
+          ctest --verbose
   windows-latest-cmake:
     runs-on: windows-latest
@@ -176,16 +176,14 @@ jobs:
         if: ${{ matrix.build == 'avx512' }}
         continue-on-error: true
         run: |
-          cd build
-          Set-Content -Path .\avx512f.exe -Value ([Convert]::FromBase64String('TVqQAAMAAAAEAAAA//8AALgAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAyAAAAA4fug4AtAnNIbgBTM0hVGhpcyBwcm9ncmFtIGNhbm5vdCBiZSBydW4gaW4gRE9TIG1vZGUuDQ0KJAAAAAAAAAClmfXY4fibi+H4m4vh+JuL4fiai+P4m4si98aL4vibi7Xbq4vg+JuLUmljaOH4m4sAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQRQAATAEBAGo6H2QAAAAAAAAAAOAADwELAQYAAAIAAAAAAAAAAAAADBAAAAAQAAAAIAAAAABAAAAQAAAAAgAABAAAAAAAAAAEAAAAAAAAAAAgAAAAAgAAAAAAAAMAAAAAABAAABAAAAAAEAAAEAAAAAAAABAAAAAAAAAAAAAAAFQQAAAoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC50ZXh0AAAAsgAAAAAQAAAAAgAAAAIAAAAAAAAAAAAAAAAAACAAAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACUEAAAiBAAAAAAAABVi+xRUVNTuAcAAAAPosHrEGaD4wGJXfxbg0X8MI1F+GoAUI1F/GoBUGr1/xUAEEAAUP8VBBBAAItF/FuDwND32BvAQMnDzMx8EAAAAAAAAAAAAACkEAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlBAAAIgQAAAAAAAApANXcml0ZUZpbGUAuQFHZXRTdGRIYW5kbGUAAEtFUk5FTDMyLmRsbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==')) -AsByteStream
-          .\avx512f.exe && echo " AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo " AVX512F: NO"
+          echo "TODO: check avx512f"
       - name: Test
         id: cmake_test
         if: ${{ matrix.build != 'avx512' || env.HAS_AVX512F == '1' }} # Test AVX-512 only when possible
         run: |
           cd build
-          ctest -C Release --output-on-failure
+          ctest -C Release --verbose
       - name: Get commit hash
         id: commit

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.gitignore RENAMED Viewed

@@ -5,6 +5,7 @@
 .vscode/
 .DS_Store
+.build/
 build/
 build-em/
 build-debug/
@@ -20,9 +21,14 @@ models/*
 /quantize
 /result
 /perplexity
+/embedding
 arm_neon.h
 compile_commands.json
 .envrc
 .direnv/
+.venv
+__pycache__
+.swiftpm

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/CMakeLists.txt RENAMED Viewed

@@ -124,8 +124,9 @@ if (LLAMA_ALL_WARNINGS)
             -Wall
             -Wextra
             -Wpedantic
-            -Wshadow
             -Wcast-qual
+            -Wdouble-promotion
+            -Wshadow
             -Wstrict-prototypes
             -Wpointer-arith
             -Wno-unused-function
@@ -135,6 +136,7 @@ if (LLAMA_ALL_WARNINGS)
             -Wextra
             -Wpedantic
             -Wcast-qual
+            -Wno-unused-function
         )
     else()
         # todo : msvc

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/Makefile RENAMED Viewed

@@ -35,6 +35,10 @@ CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
 CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
 LDFLAGS  =
+# warnings
+CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
+CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
 # OS specific
 # TODO: support Windows
 ifeq ($(UNAME_S),Linux)
@@ -212,7 +216,7 @@ $(info I CC:       $(CCV))
 $(info I CXX:      $(CXXV))
 $(info )
-default: main quantize perplexity
+default: main quantize perplexity embedding
 #
 # Build library
@@ -228,7 +232,7 @@ common.o: examples/common.cpp examples/common.h
 	$(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o
 clean:
-	rm -vf *.o main quantize perplexity
+	rm -vf *.o main quantize perplexity embedding
 main: examples/main/main.cpp ggml.o llama.o common.o
 	$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
@@ -242,6 +246,9 @@ quantize: examples/quantize/quantize.cpp ggml.o llama.o
 perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
 	$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
+embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
+	$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
 #
 # Tests
 #

{llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/convert-pth-to-ggml.py RENAMED Viewed

@@ -145,13 +145,11 @@ def main():
         print(f"Extracting only the vocab from '{fname_model}'\n")
-        model = torch.load(fname_model, map_location="cpu")
         with open(fname_out, "wb") as fout:
             write_header(fout, hparams, ftype)
             write_tokens(fout, tokenizer)
-        del model
         print(f"Done. Output file: {fname_out}\n")

llama-cpp-python 0.1.8__tar.gz → 0.1.10__tar.gz

llama-cpp-python 0.1.8tar.gz → 0.1.10tar.gz