llama-cpp-python 0.1.15__tar.gz → 0.1.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/PKG-INFO +1 -1
- llama_cpp_python-0.1.16/_skbuild/linux-x86_64-3.8/cmake-install/llama_cpp/libllama.so +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/llama_cpp/llama_cpp.py +31 -1
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/llama_cpp_python.egg-info/PKG-INFO +1 -1
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/pyproject.toml +1 -1
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/setup.py +1 -1
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/Makefile +2 -89
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/README.md +4 -2
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/ggml.c +346 -345
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/ggml.h +26 -26
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/llama.cpp +31 -4
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/llama.h +17 -0
- llama_cpp_python-0.1.15/_skbuild/linux-x86_64-3.8/cmake-install/llama_cpp/libllama.so +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/.gitignore +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/.gitmodules +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/CMakeLists.txt +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/LICENSE.md +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/README.md +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/llama_cpp/__init__.py +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/llama_cpp/llama.py +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/llama_cpp/llama_types.py +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/llama_cpp_python.egg-info/SOURCES.txt +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/llama_cpp_python.egg-info/dependency_links.txt +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/llama_cpp_python.egg-info/requires.txt +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/llama_cpp_python.egg-info/top_level.txt +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/poetry.lock +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/setup.cfg +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/.devops/tools.sh +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/.dockerignore +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/custom.md +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/.github/workflows/build.yml +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/.github/workflows/docker.yml +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/.gitignore +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/CMakeLists.txt +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/LICENSE +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/SHA256SUMS +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/convert-gptq-to-ggml.py +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/convert-pth-to-ggml.py +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/flake.lock +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/flake.nix +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/models/ggml-vocab.bin +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/tests/CMakeLists.txt +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/tests/test-quantize.c +0 -0
- {llama_cpp_python-0.1.15 → llama_cpp_python-0.1.16}/vendor/llama.cpp/tests/test-tokenizer-0.cpp +0 -0
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import ctypes
|
|
2
2
|
|
|
3
|
-
from ctypes import c_int, c_float, c_char_p, c_void_p, c_bool, POINTER, Structure, Array
|
|
3
|
+
from ctypes import c_int, c_float, c_char_p, c_void_p, c_bool, POINTER, Structure, Array, c_uint8, c_size_t
|
|
4
4
|
|
|
5
5
|
import pathlib
|
|
6
6
|
from itertools import chain
|
|
@@ -101,6 +101,36 @@ def llama_model_quantize(
|
|
|
101
101
|
_lib.llama_model_quantize.argtypes = [c_char_p, c_char_p, c_int, c_int]
|
|
102
102
|
_lib.llama_model_quantize.restype = c_int
|
|
103
103
|
|
|
104
|
+
# Returns the KV cache that will contain the context for the
|
|
105
|
+
# ongoing prediction with the model.
|
|
106
|
+
def llama_get_kv_cache(ctx: llama_context_p):
|
|
107
|
+
return _lib.llama_get_kv_cache(ctx)
|
|
108
|
+
|
|
109
|
+
_lib.llama_get_kv_cache.argtypes = [llama_context_p]
|
|
110
|
+
_lib.llama_get_kv_cache.restype = POINTER(c_uint8)
|
|
111
|
+
|
|
112
|
+
# Returns the size of the KV cache
|
|
113
|
+
def llama_get_kv_cache_size(ctx: llama_context_p) -> c_size_t:
|
|
114
|
+
return _lib.llama_get_kv_cache_size(ctx)
|
|
115
|
+
|
|
116
|
+
_lib.llama_get_kv_cache_size.argtypes = [llama_context_p]
|
|
117
|
+
_lib.llama_get_kv_cache_size.restype = c_size_t
|
|
118
|
+
|
|
119
|
+
# Returns the number of tokens in the KV cache
|
|
120
|
+
def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
|
|
121
|
+
return _lib.llama_get_kv_cache_token_count(ctx)
|
|
122
|
+
|
|
123
|
+
_lib.llama_get_kv_cache_token_count.argtypes = [llama_context_p]
|
|
124
|
+
_lib.llama_get_kv_cache_token_count.restype = c_int
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# Sets the KV cache containing the current context for the model
|
|
128
|
+
def llama_set_kv_cache(ctx: llama_context_p, kv_cache, n_size: c_size_t, n_token_count: c_int):
|
|
129
|
+
return _lib.llama_set_kv_cache(ctx, kv_cache, n_size, n_token_count)
|
|
130
|
+
|
|
131
|
+
_lib.llama_set_kv_cache.argtypes = [llama_context_p, POINTER(c_uint8), c_size_t, c_int]
|
|
132
|
+
_lib.llama_set_kv_cache.restype = None
|
|
133
|
+
|
|
104
134
|
|
|
105
135
|
# Run the llama inference to obtain the logits and probabilities for the next token.
|
|
106
136
|
# tokens + n_tokens is the provided batch of new tokens to process
|
|
@@ -10,7 +10,7 @@ setup(
|
|
|
10
10
|
description="A Python wrapper for llama.cpp",
|
|
11
11
|
long_description=long_description,
|
|
12
12
|
long_description_content_type="text/markdown",
|
|
13
|
-
version="0.1.
|
|
13
|
+
version="0.1.16",
|
|
14
14
|
author="Andrei Betlen",
|
|
15
15
|
author_email="abetlen@gmail.com",
|
|
16
16
|
license="MIT",
|
|
@@ -70,95 +70,8 @@ endif
|
|
|
70
70
|
# TODO: probably these flags need to be tweaked on some architectures
|
|
71
71
|
# feel free to update the Makefile for your architecture and send a pull request or issue
|
|
72
72
|
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
ifneq (,$(findstring F16C,$(F16C_M)))
|
|
76
|
-
CFLAGS += -mf16c
|
|
77
|
-
endif
|
|
78
|
-
AVX1_M := $(shell sysctl machdep.cpu.features)
|
|
79
|
-
ifneq (,$(findstring FMA,$(AVX1_M)))
|
|
80
|
-
CFLAGS += -mfma
|
|
81
|
-
endif
|
|
82
|
-
ifneq (,$(findstring AVX1.0,$(AVX1_M)))
|
|
83
|
-
CFLAGS += -mavx
|
|
84
|
-
endif
|
|
85
|
-
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
|
|
86
|
-
ifneq (,$(findstring AVX2,$(AVX2_M)))
|
|
87
|
-
CFLAGS += -mavx2
|
|
88
|
-
endif
|
|
89
|
-
else ifeq ($(UNAME_S),Linux)
|
|
90
|
-
AVX1_M := $(shell grep "avx " /proc/cpuinfo)
|
|
91
|
-
ifneq (,$(findstring avx,$(AVX1_M)))
|
|
92
|
-
CFLAGS += -mavx
|
|
93
|
-
endif
|
|
94
|
-
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
|
|
95
|
-
ifneq (,$(findstring avx2,$(AVX2_M)))
|
|
96
|
-
CFLAGS += -mavx2
|
|
97
|
-
endif
|
|
98
|
-
FMA_M := $(shell grep "fma " /proc/cpuinfo)
|
|
99
|
-
ifneq (,$(findstring fma,$(FMA_M)))
|
|
100
|
-
CFLAGS += -mfma
|
|
101
|
-
endif
|
|
102
|
-
F16C_M := $(shell grep "f16c " /proc/cpuinfo)
|
|
103
|
-
ifneq (,$(findstring f16c,$(F16C_M)))
|
|
104
|
-
CFLAGS += -mf16c
|
|
105
|
-
endif
|
|
106
|
-
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
|
|
107
|
-
ifneq (,$(findstring sse3,$(SSE3_M)))
|
|
108
|
-
CFLAGS += -msse3
|
|
109
|
-
endif
|
|
110
|
-
AVX512F_M := $(shell grep "avx512f " /proc/cpuinfo)
|
|
111
|
-
ifneq (,$(findstring avx512f,$(AVX512F_M)))
|
|
112
|
-
CFLAGS += -mavx512f
|
|
113
|
-
endif
|
|
114
|
-
AVX512BW_M := $(shell grep "avx512bw " /proc/cpuinfo)
|
|
115
|
-
ifneq (,$(findstring avx512bw,$(AVX512BW_M)))
|
|
116
|
-
CFLAGS += -mavx512bw
|
|
117
|
-
endif
|
|
118
|
-
AVX512DQ_M := $(shell grep "avx512dq " /proc/cpuinfo)
|
|
119
|
-
ifneq (,$(findstring avx512dq,$(AVX512DQ_M)))
|
|
120
|
-
CFLAGS += -mavx512dq
|
|
121
|
-
endif
|
|
122
|
-
AVX512VL_M := $(shell grep "avx512vl " /proc/cpuinfo)
|
|
123
|
-
ifneq (,$(findstring avx512vl,$(AVX512VL_M)))
|
|
124
|
-
CFLAGS += -mavx512vl
|
|
125
|
-
endif
|
|
126
|
-
AVX512CD_M := $(shell grep "avx512cd " /proc/cpuinfo)
|
|
127
|
-
ifneq (,$(findstring avx512cd,$(AVX512CD_M)))
|
|
128
|
-
CFLAGS += -mavx512cd
|
|
129
|
-
endif
|
|
130
|
-
AVX512ER_M := $(shell grep "avx512er " /proc/cpuinfo)
|
|
131
|
-
ifneq (,$(findstring avx512er,$(AVX512ER_M)))
|
|
132
|
-
CFLAGS += -mavx512er
|
|
133
|
-
endif
|
|
134
|
-
AVX512IFMA_M := $(shell grep "avx512ifma " /proc/cpuinfo)
|
|
135
|
-
ifneq (,$(findstring avx512ifma,$(AVX512IFMA_M)))
|
|
136
|
-
CFLAGS += -mavx512ifma
|
|
137
|
-
endif
|
|
138
|
-
AVX512PF_M := $(shell grep "avx512pf " /proc/cpuinfo)
|
|
139
|
-
ifneq (,$(findstring avx512pf,$(AVX512PF_M)))
|
|
140
|
-
CFLAGS += -mavx512pf
|
|
141
|
-
endif
|
|
142
|
-
else ifeq ($(UNAME_S),Haiku)
|
|
143
|
-
AVX1_M := $(shell sysinfo -cpu | grep -w "AVX")
|
|
144
|
-
ifneq (,$(findstring AVX,$(AVX1_M)))
|
|
145
|
-
CFLAGS += -mavx
|
|
146
|
-
endif
|
|
147
|
-
AVX2_M := $(shell sysinfo -cpu | grep -w "AVX2")
|
|
148
|
-
ifneq (,$(findstring AVX2,$(AVX2_M)))
|
|
149
|
-
CFLAGS += -mavx2
|
|
150
|
-
endif
|
|
151
|
-
FMA_M := $(shell sysinfo -cpu | grep -w "FMA")
|
|
152
|
-
ifneq (,$(findstring FMA,$(FMA_M)))
|
|
153
|
-
CFLAGS += -mfma
|
|
154
|
-
endif
|
|
155
|
-
F16C_M := $(shell sysinfo -cpu | grep -w "F16C")
|
|
156
|
-
ifneq (,$(findstring F16C,$(F16C_M)))
|
|
157
|
-
CFLAGS += -mf16c
|
|
158
|
-
endif
|
|
159
|
-
else
|
|
160
|
-
CFLAGS += -mfma -mf16c -mavx -mavx2
|
|
161
|
-
endif
|
|
73
|
+
# Use all CPU extensions that are available:
|
|
74
|
+
CFLAGS += -march=native -mtune=native
|
|
162
75
|
endif
|
|
163
76
|
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
|
164
77
|
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
|
@@ -232,13 +232,15 @@ cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach.
|
|
|
232
232
|
|
|
233
233
|
- Obtain the `gpt4all-lora-quantized.bin` model
|
|
234
234
|
- It is distributed in the old `ggml` format which is now obsoleted
|
|
235
|
-
- You have to convert it to the new format using [./convert-gpt4all-to-ggml.py](./convert-gpt4all-to-ggml.py)
|
|
235
|
+
- You have to convert it to the new format using [./convert-gpt4all-to-ggml.py](./convert-gpt4all-to-ggml.py). You may also need to
|
|
236
|
+
convert the model from the old format to the new format with [./migrate-ggml-2023-03-30-pr613.py](./migrate-ggml-2023-03-30-pr613.py):
|
|
236
237
|
|
|
237
238
|
```bash
|
|
238
239
|
python3 convert-gpt4all-to-ggml.py models/gpt4all-7B/gpt4all-lora-quantized.bin ./models/tokenizer.model
|
|
240
|
+
python3 migrate-ggml-2023-03-30-pr613.py models/gpt4all-7B/gpt4all-lora-quantized.bin models/gpt4all-7B/gpt4all-lora-quantized-new.bin
|
|
239
241
|
```
|
|
240
242
|
|
|
241
|
-
- You can now use the newly generated `gpt4all-lora-quantized.bin` model in exactly the same way as all other models
|
|
243
|
+
- You can now use the newly generated `gpt4all-lora-quantized-new.bin` model in exactly the same way as all other models
|
|
242
244
|
- The original model is saved in the same folder with a suffix `.orig`
|
|
243
245
|
|
|
244
246
|
### Obtaining and verifying the Facebook LLaMA original model and Stanford Alpaca model data
|