llama-cpp-python 0.1.9__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/PKG-INFO +1 -1
- llama_cpp_python-0.1.10/_skbuild/linux-x86_64-3.8/cmake-install/llama_cpp/libllama.so +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/llama_cpp/llama_cpp.py +10 -10
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/llama_cpp_python.egg-info/PKG-INFO +1 -1
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/pyproject.toml +1 -1
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/setup.py +1 -1
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.github/workflows/build.yml +7 -9
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.gitignore +6 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/CMakeLists.txt +3 -1
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/Makefile +9 -2
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/convert-pth-to-ggml.py +0 -2
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/ggml.c +388 -538
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/ggml.h +2 -2
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/llama.cpp +32 -33
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/llama.h +6 -7
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/quantize.py +4 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/tests/CMakeLists.txt +1 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/tests/test-quantize.c +2 -2
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/tests/test-tokenizer-0.cpp +2 -0
- llama_cpp_python-0.1.9/_skbuild/linux-x86_64-3.8/cmake-install/llama_cpp/libllama.so +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/.gitignore +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/.gitmodules +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/CMakeLists.txt +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/LICENSE.md +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/README.md +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/llama_cpp/__init__.py +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/llama_cpp/llama.py +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/llama_cpp_python.egg-info/SOURCES.txt +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/llama_cpp_python.egg-info/dependency_links.txt +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/llama_cpp_python.egg-info/top_level.txt +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/poetry.lock +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/setup.cfg +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.devops/tools.sh +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.dockerignore +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/custom.md +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.github/workflows/docker.yml +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/LICENSE +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/README.md +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/SHA256SUMS +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/convert-gptq-to-ggml.py +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/flake.lock +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/flake.nix +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/models/ggml-vocab.bin +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
- {llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
|
Binary file
|
|
@@ -3,7 +3,6 @@ import ctypes
|
|
|
3
3
|
from ctypes import (
|
|
4
4
|
c_int,
|
|
5
5
|
c_float,
|
|
6
|
-
c_double,
|
|
7
6
|
c_char_p,
|
|
8
7
|
c_void_p,
|
|
9
8
|
c_bool,
|
|
@@ -40,7 +39,7 @@ class llama_token_data(Structure):
|
|
|
40
39
|
|
|
41
40
|
llama_token_data_p = POINTER(llama_token_data)
|
|
42
41
|
|
|
43
|
-
llama_progress_callback = ctypes.CFUNCTYPE(None,
|
|
42
|
+
llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
|
|
44
43
|
|
|
45
44
|
|
|
46
45
|
class llama_context_params(Structure):
|
|
@@ -48,6 +47,7 @@ class llama_context_params(Structure):
|
|
|
48
47
|
("n_ctx", c_int), # text context
|
|
49
48
|
("n_parts", c_int), # -1 for default
|
|
50
49
|
("seed", c_int), # RNG seed, 0 for random
|
|
50
|
+
|
|
51
51
|
("f16_kv", c_bool), # use fp16 for KV cache
|
|
52
52
|
(
|
|
53
53
|
"logits_all",
|
|
@@ -56,6 +56,7 @@ class llama_context_params(Structure):
|
|
|
56
56
|
("vocab_only", c_bool), # only load the vocabulary, no weights
|
|
57
57
|
("use_mlock", c_bool), # force system to keep model in RAM
|
|
58
58
|
("embedding", c_bool), # embedding mode only
|
|
59
|
+
|
|
59
60
|
# called with a progress value between 0 and 1, pass NULL to disable
|
|
60
61
|
("progress_callback", llama_progress_callback),
|
|
61
62
|
# context pointer passed to the progress callback
|
|
@@ -70,8 +71,7 @@ llama_context_params_p = POINTER(llama_context_params)
|
|
|
70
71
|
|
|
71
72
|
|
|
72
73
|
def llama_context_default_params() -> llama_context_params:
|
|
73
|
-
|
|
74
|
-
return params
|
|
74
|
+
return _lib.llama_context_default_params()
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
_lib.llama_context_default_params.argtypes = []
|
|
@@ -229,9 +229,9 @@ def llama_sample_top_p_top_k(
|
|
|
229
229
|
last_n_tokens_data: llama_token_p,
|
|
230
230
|
last_n_tokens_size: c_int,
|
|
231
231
|
top_k: c_int,
|
|
232
|
-
top_p:
|
|
233
|
-
temp:
|
|
234
|
-
repeat_penalty:
|
|
232
|
+
top_p: c_float,
|
|
233
|
+
temp: c_float,
|
|
234
|
+
repeat_penalty: c_float,
|
|
235
235
|
) -> llama_token:
|
|
236
236
|
return _lib.llama_sample_top_p_top_k(
|
|
237
237
|
ctx, last_n_tokens_data, last_n_tokens_size, top_k, top_p, temp, repeat_penalty
|
|
@@ -243,9 +243,9 @@ _lib.llama_sample_top_p_top_k.argtypes = [
|
|
|
243
243
|
llama_token_p,
|
|
244
244
|
c_int,
|
|
245
245
|
c_int,
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
246
|
+
c_float,
|
|
247
|
+
c_float,
|
|
248
|
+
c_float,
|
|
249
249
|
]
|
|
250
250
|
_lib.llama_sample_top_p_top_k.restype = llama_token
|
|
251
251
|
|
{llama_cpp_python-0.1.9 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.github/workflows/build.yml
RENAMED
|
@@ -8,10 +8,10 @@ on:
|
|
|
8
8
|
required: true
|
|
9
9
|
type: boolean
|
|
10
10
|
push:
|
|
11
|
-
paths: ['.github/workflows/**', 'CMakeLists.txt', 'Makefile', '
|
|
11
|
+
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
|
|
12
12
|
pull_request:
|
|
13
13
|
types: [opened, synchronize, edited, reopened, review_requested, ready_for_review]
|
|
14
|
-
paths: ['CMakeLists.txt', 'Makefile', '
|
|
14
|
+
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
|
|
15
15
|
|
|
16
16
|
env:
|
|
17
17
|
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
|
@@ -62,7 +62,7 @@ jobs:
|
|
|
62
62
|
id: cmake_test
|
|
63
63
|
run: |
|
|
64
64
|
cd build
|
|
65
|
-
ctest --
|
|
65
|
+
ctest --verbose
|
|
66
66
|
|
|
67
67
|
ubuntu-latest-cmake-sanitizer:
|
|
68
68
|
runs-on: ubuntu-latest
|
|
@@ -98,7 +98,7 @@ jobs:
|
|
|
98
98
|
id: cmake_test
|
|
99
99
|
run: |
|
|
100
100
|
cd build
|
|
101
|
-
ctest --
|
|
101
|
+
ctest --verbose
|
|
102
102
|
|
|
103
103
|
macOS-latest-make:
|
|
104
104
|
runs-on: macos-latest
|
|
@@ -143,7 +143,7 @@ jobs:
|
|
|
143
143
|
id: cmake_test
|
|
144
144
|
run: |
|
|
145
145
|
cd build
|
|
146
|
-
ctest --
|
|
146
|
+
ctest --verbose
|
|
147
147
|
|
|
148
148
|
windows-latest-cmake:
|
|
149
149
|
runs-on: windows-latest
|
|
@@ -176,16 +176,14 @@ jobs:
|
|
|
176
176
|
if: ${{ matrix.build == 'avx512' }}
|
|
177
177
|
continue-on-error: true
|
|
178
178
|
run: |
|
|
179
|
-
|
|
180
|
-
Set-Content -Path .\avx512f.exe -Value ([Convert]::FromBase64String('TVqQAAMAAAAEAAAA//8AALgAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAyAAAAA4fug4AtAnNIbgBTM0hVGhpcyBwcm9ncmFtIGNhbm5vdCBiZSBydW4gaW4gRE9TIG1vZGUuDQ0KJAAAAAAAAAClmfXY4fibi+H4m4vh+JuL4fiai+P4m4si98aL4vibi7Xbq4vg+JuLUmljaOH4m4sAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQRQAATAEBAGo6H2QAAAAAAAAAAOAADwELAQYAAAIAAAAAAAAAAAAADBAAAAAQAAAAIAAAAABAAAAQAAAAAgAABAAAAAAAAAAEAAAAAAAAAAAgAAAAAgAAAAAAAAMAAAAAABAAABAAAAAAEAAAEAAAAAAAABAAAAAAAAAAAAAAAFQQAAAoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC50ZXh0AAAAsgAAAAAQAAAAAgAAAAIAAAAAAAAAAAAAAAAAACAAAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACUEAAAiBAAAAAAAABVi+xRUVNTuAcAAAAPosHrEGaD4wGJXfxbg0X8MI1F+GoAUI1F/GoBUGr1/xUAEEAAUP8VBBBAAItF/FuDwND32BvAQMnDzMx8EAAAAAAAAAAAAACkEAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlBAAAIgQAAAAAAAApANXcml0ZUZpbGUAuQFHZXRTdGRIYW5kbGUAAEtFUk5FTDMyLmRsbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==')) -AsByteStream
|
|
181
|
-
.\avx512f.exe && echo " AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo " AVX512F: NO"
|
|
179
|
+
echo "TODO: check avx512f"
|
|
182
180
|
|
|
183
181
|
- name: Test
|
|
184
182
|
id: cmake_test
|
|
185
183
|
if: ${{ matrix.build != 'avx512' || env.HAS_AVX512F == '1' }} # Test AVX-512 only when possible
|
|
186
184
|
run: |
|
|
187
185
|
cd build
|
|
188
|
-
ctest -C Release --
|
|
186
|
+
ctest -C Release --verbose
|
|
189
187
|
|
|
190
188
|
- name: Get commit hash
|
|
191
189
|
id: commit
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
.vscode/
|
|
6
6
|
.DS_Store
|
|
7
7
|
|
|
8
|
+
.build/
|
|
8
9
|
build/
|
|
9
10
|
build-em/
|
|
10
11
|
build-debug/
|
|
@@ -20,9 +21,14 @@ models/*
|
|
|
20
21
|
/quantize
|
|
21
22
|
/result
|
|
22
23
|
/perplexity
|
|
24
|
+
/embedding
|
|
23
25
|
|
|
24
26
|
arm_neon.h
|
|
25
27
|
compile_commands.json
|
|
26
28
|
|
|
27
29
|
.envrc
|
|
28
30
|
.direnv/
|
|
31
|
+
|
|
32
|
+
.venv
|
|
33
|
+
__pycache__
|
|
34
|
+
.swiftpm
|
|
@@ -124,8 +124,9 @@ if (LLAMA_ALL_WARNINGS)
|
|
|
124
124
|
-Wall
|
|
125
125
|
-Wextra
|
|
126
126
|
-Wpedantic
|
|
127
|
-
-Wshadow
|
|
128
127
|
-Wcast-qual
|
|
128
|
+
-Wdouble-promotion
|
|
129
|
+
-Wshadow
|
|
129
130
|
-Wstrict-prototypes
|
|
130
131
|
-Wpointer-arith
|
|
131
132
|
-Wno-unused-function
|
|
@@ -135,6 +136,7 @@ if (LLAMA_ALL_WARNINGS)
|
|
|
135
136
|
-Wextra
|
|
136
137
|
-Wpedantic
|
|
137
138
|
-Wcast-qual
|
|
139
|
+
-Wno-unused-function
|
|
138
140
|
)
|
|
139
141
|
else()
|
|
140
142
|
# todo : msvc
|
|
@@ -35,6 +35,10 @@ CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
|
|
|
35
35
|
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
|
|
36
36
|
LDFLAGS =
|
|
37
37
|
|
|
38
|
+
# warnings
|
|
39
|
+
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
|
|
40
|
+
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
41
|
+
|
|
38
42
|
# OS specific
|
|
39
43
|
# TODO: support Windows
|
|
40
44
|
ifeq ($(UNAME_S),Linux)
|
|
@@ -212,7 +216,7 @@ $(info I CC: $(CCV))
|
|
|
212
216
|
$(info I CXX: $(CXXV))
|
|
213
217
|
$(info )
|
|
214
218
|
|
|
215
|
-
default: main quantize perplexity
|
|
219
|
+
default: main quantize perplexity embedding
|
|
216
220
|
|
|
217
221
|
#
|
|
218
222
|
# Build library
|
|
@@ -228,7 +232,7 @@ common.o: examples/common.cpp examples/common.h
|
|
|
228
232
|
$(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o
|
|
229
233
|
|
|
230
234
|
clean:
|
|
231
|
-
rm -vf *.o main quantize perplexity
|
|
235
|
+
rm -vf *.o main quantize perplexity embedding
|
|
232
236
|
|
|
233
237
|
main: examples/main/main.cpp ggml.o llama.o common.o
|
|
234
238
|
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
|
|
@@ -242,6 +246,9 @@ quantize: examples/quantize/quantize.cpp ggml.o llama.o
|
|
|
242
246
|
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
|
|
243
247
|
$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
|
|
244
248
|
|
|
249
|
+
embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
|
|
250
|
+
$(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
|
|
251
|
+
|
|
245
252
|
#
|
|
246
253
|
# Tests
|
|
247
254
|
#
|
|
@@ -145,13 +145,11 @@ def main():
|
|
|
145
145
|
|
|
146
146
|
print(f"Extracting only the vocab from '{fname_model}'\n")
|
|
147
147
|
|
|
148
|
-
model = torch.load(fname_model, map_location="cpu")
|
|
149
148
|
|
|
150
149
|
with open(fname_out, "wb") as fout:
|
|
151
150
|
write_header(fout, hparams, ftype)
|
|
152
151
|
write_tokens(fout, tokenizer)
|
|
153
152
|
|
|
154
|
-
del model
|
|
155
153
|
|
|
156
154
|
print(f"Done. Output file: {fname_out}\n")
|
|
157
155
|
|