llama-cpp-python 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/PKG-INFO +1 -1
  2. llama_cpp_python-0.1.10/_skbuild/linux-x86_64-3.8/cmake-install/llama_cpp/libllama.so +0 -0
  3. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp/llama.py +14 -0
  4. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp/llama_cpp.py +10 -10
  5. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp_python.egg-info/PKG-INFO +1 -1
  6. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/pyproject.toml +1 -1
  7. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/setup.py +1 -1
  8. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.github/workflows/build.yml +7 -9
  9. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.gitignore +6 -0
  10. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/CMakeLists.txt +3 -1
  11. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/Makefile +9 -2
  12. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/convert-pth-to-ggml.py +0 -2
  13. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/ggml.c +388 -538
  14. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/ggml.h +2 -2
  15. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/llama.cpp +32 -33
  16. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/llama.h +6 -7
  17. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/quantize.py +4 -0
  18. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/tests/CMakeLists.txt +1 -0
  19. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/tests/test-quantize.c +2 -2
  20. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/tests/test-tokenizer-0.cpp +2 -0
  21. llama_cpp_python-0.1.8/_skbuild/linux-x86_64-3.8/cmake-install/llama_cpp/libllama.so +0 -0
  22. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/.gitignore +0 -0
  23. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/.gitmodules +0 -0
  24. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/CMakeLists.txt +0 -0
  25. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/LICENSE.md +0 -0
  26. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/README.md +0 -0
  27. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp/__init__.py +0 -0
  28. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp_python.egg-info/SOURCES.txt +0 -0
  29. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp_python.egg-info/dependency_links.txt +0 -0
  30. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/llama_cpp_python.egg-info/top_level.txt +0 -0
  31. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/poetry.lock +0 -0
  32. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/setup.cfg +0 -0
  33. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.devops/full.Dockerfile +0 -0
  34. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.devops/main.Dockerfile +0 -0
  35. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.devops/tools.sh +0 -0
  36. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.dockerignore +0 -0
  37. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.github/ISSUE_TEMPLATE/custom.md +0 -0
  38. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/.github/workflows/docker.yml +0 -0
  39. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/LICENSE +0 -0
  40. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/README.md +0 -0
  41. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/SHA256SUMS +0 -0
  42. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/convert-gptq-to-ggml.py +0 -0
  43. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/flake.lock +0 -0
  44. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/flake.nix +0 -0
  45. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/models/ggml-vocab.bin +0 -0
  46. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/prompts/alpaca.txt +0 -0
  47. {llama_cpp_python-0.1.8 → llama_cpp_python-0.1.10}/vendor/llama.cpp/prompts/chat-with-bob.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama_cpp_python
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: A Python wrapper for llama.cpp
5
5
  Author: Andrei Betlen
6
6
  Author-email: abetlen@gmail.com
@@ -105,6 +105,20 @@ class Llama:
105
105
  output += llama_cpp.llama_token_to_str(self.ctx, token)
106
106
  return output
107
107
 
108
+ def embed(self, text: str):
109
+ """Embed a string.
110
+
111
+ Args:
112
+ text: The utf-8 encoded string to embed.
113
+
114
+ Returns:
115
+ A list of embeddings.
116
+ """
117
+ tokens = self.tokenize(text.encode("utf-8"))
118
+ self._eval(tokens, 0)
119
+ embeddings = llama_cpp.llama_get_embeddings(self.ctx)
120
+ return embeddings[:llama_cpp.llama_n_embd(self.ctx)]
121
+
108
122
  def _eval(self, tokens: List[int], n_past):
109
123
  rc = llama_cpp.llama_eval(
110
124
  self.ctx,
@@ -3,7 +3,6 @@ import ctypes
3
3
  from ctypes import (
4
4
  c_int,
5
5
  c_float,
6
- c_double,
7
6
  c_char_p,
8
7
  c_void_p,
9
8
  c_bool,
@@ -40,7 +39,7 @@ class llama_token_data(Structure):
40
39
 
41
40
  llama_token_data_p = POINTER(llama_token_data)
42
41
 
43
- llama_progress_callback = ctypes.CFUNCTYPE(None, c_double, c_void_p)
42
+ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
44
43
 
45
44
 
46
45
  class llama_context_params(Structure):
@@ -48,6 +47,7 @@ class llama_context_params(Structure):
48
47
  ("n_ctx", c_int), # text context
49
48
  ("n_parts", c_int), # -1 for default
50
49
  ("seed", c_int), # RNG seed, 0 for random
50
+
51
51
  ("f16_kv", c_bool), # use fp16 for KV cache
52
52
  (
53
53
  "logits_all",
@@ -56,6 +56,7 @@ class llama_context_params(Structure):
56
56
  ("vocab_only", c_bool), # only load the vocabulary, no weights
57
57
  ("use_mlock", c_bool), # force system to keep model in RAM
58
58
  ("embedding", c_bool), # embedding mode only
59
+
59
60
  # called with a progress value between 0 and 1, pass NULL to disable
60
61
  ("progress_callback", llama_progress_callback),
61
62
  # context pointer passed to the progress callback
@@ -70,8 +71,7 @@ llama_context_params_p = POINTER(llama_context_params)
70
71
 
71
72
 
72
73
  def llama_context_default_params() -> llama_context_params:
73
- params = _lib.llama_context_default_params()
74
- return params
74
+ return _lib.llama_context_default_params()
75
75
 
76
76
 
77
77
  _lib.llama_context_default_params.argtypes = []
@@ -229,9 +229,9 @@ def llama_sample_top_p_top_k(
229
229
  last_n_tokens_data: llama_token_p,
230
230
  last_n_tokens_size: c_int,
231
231
  top_k: c_int,
232
- top_p: c_double,
233
- temp: c_double,
234
- repeat_penalty: c_double,
232
+ top_p: c_float,
233
+ temp: c_float,
234
+ repeat_penalty: c_float,
235
235
  ) -> llama_token:
236
236
  return _lib.llama_sample_top_p_top_k(
237
237
  ctx, last_n_tokens_data, last_n_tokens_size, top_k, top_p, temp, repeat_penalty
@@ -243,9 +243,9 @@ _lib.llama_sample_top_p_top_k.argtypes = [
243
243
  llama_token_p,
244
244
  c_int,
245
245
  c_int,
246
- c_double,
247
- c_double,
248
- c_double,
246
+ c_float,
247
+ c_float,
248
+ c_float,
249
249
  ]
250
250
  _lib.llama_sample_top_p_top_k.restype = llama_token
251
251
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-cpp-python
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: A Python wrapper for llama.cpp
5
5
  Author: Andrei Betlen
6
6
  Author-email: abetlen@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "llama_cpp"
3
- version = "0.1.8"
3
+ version = "0.1.10"
4
4
  description = "Python bindings for the llama.cpp library"
5
5
  authors = ["Andrei Betlen <abetlen@gmail.com>"]
6
6
  license = "MIT"
@@ -3,7 +3,7 @@ from skbuild import setup
3
3
  setup(
4
4
  name="llama_cpp_python",
5
5
  description="A Python wrapper for llama.cpp",
6
- version="0.1.8",
6
+ version="0.1.10",
7
7
  author="Andrei Betlen",
8
8
  author_email="abetlen@gmail.com",
9
9
  license="MIT",
@@ -8,10 +8,10 @@ on:
8
8
  required: true
9
9
  type: boolean
10
10
  push:
11
- paths: ['.github/workflows/**', 'CMakeLists.txt', 'Makefile', '**.h', '*.c', '**.cpp']
11
+ paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
12
12
  pull_request:
13
13
  types: [opened, synchronize, edited, reopened, review_requested, ready_for_review]
14
- paths: ['CMakeLists.txt', 'Makefile', '**.h', '*.c', '**.cpp']
14
+ paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
15
15
 
16
16
  env:
17
17
  BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -62,7 +62,7 @@ jobs:
62
62
  id: cmake_test
63
63
  run: |
64
64
  cd build
65
- ctest --output-on-failure
65
+ ctest --verbose
66
66
 
67
67
  ubuntu-latest-cmake-sanitizer:
68
68
  runs-on: ubuntu-latest
@@ -98,7 +98,7 @@ jobs:
98
98
  id: cmake_test
99
99
  run: |
100
100
  cd build
101
- ctest --output-on-failure
101
+ ctest --verbose
102
102
 
103
103
  macOS-latest-make:
104
104
  runs-on: macos-latest
@@ -143,7 +143,7 @@ jobs:
143
143
  id: cmake_test
144
144
  run: |
145
145
  cd build
146
- ctest --output-on-failure
146
+ ctest --verbose
147
147
 
148
148
  windows-latest-cmake:
149
149
  runs-on: windows-latest
@@ -176,16 +176,14 @@ jobs:
176
176
  if: ${{ matrix.build == 'avx512' }}
177
177
  continue-on-error: true
178
178
  run: |
179
- cd build
180
- Set-Content -Path .\avx512f.exe -Value ([Convert]::FromBase64String('TVqQAAMAAAAEAAAA//8AALgAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAyAAAAA4fug4AtAnNIbgBTM0hVGhpcyBwcm9ncmFtIGNhbm5vdCBiZSBydW4gaW4gRE9TIG1vZGUuDQ0KJAAAAAAAAAClmfXY4fibi+H4m4vh+JuL4fiai+P4m4si98aL4vibi7Xbq4vg+JuLUmljaOH4m4sAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQRQAATAEBAGo6H2QAAAAAAAAAAOAADwELAQYAAAIAAAAAAAAAAAAADBAAAAAQAAAAIAAAAABAAAAQAAAAAgAABAAAAAAAAAAEAAAAAAAAAAAgAAAAAgAAAAAAAAMAAAAAABAAABAAAAAAEAAAEAAAAAAAABAAAAAAAAAAAAAAAFQQAAAoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC50ZXh0AAAAsgAAAAAQAAAAAgAAAAIAAAAAAAAAAAAAAAAAACAAAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACUEAAAiBAAAAAAAABVi+xRUVNTuAcAAAAPosHrEGaD4wGJXfxbg0X8MI1F+GoAUI1F/GoBUGr1/xUAEEAAUP8VBBBAAItF/FuDwND32BvAQMnDzMx8EAAAAAAAAAAAAACkEAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlBAAAIgQAAAAAAAApANXcml0ZUZpbGUAuQFHZXRTdGRIYW5kbGUAAEtFUk5FTDMyLmRsbAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==')) -AsByteStream
181
- .\avx512f.exe && echo " AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo " AVX512F: NO"
179
+ echo "TODO: check avx512f"
182
180
 
183
181
  - name: Test
184
182
  id: cmake_test
185
183
  if: ${{ matrix.build != 'avx512' || env.HAS_AVX512F == '1' }} # Test AVX-512 only when possible
186
184
  run: |
187
185
  cd build
188
- ctest -C Release --output-on-failure
186
+ ctest -C Release --verbose
189
187
 
190
188
  - name: Get commit hash
191
189
  id: commit
@@ -5,6 +5,7 @@
5
5
  .vscode/
6
6
  .DS_Store
7
7
 
8
+ .build/
8
9
  build/
9
10
  build-em/
10
11
  build-debug/
@@ -20,9 +21,14 @@ models/*
20
21
  /quantize
21
22
  /result
22
23
  /perplexity
24
+ /embedding
23
25
 
24
26
  arm_neon.h
25
27
  compile_commands.json
26
28
 
27
29
  .envrc
28
30
  .direnv/
31
+
32
+ .venv
33
+ __pycache__
34
+ .swiftpm
@@ -124,8 +124,9 @@ if (LLAMA_ALL_WARNINGS)
124
124
  -Wall
125
125
  -Wextra
126
126
  -Wpedantic
127
- -Wshadow
128
127
  -Wcast-qual
128
+ -Wdouble-promotion
129
+ -Wshadow
129
130
  -Wstrict-prototypes
130
131
  -Wpointer-arith
131
132
  -Wno-unused-function
@@ -135,6 +136,7 @@ if (LLAMA_ALL_WARNINGS)
135
136
  -Wextra
136
137
  -Wpedantic
137
138
  -Wcast-qual
139
+ -Wno-unused-function
138
140
  )
139
141
  else()
140
142
  # todo : msvc
@@ -35,6 +35,10 @@ CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
35
35
  CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
36
36
  LDFLAGS =
37
37
 
38
+ # warnings
39
+ CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
40
+ CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
41
+
38
42
  # OS specific
39
43
  # TODO: support Windows
40
44
  ifeq ($(UNAME_S),Linux)
@@ -212,7 +216,7 @@ $(info I CC: $(CCV))
212
216
  $(info I CXX: $(CXXV))
213
217
  $(info )
214
218
 
215
- default: main quantize perplexity
219
+ default: main quantize perplexity embedding
216
220
 
217
221
  #
218
222
  # Build library
@@ -228,7 +232,7 @@ common.o: examples/common.cpp examples/common.h
228
232
  $(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o
229
233
 
230
234
  clean:
231
- rm -vf *.o main quantize perplexity
235
+ rm -vf *.o main quantize perplexity embedding
232
236
 
233
237
  main: examples/main/main.cpp ggml.o llama.o common.o
234
238
  $(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
@@ -242,6 +246,9 @@ quantize: examples/quantize/quantize.cpp ggml.o llama.o
242
246
  perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
243
247
  $(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)
244
248
 
249
+ embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o
250
+ $(CXX) $(CXXFLAGS) examples/embedding/embedding.cpp ggml.o llama.o common.o -o embedding $(LDFLAGS)
251
+
245
252
  #
246
253
  # Tests
247
254
  #
@@ -145,13 +145,11 @@ def main():
145
145
 
146
146
  print(f"Extracting only the vocab from '{fname_model}'\n")
147
147
 
148
- model = torch.load(fname_model, map_location="cpu")
149
148
 
150
149
  with open(fname_out, "wb") as fout:
151
150
  write_header(fout, hparams, ftype)
152
151
  write_tokens(fout, tokenizer)
153
152
 
154
- del model
155
153
 
156
154
  print(f"Done. Output file: {fname_out}\n")
157
155