llama-cpp-python-win 0.3.16__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. bin/convert_hf_to_gguf.py +8751 -0
  2. bin/ggml-base.dll +0 -0
  3. bin/ggml-cpu.dll +0 -0
  4. bin/ggml.dll +0 -0
  5. bin/llama-mtmd-cli.exe +0 -0
  6. bin/llama.dll +0 -0
  7. bin/mtmd.dll +0 -0
  8. include/ggml-alloc.h +76 -0
  9. include/ggml-backend.h +354 -0
  10. include/ggml-blas.h +25 -0
  11. include/ggml-cann.h +123 -0
  12. include/ggml-cpp.h +39 -0
  13. include/ggml-cpu.h +145 -0
  14. include/ggml-cuda.h +47 -0
  15. include/ggml-metal.h +66 -0
  16. include/ggml-opt.h +256 -0
  17. include/ggml-rpc.h +33 -0
  18. include/ggml-sycl.h +49 -0
  19. include/ggml-vulkan.h +29 -0
  20. include/ggml-webgpu.h +19 -0
  21. include/ggml.h +2467 -0
  22. include/gguf.h +202 -0
  23. include/llama-cpp.h +30 -0
  24. include/llama.h +1482 -0
  25. include/mtmd-helper.h +91 -0
  26. include/mtmd.h +298 -0
  27. lib/cmake/ggml/ggml-config.cmake +328 -0
  28. lib/cmake/ggml/ggml-version.cmake +65 -0
  29. lib/cmake/llama/llama-config.cmake +54 -0
  30. lib/cmake/llama/llama-version.cmake +65 -0
  31. lib/ggml-base.lib +0 -0
  32. lib/ggml-cpu.lib +0 -0
  33. lib/ggml.lib +0 -0
  34. lib/llama.lib +0 -0
  35. lib/mtmd.lib +0 -0
  36. lib/pkgconfig/llama.pc +10 -0
  37. llama_cpp/__init__.py +4 -0
  38. llama_cpp/_ctypes_extensions.py +131 -0
  39. llama_cpp/_ggml.py +12 -0
  40. llama_cpp/_internals.py +856 -0
  41. llama_cpp/_logger.py +47 -0
  42. llama_cpp/_utils.py +78 -0
  43. llama_cpp/lib/ggml-base.dll +0 -0
  44. llama_cpp/lib/ggml-base.lib +0 -0
  45. llama_cpp/lib/ggml-cpu.dll +0 -0
  46. llama_cpp/lib/ggml-cpu.lib +0 -0
  47. llama_cpp/lib/ggml.dll +0 -0
  48. llama_cpp/lib/ggml.lib +0 -0
  49. llama_cpp/lib/llama.dll +0 -0
  50. llama_cpp/lib/llama.lib +0 -0
  51. llama_cpp/lib/mtmd.dll +0 -0
  52. llama_cpp/lib/mtmd.lib +0 -0
  53. llama_cpp/llama.py +2422 -0
  54. llama_cpp/llama_cache.py +155 -0
  55. llama_cpp/llama_chat_format.py +3962 -0
  56. llama_cpp/llama_cpp.py +4374 -0
  57. llama_cpp/llama_grammar.py +953 -0
  58. llama_cpp/llama_speculative.py +64 -0
  59. llama_cpp/llama_tokenizer.py +120 -0
  60. llama_cpp/llama_types.py +316 -0
  61. llama_cpp/llava_cpp.py +158 -0
  62. llama_cpp/mtmd_cpp.py +280 -0
  63. llama_cpp/py.typed +0 -0
  64. llama_cpp/server/__init__.py +0 -0
  65. llama_cpp/server/__main__.py +100 -0
  66. llama_cpp/server/app.py +597 -0
  67. llama_cpp/server/cli.py +97 -0
  68. llama_cpp/server/errors.py +212 -0
  69. llama_cpp/server/model.py +312 -0
  70. llama_cpp/server/settings.py +240 -0
  71. llama_cpp/server/types.py +316 -0
  72. llama_cpp_python_win-0.3.16.dist-info/METADATA +856 -0
  73. llama_cpp_python_win-0.3.16.dist-info/RECORD +75 -0
  74. llama_cpp_python_win-0.3.16.dist-info/WHEEL +5 -0
  75. llama_cpp_python_win-0.3.16.dist-info/licenses/LICENSE.md +9 -0
@@ -0,0 +1,64 @@
1
+ import abc
2
+
3
+ from typing import Any
4
+
5
+ import numpy as np
6
+ import numpy.typing as npt
7
+
8
+
9
+ class LlamaDraftModel(abc.ABC):
10
+ @abc.abstractmethod
11
+ def __call__(
12
+ self, input_ids: npt.NDArray[np.intc], /, **kwargs: Any
13
+ ) -> npt.NDArray[np.intc]:
14
+ raise NotImplementedError()
15
+
16
+
17
+ class LlamaPromptLookupDecoding(LlamaDraftModel):
18
+ """Based on https://github.com/apoorvumang/prompt-lookup-decoding"""
19
+
20
+ def __init__(self, max_ngram_size: int = 2, num_pred_tokens: int = 10):
21
+ self.max_ngram_size = max_ngram_size
22
+ self.num_pred_tokens = num_pred_tokens
23
+
24
+ @staticmethod
25
+ def find_candidate_pred_tokens(
26
+ input_ids: npt.NDArray[np.intc],
27
+ max_ngram_size: int,
28
+ num_pred_tokens: int,
29
+ ):
30
+ input_length = input_ids.shape[0]
31
+
32
+ for ngram_size in range(min(max_ngram_size, input_length - 1), 0, -1):
33
+ # Create sliding windows of size ngram_size
34
+ windows = np.lib.stride_tricks.sliding_window_view(input_ids, (ngram_size,))
35
+
36
+ # Convert ngram to an array for comparison
37
+ ngram_array = input_ids[-ngram_size:]
38
+
39
+ # Find where the windows match the ngram
40
+ matches = np.all(windows == ngram_array, axis=1)
41
+
42
+ # Get the indices of matches
43
+ match_indices = np.nonzero(matches)[0]
44
+
45
+ # Iterate through match indices to find a valid continuation
46
+ for idx in match_indices:
47
+ start_idx = idx + ngram_size
48
+ end_idx = start_idx + num_pred_tokens
49
+ end_idx = min(end_idx, input_length)
50
+
51
+ if start_idx < end_idx:
52
+ return input_ids[start_idx:end_idx]
53
+
54
+ # If no match is found, return an empty array
55
+ return np.array([], dtype=np.intc)
56
+
57
+ def __call__(
58
+ self, input_ids: npt.NDArray[np.intc], /, **kwargs: Any
59
+ ) -> npt.NDArray[np.intc]:
60
+ return self.find_candidate_pred_tokens(
61
+ input_ids=input_ids,
62
+ max_ngram_size=self.max_ngram_size,
63
+ num_pred_tokens=self.num_pred_tokens,
64
+ )
@@ -0,0 +1,120 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ from typing import (
5
+ List,
6
+ Optional,
7
+ Any,
8
+ )
9
+
10
+ import llama_cpp
11
+ from llama_cpp.llama_types import List
12
+
13
+
14
+ class BaseLlamaTokenizer(abc.ABC):
15
+ @abc.abstractmethod
16
+ def tokenize(
17
+ self, text: bytes, add_bos: bool = True, special: bool = True
18
+ ) -> List[int]:
19
+ """Tokenize the text into tokens.
20
+
21
+ Args:
22
+ text: The utf-8 encoded string to tokenize.
23
+ add_bos: Whether to add a beginning of sequence token.
24
+ special: Whether to tokenize special tokens.
25
+ """
26
+ raise NotImplementedError
27
+
28
+ @abc.abstractmethod
29
+ def detokenize(
30
+ self,
31
+ tokens: List[int],
32
+ prev_tokens: Optional[List[int]] = None,
33
+ special: bool = False,
34
+ ) -> bytes:
35
+ """Detokenize the tokens into text.
36
+
37
+ Args:
38
+ tokens: The list of tokens to detokenize.
39
+ prev_tokens: The list of previous tokens. Offset mapping will be performed if provided.
40
+ special: Whether to detokenize special tokens.
41
+ """
42
+ raise NotImplementedError
43
+
44
+
45
+ class LlamaTokenizer(BaseLlamaTokenizer):
46
+ def __init__(self, llama: llama_cpp.Llama):
47
+ self._model = llama._model # type: ignore
48
+
49
+ def tokenize(
50
+ self, text: bytes, add_bos: bool = True, special: bool = True
51
+ ) -> List[int]:
52
+ return self._model.tokenize(text, add_bos=add_bos, special=special)
53
+
54
+ def detokenize(
55
+ self,
56
+ tokens: List[int],
57
+ prev_tokens: Optional[List[int]] = None,
58
+ special: bool = False,
59
+ ) -> bytes:
60
+ return self._model.detokenize(tokens, special=special)
61
+
62
+ def encode(
63
+ self, text: str, add_bos: bool = True, special: bool = True
64
+ ) -> List[int]:
65
+ return self.tokenize(
66
+ text.encode("utf-8", errors="ignore"), add_bos=add_bos, special=special
67
+ )
68
+
69
+ def decode(self, tokens: List[int]) -> str:
70
+ return self.detokenize(tokens).decode("utf-8", errors="ignore")
71
+
72
+ @classmethod
73
+ def from_ggml_file(cls, path: str) -> "LlamaTokenizer":
74
+ return cls(llama_cpp.Llama(model_path=path, vocab_only=True))
75
+
76
+
77
+ class LlamaHFTokenizer(BaseLlamaTokenizer):
78
+ def __init__(self, hf_tokenizer: Any):
79
+ self.hf_tokenizer = hf_tokenizer
80
+
81
+ def tokenize(
82
+ self, text: bytes, add_bos: bool = True, special: bool = True
83
+ ) -> List[int]:
84
+ return self.hf_tokenizer.encode(
85
+ text.decode("utf-8", errors="ignore"), add_special_tokens=special
86
+ )
87
+
88
+ def detokenize(
89
+ self,
90
+ tokens: List[int],
91
+ prev_tokens: Optional[List[int]] = None,
92
+ special: bool = False,
93
+ ) -> bytes:
94
+ skip_special_tokens = not special
95
+ if prev_tokens is not None:
96
+ text = self.hf_tokenizer.decode(
97
+ prev_tokens + tokens, skip_special_tokens=skip_special_tokens
98
+ ).encode("utf-8", errors="ignore")
99
+ prev_text = self.hf_tokenizer.decode(
100
+ prev_tokens, skip_special_tokens=skip_special_tokens
101
+ ).encode("utf-8", errors="ignore")
102
+ return text[len(prev_text) :]
103
+ else:
104
+ return self.hf_tokenizer.decode(
105
+ tokens, skip_special_tokens=skip_special_tokens
106
+ ).encode("utf-8", errors="ignore")
107
+
108
+ @classmethod
109
+ def from_pretrained(cls, pretrained_model_name_or_path: str) -> "LlamaHFTokenizer":
110
+ try:
111
+ from transformers import AutoTokenizer
112
+ except ImportError:
113
+ raise ImportError(
114
+ "The `transformers` library is required to use the `HFTokenizer`."
115
+ "You can install it with `pip install transformers`."
116
+ )
117
+ hf_tokenizer = AutoTokenizer.from_pretrained(
118
+ pretrained_model_name_or_path=pretrained_model_name_or_path
119
+ )
120
+ return cls(hf_tokenizer)
@@ -0,0 +1,316 @@
1
+ """Types and request signatures for OpenAI compatibility
2
+
3
+ NOTE: These types may change to match the OpenAI OpenAPI specification.
4
+
5
+ Based on the OpenAI OpenAPI specification:
6
+ https://github.com/openai/openai-openapi/blob/master/openapi.yaml
7
+
8
+ """
9
+
10
+ from typing import Any, List, Optional, Dict, Union
11
+ from typing_extensions import TypedDict, NotRequired, Literal
12
+
13
+
14
+ # NOTE: Defining this correctly using annotations seems to break pydantic validation.
15
+ # This is a workaround until we can figure out how to do this correctly
16
+ # JsonType = Union[None, int, str, bool, List["JsonType"], Dict[str, "JsonType"]]
17
+ JsonType = Union[None, int, str, bool, List[Any], Dict[str, Any]]
18
+
19
+
20
+ class EmbeddingUsage(TypedDict):
21
+ prompt_tokens: int
22
+ total_tokens: int
23
+
24
+
25
+ class Embedding(TypedDict):
26
+ index: int
27
+ object: str
28
+ embedding: Union[List[float], List[List[float]]]
29
+
30
+
31
+ class CreateEmbeddingResponse(TypedDict):
32
+ object: Literal["list"]
33
+ model: str
34
+ data: List[Embedding]
35
+ usage: EmbeddingUsage
36
+
37
+
38
+ class CompletionLogprobs(TypedDict):
39
+ text_offset: List[int]
40
+ token_logprobs: List[Optional[float]]
41
+ tokens: List[str]
42
+ top_logprobs: List[Optional[Dict[str, float]]]
43
+
44
+
45
+ class CompletionChoice(TypedDict):
46
+ text: str
47
+ index: int
48
+ logprobs: Optional[CompletionLogprobs]
49
+ finish_reason: Optional[Literal["stop", "length"]]
50
+
51
+
52
+ class CompletionUsage(TypedDict):
53
+ prompt_tokens: int
54
+ completion_tokens: int
55
+ total_tokens: int
56
+
57
+
58
+ class CreateCompletionResponse(TypedDict):
59
+ id: str
60
+ object: Literal["text_completion"]
61
+ created: int
62
+ model: str
63
+ choices: List[CompletionChoice]
64
+ usage: NotRequired[CompletionUsage]
65
+
66
+
67
+ class ChatCompletionResponseFunctionCall(TypedDict):
68
+ name: str
69
+ arguments: str
70
+
71
+
72
+ class ChatCompletionResponseMessage(TypedDict):
73
+ content: Optional[str]
74
+ tool_calls: NotRequired["ChatCompletionMessageToolCalls"]
75
+ role: Literal["assistant", "function"] # NOTE: "function" may be incorrect here
76
+ function_call: NotRequired[ChatCompletionResponseFunctionCall] # DEPRECATED
77
+
78
+
79
+ class ChatCompletionFunction(TypedDict):
80
+ name: str
81
+ description: NotRequired[str]
82
+ parameters: Dict[str, JsonType] # TODO: make this more specific
83
+
84
+
85
+ class ChatCompletionTopLogprobToken(TypedDict):
86
+ token: str
87
+ logprob: float
88
+ bytes: Optional[List[int]]
89
+
90
+
91
+ class ChatCompletionLogprobToken(ChatCompletionTopLogprobToken):
92
+ token: str
93
+ logprob: float
94
+ bytes: Optional[List[int]]
95
+ top_logprobs: List[ChatCompletionTopLogprobToken]
96
+
97
+
98
+ class ChatCompletionLogprobs(TypedDict):
99
+ content: Optional[List[ChatCompletionLogprobToken]]
100
+ refusal: Optional[List[ChatCompletionLogprobToken]]
101
+
102
+
103
+ class ChatCompletionResponseChoice(TypedDict):
104
+ index: int
105
+ message: "ChatCompletionResponseMessage"
106
+ logprobs: Optional[ChatCompletionLogprobs]
107
+ finish_reason: Optional[str]
108
+
109
+
110
+ class CreateChatCompletionResponse(TypedDict):
111
+ id: str
112
+ object: Literal["chat.completion"]
113
+ created: int
114
+ model: str
115
+ choices: List["ChatCompletionResponseChoice"]
116
+ usage: CompletionUsage
117
+
118
+
119
+ class ChatCompletionMessageToolCallChunkFunction(TypedDict):
120
+ name: Optional[str]
121
+ arguments: str
122
+
123
+
124
+ class ChatCompletionMessageToolCallChunk(TypedDict):
125
+ index: int
126
+ id: NotRequired[str]
127
+ type: Literal["function"]
128
+ function: ChatCompletionMessageToolCallChunkFunction
129
+
130
+
131
+ class ChatCompletionStreamResponseDeltaEmpty(TypedDict):
132
+ pass
133
+
134
+
135
+ class ChatCompletionStreamResponseDeltaFunctionCall(TypedDict):
136
+ name: str
137
+ arguments: str
138
+
139
+
140
+ class ChatCompletionStreamResponseDelta(TypedDict):
141
+ content: NotRequired[Optional[str]]
142
+ function_call: NotRequired[
143
+ Optional[ChatCompletionStreamResponseDeltaFunctionCall]
144
+ ] # DEPRECATED
145
+ tool_calls: NotRequired[Optional[List[ChatCompletionMessageToolCallChunk]]]
146
+ role: NotRequired[Optional[Literal["system", "user", "assistant", "tool"]]]
147
+
148
+
149
+ class ChatCompletionStreamResponseChoice(TypedDict):
150
+ index: int
151
+ delta: Union[
152
+ ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty
153
+ ]
154
+ finish_reason: Optional[Literal["stop", "length", "tool_calls", "function_call"]]
155
+ logprobs: NotRequired[Optional[ChatCompletionLogprobs]]
156
+
157
+
158
+ class CreateChatCompletionStreamResponse(TypedDict):
159
+ id: str
160
+ model: str
161
+ object: Literal["chat.completion.chunk"]
162
+ created: int
163
+ choices: List[ChatCompletionStreamResponseChoice]
164
+
165
+
166
+ class ChatCompletionFunctions(TypedDict):
167
+ name: str
168
+ description: NotRequired[str]
169
+ parameters: Dict[str, JsonType] # TODO: make this more specific
170
+
171
+
172
+ class ChatCompletionFunctionCallOption(TypedDict):
173
+ name: str
174
+
175
+
176
+ class ChatCompletionRequestResponseFormat(TypedDict):
177
+ type: Literal["text", "json_object"]
178
+ schema: NotRequired[
179
+ JsonType
180
+ ] # https://docs.endpoints.anyscale.com/guides/json_mode/
181
+
182
+
183
+ class ChatCompletionRequestMessageContentPartText(TypedDict):
184
+ type: Literal["text"]
185
+ text: str
186
+
187
+
188
+ class ChatCompletionRequestMessageContentPartImageImageUrl(TypedDict):
189
+ url: str
190
+ detail: NotRequired[Literal["auto", "low", "high"]]
191
+
192
+
193
+ class ChatCompletionRequestMessageContentPartImage(TypedDict):
194
+ type: Literal["image_url"]
195
+ image_url: Union[str, ChatCompletionRequestMessageContentPartImageImageUrl]
196
+
197
+
198
+ ChatCompletionRequestMessageContentPart = Union[
199
+ ChatCompletionRequestMessageContentPartText,
200
+ ChatCompletionRequestMessageContentPartImage,
201
+ ]
202
+
203
+
204
+ class ChatCompletionRequestSystemMessage(TypedDict):
205
+ role: Literal["system"]
206
+ content: Optional[str]
207
+
208
+
209
+ class ChatCompletionRequestUserMessage(TypedDict):
210
+ role: Literal["user"]
211
+ content: Optional[Union[str, List[ChatCompletionRequestMessageContentPart]]]
212
+
213
+
214
+ class ChatCompletionMessageToolCallFunction(TypedDict):
215
+ name: str
216
+ arguments: str
217
+
218
+
219
+ class ChatCompletionMessageToolCall(TypedDict):
220
+ id: str
221
+ type: Literal["function"]
222
+ function: ChatCompletionMessageToolCallFunction
223
+
224
+
225
+ ChatCompletionMessageToolCalls = List[ChatCompletionMessageToolCall]
226
+
227
+
228
+ class ChatCompletionRequestAssistantMessageFunctionCall(TypedDict):
229
+ name: str
230
+ arguments: str
231
+
232
+
233
+ class ChatCompletionRequestAssistantMessage(TypedDict):
234
+ role: Literal["assistant"]
235
+ content: NotRequired[str]
236
+ tool_calls: NotRequired[ChatCompletionMessageToolCalls]
237
+ function_call: NotRequired[
238
+ ChatCompletionRequestAssistantMessageFunctionCall
239
+ ] # DEPRECATED
240
+
241
+
242
+ class ChatCompletionRequestToolMessage(TypedDict):
243
+ role: Literal["tool"]
244
+ content: Optional[str]
245
+ tool_call_id: str
246
+
247
+
248
+ class ChatCompletionRequestFunctionMessage(TypedDict):
249
+ role: Literal["function"]
250
+ content: Optional[str]
251
+ name: str
252
+
253
+
254
+ ChatCompletionRequestMessage = Union[
255
+ ChatCompletionRequestSystemMessage,
256
+ ChatCompletionRequestUserMessage,
257
+ ChatCompletionRequestAssistantMessage,
258
+ ChatCompletionRequestUserMessage,
259
+ ChatCompletionRequestToolMessage,
260
+ ChatCompletionRequestFunctionMessage,
261
+ ]
262
+
263
+
264
+ class ChatCompletionRequestFunctionCallOption(TypedDict):
265
+ name: str
266
+
267
+
268
+ ChatCompletionRequestFunctionCall = Union[
269
+ Literal["none", "auto"], ChatCompletionRequestFunctionCallOption
270
+ ]
271
+
272
+ ChatCompletionFunctionParameters = Dict[str, JsonType] # TODO: make this more specific
273
+
274
+
275
+ class ChatCompletionToolFunction(TypedDict):
276
+ name: str
277
+ description: NotRequired[str]
278
+ parameters: ChatCompletionFunctionParameters
279
+
280
+
281
+ class ChatCompletionTool(TypedDict):
282
+ type: Literal["function"]
283
+ function: ChatCompletionToolFunction
284
+
285
+
286
+ class ChatCompletionNamedToolChoiceFunction(TypedDict):
287
+ name: str
288
+
289
+
290
+ class ChatCompletionNamedToolChoice(TypedDict):
291
+ type: Literal["function"]
292
+ function: ChatCompletionNamedToolChoiceFunction
293
+
294
+
295
+ ChatCompletionToolChoiceOption = Union[
296
+ Literal["none", "auto", "required"], ChatCompletionNamedToolChoice
297
+ ]
298
+
299
+
300
+ # NOTE: The following type names are not part of the OpenAI OpenAPI specification
301
+ # and will be removed in a future major release.
302
+
303
+ EmbeddingData = Embedding
304
+ CompletionChunk = CreateCompletionResponse
305
+ Completion = CreateCompletionResponse
306
+ CreateCompletionStreamResponse = CreateCompletionResponse
307
+ ChatCompletionMessage = ChatCompletionResponseMessage
308
+ ChatCompletionChoice = ChatCompletionResponseChoice
309
+ ChatCompletion = CreateChatCompletionResponse
310
+ ChatCompletionChunkDeltaEmpty = ChatCompletionStreamResponseDeltaEmpty
311
+ ChatCompletionChunkChoice = ChatCompletionStreamResponseChoice
312
+ ChatCompletionChunkDelta = ChatCompletionStreamResponseDelta
313
+ ChatCompletionChunk = CreateChatCompletionStreamResponse
314
+ ChatCompletionStreamResponse = CreateChatCompletionStreamResponse
315
+ ChatCompletionResponseFunction = ChatCompletionFunction
316
+ ChatCompletionFunctionCall = ChatCompletionResponseFunctionCall
llama_cpp/llava_cpp.py ADDED
@@ -0,0 +1,158 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from ctypes import (
5
+ c_bool,
6
+ c_char_p,
7
+ c_int,
8
+ c_uint8,
9
+ c_float,
10
+ c_void_p,
11
+ POINTER,
12
+ _Pointer, # type: ignore
13
+ Structure,
14
+ )
15
+ import pathlib
16
+ from typing import (
17
+ Union,
18
+ NewType,
19
+ Optional,
20
+ TYPE_CHECKING,
21
+ )
22
+
23
+ import llama_cpp.llama_cpp as llama_cpp
24
+
25
+ from llama_cpp._ctypes_extensions import (
26
+ load_shared_library,
27
+ ctypes_function_for_shared_library,
28
+ )
29
+
30
+ if TYPE_CHECKING:
31
+ from llama_cpp._ctypes_extensions import (
32
+ CtypesArray,
33
+ )
34
+
35
+
36
+ # Specify the base name of the shared library to load
37
+ _libllava_base_name = "llava"
38
+ _libllava_override_path = os.environ.get("LLAVA_CPP_LIB")
39
+ _libllava_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib" if _libllava_override_path is None else pathlib.Path()
40
+
41
+ # Load the library
42
+ _libllava = load_shared_library(_libllava_base_name, _libllava_base_path)
43
+
44
+ ctypes_function = ctypes_function_for_shared_library(_libllava)
45
+
46
+
47
+ ################################################
48
+ # llava.h
49
+ ################################################
50
+
51
+ # struct clip_ctx;
52
+ clip_ctx_p = NewType("clip_ctx_p", int)
53
+ clip_ctx_p_ctypes = c_void_p
54
+
55
+
56
+ # struct llava_image_embed {
57
+ # float * embed;
58
+ # int n_image_pos;
59
+ # };
60
+ class llava_image_embed(Structure):
61
+ _fields_ = [
62
+ ("embed", POINTER(c_float)),
63
+ ("n_image_pos", c_int),
64
+ ]
65
+
66
+
67
+ # /** sanity check for clip <-> llava embed size match */
68
+ # LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);
69
+ @ctypes_function(
70
+ "llava_validate_embed_size",
71
+ [llama_cpp.llama_context_p_ctypes, clip_ctx_p_ctypes],
72
+ c_bool,
73
+ )
74
+ def llava_validate_embed_size(
75
+ ctx_llama: llama_cpp.llama_context_p, ctx_clip: clip_ctx_p, /
76
+ ) -> bool:
77
+ ...
78
+
79
+
80
+ # /** build an image embed from image file bytes */
81
+ # LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
82
+ @ctypes_function(
83
+ "llava_image_embed_make_with_bytes",
84
+ [clip_ctx_p_ctypes, c_int, POINTER(c_uint8), c_int],
85
+ POINTER(llava_image_embed),
86
+ )
87
+ def llava_image_embed_make_with_bytes(
88
+ ctx_clip: clip_ctx_p,
89
+ n_threads: Union[c_int, int],
90
+ image_bytes: CtypesArray[c_uint8],
91
+ image_bytes_length: Union[c_int, int],
92
+ /,
93
+ ) -> "_Pointer[llava_image_embed]":
94
+ ...
95
+
96
+
97
+ # /** build an image embed from a path to an image filename */
98
+ # LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path);
99
+ @ctypes_function(
100
+ "llava_image_embed_make_with_filename",
101
+ [clip_ctx_p_ctypes, c_int, c_char_p],
102
+ POINTER(llava_image_embed),
103
+ )
104
+ def llava_image_embed_make_with_filename(
105
+ ctx_clip: clip_ctx_p, n_threads: Union[c_int, int], image_path: bytes, /
106
+ ) -> "_Pointer[llava_image_embed]":
107
+ ...
108
+
109
+
110
+ # LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed);
111
+ # /** free an embedding made with llava_image_embed_make_* */
112
+ @ctypes_function("llava_image_embed_free", [POINTER(llava_image_embed)], None)
113
+ def llava_image_embed_free(embed: "_Pointer[llava_image_embed]", /):
114
+ ...
115
+
116
+
117
+ # /** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */
118
+ # LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past);
119
+ @ctypes_function(
120
+ "llava_eval_image_embed",
121
+ [
122
+ llama_cpp.llama_context_p_ctypes,
123
+ POINTER(llava_image_embed),
124
+ c_int,
125
+ POINTER(c_int),
126
+ ],
127
+ c_bool,
128
+ )
129
+ def llava_eval_image_embed(
130
+ ctx_llama: llama_cpp.llama_context_p,
131
+ embed: "_Pointer[llava_image_embed]",
132
+ n_batch: Union[c_int, int],
133
+ n_past: "_Pointer[c_int]",
134
+ /,
135
+ ) -> bool:
136
+ ...
137
+
138
+
139
+ ################################################
140
+ # clip.h
141
+ ################################################
142
+
143
+
144
+ # /** load mmproj model */
145
+ # CLIP_API struct clip_ctx * clip_model_load (const char * fname, int verbosity);
146
+ @ctypes_function("clip_model_load", [c_char_p, c_int], clip_ctx_p_ctypes)
147
+ def clip_model_load(
148
+ fname: bytes, verbosity: Union[c_int, int], /
149
+ ) -> Optional[clip_ctx_p]:
150
+ ...
151
+
152
+
153
+ # /** free mmproj model */
154
+ # CLIP_API void clip_free(struct clip_ctx * ctx);
155
+ @ctypes_function("clip_free", [clip_ctx_p_ctypes], None)
156
+ def clip_free(ctx: clip_ctx_p, /):
157
+ ...
158
+