ik-llama-cpp-python 0.1.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
include/ggml-cann.h ADDED
@@ -0,0 +1,125 @@
1
+ /*
2
+ * Copyright (c) 2023-2024 The ggml authors
3
+ *
4
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ * of this software and associated documentation files (the "Software"), to
6
+ * deal in the Software without restriction, including without limitation the
7
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
+ * sell copies of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be included in
12
+ * all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
+ * IN THE SOFTWARE.
21
+ */
22
+
23
+ #pragma once
24
+
25
+ #include "ggml-backend.h"
26
+ #include "ggml.h"
27
+
28
+ #ifdef __cplusplus
29
+ extern "C" {
30
+ #endif
31
+
32
+ /**
33
+ * @brief Maximum number of CANN devices supported.
34
+ */
35
+ #define GGML_CANN_MAX_DEVICES 16
36
+
37
+ /**
38
+ * @brief Initializes the CANN backend for a specified device.
39
+ *
40
+ * This function initializes the CANN backend for the given device.
41
+ * It verifies the device index, allocates a context, and creates a backend
42
+ * instance.
43
+ *
44
+ * @param device The index of the device to initialize.
45
+ * @return A pointer to the initialized backend instance, or nullptr on failure.
46
+ */
47
+ GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
48
+
49
+ /**
50
+ * @brief Checks if a given backend is a CANN backend.
51
+ *
52
+ * This function verifies if the provided backend is a CANN backend by comparing
53
+ * its GUID with the CANN backend's GUID.
54
+ *
55
+ * @param backend The backend instance to check.
56
+ * @return True if the backend is a CANN backend, false otherwise.
57
+ */
58
+ GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
59
+
60
+ /**
61
+ * @brief Retrieves the CANN buffer type for a specified device.
62
+ *
63
+ * This function initializes and returns the buffer type interface associated
64
+ * with the given device. It ensures thread-safe access using a mutex.
65
+ *
66
+ * @param device The device index for which to retrieve the buffer type.
67
+ * @return A pointer to the buffer type interface for the specified device, or
68
+ * nullptr if the device index is out of range.
69
+ */
70
+ GGML_API GGML_CALL ggml_backend_buffer_type_t
71
+ ggml_backend_cann_buffer_type(int32_t device);
72
+
73
+ /**
74
+ * @brief Retrieves the number of CANN devices available.
75
+ *
76
+ * This function returns the number of CANN devices available based on
77
+ * information obtained from `ggml_cann_info()`.
78
+ *
79
+ * @return The number of CANN devices available.
80
+ */
81
+ GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
82
+
83
+ /**
84
+ * @brief Retrieves the description of a specific CANN device.
85
+ *
86
+ * This function sets the specified device, retrieves the SoC name,
87
+ * and writes it into the provided description buffer.
88
+ *
89
+ * @param device The device index to retrieve the description for.
90
+ * @param description Pointer to a buffer where the description will be written.
91
+ * @param description_size Size of the description buffer.
92
+ */
93
+ GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
94
+ int32_t device, char* description, size_t description_size);
95
+
96
+ /**
97
+ * @brief Retrieves the memory information of a specific CANN device.
98
+ *
99
+ * This function sets the specified device, retrieves the free and total
100
+ * memory information of the specified type (ACL_HBM_MEM), and stores them
101
+ * in the provided pointers.
102
+ *
103
+ * @param device The device index to retrieve memory information for.
104
+ * @param free Pointer to a variable where the free memory size will be stored.
105
+ * @param total Pointer to a variable where the total memory size will be
106
+ * stored.
107
+ */
108
+ GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device,
109
+ size_t* free,
110
+ size_t* total);
111
+
112
+ /**
113
+ * @brief Set the logging callback for GGML.
114
+ *
115
+ * This function sets the logging callback and user data for logging.
116
+ *
117
+ * @param log_callback The logging callback to set.
118
+ * @param user_data User data to pass to the logging callback.
119
+ */
120
+ GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
121
+ void* user_data);
122
+
123
+ #ifdef __cplusplus
124
+ }
125
+ #endif
include/ggml-cuda.h ADDED
@@ -0,0 +1,47 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef GGML_USE_HIPBLAS
7
+ #define GGML_CUDA_NAME "ROCm"
8
+ #define GGML_CUBLAS_NAME "hipBLAS"
9
+ #elif defined(GGML_USE_MUSA)
10
+ #define GGML_CUDA_NAME "MUSA"
11
+ #define GGML_CUBLAS_NAME "muBLAS"
12
+ #else
13
+ #define GGML_CUDA_NAME "CUDA"
14
+ #define GGML_CUBLAS_NAME "cuBLAS"
15
+ #endif
16
+
17
+ #ifdef __cplusplus
18
+ extern "C" {
19
+ #endif
20
+
21
+ #define GGML_CUDA_MAX_DEVICES 16
22
+
23
+ // backend API
24
+ GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device, const void * params);
25
+
26
+ GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend);
27
+
28
+ // device buffer
29
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
30
+
31
+ // split tensor buffer that splits matrices by rows across multiple devices
32
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
33
+
34
+ // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
35
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
36
+
37
+ GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void);
38
+ GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
39
+ GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
40
+
41
+ GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
42
+ GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
43
+
44
+ GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data);
45
+ #ifdef __cplusplus
46
+ }
47
+ #endif
include/ggml-kompute.h ADDED
@@ -0,0 +1,46 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #include <stdbool.h>
7
+ #include <stddef.h>
8
+ #include <stdint.h>
9
+
10
+ #ifdef __cplusplus
11
+ extern "C" {
12
+ #endif
13
+
14
+ struct ggml_vk_device {
15
+ int index;
16
+ int type; // same as VkPhysicalDeviceType
17
+ size_t heapSize;
18
+ const char * name;
19
+ const char * vendor;
20
+ int subgroupSize;
21
+ uint64_t bufferAlignment;
22
+ uint64_t maxAlloc;
23
+ };
24
+
25
+ struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
26
+ bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
27
+ bool ggml_vk_has_vulkan(void);
28
+ bool ggml_vk_has_device(void);
29
+ struct ggml_vk_device ggml_vk_current_device(void);
30
+
31
+ //
32
+ // backend API
33
+ //
34
+
35
+ // forward declaration
36
+ typedef struct ggml_backend * ggml_backend_t;
37
+
38
+ GGML_API ggml_backend_t ggml_backend_kompute_init(int device);
39
+
40
+ GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend);
41
+
42
+ GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
43
+
44
+ #ifdef __cplusplus
45
+ }
46
+ #endif
include/ggml-metal.h ADDED
@@ -0,0 +1,67 @@
1
+ // An interface allowing to compute ggml_cgraph with Metal
2
+ //
3
+ // This is a fully functional interface that extends ggml with GPU support for Apple devices.
4
+ // A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
5
+ //
6
+ // How it works?
7
+ //
8
+ // As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
9
+ // interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
10
+ // use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
11
+ //
12
+ // You only need to make sure that all memory buffers that you used during the graph creation
13
+ // are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
14
+ // used during the graph evaluation to determine the arguments of the compute kernels.
15
+ //
16
+ // Synchronization between device and host memory (for example for input and output tensors)
17
+ // is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
18
+ //
19
+
20
+ #pragma once
21
+
22
+ #include "ggml.h"
23
+ #include "ggml-backend.h"
24
+
25
+ #include <stddef.h>
26
+ #include <stdbool.h>
27
+
28
+ // max memory buffers that can be mapped to the device
29
+ #define GGML_METAL_MAX_BUFFERS 64
30
+
31
+ struct ggml_tensor;
32
+ struct ggml_cgraph;
33
+
34
+ #ifdef __cplusplus
35
+ extern "C" {
36
+ #endif
37
+
38
+ //
39
+ // backend API
40
+ // user-code should use only these functions
41
+ //
42
+
43
+ GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
44
+
45
+ GGML_API ggml_backend_t ggml_backend_metal_init(void);
46
+
47
+ GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
48
+
49
+ GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
50
+
51
+ GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
52
+
53
+ GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
54
+
55
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
56
+
57
+ // helper to check if the device supports a specific family
58
+ // ideally, the user code should be doing these checks
59
+ // ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
60
+ GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
61
+
62
+ // capture all command buffers committed the next time `ggml_backend_graph_compute` is called
63
+ GGML_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
64
+
65
+ #ifdef __cplusplus
66
+ }
67
+ #endif
include/ggml-rpc.h ADDED
@@ -0,0 +1,29 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ #define RPC_PROTO_MAJOR_VERSION 3
11
+ #define RPC_PROTO_MINOR_VERSION 5
12
+ #define RPC_PROTO_PATCH_VERSION 2
13
+ #define GGML_RPC_MAX_SERVERS 16
14
+
15
+ // backend API
16
+ GGML_API GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint, uint32_t device);
17
+ GGML_API GGML_CALL bool ggml_backend_is_rpc(ggml_backend_t backend);
18
+
19
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint, uint32_t device);
20
+
21
+ GGML_API GGML_CALL uint32_t ggml_backend_rpc_get_device_count(const char* endpoint);
22
+
23
+ GGML_API GGML_CALL void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total);
24
+
25
+ GGML_API GGML_CALL void ggml_backend_rpc_start_server(const char * endpoint, const char* cache_dir, size_t device, ggml_backend_t * devices, size_t* free_mem, size_t* total_mem);
26
+
27
+ #ifdef __cplusplus
28
+ }
29
+ #endif
include/ggml-sycl.h ADDED
@@ -0,0 +1,42 @@
1
+ //
2
+ // MIT license
3
+ // Copyright (C) 2024 Intel Corporation
4
+ // SPDX-License-Identifier: MIT
5
+ //
6
+
7
+ #pragma once
8
+
9
+ #include "ggml.h"
10
+ #include "ggml-backend.h"
11
+
12
+ #define GGML_SYCL_NAME "SYCL"
13
+ #define GGML_SYCL_MAX_DEVICES 48
14
+
15
+ #ifdef __cplusplus
16
+ extern "C" {
17
+ #endif
18
+
19
+ // backend API
20
+ GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
21
+
22
+ // devide buffer
23
+ GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
24
+
25
+ // split tensor buffer that splits matrices by rows across multiple devices
26
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
27
+
28
+ // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
29
+ GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
30
+
31
+ GGML_API void ggml_backend_sycl_print_sycl_devices(void);
32
+ GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
33
+ GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
34
+ GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
35
+ GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
36
+
37
+ // SYCL doesn't support registering host memory, keep here for reference
38
+ // GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
39
+ // GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
40
+ #ifdef __cplusplus
41
+ }
42
+ #endif
include/ggml-vulkan.h ADDED
@@ -0,0 +1,29 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ #define GGML_VK_NAME "Vulkan"
11
+ #define GGML_VK_MAX_DEVICES 16
12
+
13
+ //GGML_API GGML_CALL void ggml_vk_instance_init(void);
14
+
15
+ // backend API
16
+ GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
17
+
18
+ GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend);
19
+ GGML_API GGML_CALL int ggml_backend_vk_get_device_count(void);
20
+ GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
21
+ GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
22
+
23
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
24
+ // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
25
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
26
+
27
+ #ifdef __cplusplus
28
+ }
29
+ #endif