whispercpp 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,123 @@
1
+ /*
2
+ * Copyright (c) 2023-2024 The ggml authors
3
+ *
4
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ * of this software and associated documentation files (the "Software"), to
6
+ * deal in the Software without restriction, including without limitation the
7
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
+ * sell copies of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be included in
12
+ * all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
+ * IN THE SOFTWARE.
21
+ */
22
+
23
+ #pragma once
24
+
25
+ #include "ggml-backend.h"
26
+ #include "ggml.h"
27
+
28
+ #ifdef __cplusplus
29
+ extern "C" {
30
+ #endif
31
+
32
+ /**
33
+ * @brief Maximum number of CANN devices supported.
34
+ */
35
+ #define GGML_CANN_MAX_DEVICES 16
36
+
37
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
38
+
39
+ /**
40
+ * @brief Initializes the CANN backend for a specified device.
41
+ *
42
+ * This function initializes the CANN backend for the given device.
43
+ * It verifies the device index, allocates a context, and creates a backend
44
+ * instance.
45
+ *
46
+ * @param device The index of the device to initialize.
47
+ * @return A pointer to the initialized backend instance, or nullptr on failure.
48
+ */
49
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
50
+
51
+ /**
52
+ * @brief Checks if a given backend is a CANN backend.
53
+ *
54
+ * This function verifies if the provided backend is a CANN backend by comparing
55
+ * its GUID with the CANN backend's GUID.
56
+ *
57
+ * @param backend The backend instance to check.
58
+ * @return True if the backend is a CANN backend, false otherwise.
59
+ */
60
+ GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
61
+
62
+ /**
63
+ * @brief Retrieves the CANN buffer type for a specified device.
64
+ *
65
+ * This function initializes and returns the buffer type interface associated
66
+ * with the given device. It ensures thread-safe access using a mutex.
67
+ *
68
+ * @param device The device index for which to retrieve the buffer type.
69
+ * @return A pointer to the buffer type interface for the specified device, or
70
+ * nullptr if the device index is out of range.
71
+ */
72
+ GGML_BACKEND_API ggml_backend_buffer_type_t
73
+ ggml_backend_cann_buffer_type(int32_t device);
74
+
75
+ /**
76
+ * @brief Retrieves the number of CANN devices available.
77
+ *
78
+ * This function returns the number of CANN devices available based on
79
+ * information obtained from `ggml_cann_info()`.
80
+ *
81
+ * @return The number of CANN devices available.
82
+ */
83
+ GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
84
+
85
+ /**
86
+ * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
87
+ *
88
+ * @return A pointer to the host buffer type interface.
89
+ */
90
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
91
+
92
+ /**
93
+ * @brief Retrieves the description of a specific CANN device.
94
+ *
95
+ * This function sets the specified device, retrieves the SoC name,
96
+ * and writes it into the provided description buffer.
97
+ *
98
+ * @param device The device index to retrieve the description for.
99
+ * @param description Pointer to a buffer where the description will be written.
100
+ * @param description_size Size of the description buffer.
101
+ */
102
+ GGML_BACKEND_API void ggml_backend_cann_get_device_description(
103
+ int32_t device, char* description, size_t description_size);
104
+
105
+ /**
106
+ * @brief Retrieves the memory information of a specific CANN device.
107
+ *
108
+ * This function sets the specified device, retrieves the free and total
109
+ * memory information of the specified type (ACL_HBM_MEM), and stores them
110
+ * in the provided pointers.
111
+ *
112
+ * @param device The device index to retrieve memory information for.
113
+ * @param free Pointer to a variable where the free memory size will be stored.
114
+ * @param total Pointer to a variable where the total memory size will be
115
+ * stored.
116
+ */
117
+ GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
118
+ size_t* free,
119
+ size_t* total);
120
+
121
+ #ifdef __cplusplus
122
+ }
123
+ #endif
@@ -0,0 +1,38 @@
1
+ #pragma once
2
+
3
+ #ifndef __cplusplus
4
+ #error "This header is for C++ only"
5
+ #endif
6
+
7
+ #include "ggml.h"
8
+ #include "ggml-alloc.h"
9
+ #include "ggml-backend.h"
10
+ #include <memory>
11
+
12
+ // Smart pointers for ggml types
13
+
14
+ // ggml
15
+
16
+ struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
17
+ struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
18
+
19
+ typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
20
+ typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
21
+
22
+ // ggml-alloc
23
+
24
+ struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
25
+
26
+ typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
27
+
28
+ // ggml-backend
29
+
30
+ struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } };
31
+ struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
32
+ struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } };
33
+ struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } };
34
+
35
+ typedef std::unique_ptr<ggml_backend, ggml_backend_deleter> ggml_backend_ptr;
36
+ typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
37
+ typedef std::unique_ptr<ggml_backend_event, ggml_backend_event_deleter> ggml_backend_event_ptr;
38
+ typedef std::unique_ptr<ggml_backend_sched, ggml_backend_sched_deleter> ggml_backend_sched_ptr;
@@ -0,0 +1,135 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ // the compute plan that needs to be prepared for ggml_graph_compute()
11
+ // since https://github.com/ggerganov/ggml/issues/287
12
+ struct ggml_cplan {
13
+ size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
14
+ uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
15
+
16
+ int n_threads;
17
+ struct ggml_threadpool * threadpool;
18
+
19
+ // abort ggml_graph_compute when true
20
+ ggml_abort_callback abort_callback;
21
+ void * abort_callback_data;
22
+ };
23
+
24
+ // numa strategies
25
+ enum ggml_numa_strategy {
26
+ GGML_NUMA_STRATEGY_DISABLED = 0,
27
+ GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
28
+ GGML_NUMA_STRATEGY_ISOLATE = 2,
29
+ GGML_NUMA_STRATEGY_NUMACTL = 3,
30
+ GGML_NUMA_STRATEGY_MIRROR = 4,
31
+ GGML_NUMA_STRATEGY_COUNT
32
+ };
33
+
34
+ GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
35
+ GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
36
+
37
+ GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
38
+ GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
39
+
40
+ GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
41
+ GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
42
+
43
+ GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
44
+ GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
45
+
46
+ GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
47
+ GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
48
+
49
+ GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
50
+ GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
51
+
52
+ GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
53
+ GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
54
+
55
+ GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
56
+ GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
57
+ GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
58
+ GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
59
+ GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
60
+
61
+ // ggml_graph_plan() has to be called before ggml_graph_compute()
62
+ // when plan.work_size > 0, caller must allocate memory for plan.work_data
63
+ GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
64
+ const struct ggml_cgraph * cgraph,
65
+ int n_threads, /* = GGML_DEFAULT_N_THREADS */
66
+ struct ggml_threadpool * threadpool /* = NULL */ );
67
+ GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
68
+
69
+ // same as ggml_graph_compute() but the work data is allocated as a part of the context
70
+ // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
71
+ GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
72
+
73
+ //
74
+ // system info
75
+ //
76
+
77
+ // x86
78
+ GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
79
+ GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
80
+ GGML_BACKEND_API int ggml_cpu_has_avx (void);
81
+ GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
82
+ GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
83
+ GGML_BACKEND_API int ggml_cpu_has_f16c (void);
84
+ GGML_BACKEND_API int ggml_cpu_has_fma (void);
85
+ GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
86
+ GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
87
+ GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
88
+ GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
89
+ GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
90
+ // ARM
91
+ GGML_BACKEND_API int ggml_cpu_has_neon (void);
92
+ GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
93
+ GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
94
+ GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
95
+ GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
96
+ GGML_BACKEND_API int ggml_cpu_has_sve (void);
97
+ GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
98
+ // other
99
+ GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
100
+ GGML_BACKEND_API int ggml_cpu_has_vsx (void);
101
+ GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
102
+ GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
103
+
104
+ // Internal types and functions exposed for tests and benchmarks
105
+
106
+ typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
107
+ const void * GGML_RESTRICT y, size_t by, int nrc);
108
+
109
+ struct ggml_type_traits_cpu {
110
+ ggml_from_float_t from_float;
111
+ ggml_vec_dot_t vec_dot;
112
+ enum ggml_type vec_dot_type;
113
+ int64_t nrows; // number of rows to process simultaneously
114
+ };
115
+
116
+ GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
117
+
118
+ GGML_BACKEND_API void ggml_cpu_init(void);
119
+
120
+ //
121
+ // CPU backend
122
+ //
123
+
124
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
125
+
126
+ GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
127
+ GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
128
+ GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
129
+ GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
130
+
131
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
132
+
133
+ #ifdef __cplusplus
134
+ }
135
+ #endif
@@ -0,0 +1,47 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ #ifdef GGML_USE_HIP
11
+ #define GGML_CUDA_NAME "ROCm"
12
+ #define GGML_CUBLAS_NAME "hipBLAS"
13
+ #elif defined(GGML_USE_MUSA)
14
+ #define GGML_CUDA_NAME "MUSA"
15
+ #define GGML_CUBLAS_NAME "muBLAS"
16
+ #else
17
+ #define GGML_CUDA_NAME "CUDA"
18
+ #define GGML_CUBLAS_NAME "cuBLAS"
19
+ #endif
20
+ #define GGML_CUDA_MAX_DEVICES 16
21
+
22
+ // backend API
23
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device);
24
+
25
+ GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend);
26
+
27
+ // device buffer
28
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
29
+
30
+ // split tensor buffer that splits matrices by rows across multiple devices
31
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
32
+
33
+ // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
34
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
35
+
36
+ GGML_BACKEND_API int ggml_backend_cuda_get_device_count(void);
37
+ GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
38
+ GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
39
+
40
+ GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
41
+ GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
42
+
43
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
44
+
45
+ #ifdef __cplusplus
46
+ }
47
+ #endif
@@ -0,0 +1,50 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #include <stdbool.h>
7
+ #include <stddef.h>
8
+ #include <stdint.h>
9
+
10
+ #ifdef __cplusplus
11
+ extern "C" {
12
+ #endif
13
+
14
+ #define GGML_KOMPUTE_MAX_DEVICES 16
15
+
16
+ struct ggml_vk_device {
17
+ int index;
18
+ int type; // same as VkPhysicalDeviceType
19
+ size_t heapSize;
20
+ const char * name;
21
+ const char * vendor;
22
+ int subgroupSize;
23
+ uint64_t bufferAlignment;
24
+ uint64_t maxAlloc;
25
+ };
26
+
27
+ struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
28
+ bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
29
+ bool ggml_vk_has_vulkan(void);
30
+ bool ggml_vk_has_device(void);
31
+ struct ggml_vk_device ggml_vk_current_device(void);
32
+
33
+ //
34
+ // backend API
35
+ //
36
+
37
+ // forward declaration
38
+ typedef struct ggml_backend * ggml_backend_t;
39
+
40
+ GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
41
+
42
+ GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
43
+
44
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
45
+
46
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
47
+
48
+ #ifdef __cplusplus
49
+ }
50
+ #endif
@@ -0,0 +1,66 @@
1
+ // Note: this description is outdated
2
+ //
3
+ // An interface allowing to compute ggml_cgraph with Metal
4
+ //
5
+ // This is a fully functional interface that extends ggml with GPU support for Apple devices.
6
+ // A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
7
+ //
8
+ // How it works?
9
+ //
10
+ // As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
11
+ // interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
12
+ // use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
13
+ //
14
+ // You only need to make sure that all memory buffers that you used during the graph creation
15
+ // are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
16
+ // used during the graph evaluation to determine the arguments of the compute kernels.
17
+ //
18
+ // Synchronization between device and host memory (for example for input and output tensors)
19
+ // is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
20
+ //
21
+
22
+ #pragma once
23
+
24
+ #include "ggml.h"
25
+ #include "ggml-backend.h"
26
+
27
+ #include <stddef.h>
28
+ #include <stdbool.h>
29
+
30
+ struct ggml_tensor;
31
+ struct ggml_cgraph;
32
+
33
+ #ifdef __cplusplus
34
+ extern "C" {
35
+ #endif
36
+
37
+ //
38
+ // backend API
39
+ // user-code should use only these functions
40
+ //
41
+
42
+ GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
43
+
44
+ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
45
+
46
+ GGML_DEPRECATED(
47
+ GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48
+ "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
49
+
50
+ GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
51
+
52
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
53
+
54
+ // helper to check if the device supports a specific family
55
+ // ideally, the user code should be doing these checks
56
+ // ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
57
+ GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
58
+
59
+ // capture all command buffers committed the next time `ggml_backend_graph_compute` is called
60
+ GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
61
+
62
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
63
+
64
+ #ifdef __cplusplus
65
+ }
66
+ #endif
@@ -0,0 +1,26 @@
1
+ #ifndef GGML_OPENCL_H
2
+ #define GGML_OPENCL_H
3
+
4
+ #include "ggml.h"
5
+ #include "ggml-backend.h"
6
+
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ //
12
+ // backend API
13
+ //
14
+ GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
15
+ GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
16
+
17
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
18
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
19
+
20
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
21
+
22
+ #ifdef __cplusplus
23
+ }
24
+ #endif
25
+
26
+ #endif // GGML_OPENCL_H