whispercpp 1.2.0.2 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (135) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +46 -86
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -7
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/ggml/include/ggml.h +2285 -0
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/include/whisper.h +672 -0
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1608 -159
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/src/whisper.cpp +7393 -0
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -8616
  133. data/ext/ggml.h +0 -748
  134. data/ext/whisper.cpp +0 -4829
  135. data/ext/whisper.h +0 -402
@@ -0,0 +1,123 @@
1
+ /*
2
+ * Copyright (c) 2023-2024 The ggml authors
3
+ *
4
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ * of this software and associated documentation files (the "Software"), to
6
+ * deal in the Software without restriction, including without limitation the
7
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
+ * sell copies of the Software, and to permit persons to whom the Software is
9
+ * furnished to do so, subject to the following conditions:
10
+ *
11
+ * The above copyright notice and this permission notice shall be included in
12
+ * all copies or substantial portions of the Software.
13
+ *
14
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
+ * IN THE SOFTWARE.
21
+ */
22
+
23
+ #pragma once
24
+
25
+ #include "ggml-backend.h"
26
+ #include "ggml.h"
27
+
28
+ #ifdef __cplusplus
29
+ extern "C" {
30
+ #endif
31
+
32
+ /**
33
+ * @brief Maximum number of CANN devices supported.
34
+ */
35
+ #define GGML_CANN_MAX_DEVICES 16
36
+
37
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);
38
+
39
+ /**
40
+ * @brief Initializes the CANN backend for a specified device.
41
+ *
42
+ * This function initializes the CANN backend for the given device.
43
+ * It verifies the device index, allocates a context, and creates a backend
44
+ * instance.
45
+ *
46
+ * @param device The index of the device to initialize.
47
+ * @return A pointer to the initialized backend instance, or nullptr on failure.
48
+ */
49
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);
50
+
51
+ /**
52
+ * @brief Checks if a given backend is a CANN backend.
53
+ *
54
+ * This function verifies if the provided backend is a CANN backend by comparing
55
+ * its GUID with the CANN backend's GUID.
56
+ *
57
+ * @param backend The backend instance to check.
58
+ * @return True if the backend is a CANN backend, false otherwise.
59
+ */
60
+ GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);
61
+
62
+ /**
63
+ * @brief Retrieves the CANN buffer type for a specified device.
64
+ *
65
+ * This function initializes and returns the buffer type interface associated
66
+ * with the given device. It ensures thread-safe access using a mutex.
67
+ *
68
+ * @param device The device index for which to retrieve the buffer type.
69
+ * @return A pointer to the buffer type interface for the specified device, or
70
+ * nullptr if the device index is out of range.
71
+ */
72
+ GGML_BACKEND_API ggml_backend_buffer_type_t
73
+ ggml_backend_cann_buffer_type(int32_t device);
74
+
75
+ /**
76
+ * @brief Retrieves the number of CANN devices available.
77
+ *
78
+ * This function returns the number of CANN devices available based on
79
+ * information obtained from `ggml_cann_info()`.
80
+ *
81
+ * @return The number of CANN devices available.
82
+ */
83
+ GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);
84
+
85
+ /**
86
+ * @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
87
+ *
88
+ * @return A pointer to the host buffer type interface.
89
+ */
90
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
91
+
92
+ /**
93
+ * @brief Retrieves the description of a specific CANN device.
94
+ *
95
+ * This function sets the specified device, retrieves the SoC name,
96
+ * and writes it into the provided description buffer.
97
+ *
98
+ * @param device The device index to retrieve the description for.
99
+ * @param description Pointer to a buffer where the description will be written.
100
+ * @param description_size Size of the description buffer.
101
+ */
102
+ GGML_BACKEND_API void ggml_backend_cann_get_device_description(
103
+ int32_t device, char* description, size_t description_size);
104
+
105
+ /**
106
+ * @brief Retrieves the memory information of a specific CANN device.
107
+ *
108
+ * This function sets the specified device, retrieves the free and total
109
+ * memory information of the specified type (ACL_HBM_MEM), and stores them
110
+ * in the provided pointers.
111
+ *
112
+ * @param device The device index to retrieve memory information for.
113
+ * @param free Pointer to a variable where the free memory size will be stored.
114
+ * @param total Pointer to a variable where the total memory size will be
115
+ * stored.
116
+ */
117
+ GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
118
+ size_t* free,
119
+ size_t* total);
120
+
121
+ #ifdef __cplusplus
122
+ }
123
+ #endif
@@ -0,0 +1,38 @@
1
+ #pragma once
2
+
3
+ #ifndef __cplusplus
4
+ #error "This header is for C++ only"
5
+ #endif
6
+
7
+ #include "ggml.h"
8
+ #include "ggml-alloc.h"
9
+ #include "ggml-backend.h"
10
+ #include <memory>
11
+
12
+ // Smart pointers for ggml types
13
+
14
+ // ggml
15
+
16
+ struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
17
+ struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
18
+
19
+ typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
20
+ typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
21
+
22
+ // ggml-alloc
23
+
24
+ struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
25
+
26
+ typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
27
+
28
+ // ggml-backend
29
+
30
+ struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } };
31
+ struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
32
+ struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } };
33
+ struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } };
34
+
35
+ typedef std::unique_ptr<ggml_backend, ggml_backend_deleter> ggml_backend_ptr;
36
+ typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
37
+ typedef std::unique_ptr<ggml_backend_event, ggml_backend_event_deleter> ggml_backend_event_ptr;
38
+ typedef std::unique_ptr<ggml_backend_sched, ggml_backend_sched_deleter> ggml_backend_sched_ptr;
@@ -0,0 +1,135 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ // the compute plan that needs to be prepared for ggml_graph_compute()
11
+ // since https://github.com/ggerganov/ggml/issues/287
12
+ struct ggml_cplan {
13
+ size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
14
+ uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
15
+
16
+ int n_threads;
17
+ struct ggml_threadpool * threadpool;
18
+
19
+ // abort ggml_graph_compute when true
20
+ ggml_abort_callback abort_callback;
21
+ void * abort_callback_data;
22
+ };
23
+
24
+ // numa strategies
25
+ enum ggml_numa_strategy {
26
+ GGML_NUMA_STRATEGY_DISABLED = 0,
27
+ GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
28
+ GGML_NUMA_STRATEGY_ISOLATE = 2,
29
+ GGML_NUMA_STRATEGY_NUMACTL = 3,
30
+ GGML_NUMA_STRATEGY_MIRROR = 4,
31
+ GGML_NUMA_STRATEGY_COUNT
32
+ };
33
+
34
+ GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
35
+ GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
36
+
37
+ GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
38
+ GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
39
+
40
+ GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
41
+ GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
42
+
43
+ GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
44
+ GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
45
+
46
+ GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
47
+ GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
48
+
49
+ GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
50
+ GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
51
+
52
+ GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
53
+ GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
54
+
55
+ GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
56
+ GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
57
+ GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
58
+ GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
59
+ GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
60
+
61
+ // ggml_graph_plan() has to be called before ggml_graph_compute()
62
+ // when plan.work_size > 0, caller must allocate memory for plan.work_data
63
+ GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
64
+ const struct ggml_cgraph * cgraph,
65
+ int n_threads, /* = GGML_DEFAULT_N_THREADS */
66
+ struct ggml_threadpool * threadpool /* = NULL */ );
67
+ GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
68
+
69
+ // same as ggml_graph_compute() but the work data is allocated as a part of the context
70
+ // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
71
+ GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
72
+
73
+ //
74
+ // system info
75
+ //
76
+
77
+ // x86
78
+ GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
79
+ GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
80
+ GGML_BACKEND_API int ggml_cpu_has_avx (void);
81
+ GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
82
+ GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
83
+ GGML_BACKEND_API int ggml_cpu_has_f16c (void);
84
+ GGML_BACKEND_API int ggml_cpu_has_fma (void);
85
+ GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
86
+ GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
87
+ GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
88
+ GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
89
+ GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
90
+ // ARM
91
+ GGML_BACKEND_API int ggml_cpu_has_neon (void);
92
+ GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
93
+ GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
94
+ GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
95
+ GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
96
+ GGML_BACKEND_API int ggml_cpu_has_sve (void);
97
+ GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
98
+ // other
99
+ GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
100
+ GGML_BACKEND_API int ggml_cpu_has_vsx (void);
101
+ GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
102
+ GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
103
+
104
+ // Internal types and functions exposed for tests and benchmarks
105
+
106
+ typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
107
+ const void * GGML_RESTRICT y, size_t by, int nrc);
108
+
109
+ struct ggml_type_traits_cpu {
110
+ ggml_from_float_t from_float;
111
+ ggml_vec_dot_t vec_dot;
112
+ enum ggml_type vec_dot_type;
113
+ int64_t nrows; // number of rows to process simultaneously
114
+ };
115
+
116
+ GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
117
+
118
+ GGML_BACKEND_API void ggml_cpu_init(void);
119
+
120
+ //
121
+ // CPU backend
122
+ //
123
+
124
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
125
+
126
+ GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
127
+ GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
128
+ GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
129
+ GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
130
+
131
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
132
+
133
+ #ifdef __cplusplus
134
+ }
135
+ #endif
@@ -0,0 +1,47 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ #ifdef GGML_USE_HIP
11
+ #define GGML_CUDA_NAME "ROCm"
12
+ #define GGML_CUBLAS_NAME "hipBLAS"
13
+ #elif defined(GGML_USE_MUSA)
14
+ #define GGML_CUDA_NAME "MUSA"
15
+ #define GGML_CUBLAS_NAME "muBLAS"
16
+ #else
17
+ #define GGML_CUDA_NAME "CUDA"
18
+ #define GGML_CUBLAS_NAME "cuBLAS"
19
+ #endif
20
+ #define GGML_CUDA_MAX_DEVICES 16
21
+
22
+ // backend API
23
+ GGML_BACKEND_API ggml_backend_t ggml_backend_cuda_init(int device);
24
+
25
+ GGML_BACKEND_API bool ggml_backend_is_cuda(ggml_backend_t backend);
26
+
27
+ // device buffer
28
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
29
+
30
+ // split tensor buffer that splits matrices by rows across multiple devices
31
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
32
+
33
+ // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
34
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
35
+
36
+ GGML_BACKEND_API int ggml_backend_cuda_get_device_count(void);
37
+ GGML_BACKEND_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
38
+ GGML_BACKEND_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
39
+
40
+ GGML_BACKEND_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
41
+ GGML_BACKEND_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
42
+
43
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
44
+
45
+ #ifdef __cplusplus
46
+ }
47
+ #endif
@@ -0,0 +1,50 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+ #include "ggml-backend.h"
5
+
6
+ #include <stdbool.h>
7
+ #include <stddef.h>
8
+ #include <stdint.h>
9
+
10
+ #ifdef __cplusplus
11
+ extern "C" {
12
+ #endif
13
+
14
+ #define GGML_KOMPUTE_MAX_DEVICES 16
15
+
16
+ struct ggml_vk_device {
17
+ int index;
18
+ int type; // same as VkPhysicalDeviceType
19
+ size_t heapSize;
20
+ const char * name;
21
+ const char * vendor;
22
+ int subgroupSize;
23
+ uint64_t bufferAlignment;
24
+ uint64_t maxAlloc;
25
+ };
26
+
27
+ struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
28
+ bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
29
+ bool ggml_vk_has_vulkan(void);
30
+ bool ggml_vk_has_device(void);
31
+ struct ggml_vk_device ggml_vk_current_device(void);
32
+
33
+ //
34
+ // backend API
35
+ //
36
+
37
+ // forward declaration
38
+ typedef struct ggml_backend * ggml_backend_t;
39
+
40
+ GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
41
+
42
+ GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
43
+
44
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
45
+
46
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
47
+
48
+ #ifdef __cplusplus
49
+ }
50
+ #endif
@@ -0,0 +1,66 @@
1
+ // Note: this description is outdated
2
+ //
3
+ // An interface allowing to compute ggml_cgraph with Metal
4
+ //
5
+ // This is a fully functional interface that extends ggml with GPU support for Apple devices.
6
+ // A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
7
+ //
8
+ // How it works?
9
+ //
10
+ // As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
11
+ // interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
12
+ // use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
13
+ //
14
+ // You only need to make sure that all memory buffers that you used during the graph creation
15
+ // are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
16
+ // used during the graph evaluation to determine the arguments of the compute kernels.
17
+ //
18
+ // Synchronization between device and host memory (for example for input and output tensors)
19
+ // is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
20
+ //
21
+
22
+ #pragma once
23
+
24
+ #include "ggml.h"
25
+ #include "ggml-backend.h"
26
+
27
+ #include <stddef.h>
28
+ #include <stdbool.h>
29
+
30
+ struct ggml_tensor;
31
+ struct ggml_cgraph;
32
+
33
+ #ifdef __cplusplus
34
+ extern "C" {
35
+ #endif
36
+
37
+ //
38
+ // backend API
39
+ // user-code should use only these functions
40
+ //
41
+
42
+ GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
43
+
44
+ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
45
+
46
+ GGML_DEPRECATED(
47
+ GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48
+ "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
49
+
50
+ GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
51
+
52
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
53
+
54
+ // helper to check if the device supports a specific family
55
+ // ideally, the user code should be doing these checks
56
+ // ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
57
+ GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
58
+
59
+ // capture all command buffers committed the next time `ggml_backend_graph_compute` is called
60
+ GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
61
+
62
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
63
+
64
+ #ifdef __cplusplus
65
+ }
66
+ #endif
@@ -0,0 +1,26 @@
1
+ #ifndef GGML_OPENCL_H
2
+ #define GGML_OPENCL_H
3
+
4
+ #include "ggml.h"
5
+ #include "ggml-backend.h"
6
+
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ //
12
+ // backend API
13
+ //
14
+ GGML_BACKEND_API ggml_backend_t ggml_backend_opencl_init(void);
15
+ GGML_BACKEND_API bool ggml_backend_is_opencl(ggml_backend_t backend);
16
+
17
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
18
+ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
19
+
20
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl_reg(void);
21
+
22
+ #ifdef __cplusplus
23
+ }
24
+ #endif
25
+
26
+ #endif // GGML_OPENCL_H