whispercpp 1.2.0.2 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (135) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +46 -86
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -7
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/ggml/include/ggml.h +2285 -0
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/include/whisper.h +672 -0
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1608 -159
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/src/whisper.cpp +7393 -0
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -8616
  133. data/ext/ggml.h +0 -748
  134. data/ext/whisper.cpp +0 -4829
  135. data/ext/whisper.h +0 -402
data/ext/ggml.h DELETED
@@ -1,748 +0,0 @@
1
- #pragma once
2
-
3
- //
4
- // GGML Tensor Library
5
- //
6
- // This documentation is still a work in progress.
7
- // If you wish some specific topics to be covered, feel free to drop a comment:
8
- //
9
- // https://github.com/ggerganov/whisper.cpp/issues/40
10
- //
11
- // ## Overview
12
- //
13
- // This library implements:
14
- //
15
- // - a set of tensor operations
16
- // - automatic differentiation
17
- // - basic optimization algorithms
18
- //
19
- // The aim of this library is to provide a minimalistic approach for various machine learning tasks. This includes,
20
- // but is not limited to, the following:
21
- //
22
- // - linear regression
23
- // - support vector machines
24
- // - neural networks
25
- //
26
- // The library allows the user to define a certain function using the available tensor operations. This function
27
- // definition is represented internally via a computation graph. Each tensor operation in the function definition
28
- // corresponds to a node in the graph. Having the computation graph defined, the user can choose to compute the
29
- // function's value and/or its gradient with respect to the input variables. Optionally, the function can be optimized
30
- // using one of the available optimization algorithms.
31
- //
32
- // For example, here we define the function: f(x) = a*x^2 + b
33
- //
34
- // {
35
- // struct ggml_init_params params = {
36
- // .mem_size = 16*1024*1024,
37
- // .mem_buffer = NULL,
38
- // };
39
- //
40
- // // memory allocation happens here
41
- // struct ggml_context * ctx = ggml_init(params);
42
- //
43
- // struct ggml_tensor * x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
44
- //
45
- // ggml_set_param(ctx, x); // x is an input variable
46
- //
47
- // struct ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
48
- // struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
49
- // struct ggml_tensor * x2 = ggml_mul(ctx, x, x);
50
- // struct ggml_tensor * f = ggml_add(ctx, ggml_mul(ctx, a, x2), b);
51
- //
52
- // ...
53
- // }
54
- //
55
- // Notice that the function definition above does not involve any actual computation. The computation is performed only
56
- // when the user explicitly requests it. For example, to compute the function's value at x = 2.0:
57
- //
58
- // {
59
- // ...
60
- //
61
- // struct ggml_cgraph gf = ggml_build_forward(f);
62
- //
63
- // // set the input variable and parameter values
64
- // ggml_set_f32(x, 2.0f);
65
- // ggml_set_f32(a, 3.0f);
66
- // ggml_set_f32(b, 4.0f);
67
- //
68
- // ggml_graph_compute(ctx0, &gf);
69
- //
70
- // printf("f = %f\n", ggml_get_f32_1d(f, 0));
71
- //
72
- // ...
73
- // }
74
- //
75
- // The actual computation is performed in the ggml_graph_compute() function.
76
- //
77
- // The ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the
78
- // ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know
79
- // in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory
80
- // and after defining the computation graph, call the ggml_used_mem() function to find out how much memory was
81
- // actually needed.
82
- //
83
- // The ggml_set_param() function marks a tensor as an input variable. This is used by the automatic
84
- // differentiation and optimization algorithms.
85
- //
86
- // The described approach allows to define the function graph once and then compute its forward or backward graphs
87
- // multiple times. All computations will use the same memory buffer allocated in the ggml_init() function. This way
88
- // the user can avoid the memory allocation overhead at runtime.
89
- //
90
- // The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class
91
- // citizens, but in theory the library can be extended to support FP8 and integer data types.
92
- //
93
- // Each tensor operation produces a new tensor. Initially the library was envisioned to support only the use of unary
94
- // and binary operations. Most of the available operations fall into one of these two categories. With time, it became
95
- // clear that the library needs to support more complex operations. The way to support these operations is not clear
96
- // yet, but a few examples are demonstrated in the following operations:
97
- //
98
- // - ggml_permute()
99
- // - ggml_conv_1d_1s()
100
- // - ggml_conv_1d_2s()
101
- //
102
- // For each tensor operator, the library implements a forward and backward computation function. The forward function
103
- // computes the output tensor value given the input tensor values. The backward function computes the adjoint of the
104
- // input tensors given the adjoint of the output tensor. For a detailed explanation of what this means, take a
105
- // calculus class, or watch the following video:
106
- //
107
- // What is Automatic Differentiation?
108
- // https://www.youtube.com/watch?v=wG_nF1awSSY
109
- //
110
- //
111
- // ## Tensor data (struct ggml_tensor)
112
- //
113
- // The tensors are stored in memory via the ggml_tensor struct. The structure provides information about the size of
114
- // the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains
115
- // pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example:
116
- //
117
- // {
118
- // struct ggml_tensor * c = ggml_add(ctx, a, b);
119
- //
120
- // assert(c->src[0] == a);
121
- // assert(c->src[1] == b);
122
- // }
123
- //
124
- // The multi-dimensional tensors are stored in row-major order. The ggml_tensor struct contains fields for the
125
- // number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows
126
- // to store tensors that are not contiguous in memory, which is useful for operations such as transposition and
127
- // permutation. All tensor operations have to take the stride into account and not assume that the tensor is
128
- // contiguous in memory.
129
- //
130
- // The data of the tensor is accessed via the "data" pointer. For example:
131
- //
132
- // {
133
- // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
134
- //
135
- // // a[1, 2] = 1.0f;
136
- // *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
137
- //
138
- // // a[2, 0] = 2.0f;
139
- // *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
140
- //
141
- // ...
142
- // }
143
- //
144
- // Alternatively, there are helper functions, such as ggml_get_f32_1d() and ggml_set_f32_1d() that can be used.
145
- //
146
- // ## The matrix multiplication operator (ggml_mul_mat)
147
- //
148
- // TODO
149
- //
150
- //
151
- // ## Multi-threading
152
- //
153
- // TODO
154
- //
155
- //
156
- // ## Overview of ggml.c
157
- //
158
- // TODO
159
- //
160
- //
161
- // ## SIMD optimizations
162
- //
163
- // TODO
164
- //
165
- //
166
- // ## Debugging ggml
167
- //
168
- // TODO
169
- //
170
- //
171
-
172
- #ifdef __cplusplus
173
- extern "C" {
174
- #endif
175
-
176
- #include <stdint.h>
177
- #include <stddef.h>
178
- #include <stdbool.h>
179
-
180
- #define GGML_MAX_DIMS 4
181
- #define GGML_MAX_NODES 4096
182
- #define GGML_MAX_PARAMS 16
183
- #define GGML_MAX_CONTEXTS 64
184
- #define GGML_MAX_OPT 4
185
-
186
- #ifdef __ARM_NEON
187
- // we use the built-in 16-bit float type
188
- typedef __fp16 ggml_fp16_t;
189
- #else
190
- typedef uint16_t ggml_fp16_t;
191
- #endif
192
-
193
- // convert FP16 <-> FP32
194
- float ggml_fp16_to_fp32(ggml_fp16_t x);
195
- ggml_fp16_t ggml_fp32_to_fp16(float x);
196
-
197
- struct ggml_object;
198
- struct ggml_context;
199
-
200
- enum ggml_type {
201
- GGML_TYPE_I8,
202
- GGML_TYPE_I16,
203
- GGML_TYPE_I32,
204
- GGML_TYPE_F16,
205
- GGML_TYPE_F32,
206
- GGML_TYPE_COUNT,
207
- };
208
-
209
- // available tensor operations:
210
- enum ggml_op {
211
- GGML_OP_NONE = 0,
212
-
213
- GGML_OP_DUP,
214
- GGML_OP_ADD,
215
- GGML_OP_SUB,
216
- GGML_OP_MUL,
217
- GGML_OP_DIV,
218
- GGML_OP_SQR,
219
- GGML_OP_SQRT,
220
- GGML_OP_SUM,
221
- GGML_OP_MEAN,
222
- GGML_OP_REPEAT,
223
- GGML_OP_ABS,
224
- GGML_OP_SGN,
225
- GGML_OP_NEG,
226
- GGML_OP_STEP,
227
- GGML_OP_RELU,
228
- GGML_OP_GELU,
229
- GGML_OP_NORM, // normalize
230
-
231
- GGML_OP_MUL_MAT,
232
-
233
- GGML_OP_SCALE,
234
- GGML_OP_CPY,
235
- GGML_OP_RESHAPE,
236
- GGML_OP_VIEW,
237
- GGML_OP_PERMUTE,
238
- GGML_OP_TRANSPOSE,
239
- GGML_OP_GET_ROWS,
240
- GGML_OP_DIAG_MASK_INF,
241
- GGML_OP_SOFT_MAX,
242
- GGML_OP_ROPE,
243
- GGML_OP_CONV_1D_1S,
244
- GGML_OP_CONV_1D_2S,
245
-
246
- GGML_OP_FLASH_ATTN,
247
- GGML_OP_FLASH_FF,
248
-
249
- GGML_OP_COUNT,
250
- };
251
-
252
- // n-dimensional tensor
253
- struct ggml_tensor {
254
- enum ggml_type type;
255
-
256
- int n_dims;
257
- int ne[GGML_MAX_DIMS]; // number of elements
258
- size_t nb[GGML_MAX_DIMS]; // stride in bytes:
259
- // nb[0] = sizeof(type)
260
- // nb[1] = nb[0] * ne[0] + padding
261
- // nb[i] = nb[i-1] * ne[i-1]
262
-
263
- // compute data
264
- enum ggml_op op;
265
-
266
- bool is_param;
267
-
268
- struct ggml_tensor * grad;
269
- struct ggml_tensor * src0;
270
- struct ggml_tensor * src1;
271
- struct ggml_tensor * opt[GGML_MAX_OPT];
272
-
273
- // thread scheduling
274
- int n_tasks;
275
-
276
- // performance
277
- int perf_runs;
278
- int64_t perf_cycles;
279
- int64_t perf_time_us;
280
-
281
- void * data;
282
- char padding[8];
283
- };
284
-
285
- // computation graph
286
- struct ggml_cgraph {
287
- int n_nodes;
288
- int n_leafs;
289
- int n_threads;
290
-
291
- size_t work_size;
292
- struct ggml_tensor * work;
293
-
294
- struct ggml_tensor * nodes[GGML_MAX_NODES];
295
- struct ggml_tensor * grads[GGML_MAX_NODES];
296
- struct ggml_tensor * leafs[GGML_MAX_NODES];
297
-
298
- // performance
299
- int perf_runs;
300
- int64_t perf_cycles;
301
- int64_t perf_time_us;
302
- };
303
-
304
- // scratch buffer
305
- struct ggml_scratch {
306
- size_t offs;
307
- size_t size;
308
- void * data;
309
- };
310
-
311
- struct ggml_init_params {
312
- // memory pool
313
- size_t mem_size; // bytes
314
- void * mem_buffer; // if NULL, memory will be allocated internally
315
- };
316
-
317
- void ggml_time_init(void); // call this once at the beginning of the program
318
- int64_t ggml_time_ms(void);
319
- int64_t ggml_time_us(void);
320
- int64_t ggml_cycles(void);
321
- int64_t ggml_cycles_per_ms(void);
322
-
323
- void ggml_print_object (const struct ggml_object * obj);
324
- void ggml_print_objects(const struct ggml_context * ctx);
325
-
326
- int ggml_nelements(const struct ggml_tensor * tensor);
327
- size_t ggml_nbytes (const struct ggml_tensor * tensor);
328
-
329
- size_t ggml_type_size (enum ggml_type type);
330
- size_t ggml_element_size(const struct ggml_tensor * tensor);
331
-
332
- struct ggml_context * ggml_init(struct ggml_init_params params);
333
- void ggml_free(struct ggml_context * ctx);
334
-
335
- size_t ggml_used_mem(const struct ggml_context * ctx);
336
-
337
- size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
338
-
339
- struct ggml_tensor * ggml_new_tensor(
340
- struct ggml_context * ctx,
341
- enum ggml_type type,
342
- int n_dims,
343
- const int *ne);
344
-
345
- struct ggml_tensor * ggml_new_tensor_1d(
346
- struct ggml_context * ctx,
347
- enum ggml_type type,
348
- int ne0);
349
-
350
- struct ggml_tensor * ggml_new_tensor_2d(
351
- struct ggml_context * ctx,
352
- enum ggml_type type,
353
- int ne0,
354
- int ne1);
355
-
356
- struct ggml_tensor * ggml_new_tensor_3d(
357
- struct ggml_context * ctx,
358
- enum ggml_type type,
359
- int ne0,
360
- int ne1,
361
- int ne2);
362
-
363
- struct ggml_tensor * ggml_new_tensor_4d(
364
- struct ggml_context * ctx,
365
- enum ggml_type type,
366
- int ne0,
367
- int ne1,
368
- int ne2,
369
- int ne3);
370
-
371
- struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
372
- struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
373
-
374
- struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
375
- struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
376
-
377
- struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
378
- struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
379
- struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
380
-
381
- int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
382
- void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
383
-
384
- float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
385
- void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
386
-
387
- void * ggml_get_data (const struct ggml_tensor * tensor);
388
- float * ggml_get_data_f32(const struct ggml_tensor * tensor);
389
-
390
- //
391
- // operations on tensors with backpropagation
392
- //
393
-
394
- struct ggml_tensor * ggml_dup(
395
- struct ggml_context * ctx,
396
- struct ggml_tensor * a);
397
-
398
- struct ggml_tensor * ggml_add(
399
- struct ggml_context * ctx,
400
- struct ggml_tensor * a,
401
- struct ggml_tensor * b);
402
-
403
- struct ggml_tensor * ggml_sub(
404
- struct ggml_context * ctx,
405
- struct ggml_tensor * a,
406
- struct ggml_tensor * b);
407
-
408
- struct ggml_tensor * ggml_mul(
409
- struct ggml_context * ctx,
410
- struct ggml_tensor * a,
411
- struct ggml_tensor * b);
412
-
413
- struct ggml_tensor * ggml_div(
414
- struct ggml_context * ctx,
415
- struct ggml_tensor * a,
416
- struct ggml_tensor * b);
417
-
418
- struct ggml_tensor * ggml_sqr(
419
- struct ggml_context * ctx,
420
- struct ggml_tensor * a);
421
-
422
- struct ggml_tensor * ggml_sqrt(
423
- struct ggml_context * ctx,
424
- struct ggml_tensor * a);
425
-
426
- // return scalar
427
- // TODO: compute sum along rows
428
- struct ggml_tensor * ggml_sum(
429
- struct ggml_context * ctx,
430
- struct ggml_tensor * a);
431
-
432
- // mean along rows
433
- struct ggml_tensor * ggml_mean(
434
- struct ggml_context * ctx,
435
- struct ggml_tensor * a);
436
-
437
- // if a is the same shape as b, and a is not parameter, return a
438
- // otherwise, return a new tensor: repeat(a) to fit in b
439
- struct ggml_tensor * ggml_repeat(
440
- struct ggml_context * ctx,
441
- struct ggml_tensor * a,
442
- struct ggml_tensor * b);
443
-
444
- struct ggml_tensor * ggml_abs(
445
- struct ggml_context * ctx,
446
- struct ggml_tensor * a);
447
-
448
- struct ggml_tensor * ggml_sgn(
449
- struct ggml_context * ctx,
450
- struct ggml_tensor * a);
451
-
452
- struct ggml_tensor * ggml_neg(
453
- struct ggml_context * ctx,
454
- struct ggml_tensor * a);
455
-
456
- struct ggml_tensor * ggml_step(
457
- struct ggml_context * ctx,
458
- struct ggml_tensor * a);
459
-
460
- struct ggml_tensor * ggml_relu(
461
- struct ggml_context * ctx,
462
- struct ggml_tensor * a);
463
-
464
- // TODO: double-check this computation is correct
465
- struct ggml_tensor * ggml_gelu(
466
- struct ggml_context * ctx,
467
- struct ggml_tensor * a);
468
-
469
- // normalize along rows
470
- // TODO: eps is hardcoded to 1e-5 for now
471
- struct ggml_tensor * ggml_norm(
472
- struct ggml_context * ctx,
473
- struct ggml_tensor * a);
474
-
475
- // A: m rows, n columns
476
- // B: p rows, n columns (i.e. we transpose it internally)
477
- // result is m columns, p rows
478
- struct ggml_tensor * ggml_mul_mat(
479
- struct ggml_context * ctx,
480
- struct ggml_tensor * a,
481
- struct ggml_tensor * b);
482
-
483
- //
484
- // operations on tensors without backpropagation
485
- //
486
-
487
- // in-place, returns view(a)
488
- struct ggml_tensor * ggml_scale(
489
- struct ggml_context * ctx,
490
- struct ggml_tensor * a,
491
- struct ggml_tensor * b);
492
-
493
- // a -> b, return view(b)
494
- struct ggml_tensor * ggml_cpy(
495
- struct ggml_context * ctx,
496
- struct ggml_tensor * a,
497
- struct ggml_tensor * b);
498
-
499
- // return view(a), b specifies the new shape
500
- // TODO: when we start computing gradient, make a copy instead of view
501
- struct ggml_tensor * ggml_reshape(
502
- struct ggml_context * ctx,
503
- struct ggml_tensor * a,
504
- struct ggml_tensor * b);
505
-
506
- // return view(a)
507
- // TODO: when we start computing gradient, make a copy instead of view
508
- struct ggml_tensor * ggml_reshape_2d(
509
- struct ggml_context * ctx,
510
- struct ggml_tensor * a,
511
- int ne0,
512
- int ne1);
513
-
514
- // return view(a)
515
- // TODO: when we start computing gradient, make a copy instead of view
516
- struct ggml_tensor * ggml_reshape_3d(
517
- struct ggml_context * ctx,
518
- struct ggml_tensor * a,
519
- int ne0,
520
- int ne1,
521
- int ne2);
522
-
523
- // offset in bytes
524
- struct ggml_tensor * ggml_view_1d(
525
- struct ggml_context * ctx,
526
- struct ggml_tensor * a,
527
- int ne0,
528
- size_t offset);
529
-
530
- struct ggml_tensor * ggml_view_2d(
531
- struct ggml_context * ctx,
532
- struct ggml_tensor * a,
533
- int ne0,
534
- int ne1,
535
- size_t nb1, // row stride in bytes
536
- size_t offset);
537
-
538
- struct ggml_tensor * ggml_permute(
539
- struct ggml_context * ctx,
540
- struct ggml_tensor * a,
541
- int axis0,
542
- int axis1,
543
- int axis2,
544
- int axis3);
545
-
546
- // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
547
- struct ggml_tensor * ggml_transpose(
548
- struct ggml_context * ctx,
549
- struct ggml_tensor * a);
550
-
551
- struct ggml_tensor * ggml_get_rows(
552
- struct ggml_context * ctx,
553
- struct ggml_tensor * a,
554
- struct ggml_tensor * b);
555
-
556
- // set elements above the diagonal to -INF
557
- // in-place, returns view(a)
558
- struct ggml_tensor * ggml_diag_mask_inf(
559
- struct ggml_context * ctx,
560
- struct ggml_tensor * a,
561
- int n_past);
562
-
563
- // in-place, returns view(a)
564
- struct ggml_tensor * ggml_soft_max(
565
- struct ggml_context * ctx,
566
- struct ggml_tensor * a);
567
-
568
- // rotary position embedding
569
- // in-place, returns view(a)
570
- // if mode == 1, skip n_past elements
571
- // TODO: avoid creating a new tensor every time
572
- struct ggml_tensor * ggml_rope(
573
- struct ggml_context * ctx,
574
- struct ggml_tensor * a,
575
- int n_past,
576
- int n_dims,
577
- int mode);
578
-
579
- // padding = 1
580
- // TODO: we don't support extra parameters for now
581
- // that's why we are hard-coding the stride, padding, and dilation
582
- // not great ..
583
- struct ggml_tensor * ggml_conv_1d_1s(
584
- struct ggml_context * ctx,
585
- struct ggml_tensor * a,
586
- struct ggml_tensor * b);
587
-
588
- struct ggml_tensor * ggml_conv_1d_2s(
589
- struct ggml_context * ctx,
590
- struct ggml_tensor * a,
591
- struct ggml_tensor * b);
592
-
593
- struct ggml_tensor * ggml_flash_attn(
594
- struct ggml_context * ctx,
595
- struct ggml_tensor * q,
596
- struct ggml_tensor * k,
597
- struct ggml_tensor * v,
598
- bool masked);
599
-
600
- struct ggml_tensor * ggml_flash_ff(
601
- struct ggml_context * ctx,
602
- struct ggml_tensor * a,
603
- struct ggml_tensor * b0,
604
- struct ggml_tensor * b1,
605
- struct ggml_tensor * c0,
606
- struct ggml_tensor * c1);
607
-
608
- //
609
- // automatic differentiation
610
- //
611
-
612
- void ggml_set_param(
613
- struct ggml_context * ctx,
614
- struct ggml_tensor * tensor);
615
-
616
- void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
617
-
618
- struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
619
- struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
620
-
621
- void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
622
- void ggml_graph_reset (struct ggml_cgraph * cgraph);
623
-
624
- // print info and performance information for the graph
625
- void ggml_graph_print(const struct ggml_cgraph * cgraph);
626
-
627
- // dump the graph into a file using the dot format
628
- void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
629
-
630
- //
631
- // optimization
632
- //
633
-
634
- // optimization methods
635
- enum ggml_opt_type {
636
- GGML_OPT_ADAM,
637
- GGML_OPT_LBFGS,
638
- };
639
-
640
- // linesearch methods
641
- enum ggml_linesearch {
642
- GGML_LINESEARCH_DEFAULT = 1,
643
-
644
- GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
645
- GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
646
- GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
647
- };
648
-
649
- // optimization return values
650
- enum ggml_opt_result {
651
- GGML_OPT_OK = 0,
652
- GGML_OPT_DID_NOT_CONVERGE,
653
- GGML_OPT_NO_CONTEXT,
654
- GGML_OPT_INVALID_WOLFE,
655
- GGML_OPT_FAIL,
656
-
657
- GGML_LINESEARCH_FAIL = -128,
658
- GGML_LINESEARCH_MINIMUM_STEP,
659
- GGML_LINESEARCH_MAXIMUM_STEP,
660
- GGML_LINESEARCH_MAXIMUM_ITERATIONS,
661
- GGML_LINESEARCH_INVALID_PARAMETERS,
662
- };
663
-
664
- // optimization parameters
665
- //
666
- // see ggml.c (ggml_opt_default_params) for default values
667
- //
668
- struct ggml_opt_params {
669
- enum ggml_opt_type type;
670
-
671
- int n_threads;
672
-
673
- // delta-based convergence test
674
- //
675
- // if past == 0 - disabled
676
- // if past > 0:
677
- // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
678
- //
679
- int past;
680
- float delta;
681
-
682
- // maximum number of iterations without improvement
683
- //
684
- // if 0 - disabled
685
- // if > 0:
686
- // assume convergence if no cost improvement in this number of iterations
687
- //
688
- int max_no_improvement;
689
-
690
- bool print_forward_graph;
691
- bool print_backward_graph;
692
-
693
- // ADAM parameters
694
- struct {
695
- int n_iter;
696
-
697
- float alpha; // learning rate
698
- float beta1;
699
- float beta2;
700
- float eps; // epsilon for numerical stability
701
- float eps_f; // epsilon for convergence test
702
- float eps_g; // epsilon for convergence test
703
- } adam;
704
-
705
- // LBFGS parameters
706
- struct {
707
- int m; // number of corrections to approximate the inv. Hessian
708
- int n_iter;
709
- int max_linesearch;
710
-
711
- float eps; // convergence tolerance
712
- float ftol; // line search tolerance
713
- float wolfe;
714
- float min_step;
715
- float max_step;
716
-
717
- enum ggml_linesearch linesearch;
718
- } lbfgs;
719
- };
720
-
721
- struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
722
-
723
- // optimize the function defined by the tensor f
724
- enum ggml_opt_result ggml_opt(
725
- struct ggml_context * ctx,
726
- struct ggml_opt_params params,
727
- struct ggml_tensor * f);
728
-
729
- //
730
- // system info
731
- //
732
-
733
- int ggml_cpu_has_avx(void);
734
- int ggml_cpu_has_avx2(void);
735
- int ggml_cpu_has_avx512(void);
736
- int ggml_cpu_has_fma(void);
737
- int ggml_cpu_has_neon(void);
738
- int ggml_cpu_has_arm_fma(void);
739
- int ggml_cpu_has_f16c(void);
740
- int ggml_cpu_has_fp16_va(void);
741
- int ggml_cpu_has_wasm_simd(void);
742
- int ggml_cpu_has_blas(void);
743
- int ggml_cpu_has_sse3(void);
744
- int ggml_cpu_has_vsx(void);
745
-
746
- #ifdef __cplusplus
747
- }
748
- #endif