llama_cpp 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +3 -0
- data/ext/llama_cpp/llama_cpp.cpp +39 -1
- data/ext/llama_cpp/src/ggml.c +914 -509
- data/ext/llama_cpp/src/ggml.h +42 -27
- data/ext/llama_cpp/src/llama.cpp +293 -303
- data/ext/llama_cpp/src/llama.h +19 -2
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +6 -2
- data/sig/llama_cpp.rbs +52 -0
- metadata +3 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -258,11 +258,11 @@ struct ggml_tensor {
|
|
258
258
|
enum ggml_type type;
|
259
259
|
|
260
260
|
int n_dims;
|
261
|
-
|
262
|
-
size_t
|
263
|
-
|
264
|
-
|
265
|
-
|
261
|
+
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
262
|
+
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
263
|
+
// nb[0] = sizeof(type)
|
264
|
+
// nb[1] = nb[0] * ne[0] + padding
|
265
|
+
// nb[i] = nb[i-1] * ne[i-1]
|
266
266
|
|
267
267
|
// compute data
|
268
268
|
enum ggml_op op;
|
@@ -316,6 +316,7 @@ struct ggml_init_params {
|
|
316
316
|
// memory pool
|
317
317
|
size_t mem_size; // bytes
|
318
318
|
void * mem_buffer; // if NULL, memory will be allocated internally
|
319
|
+
bool no_alloc; // don't allocate memory for the tensor data
|
319
320
|
};
|
320
321
|
|
321
322
|
void ggml_time_init(void); // call this once at the beginning of the program
|
@@ -327,8 +328,8 @@ int64_t ggml_cycles_per_ms(void);
|
|
327
328
|
void ggml_print_object (const struct ggml_object * obj);
|
328
329
|
void ggml_print_objects(const struct ggml_context * ctx);
|
329
330
|
|
330
|
-
|
331
|
-
size_t
|
331
|
+
int64_t ggml_nelements(const struct ggml_tensor * tensor);
|
332
|
+
size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
332
333
|
|
333
334
|
int ggml_blck_size (enum ggml_type type);
|
334
335
|
size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
|
@@ -344,39 +345,43 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
|
|
344
345
|
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
345
346
|
|
346
347
|
bool ggml_mlock_supported(void);
|
347
|
-
bool ggml_mlock(
|
348
|
+
bool ggml_mlock(
|
349
|
+
struct ggml_context * ctx,
|
350
|
+
const void *opt_extra_addr,
|
351
|
+
size_t opt_extra_len,
|
352
|
+
char **err_p);
|
348
353
|
|
349
354
|
struct ggml_tensor * ggml_new_tensor(
|
350
355
|
struct ggml_context * ctx,
|
351
356
|
enum ggml_type type,
|
352
357
|
int n_dims,
|
353
|
-
const
|
358
|
+
const int64_t *ne);
|
354
359
|
|
355
360
|
struct ggml_tensor * ggml_new_tensor_1d(
|
356
361
|
struct ggml_context * ctx,
|
357
362
|
enum ggml_type type,
|
358
|
-
|
363
|
+
int64_t ne0);
|
359
364
|
|
360
365
|
struct ggml_tensor * ggml_new_tensor_2d(
|
361
366
|
struct ggml_context * ctx,
|
362
367
|
enum ggml_type type,
|
363
|
-
|
364
|
-
|
368
|
+
int64_t ne0,
|
369
|
+
int64_t ne1);
|
365
370
|
|
366
371
|
struct ggml_tensor * ggml_new_tensor_3d(
|
367
372
|
struct ggml_context * ctx,
|
368
373
|
enum ggml_type type,
|
369
|
-
|
370
|
-
|
371
|
-
|
374
|
+
int64_t ne0,
|
375
|
+
int64_t ne1,
|
376
|
+
int64_t ne2);
|
372
377
|
|
373
378
|
struct ggml_tensor * ggml_new_tensor_4d(
|
374
379
|
struct ggml_context * ctx,
|
375
380
|
enum ggml_type type,
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
381
|
+
int64_t ne0,
|
382
|
+
int64_t ne1,
|
383
|
+
int64_t ne2,
|
384
|
+
int64_t ne3);
|
380
385
|
|
381
386
|
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
382
387
|
struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
@@ -526,33 +531,43 @@ struct ggml_tensor * ggml_reshape(
|
|
526
531
|
struct ggml_tensor * ggml_reshape_2d(
|
527
532
|
struct ggml_context * ctx,
|
528
533
|
struct ggml_tensor * a,
|
529
|
-
|
530
|
-
|
534
|
+
int64_t ne0,
|
535
|
+
int64_t ne1);
|
531
536
|
|
532
537
|
// return view(a)
|
533
538
|
// TODO: when we start computing gradient, make a copy instead of view
|
534
539
|
struct ggml_tensor * ggml_reshape_3d(
|
535
540
|
struct ggml_context * ctx,
|
536
541
|
struct ggml_tensor * a,
|
537
|
-
|
538
|
-
|
539
|
-
|
542
|
+
int64_t ne0,
|
543
|
+
int64_t ne1,
|
544
|
+
int64_t ne2);
|
540
545
|
|
541
546
|
// offset in bytes
|
542
547
|
struct ggml_tensor * ggml_view_1d(
|
543
548
|
struct ggml_context * ctx,
|
544
549
|
struct ggml_tensor * a,
|
545
|
-
|
550
|
+
int64_t ne0,
|
546
551
|
size_t offset);
|
547
552
|
|
548
553
|
struct ggml_tensor * ggml_view_2d(
|
549
554
|
struct ggml_context * ctx,
|
550
555
|
struct ggml_tensor * a,
|
551
|
-
|
552
|
-
|
556
|
+
int64_t ne0,
|
557
|
+
int64_t ne1,
|
553
558
|
size_t nb1, // row stride in bytes
|
554
559
|
size_t offset);
|
555
560
|
|
561
|
+
struct ggml_tensor * ggml_view_3d(
|
562
|
+
struct ggml_context * ctx,
|
563
|
+
struct ggml_tensor * a,
|
564
|
+
int64_t ne0,
|
565
|
+
int64_t ne1,
|
566
|
+
int64_t ne2,
|
567
|
+
size_t nb1, // row stride in bytes
|
568
|
+
size_t nb2, // slice stride in bytes
|
569
|
+
size_t offset);
|
570
|
+
|
556
571
|
struct ggml_tensor * ggml_permute(
|
557
572
|
struct ggml_context * ctx,
|
558
573
|
struct ggml_tensor * a,
|