llama_cpp 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +3 -0
- data/ext/llama_cpp/llama_cpp.cpp +39 -1
- data/ext/llama_cpp/src/ggml.c +914 -509
- data/ext/llama_cpp/src/ggml.h +42 -27
- data/ext/llama_cpp/src/llama.cpp +293 -303
- data/ext/llama_cpp/src/llama.h +19 -2
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +6 -2
- data/sig/llama_cpp.rbs +52 -0
- metadata +3 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -258,11 +258,11 @@ struct ggml_tensor {
|
|
258
258
|
enum ggml_type type;
|
259
259
|
|
260
260
|
int n_dims;
|
261
|
-
|
262
|
-
size_t
|
263
|
-
|
264
|
-
|
265
|
-
|
261
|
+
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
262
|
+
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
263
|
+
// nb[0] = sizeof(type)
|
264
|
+
// nb[1] = nb[0] * ne[0] + padding
|
265
|
+
// nb[i] = nb[i-1] * ne[i-1]
|
266
266
|
|
267
267
|
// compute data
|
268
268
|
enum ggml_op op;
|
@@ -316,6 +316,7 @@ struct ggml_init_params {
|
|
316
316
|
// memory pool
|
317
317
|
size_t mem_size; // bytes
|
318
318
|
void * mem_buffer; // if NULL, memory will be allocated internally
|
319
|
+
bool no_alloc; // don't allocate memory for the tensor data
|
319
320
|
};
|
320
321
|
|
321
322
|
void ggml_time_init(void); // call this once at the beginning of the program
|
@@ -327,8 +328,8 @@ int64_t ggml_cycles_per_ms(void);
|
|
327
328
|
void ggml_print_object (const struct ggml_object * obj);
|
328
329
|
void ggml_print_objects(const struct ggml_context * ctx);
|
329
330
|
|
330
|
-
|
331
|
-
size_t
|
331
|
+
int64_t ggml_nelements(const struct ggml_tensor * tensor);
|
332
|
+
size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
332
333
|
|
333
334
|
int ggml_blck_size (enum ggml_type type);
|
334
335
|
size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
|
@@ -344,39 +345,43 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
|
|
344
345
|
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
345
346
|
|
346
347
|
bool ggml_mlock_supported(void);
|
347
|
-
bool ggml_mlock(
|
348
|
+
bool ggml_mlock(
|
349
|
+
struct ggml_context * ctx,
|
350
|
+
const void *opt_extra_addr,
|
351
|
+
size_t opt_extra_len,
|
352
|
+
char **err_p);
|
348
353
|
|
349
354
|
struct ggml_tensor * ggml_new_tensor(
|
350
355
|
struct ggml_context * ctx,
|
351
356
|
enum ggml_type type,
|
352
357
|
int n_dims,
|
353
|
-
const
|
358
|
+
const int64_t *ne);
|
354
359
|
|
355
360
|
struct ggml_tensor * ggml_new_tensor_1d(
|
356
361
|
struct ggml_context * ctx,
|
357
362
|
enum ggml_type type,
|
358
|
-
|
363
|
+
int64_t ne0);
|
359
364
|
|
360
365
|
struct ggml_tensor * ggml_new_tensor_2d(
|
361
366
|
struct ggml_context * ctx,
|
362
367
|
enum ggml_type type,
|
363
|
-
|
364
|
-
|
368
|
+
int64_t ne0,
|
369
|
+
int64_t ne1);
|
365
370
|
|
366
371
|
struct ggml_tensor * ggml_new_tensor_3d(
|
367
372
|
struct ggml_context * ctx,
|
368
373
|
enum ggml_type type,
|
369
|
-
|
370
|
-
|
371
|
-
|
374
|
+
int64_t ne0,
|
375
|
+
int64_t ne1,
|
376
|
+
int64_t ne2);
|
372
377
|
|
373
378
|
struct ggml_tensor * ggml_new_tensor_4d(
|
374
379
|
struct ggml_context * ctx,
|
375
380
|
enum ggml_type type,
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
381
|
+
int64_t ne0,
|
382
|
+
int64_t ne1,
|
383
|
+
int64_t ne2,
|
384
|
+
int64_t ne3);
|
380
385
|
|
381
386
|
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
|
382
387
|
struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
@@ -526,33 +531,43 @@ struct ggml_tensor * ggml_reshape(
|
|
526
531
|
struct ggml_tensor * ggml_reshape_2d(
|
527
532
|
struct ggml_context * ctx,
|
528
533
|
struct ggml_tensor * a,
|
529
|
-
|
530
|
-
|
534
|
+
int64_t ne0,
|
535
|
+
int64_t ne1);
|
531
536
|
|
532
537
|
// return view(a)
|
533
538
|
// TODO: when we start computing gradient, make a copy instead of view
|
534
539
|
struct ggml_tensor * ggml_reshape_3d(
|
535
540
|
struct ggml_context * ctx,
|
536
541
|
struct ggml_tensor * a,
|
537
|
-
|
538
|
-
|
539
|
-
|
542
|
+
int64_t ne0,
|
543
|
+
int64_t ne1,
|
544
|
+
int64_t ne2);
|
540
545
|
|
541
546
|
// offset in bytes
|
542
547
|
struct ggml_tensor * ggml_view_1d(
|
543
548
|
struct ggml_context * ctx,
|
544
549
|
struct ggml_tensor * a,
|
545
|
-
|
550
|
+
int64_t ne0,
|
546
551
|
size_t offset);
|
547
552
|
|
548
553
|
struct ggml_tensor * ggml_view_2d(
|
549
554
|
struct ggml_context * ctx,
|
550
555
|
struct ggml_tensor * a,
|
551
|
-
|
552
|
-
|
556
|
+
int64_t ne0,
|
557
|
+
int64_t ne1,
|
553
558
|
size_t nb1, // row stride in bytes
|
554
559
|
size_t offset);
|
555
560
|
|
561
|
+
struct ggml_tensor * ggml_view_3d(
|
562
|
+
struct ggml_context * ctx,
|
563
|
+
struct ggml_tensor * a,
|
564
|
+
int64_t ne0,
|
565
|
+
int64_t ne1,
|
566
|
+
int64_t ne2,
|
567
|
+
size_t nb1, // row stride in bytes
|
568
|
+
size_t nb2, // slice stride in bytes
|
569
|
+
size_t offset);
|
570
|
+
|
556
571
|
struct ggml_tensor * ggml_permute(
|
557
572
|
struct ggml_context * ctx,
|
558
573
|
struct ggml_tensor * a,
|