cui-llama.rn 1.4.3 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +93 -114
  2. package/android/src/main/CMakeLists.txt +5 -0
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
  4. package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
  5. package/android/src/main/jni-utils.h +6 -0
  6. package/android/src/main/jni.cpp +289 -31
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  15. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
  16. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
  17. package/cpp/chat-template.hpp +529 -0
  18. package/cpp/chat.cpp +1779 -0
  19. package/cpp/chat.h +135 -0
  20. package/cpp/common.cpp +2064 -1873
  21. package/cpp/common.h +700 -699
  22. package/cpp/ggml-alloc.c +1039 -1042
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +255 -255
  25. package/cpp/ggml-backend-reg.cpp +586 -582
  26. package/cpp/ggml-backend.cpp +2004 -2002
  27. package/cpp/ggml-backend.h +354 -354
  28. package/cpp/ggml-common.h +1851 -1853
  29. package/cpp/ggml-cpp.h +39 -39
  30. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  31. package/cpp/ggml-cpu-aarch64.h +8 -8
  32. package/cpp/ggml-cpu-impl.h +531 -386
  33. package/cpp/ggml-cpu-quants.c +12527 -10920
  34. package/cpp/ggml-cpu-traits.cpp +36 -36
  35. package/cpp/ggml-cpu-traits.h +38 -38
  36. package/cpp/ggml-cpu.c +15766 -14391
  37. package/cpp/ggml-cpu.cpp +655 -635
  38. package/cpp/ggml-cpu.h +138 -135
  39. package/cpp/ggml-impl.h +567 -567
  40. package/cpp/ggml-metal-impl.h +235 -0
  41. package/cpp/ggml-metal.h +1 -1
  42. package/cpp/ggml-metal.m +5146 -4884
  43. package/cpp/ggml-opt.cpp +854 -854
  44. package/cpp/ggml-opt.h +216 -216
  45. package/cpp/ggml-quants.c +5238 -5238
  46. package/cpp/ggml-threading.h +14 -14
  47. package/cpp/ggml.c +6529 -6514
  48. package/cpp/ggml.h +2198 -2194
  49. package/cpp/gguf.cpp +1329 -1329
  50. package/cpp/gguf.h +202 -202
  51. package/cpp/json-schema-to-grammar.cpp +1024 -1045
  52. package/cpp/json-schema-to-grammar.h +21 -8
  53. package/cpp/json.hpp +24766 -24766
  54. package/cpp/llama-adapter.cpp +347 -347
  55. package/cpp/llama-adapter.h +74 -74
  56. package/cpp/llama-arch.cpp +1513 -1487
  57. package/cpp/llama-arch.h +403 -400
  58. package/cpp/llama-batch.cpp +368 -368
  59. package/cpp/llama-batch.h +88 -88
  60. package/cpp/llama-chat.cpp +588 -578
  61. package/cpp/llama-chat.h +53 -52
  62. package/cpp/llama-context.cpp +1775 -1775
  63. package/cpp/llama-context.h +128 -128
  64. package/cpp/llama-cparams.cpp +1 -1
  65. package/cpp/llama-cparams.h +37 -37
  66. package/cpp/llama-cpp.h +30 -30
  67. package/cpp/llama-grammar.cpp +1219 -1139
  68. package/cpp/llama-grammar.h +173 -143
  69. package/cpp/llama-hparams.cpp +71 -71
  70. package/cpp/llama-hparams.h +139 -139
  71. package/cpp/llama-impl.cpp +167 -167
  72. package/cpp/llama-impl.h +61 -61
  73. package/cpp/llama-kv-cache.cpp +718 -718
  74. package/cpp/llama-kv-cache.h +219 -218
  75. package/cpp/llama-mmap.cpp +600 -590
  76. package/cpp/llama-mmap.h +68 -67
  77. package/cpp/llama-model-loader.cpp +1124 -1124
  78. package/cpp/llama-model-loader.h +167 -167
  79. package/cpp/llama-model.cpp +4087 -3997
  80. package/cpp/llama-model.h +370 -370
  81. package/cpp/llama-sampling.cpp +2558 -2408
  82. package/cpp/llama-sampling.h +32 -32
  83. package/cpp/llama-vocab.cpp +3264 -3247
  84. package/cpp/llama-vocab.h +125 -125
  85. package/cpp/llama.cpp +10284 -10077
  86. package/cpp/llama.h +1354 -1323
  87. package/cpp/log.cpp +393 -401
  88. package/cpp/log.h +132 -121
  89. package/cpp/minja/chat-template.hpp +529 -0
  90. package/cpp/minja/minja.hpp +2915 -0
  91. package/cpp/minja.hpp +2915 -0
  92. package/cpp/rn-llama.cpp +66 -6
  93. package/cpp/rn-llama.h +26 -1
  94. package/cpp/sampling.cpp +570 -505
  95. package/cpp/sampling.h +3 -0
  96. package/cpp/sgemm.cpp +2598 -2597
  97. package/cpp/sgemm.h +14 -14
  98. package/cpp/speculative.cpp +278 -277
  99. package/cpp/speculative.h +28 -28
  100. package/cpp/unicode.cpp +9 -2
  101. package/ios/CMakeLists.txt +6 -0
  102. package/ios/RNLlama.h +0 -8
  103. package/ios/RNLlama.mm +27 -3
  104. package/ios/RNLlamaContext.h +10 -1
  105. package/ios/RNLlamaContext.mm +269 -57
  106. package/jest/mock.js +21 -2
  107. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  108. package/lib/commonjs/grammar.js +3 -0
  109. package/lib/commonjs/grammar.js.map +1 -1
  110. package/lib/commonjs/index.js +87 -13
  111. package/lib/commonjs/index.js.map +1 -1
  112. package/lib/module/NativeRNLlama.js.map +1 -1
  113. package/lib/module/grammar.js +3 -0
  114. package/lib/module/grammar.js.map +1 -1
  115. package/lib/module/index.js +86 -13
  116. package/lib/module/index.js.map +1 -1
  117. package/lib/typescript/NativeRNLlama.d.ts +107 -2
  118. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  119. package/lib/typescript/grammar.d.ts.map +1 -1
  120. package/lib/typescript/index.d.ts +32 -7
  121. package/lib/typescript/index.d.ts.map +1 -1
  122. package/llama-rn.podspec +1 -1
  123. package/package.json +3 -2
  124. package/src/NativeRNLlama.ts +115 -3
  125. package/src/grammar.ts +3 -0
  126. package/src/index.ts +138 -21
  127. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  128. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  129. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  130. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  132. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -55
  134. package/cpp/rn-llama.hpp +0 -913
@@ -285,4 +285,239 @@ typedef struct {
285
285
  float eps;
286
286
  } ggml_metal_kargs_rms_norm;
287
287
 
288
+ typedef struct {
289
+ int64_t ne00;
290
+ int64_t ne01;
291
+ int64_t ne02;
292
+ uint64_t nb00;
293
+ uint64_t nb01;
294
+ uint64_t nb02;
295
+ int32_t n_groups;
296
+ float eps;
297
+ } ggml_metal_kargs_group_norm;
298
+
299
+ typedef struct {
300
+ int32_t IC;
301
+ int32_t IL;
302
+ int32_t K;
303
+ int32_t s0;
304
+ uint64_t nb0;
305
+ uint64_t nb1;
306
+ } ggml_metal_kargs_conv_transpose_1d;
307
+
308
+ typedef struct {
309
+ uint64_t ofs0;
310
+ uint64_t ofs1;
311
+ int32_t IW;
312
+ int32_t IH;
313
+ int32_t CHW;
314
+ int32_t s0;
315
+ int32_t s1;
316
+ int32_t p0;
317
+ int32_t p1;
318
+ int32_t d0;
319
+ int32_t d1;
320
+ int32_t N;
321
+ int32_t KH;
322
+ int32_t KW;
323
+ int32_t KHW; // KH * KW, pre-computed on CPU to save GPU resources
324
+ } ggml_metal_kargs_im2col;
325
+
326
+ typedef struct {
327
+ int64_t ne00;
328
+ int64_t ne01;
329
+ int64_t ne02;
330
+ int64_t ne03;
331
+ uint64_t nb00;
332
+ uint64_t nb01;
333
+ uint64_t nb02;
334
+ uint64_t nb03;
335
+ int64_t ne10;
336
+ int64_t ne11;
337
+ int64_t ne12;
338
+ int64_t ne13;
339
+ uint64_t nb10;
340
+ uint64_t nb11;
341
+ uint64_t nb12;
342
+ uint64_t nb13;
343
+ int64_t ne0;
344
+ int64_t ne1;
345
+ int64_t ne2;
346
+ int64_t ne3;
347
+ uint64_t nb0;
348
+ uint64_t nb1;
349
+ uint64_t nb2;
350
+ uint64_t nb3;
351
+ } ggml_metal_kargs_sum_rows;
352
+
353
+ typedef struct {
354
+ int64_t ne00;
355
+ int64_t ne01;
356
+ int64_t ne02;
357
+ float scale;
358
+ float max_bias;
359
+ float m0;
360
+ float m1;
361
+ uint32_t n_head_log2;
362
+ } ggml_metal_kargs_soft_max;
363
+
364
+ typedef struct {
365
+ int64_t ne00;
366
+ int64_t ne01;
367
+ int n_past;
368
+ } ggml_metal_kargs_diag_mask_inf;
369
+
370
+ typedef struct {
371
+ int64_t ne00;
372
+ int64_t ne01;
373
+ int64_t ne02;
374
+ uint64_t nb00;
375
+ uint64_t nb01;
376
+ uint64_t nb02;
377
+ int64_t ne10;
378
+ int64_t ne11;
379
+ uint64_t nb10;
380
+ uint64_t nb11;
381
+ int64_t ne0;
382
+ int64_t ne1;
383
+ int64_t ne2;
384
+ uint64_t nb0;
385
+ uint64_t nb1;
386
+ uint64_t nb2;
387
+ } ggml_metal_kargs_ssm_conv;
388
+
389
+ typedef struct {
390
+ int64_t d_state;
391
+ int64_t d_inner;
392
+ int64_t n_seq_tokens;
393
+ int64_t n_seqs;
394
+ uint64_t nb00;
395
+ uint64_t nb01;
396
+ uint64_t nb02;
397
+ uint64_t nb10;
398
+ uint64_t nb11;
399
+ uint64_t nb12;
400
+ uint64_t nb13;
401
+ uint64_t nb20;
402
+ uint64_t nb21;
403
+ uint64_t nb22;
404
+ uint64_t nb30;
405
+ uint64_t nb31;
406
+ uint64_t nb40;
407
+ uint64_t nb41;
408
+ uint64_t nb42;
409
+ uint64_t nb50;
410
+ uint64_t nb51;
411
+ uint64_t nb52;
412
+ } ggml_metal_kargs_ssm_scan;
413
+
414
+ typedef struct {
415
+ int64_t ne00;
416
+ uint64_t nb01;
417
+ uint64_t nb02;
418
+ int64_t ne10;
419
+ uint64_t nb10;
420
+ uint64_t nb11;
421
+ uint64_t nb1;
422
+ uint64_t nb2;
423
+ } ggml_metal_kargs_get_rows;
424
+
425
+ typedef struct {
426
+ int64_t ne00;
427
+ int64_t ne01;
428
+ int64_t ne02;
429
+ int64_t ne03;
430
+ uint64_t nb00;
431
+ uint64_t nb01;
432
+ uint64_t nb02;
433
+ uint64_t nb03;
434
+ int64_t ne0;
435
+ int64_t ne1;
436
+ int64_t ne2;
437
+ int64_t ne3;
438
+ uint64_t nb0;
439
+ uint64_t nb1;
440
+ uint64_t nb2;
441
+ uint64_t nb3;
442
+ float sf0;
443
+ float sf1;
444
+ float sf2;
445
+ float sf3;
446
+ } ggml_metal_kargs_upscale;
447
+
448
+ typedef struct {
449
+ int64_t ne00;
450
+ int64_t ne01;
451
+ int64_t ne02;
452
+ int64_t ne03;
453
+ uint64_t nb00;
454
+ uint64_t nb01;
455
+ uint64_t nb02;
456
+ uint64_t nb03;
457
+ int64_t ne0;
458
+ int64_t ne1;
459
+ int64_t ne2;
460
+ int64_t ne3;
461
+ uint64_t nb0;
462
+ uint64_t nb1;
463
+ uint64_t nb2;
464
+ uint64_t nb3;
465
+ } ggml_metal_kargs_pad;
466
+
467
+ typedef struct {
468
+ int64_t ne00;
469
+ int64_t ne01;
470
+ int64_t ne02;
471
+ int64_t ne03;
472
+ uint64_t nb00;
473
+ uint64_t nb01;
474
+ uint64_t nb02;
475
+ uint64_t nb03;
476
+ int64_t ne0;
477
+ int64_t ne1;
478
+ int64_t ne2;
479
+ int64_t ne3;
480
+ uint64_t nb0;
481
+ uint64_t nb1;
482
+ uint64_t nb2;
483
+ uint64_t nb3;
484
+ int32_t p0;
485
+ int32_t p1;
486
+ } ggml_metal_kargs_pad_reflect_1d;
487
+
488
+ typedef struct {
489
+ uint64_t nb1;
490
+ int dim;
491
+ int max_period;
492
+ } ggml_metal_kargs_timestep_embedding;
493
+
494
+ typedef struct {
495
+ float slope;
496
+ } ggml_metal_kargs_leaky_relu;
497
+
498
+ typedef struct {
499
+ int64_t ncols;
500
+ int64_t ncols_pad;
501
+ } ggml_metal_kargs_argsort;
502
+
503
+ typedef struct {
504
+ int64_t ne0;
505
+ float start;
506
+ float step;
507
+ } ggml_metal_kargs_arange;
508
+
509
+ typedef struct {
510
+ int32_t k0;
511
+ int32_t k1;
512
+ int32_t s0;
513
+ int32_t s1;
514
+ int32_t p0;
515
+ int32_t p1;
516
+ int64_t IH;
517
+ int64_t IW;
518
+ int64_t OH;
519
+ int64_t OW;
520
+ int64_t parallel_elements;
521
+ } ggml_metal_kargs_pool_2d;
522
+
288
523
  #endif // GGML_METAL_IMPL
package/cpp/ggml-metal.h CHANGED
@@ -45,7 +45,7 @@ LM_GGML_BACKEND_API bool lm_ggml_backend_is_metal(lm_ggml_backend_t backend);
45
45
 
46
46
  LM_GGML_DEPRECATED(
47
47
  LM_GGML_BACKEND_API lm_ggml_backend_buffer_t lm_ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48
- "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
48
+ "obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
49
49
 
50
50
  LM_GGML_BACKEND_API void lm_ggml_backend_metal_set_abort_callback(lm_ggml_backend_t backend, lm_ggml_abort_callback abort_callback, void * user_data);
51
51