cui-llama.rn 1.1.4 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +1 -0
- package/android/src/main/jni.cpp +3 -4
- package/cpp/common.cpp +183 -1990
- package/cpp/common.h +101 -130
- package/cpp/ggml-impl.h +32 -0
- package/cpp/ggml-metal.m +38 -28
- package/cpp/ggml-quants.c +275 -84
- package/cpp/ggml.c +89 -35
- package/cpp/ggml.h +30 -67
- package/cpp/llama-impl.h +1 -0
- package/cpp/llama-sampling.cpp +218 -102
- package/cpp/llama.cpp +599 -120
- package/cpp/llama.h +33 -25
- package/cpp/log.cpp +401 -0
- package/cpp/log.h +85 -703
- package/cpp/rn-llama.hpp +9 -11
- package/cpp/sampling.cpp +12 -9
- package/cpp/sampling.h +4 -56
- package/cpp/sgemm.cpp +38 -0
- package/package.json +1 -1
package/android/src/main/jni.cpp
CHANGED
@@ -523,7 +523,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
523
523
|
}
|
524
524
|
}
|
525
525
|
|
526
|
-
|
526
|
+
llama_perf_context_print(llama->ctx);
|
527
527
|
llama->is_predicting = false;
|
528
528
|
|
529
529
|
auto result = createWriteableMap(env);
|
@@ -538,7 +538,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
|
|
538
538
|
putString(env, result, "stopping_word", llama->stopping_word.c_str());
|
539
539
|
putInt(env, result, "tokens_cached", llama->n_past);
|
540
540
|
|
541
|
-
const auto timings_token =
|
541
|
+
const auto timings_token = llama_perf_context(llama -> ctx);
|
542
542
|
|
543
543
|
auto timingsResult = createWriteableMap(env);
|
544
544
|
putInt(env, timingsResult, "prompt_n", timings_token.n_p_eval);
|
@@ -635,8 +635,7 @@ Java_com_rnllama_LlamaContext_embedding(
|
|
635
635
|
|
636
636
|
llama->rewind();
|
637
637
|
|
638
|
-
|
639
|
-
llama_perf_reset(llama->ctx, LLAMA_PERF_TYPE_CONTEXT);
|
638
|
+
llama_perf_context_reset(llama->ctx);
|
640
639
|
gpt_sampler_reset(llama->ctx_sampling);
|
641
640
|
|
642
641
|
|