cui-llama.rn 1.0.6 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/llama.h CHANGED
@@ -345,7 +345,7 @@ extern "C" {
345
345
  int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
346
346
  enum llama_ftype ftype; // quantize to this llama_ftype
347
347
  enum lm_ggml_type output_tensor_type; // output tensor type
348
- enum lm_ggml_type token_embedding_type; // itoken embeddings tensor type
348
+ enum lm_ggml_type token_embedding_type; // token embeddings tensor type
349
349
  bool allow_requantize; // allow quantizing non-f32/f16 tensors
350
350
  bool quantize_output_tensor; // quantize output.weight
351
351
  bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
@@ -504,6 +504,9 @@ extern "C" {
504
504
  // Returns true if the model contains an encoder that requires llama_encode() call
505
505
  LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
506
506
 
507
+ // Returns true if the model contains a decoder that requires llama_decode() call
508
+ LLAMA_API bool llama_model_has_decoder(const struct llama_model * model);
509
+
507
510
  // For encoder-decoder models, this function returns id of the token that must be provided
508
511
  // to the decoder to start generating output sequence. For other models, it returns -1.
509
512
  LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);
package/cpp/rn-llama.hpp CHANGED
@@ -6,13 +6,10 @@
6
6
  #include "common.h"
7
7
  #include "llama.h"
8
8
 
9
-
10
9
  #include <android/log.h>
11
10
  #define LLAMA_ANDROID_TAG "RNLLAMA_LOG_ANDROID"
12
11
  #define LLAMA_LOG_INFO(...) __android_log_print(ANDROID_LOG_INFO , LLAMA_ANDROID_TAG, __VA_ARGS__)
13
12
 
14
-
15
-
16
13
  namespace rnllama {
17
14
 
18
15
  static void llama_batch_clear(llama_batch *batch) {
@@ -227,7 +224,9 @@ struct llama_rn_context
227
224
  bool loadModel(gpt_params &params_)
228
225
  {
229
226
  params = params_;
230
- std::tie(model, ctx) = llama_init_from_gpt_params(params);
227
+ llama_init_result result = llama_init_from_gpt_params(params);
228
+ model = result.model;
229
+ ctx = result.context;
231
230
  if (model == nullptr)
232
231
  {
233
232
  LOG_ERROR("unable to load model: %s", params_.model.c_str());
@@ -240,9 +239,11 @@ struct llama_rn_context
240
239
  bool validateModelChatTemplate() const {
241
240
  llama_chat_message chat[] = {{"user", "test"}};
242
241
 
243
- const int res = llama_chat_apply_template(model, nullptr, chat, 1, true, nullptr, 0);
242
+ std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
243
+ std::string template_key = "tokenizer.chat_template";
244
+ int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
244
245
 
245
- return res > 0;
246
+ return res >= 0;
246
247
  }
247
248
 
248
249
  void truncatePrompt(std::vector<llama_token> &prompt_tokens) {
@@ -102,7 +102,7 @@
102
102
  for (int i = 0; i < count; i++) {
103
103
  char key[256];
104
104
  llama_model_meta_key_by_index(llama->model, i, key, sizeof(key));
105
- char val[256];
105
+ char val[2048];
106
106
  llama_model_meta_val_str_by_index(llama->model, i, val, sizeof(val));
107
107
 
108
108
  NSString *keyStr = [NSString stringWithUTF8String:key];
package/jest/mock.js CHANGED
@@ -10,6 +10,9 @@ if (!NativeModules.RNLlama) {
10
10
  }),
11
11
  ),
12
12
 
13
+ // TODO: Use jinja parser
14
+ getFormattedChat: jest.fn(() => ''),
15
+
13
16
  completion: jest.fn(async (contextId, jobId) => {
14
17
  const testResult = {
15
18
  text: '*giggles*',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cui-llama.rn",
3
- "version": "1.0.6",
3
+ "version": "1.0.9",
4
4
  "description": "Fork of llama.rn for ChatterUI",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",