@novastera-oss/llamarn 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -157,7 +157,7 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
157
157
  SystemUtils::setIfExists(runtime, options, "use_mmap", params.use_mmap);
158
158
  SystemUtils::setIfExists(runtime, options, "use_mlock", params.use_mlock);
159
159
  SystemUtils::setIfExists(runtime, options, "use_jinja", params.use_jinja);
160
-
160
+
161
161
  // Extract threading parameters (preserve custom thread logic)
162
162
  int n_threads = 0; // 0 = auto
163
163
  if (options.hasProperty(runtime, "n_threads")) {
@@ -283,42 +283,24 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
283
283
  // Set additional fields
284
284
  rn_params.use_jinja = params.use_jinja;
285
285
  rn_params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
286
- // Use chatml format by default instead of content-only for better tool support
287
- rn_params.chat_format = COMMON_CHAT_FORMAT_GENERIC;
286
+ // Don't force a specific chat format - let the template system auto-detect based on model and tools
287
+ // rn_params.chat_format = COMMON_CHAT_FORMAT_GENERIC;
288
288
  // Now assign to the context
289
289
  rn_ctx_->params = rn_params;
290
290
 
291
- // Initialize chat templates with proper error handling
291
+ rn_ctx_->chat_templates = common_chat_templates_init(rn_ctx_->model, params.chat_template);
292
292
  try {
293
- // Get BOS and EOS tokens if provided in options
294
- std::string bos_token_override;
295
- std::string eos_token_override;
296
-
297
- SystemUtils::setIfExists(runtime, options, "bos_token", bos_token_override);
298
- SystemUtils::setIfExists(runtime, options, "eos_token", eos_token_override);
299
-
300
- rn_ctx_->chat_templates = common_chat_templates_init(
301
- rn_ctx_->model,
302
- params.chat_template,
303
- bos_token_override,
304
- eos_token_override
305
- );
306
-
307
- if (!rn_ctx_->chat_templates) {
308
- throw std::runtime_error("Failed to initialize chat templates");
309
- }
310
- } catch (const std::exception& e) {
311
- // Log warning and fallback to chatml
312
- fprintf(stderr, "Warning: Failed to initialize chat template: %s. Falling back to chatml.\n", e.what());
293
+ common_chat_format_example(rn_ctx_->chat_templates.get(), params.use_jinja);
294
+ } catch (const std::exception & e) {
295
+ // Fallback to chatml if the original template parsing fails
313
296
  rn_ctx_->chat_templates = common_chat_templates_init(rn_ctx_->model, "chatml");
314
- if (!rn_ctx_->chat_templates) {
315
- throw std::runtime_error("Failed to initialize fallback chatml template");
316
- }
317
297
  }
298
+
318
299
 
319
300
  // Create the model object and return it
320
301
  return createModelObject(runtime, rn_ctx_.get());
321
302
  } catch (const std::exception& e) {
303
+ // We can keep this top-level error log as it's for initialization failure
322
304
  fprintf(stderr, "initLlama error: %s\n", e.what());
323
305
  throw jsi::JSError(runtime, e.what());
324
306
  }
package/cpp/SystemUtils.h CHANGED
@@ -44,8 +44,8 @@ public:
44
44
  * Helper functions to easily set values from a JSI object if the property exists.
45
45
  * Returns true if the property was found and the value was set.
46
46
  */
47
- // Template for all numeric types
48
- template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
47
+ // Template for numeric types (excluding bool so bool specialization is used)
48
+ template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value && !std::is_same<T, bool>::value>::type>
49
49
  static bool setIfExists(jsi::Runtime& rt, const jsi::Object& options, const std::string& key, T& outValue) {
50
50
  if (options.hasProperty(rt, key.c_str())) {
51
51
  jsi::Value val = options.getProperty(rt, key.c_str());
@@ -350,7 +350,7 @@ CompletionResult run_chat_completion(
350
350
  common_chat_templates_inputs template_inputs;
351
351
  template_inputs.messages = chat_msgs;
352
352
  template_inputs.add_generation_prompt = true;
353
- template_inputs.use_jinja = options.use_jinja;
353
+ template_inputs.use_jinja = rn_ctx->params.use_jinja;
354
354
  // Note: extract_reasoning field doesn't exist in current llama.cpp version
355
355
  // template_inputs.extract_reasoning = true; // Default to true to extract reasoning content if available
356
356
 
@@ -391,6 +391,31 @@ CompletionResult run_chat_completion(
391
391
  result = run_completion(rn_ctx, cmpl_options, callback);
392
392
 
393
393
  if (result.success) {
394
+ // Parse the generated content for tool calls and structured responses
395
+ common_chat_msg parsed_msg;
396
+ bool has_parsed_content = false;
397
+
398
+ // Only parse if we have tools available and the response isn't empty
399
+ if (!template_inputs.tools.empty() && !result.content.empty()) {
400
+ try {
401
+ // Construct the chat syntax for parsing using the format from template application
402
+ common_chat_syntax syntax;
403
+ syntax.format = chat_params.format; // Use format from template, not from params
404
+ syntax.reasoning_format = rn_ctx->params.reasoning_format;
405
+ syntax.reasoning_in_content = true;
406
+ syntax.thinking_forced_open = false;
407
+ syntax.parse_tool_calls = true;
408
+
409
+ // Parse the generated content for tool calls
410
+ parsed_msg = common_chat_parse(result.content, false, syntax);
411
+ has_parsed_content = true;
412
+
413
+ } catch (const std::exception& e) {
414
+ // If parsing fails, treat as regular content
415
+ has_parsed_content = false;
416
+ }
417
+ }
418
+
394
419
  // Create OpenAI-compatible response
395
420
  json response = {
396
421
  {"id", gen_chatcmplid()},
@@ -403,11 +428,39 @@ CompletionResult run_chat_completion(
403
428
  json choice = {
404
429
  {"index", 0},
405
430
  {"message", {
406
- {"role", "assistant"},
407
- {"content", result.content}
431
+ {"role", "assistant"}
408
432
  }},
409
433
  {"finish_reason", "stop"}
410
434
  };
435
+
436
+ // Add parsed content and tool calls if available
437
+ if (has_parsed_content && !parsed_msg.tool_calls.empty()) {
438
+ // Set content to the parsed content (may be null for tool-only responses)
439
+ if (!parsed_msg.content.empty()) {
440
+ choice["message"]["content"] = parsed_msg.content;
441
+ } else {
442
+ choice["message"]["content"] = nullptr;
443
+ }
444
+
445
+ // Add tool calls to the message
446
+ json tool_calls = json::array();
447
+ for (const auto& tool_call : parsed_msg.tool_calls) {
448
+ json tc = {
449
+ {"id", tool_call.id.empty() ? ("call_" + std::to_string(std::rand())) : tool_call.id},
450
+ {"type", "function"},
451
+ {"function", {
452
+ {"name", tool_call.name},
453
+ {"arguments", tool_call.arguments}
454
+ }}
455
+ };
456
+ tool_calls.push_back(tc);
457
+ }
458
+ choice["message"]["tool_calls"] = tool_calls;
459
+ choice["finish_reason"] = "tool_calls";
460
+ } else {
461
+ // Regular text response
462
+ choice["message"]["content"] = has_parsed_content ? parsed_msg.content : result.content;
463
+ }
411
464
 
412
465
  choices.push_back(choice);
413
466
  response["choices"] = choices;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@novastera-oss/llamarn",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "description": "An attempt at a pure cpp turbo module library",
5
5
  "source": "./src/index.tsx",
6
6
  "main": "./lib/module/index.js",