npm - @novastera-oss/llamarn - Versions diffs - 0.2.2 → 0.2.3 - Mend

@novastera-oss/llamarn 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/cpp/PureCppImpl.cpp CHANGED Viewed

@@ -157,7 +157,7 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
     SystemUtils::setIfExists(runtime, options, "use_mmap", params.use_mmap);
     SystemUtils::setIfExists(runtime, options, "use_mlock", params.use_mlock);
     SystemUtils::setIfExists(runtime, options, "use_jinja", params.use_jinja);
     // Extract threading parameters (preserve custom thread logic)
     int n_threads = 0; // 0 = auto
     if (options.hasProperty(runtime, "n_threads")) {
@@ -283,42 +283,24 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
     // Set additional fields
     rn_params.use_jinja = params.use_jinja;
     rn_params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    // Use chatml format by default instead of content-only for better tool support
-    rn_params.chat_format = COMMON_CHAT_FORMAT_GENERIC;
+    // Don't force a specific chat format - let the template system auto-detect based on model and tools
+    // rn_params.chat_format = COMMON_CHAT_FORMAT_GENERIC;
     // Now assign to the context
     rn_ctx_->params = rn_params;
-    // Initialize chat templates with proper error handling
+    rn_ctx_->chat_templates = common_chat_templates_init(rn_ctx_->model, params.chat_template);
     try {
-        // Get BOS and EOS tokens if provided in options
-        std::string bos_token_override;
-        std::string eos_token_override;
-        SystemUtils::setIfExists(runtime, options, "bos_token", bos_token_override);
-        SystemUtils::setIfExists(runtime, options, "eos_token", eos_token_override);
-        rn_ctx_->chat_templates = common_chat_templates_init(
-            rn_ctx_->model,
-            params.chat_template,
-            bos_token_override,
-            eos_token_override
-        );
-        if (!rn_ctx_->chat_templates) {
-            throw std::runtime_error("Failed to initialize chat templates");
-        }
-    } catch (const std::exception& e) {
-        // Log warning and fallback to chatml
-        fprintf(stderr, "Warning: Failed to initialize chat template: %s. Falling back to chatml.\n", e.what());
+        common_chat_format_example(rn_ctx_->chat_templates.get(), params.use_jinja);
+    } catch (const std::exception & e) {
+        // Fallback to chatml if the original template parsing fails
         rn_ctx_->chat_templates = common_chat_templates_init(rn_ctx_->model, "chatml");
-        if (!rn_ctx_->chat_templates) {
-            throw std::runtime_error("Failed to initialize fallback chatml template");
-        }
     }
     // Create the model object and return it
     return createModelObject(runtime, rn_ctx_.get());
   } catch (const std::exception& e) {
+    // We can keep this top-level error log as it's for initialization failure
     fprintf(stderr, "initLlama error: %s\n", e.what());
     throw jsi::JSError(runtime, e.what());
   }

package/cpp/SystemUtils.h CHANGED Viewed

@@ -44,8 +44,8 @@ public:
    * Helper functions to easily set values from a JSI object if the property exists.
    * Returns true if the property was found and the value was set.
    */
-  // Template for all numeric types
-  template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+  // Template for numeric types (excluding bool so bool specialization is used)
+  template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value && !std::is_same<T, bool>::value>::type>
   static bool setIfExists(jsi::Runtime& rt, const jsi::Object& options, const std::string& key, T& outValue) {
     if (options.hasProperty(rt, key.c_str())) {
       jsi::Value val = options.getProperty(rt, key.c_str());

package/cpp/rn-completion.cpp CHANGED Viewed

@@ -350,7 +350,7 @@ CompletionResult run_chat_completion(
         common_chat_templates_inputs template_inputs;
         template_inputs.messages = chat_msgs;
         template_inputs.add_generation_prompt = true;
-        template_inputs.use_jinja = options.use_jinja;
+        template_inputs.use_jinja = rn_ctx->params.use_jinja;
         // Note: extract_reasoning field doesn't exist in current llama.cpp version
         // template_inputs.extract_reasoning = true; // Default to true to extract reasoning content if available
@@ -391,6 +391,31 @@ CompletionResult run_chat_completion(
         result = run_completion(rn_ctx, cmpl_options, callback);
         if (result.success) {
+            // Parse the generated content for tool calls and structured responses
+            common_chat_msg parsed_msg;
+            bool has_parsed_content = false;
+            // Only parse if we have tools available and the response isn't empty
+            if (!template_inputs.tools.empty() && !result.content.empty()) {
+                try {
+                    // Construct the chat syntax for parsing using the format from template application
+                    common_chat_syntax syntax;
+                    syntax.format = chat_params.format;  // Use format from template, not from params
+                    syntax.reasoning_format = rn_ctx->params.reasoning_format;
+                    syntax.reasoning_in_content = true;
+                    syntax.thinking_forced_open = false;
+                    syntax.parse_tool_calls = true;
+                    // Parse the generated content for tool calls
+                    parsed_msg = common_chat_parse(result.content, false, syntax);
+                    has_parsed_content = true;
+                } catch (const std::exception& e) {
+                    // If parsing fails, treat as regular content
+                    has_parsed_content = false;
+                }
+            }
             // Create OpenAI-compatible response
             json response = {
                 {"id", gen_chatcmplid()},
@@ -403,11 +428,39 @@ CompletionResult run_chat_completion(
             json choice = {
                 {"index", 0},
                 {"message", {
-                    {"role", "assistant"},
-                    {"content", result.content}
+                    {"role", "assistant"}
                 }},
                 {"finish_reason", "stop"}
             };
+            // Add parsed content and tool calls if available
+            if (has_parsed_content && !parsed_msg.tool_calls.empty()) {
+                // Set content to the parsed content (may be null for tool-only responses)
+                if (!parsed_msg.content.empty()) {
+                    choice["message"]["content"] = parsed_msg.content;
+                } else {
+                    choice["message"]["content"] = nullptr;
+                }
+                // Add tool calls to the message
+                json tool_calls = json::array();
+                for (const auto& tool_call : parsed_msg.tool_calls) {
+                    json tc = {
+                        {"id", tool_call.id.empty() ? ("call_" + std::to_string(std::rand())) : tool_call.id},
+                        {"type", "function"},
+                        {"function", {
+                            {"name", tool_call.name},
+                            {"arguments", tool_call.arguments}
+                        }}
+                    };
+                    tool_calls.push_back(tc);
+                }
+                choice["message"]["tool_calls"] = tool_calls;
+                choice["finish_reason"] = "tool_calls";
+            } else {
+                // Regular text response
+                choice["message"]["content"] = has_parsed_content ? parsed_msg.content : result.content;
+            }
             choices.push_back(choice);
             response["choices"] = choices;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@novastera-oss/llamarn",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "description": "An attempt at a pure cpp turbo module library",
   "source": "./src/index.tsx",
   "main": "./lib/module/index.js",