npm - @novastera-oss/llamarn - Versions diffs - 0.2.4 → 0.2.5 - Mend

@novastera-oss/llamarn 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/cpp/rn-completion.cpp +42 -48
package/package.json +1 -1

package/cpp/rn-completion.cpp CHANGED Viewed

@@ -147,30 +147,13 @@ CompletionResult run_completion(
         json data = options.to_json();
         // Prepare the sampling parameters
         const auto& params = rn_ctx->params;
-        // Set the prompt
-        if (data.contains("prompt")) {
-            // Tokenize the prompt
-            const auto& tokenized_prompts = tokenize_input_prompts(rn_ctx->vocab, data["prompt"], true, true);
-            if (tokenized_prompts.empty() || tokenized_prompts[0].empty()) {
-                result.success = false;
-                result.error_msg = "Empty prompt";
-                result.error_type = RN_ERROR_INVALID_PARAM;
-                return result;
-            }
-            state.prompt_tokens = std::move(tokenized_prompts[0]);
-        } else {
-            result.success = false;
-            result.error_msg = "No prompt provided";
-            result.error_type = RN_ERROR_INVALID_PARAM;
-            return result;
+        // Create a copy of sampling parameters and apply grammar if provided
+        common_params_sampling sampling_params = params.sampling;
+        if (!options.grammar.empty()) {
+            sampling_params.grammar = options.grammar;
         }
-        // Configure state
-        state.n_ctx = llama_n_ctx(rn_ctx->ctx);
-        state.n_predict = options.n_predict > 0 ? options.n_predict : params.n_predict;
-        state.n_remaining = state.n_predict;
         // Parse tool_choice
         if (options.tool_choice == "auto") {
             state.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
@@ -179,8 +162,8 @@ CompletionResult run_completion(
         } else if (options.tool_choice == "required") {
             state.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
         }
-        // Initialize the sampler
-        state.sampler = common_sampler_init(rn_ctx->model, params.sampling);
+        // Initialize the sampler with the updated sampling parameters
+        state.sampler = common_sampler_init(rn_ctx->model, sampling_params);
         if (!state.sampler) {
             result.success = false;
             result.error_msg = "Failed to initialize sampler";
@@ -201,6 +184,29 @@ CompletionResult run_completion(
             }
         }
+        // Set the prompt
+        if (data.contains("prompt")) {
+            // Tokenize the prompt
+            const auto& tokenized_prompts = tokenize_input_prompts(rn_ctx->vocab, data["prompt"], true, true);
+            if (tokenized_prompts.empty() || tokenized_prompts[0].empty()) {
+                result.success = false;
+                result.error_msg = "Empty prompt";
+                result.error_type = RN_ERROR_INVALID_PARAM;
+                return result;
+            }
+            state.prompt_tokens = std::move(tokenized_prompts[0]);
+        } else {
+            result.success = false;
+            result.error_msg = "No prompt provided";
+            result.error_type = RN_ERROR_INVALID_PARAM;
+            return result;
+        }
+        // Configure state
+        state.n_ctx = llama_n_ctx(rn_ctx->ctx);
+        state.n_predict = options.n_predict > 0 ? options.n_predict : params.n_predict;
+        state.n_remaining = state.n_predict;
         // Process the prompt
         for (int i = 0; i < (int)state.prompt_tokens.size(); ++i) {
             llama_token token = state.prompt_tokens[i];
@@ -222,7 +228,11 @@ CompletionResult run_completion(
                 return result;
             }
-            common_sampler_accept(state.sampler, token, true);
+            // Only accept tokens during prompt processing if no grammar is present
+            // Grammar-based sampling needs to start fresh from the generation phase
+            if (sampling_params.grammar.empty()) {
+                common_sampler_accept(state.sampler, token, true);
+            }
             state.n_past++;
         }
@@ -435,31 +445,15 @@ CompletionResult run_chat_completion(
             // Add parsed content and tool calls if available
             if (has_parsed_content && !parsed_msg.tool_calls.empty()) {
-                // Set content to the parsed content (may be null for tool-only responses)
-                if (!parsed_msg.content.empty()) {
-                    choice["message"]["content"] = parsed_msg.content;
-                } else {
-                    choice["message"]["content"] = nullptr;
-                }
-                // Add tool calls to the message
-                json tool_calls = json::array();
-                for (const auto& tool_call : parsed_msg.tool_calls) {
-                    json tc = {
-                        {"id", tool_call.id.empty() ? ("call_" + std::to_string(std::rand())) : tool_call.id},
-                        {"type", "function"},
-                        {"function", {
-                            {"name", tool_call.name},
-                            {"arguments", tool_call.arguments}
-                        }}
-                    };
-                    tool_calls.push_back(tc);
-                }
-                choice["message"]["tool_calls"] = tool_calls;
+                // Use the server.cpp approach: let the common_chat_msg handle the JSON conversion
+                choice["message"] = parsed_msg.to_json_oaicompat<json>();
                 choice["finish_reason"] = "tool_calls";
+            } else if (has_parsed_content && !parsed_msg.content.empty()) {
+                // Regular text response with parsed content
+                choice["message"]["content"] = parsed_msg.content;
             } else {
-                // Regular text response
-                choice["message"]["content"] = has_parsed_content ? parsed_msg.content : result.content;
+                // Fallback to raw content if parsing failed or no tools
+                choice["message"]["content"] = result.content;
             }
             choices.push_back(choice);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@novastera-oss/llamarn",
-  "version": "0.2.4",
+  "version": "0.2.5",
   "description": "An attempt at a pure cpp turbo module library",
   "source": "./src/index.tsx",
   "main": "./lib/module/index.js",