@novastera-oss/llamarn 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -147,30 +147,13 @@ CompletionResult run_completion(
147
147
  json data = options.to_json();
148
148
  // Prepare the sampling parameters
149
149
  const auto& params = rn_ctx->params;
150
-
151
- // Set the prompt
152
- if (data.contains("prompt")) {
153
- // Tokenize the prompt
154
- const auto& tokenized_prompts = tokenize_input_prompts(rn_ctx->vocab, data["prompt"], true, true);
155
- if (tokenized_prompts.empty() || tokenized_prompts[0].empty()) {
156
- result.success = false;
157
- result.error_msg = "Empty prompt";
158
- result.error_type = RN_ERROR_INVALID_PARAM;
159
- return result;
160
- }
161
- state.prompt_tokens = std::move(tokenized_prompts[0]);
162
- } else {
163
- result.success = false;
164
- result.error_msg = "No prompt provided";
165
- result.error_type = RN_ERROR_INVALID_PARAM;
166
- return result;
150
+
151
+ // Create a copy of sampling parameters and apply grammar if provided
152
+ common_params_sampling sampling_params = params.sampling;
153
+ if (!options.grammar.empty()) {
154
+ sampling_params.grammar = options.grammar;
167
155
  }
168
156
 
169
- // Configure state
170
- state.n_ctx = llama_n_ctx(rn_ctx->ctx);
171
- state.n_predict = options.n_predict > 0 ? options.n_predict : params.n_predict;
172
- state.n_remaining = state.n_predict;
173
-
174
157
  // Parse tool_choice
175
158
  if (options.tool_choice == "auto") {
176
159
  state.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
@@ -179,8 +162,8 @@ CompletionResult run_completion(
179
162
  } else if (options.tool_choice == "required") {
180
163
  state.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
181
164
  }
182
- // Initialize the sampler
183
- state.sampler = common_sampler_init(rn_ctx->model, params.sampling);
165
+ // Initialize the sampler with the updated sampling parameters
166
+ state.sampler = common_sampler_init(rn_ctx->model, sampling_params);
184
167
  if (!state.sampler) {
185
168
  result.success = false;
186
169
  result.error_msg = "Failed to initialize sampler";
@@ -201,6 +184,29 @@ CompletionResult run_completion(
201
184
  }
202
185
  }
203
186
 
187
+ // Set the prompt
188
+ if (data.contains("prompt")) {
189
+ // Tokenize the prompt
190
+ const auto& tokenized_prompts = tokenize_input_prompts(rn_ctx->vocab, data["prompt"], true, true);
191
+ if (tokenized_prompts.empty() || tokenized_prompts[0].empty()) {
192
+ result.success = false;
193
+ result.error_msg = "Empty prompt";
194
+ result.error_type = RN_ERROR_INVALID_PARAM;
195
+ return result;
196
+ }
197
+ state.prompt_tokens = std::move(tokenized_prompts[0]);
198
+ } else {
199
+ result.success = false;
200
+ result.error_msg = "No prompt provided";
201
+ result.error_type = RN_ERROR_INVALID_PARAM;
202
+ return result;
203
+ }
204
+
205
+ // Configure state
206
+ state.n_ctx = llama_n_ctx(rn_ctx->ctx);
207
+ state.n_predict = options.n_predict > 0 ? options.n_predict : params.n_predict;
208
+ state.n_remaining = state.n_predict;
209
+
204
210
  // Process the prompt
205
211
  for (int i = 0; i < (int)state.prompt_tokens.size(); ++i) {
206
212
  llama_token token = state.prompt_tokens[i];
@@ -222,7 +228,11 @@ CompletionResult run_completion(
222
228
  return result;
223
229
  }
224
230
 
225
- common_sampler_accept(state.sampler, token, true);
231
+ // Only accept tokens during prompt processing if no grammar is present
232
+ // Grammar-based sampling needs to start fresh from the generation phase
233
+ if (sampling_params.grammar.empty()) {
234
+ common_sampler_accept(state.sampler, token, true);
235
+ }
226
236
  state.n_past++;
227
237
  }
228
238
 
@@ -435,31 +445,15 @@ CompletionResult run_chat_completion(
435
445
 
436
446
  // Add parsed content and tool calls if available
437
447
  if (has_parsed_content && !parsed_msg.tool_calls.empty()) {
438
- // Set content to the parsed content (may be null for tool-only responses)
439
- if (!parsed_msg.content.empty()) {
440
- choice["message"]["content"] = parsed_msg.content;
441
- } else {
442
- choice["message"]["content"] = nullptr;
443
- }
444
-
445
- // Add tool calls to the message
446
- json tool_calls = json::array();
447
- for (const auto& tool_call : parsed_msg.tool_calls) {
448
- json tc = {
449
- {"id", tool_call.id.empty() ? ("call_" + std::to_string(std::rand())) : tool_call.id},
450
- {"type", "function"},
451
- {"function", {
452
- {"name", tool_call.name},
453
- {"arguments", tool_call.arguments}
454
- }}
455
- };
456
- tool_calls.push_back(tc);
457
- }
458
- choice["message"]["tool_calls"] = tool_calls;
448
+ // Use the server.cpp approach: let the common_chat_msg handle the JSON conversion
449
+ choice["message"] = parsed_msg.to_json_oaicompat<json>();
459
450
  choice["finish_reason"] = "tool_calls";
451
+ } else if (has_parsed_content && !parsed_msg.content.empty()) {
452
+ // Regular text response with parsed content
453
+ choice["message"]["content"] = parsed_msg.content;
460
454
  } else {
461
- // Regular text response
462
- choice["message"]["content"] = has_parsed_content ? parsed_msg.content : result.content;
455
+ // Fallback to raw content if parsing failed or no tools
456
+ choice["message"]["content"] = result.content;
463
457
  }
464
458
 
465
459
  choices.push_back(choice);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@novastera-oss/llamarn",
3
- "version": "0.2.4",
3
+ "version": "0.2.5",
4
4
  "description": "An attempt at a pure cpp turbo module library",
5
5
  "source": "./src/index.tsx",
6
6
  "main": "./lib/module/index.js",