@novastera-oss/llamarn 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/rn-completion.cpp +42 -48
- package/package.json +1 -1
package/cpp/rn-completion.cpp
CHANGED
|
@@ -147,30 +147,13 @@ CompletionResult run_completion(
|
|
|
147
147
|
json data = options.to_json();
|
|
148
148
|
// Prepare the sampling parameters
|
|
149
149
|
const auto& params = rn_ctx->params;
|
|
150
|
-
|
|
151
|
-
//
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
if (tokenized_prompts.empty() || tokenized_prompts[0].empty()) {
|
|
156
|
-
result.success = false;
|
|
157
|
-
result.error_msg = "Empty prompt";
|
|
158
|
-
result.error_type = RN_ERROR_INVALID_PARAM;
|
|
159
|
-
return result;
|
|
160
|
-
}
|
|
161
|
-
state.prompt_tokens = std::move(tokenized_prompts[0]);
|
|
162
|
-
} else {
|
|
163
|
-
result.success = false;
|
|
164
|
-
result.error_msg = "No prompt provided";
|
|
165
|
-
result.error_type = RN_ERROR_INVALID_PARAM;
|
|
166
|
-
return result;
|
|
150
|
+
|
|
151
|
+
// Create a copy of sampling parameters and apply grammar if provided
|
|
152
|
+
common_params_sampling sampling_params = params.sampling;
|
|
153
|
+
if (!options.grammar.empty()) {
|
|
154
|
+
sampling_params.grammar = options.grammar;
|
|
167
155
|
}
|
|
168
156
|
|
|
169
|
-
// Configure state
|
|
170
|
-
state.n_ctx = llama_n_ctx(rn_ctx->ctx);
|
|
171
|
-
state.n_predict = options.n_predict > 0 ? options.n_predict : params.n_predict;
|
|
172
|
-
state.n_remaining = state.n_predict;
|
|
173
|
-
|
|
174
157
|
// Parse tool_choice
|
|
175
158
|
if (options.tool_choice == "auto") {
|
|
176
159
|
state.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
|
@@ -179,8 +162,8 @@ CompletionResult run_completion(
|
|
|
179
162
|
} else if (options.tool_choice == "required") {
|
|
180
163
|
state.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
181
164
|
}
|
|
182
|
-
// Initialize the sampler
|
|
183
|
-
state.sampler = common_sampler_init(rn_ctx->model,
|
|
165
|
+
// Initialize the sampler with the updated sampling parameters
|
|
166
|
+
state.sampler = common_sampler_init(rn_ctx->model, sampling_params);
|
|
184
167
|
if (!state.sampler) {
|
|
185
168
|
result.success = false;
|
|
186
169
|
result.error_msg = "Failed to initialize sampler";
|
|
@@ -201,6 +184,29 @@ CompletionResult run_completion(
|
|
|
201
184
|
}
|
|
202
185
|
}
|
|
203
186
|
|
|
187
|
+
// Set the prompt
|
|
188
|
+
if (data.contains("prompt")) {
|
|
189
|
+
// Tokenize the prompt
|
|
190
|
+
const auto& tokenized_prompts = tokenize_input_prompts(rn_ctx->vocab, data["prompt"], true, true);
|
|
191
|
+
if (tokenized_prompts.empty() || tokenized_prompts[0].empty()) {
|
|
192
|
+
result.success = false;
|
|
193
|
+
result.error_msg = "Empty prompt";
|
|
194
|
+
result.error_type = RN_ERROR_INVALID_PARAM;
|
|
195
|
+
return result;
|
|
196
|
+
}
|
|
197
|
+
state.prompt_tokens = std::move(tokenized_prompts[0]);
|
|
198
|
+
} else {
|
|
199
|
+
result.success = false;
|
|
200
|
+
result.error_msg = "No prompt provided";
|
|
201
|
+
result.error_type = RN_ERROR_INVALID_PARAM;
|
|
202
|
+
return result;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Configure state
|
|
206
|
+
state.n_ctx = llama_n_ctx(rn_ctx->ctx);
|
|
207
|
+
state.n_predict = options.n_predict > 0 ? options.n_predict : params.n_predict;
|
|
208
|
+
state.n_remaining = state.n_predict;
|
|
209
|
+
|
|
204
210
|
// Process the prompt
|
|
205
211
|
for (int i = 0; i < (int)state.prompt_tokens.size(); ++i) {
|
|
206
212
|
llama_token token = state.prompt_tokens[i];
|
|
@@ -222,7 +228,11 @@ CompletionResult run_completion(
|
|
|
222
228
|
return result;
|
|
223
229
|
}
|
|
224
230
|
|
|
225
|
-
|
|
231
|
+
// Only accept tokens during prompt processing if no grammar is present
|
|
232
|
+
// Grammar-based sampling needs to start fresh from the generation phase
|
|
233
|
+
if (sampling_params.grammar.empty()) {
|
|
234
|
+
common_sampler_accept(state.sampler, token, true);
|
|
235
|
+
}
|
|
226
236
|
state.n_past++;
|
|
227
237
|
}
|
|
228
238
|
|
|
@@ -435,31 +445,15 @@ CompletionResult run_chat_completion(
|
|
|
435
445
|
|
|
436
446
|
// Add parsed content and tool calls if available
|
|
437
447
|
if (has_parsed_content && !parsed_msg.tool_calls.empty()) {
|
|
438
|
-
//
|
|
439
|
-
|
|
440
|
-
choice["message"]["content"] = parsed_msg.content;
|
|
441
|
-
} else {
|
|
442
|
-
choice["message"]["content"] = nullptr;
|
|
443
|
-
}
|
|
444
|
-
|
|
445
|
-
// Add tool calls to the message
|
|
446
|
-
json tool_calls = json::array();
|
|
447
|
-
for (const auto& tool_call : parsed_msg.tool_calls) {
|
|
448
|
-
json tc = {
|
|
449
|
-
{"id", tool_call.id.empty() ? ("call_" + std::to_string(std::rand())) : tool_call.id},
|
|
450
|
-
{"type", "function"},
|
|
451
|
-
{"function", {
|
|
452
|
-
{"name", tool_call.name},
|
|
453
|
-
{"arguments", tool_call.arguments}
|
|
454
|
-
}}
|
|
455
|
-
};
|
|
456
|
-
tool_calls.push_back(tc);
|
|
457
|
-
}
|
|
458
|
-
choice["message"]["tool_calls"] = tool_calls;
|
|
448
|
+
// Use the server.cpp approach: let the common_chat_msg handle the JSON conversion
|
|
449
|
+
choice["message"] = parsed_msg.to_json_oaicompat<json>();
|
|
459
450
|
choice["finish_reason"] = "tool_calls";
|
|
451
|
+
} else if (has_parsed_content && !parsed_msg.content.empty()) {
|
|
452
|
+
// Regular text response with parsed content
|
|
453
|
+
choice["message"]["content"] = parsed_msg.content;
|
|
460
454
|
} else {
|
|
461
|
-
//
|
|
462
|
-
choice["message"]["content"] =
|
|
455
|
+
// Fallback to raw content if parsing failed or no tools
|
|
456
|
+
choice["message"]["content"] = result.content;
|
|
463
457
|
}
|
|
464
458
|
|
|
465
459
|
choices.push_back(choice);
|