@novastera-oss/llamarn 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/PureCppImpl.cpp +9 -27
- package/cpp/SystemUtils.h +2 -2
- package/cpp/rn-completion.cpp +56 -3
- package/package.json +1 -1
package/cpp/PureCppImpl.cpp
CHANGED
|
@@ -157,7 +157,7 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
|
|
|
157
157
|
SystemUtils::setIfExists(runtime, options, "use_mmap", params.use_mmap);
|
|
158
158
|
SystemUtils::setIfExists(runtime, options, "use_mlock", params.use_mlock);
|
|
159
159
|
SystemUtils::setIfExists(runtime, options, "use_jinja", params.use_jinja);
|
|
160
|
-
|
|
160
|
+
|
|
161
161
|
// Extract threading parameters (preserve custom thread logic)
|
|
162
162
|
int n_threads = 0; // 0 = auto
|
|
163
163
|
if (options.hasProperty(runtime, "n_threads")) {
|
|
@@ -283,42 +283,24 @@ jsi::Value PureCppImpl::initLlama(jsi::Runtime &runtime, jsi::Object options) {
|
|
|
283
283
|
// Set additional fields
|
|
284
284
|
rn_params.use_jinja = params.use_jinja;
|
|
285
285
|
rn_params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
286
|
-
//
|
|
287
|
-
rn_params.chat_format = COMMON_CHAT_FORMAT_GENERIC;
|
|
286
|
+
// Don't force a specific chat format - let the template system auto-detect based on model and tools
|
|
287
|
+
// rn_params.chat_format = COMMON_CHAT_FORMAT_GENERIC;
|
|
288
288
|
// Now assign to the context
|
|
289
289
|
rn_ctx_->params = rn_params;
|
|
290
290
|
|
|
291
|
-
|
|
291
|
+
rn_ctx_->chat_templates = common_chat_templates_init(rn_ctx_->model, params.chat_template);
|
|
292
292
|
try {
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
SystemUtils::setIfExists(runtime, options, "bos_token", bos_token_override);
|
|
298
|
-
SystemUtils::setIfExists(runtime, options, "eos_token", eos_token_override);
|
|
299
|
-
|
|
300
|
-
rn_ctx_->chat_templates = common_chat_templates_init(
|
|
301
|
-
rn_ctx_->model,
|
|
302
|
-
params.chat_template,
|
|
303
|
-
bos_token_override,
|
|
304
|
-
eos_token_override
|
|
305
|
-
);
|
|
306
|
-
|
|
307
|
-
if (!rn_ctx_->chat_templates) {
|
|
308
|
-
throw std::runtime_error("Failed to initialize chat templates");
|
|
309
|
-
}
|
|
310
|
-
} catch (const std::exception& e) {
|
|
311
|
-
// Log warning and fallback to chatml
|
|
312
|
-
fprintf(stderr, "Warning: Failed to initialize chat template: %s. Falling back to chatml.\n", e.what());
|
|
293
|
+
common_chat_format_example(rn_ctx_->chat_templates.get(), params.use_jinja);
|
|
294
|
+
} catch (const std::exception & e) {
|
|
295
|
+
// Fallback to chatml if the original template parsing fails
|
|
313
296
|
rn_ctx_->chat_templates = common_chat_templates_init(rn_ctx_->model, "chatml");
|
|
314
|
-
if (!rn_ctx_->chat_templates) {
|
|
315
|
-
throw std::runtime_error("Failed to initialize fallback chatml template");
|
|
316
|
-
}
|
|
317
297
|
}
|
|
298
|
+
|
|
318
299
|
|
|
319
300
|
// Create the model object and return it
|
|
320
301
|
return createModelObject(runtime, rn_ctx_.get());
|
|
321
302
|
} catch (const std::exception& e) {
|
|
303
|
+
// We can keep this top-level error log as it's for initialization failure
|
|
322
304
|
fprintf(stderr, "initLlama error: %s\n", e.what());
|
|
323
305
|
throw jsi::JSError(runtime, e.what());
|
|
324
306
|
}
|
package/cpp/SystemUtils.h
CHANGED
|
@@ -44,8 +44,8 @@ public:
|
|
|
44
44
|
* Helper functions to easily set values from a JSI object if the property exists.
|
|
45
45
|
* Returns true if the property was found and the value was set.
|
|
46
46
|
*/
|
|
47
|
-
// Template for
|
|
48
|
-
template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
|
|
47
|
+
// Template for numeric types (excluding bool so bool specialization is used)
|
|
48
|
+
template<typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value && !std::is_same<T, bool>::value>::type>
|
|
49
49
|
static bool setIfExists(jsi::Runtime& rt, const jsi::Object& options, const std::string& key, T& outValue) {
|
|
50
50
|
if (options.hasProperty(rt, key.c_str())) {
|
|
51
51
|
jsi::Value val = options.getProperty(rt, key.c_str());
|
package/cpp/rn-completion.cpp
CHANGED
|
@@ -350,7 +350,7 @@ CompletionResult run_chat_completion(
|
|
|
350
350
|
common_chat_templates_inputs template_inputs;
|
|
351
351
|
template_inputs.messages = chat_msgs;
|
|
352
352
|
template_inputs.add_generation_prompt = true;
|
|
353
|
-
template_inputs.use_jinja =
|
|
353
|
+
template_inputs.use_jinja = rn_ctx->params.use_jinja;
|
|
354
354
|
// Note: extract_reasoning field doesn't exist in current llama.cpp version
|
|
355
355
|
// template_inputs.extract_reasoning = true; // Default to true to extract reasoning content if available
|
|
356
356
|
|
|
@@ -391,6 +391,31 @@ CompletionResult run_chat_completion(
|
|
|
391
391
|
result = run_completion(rn_ctx, cmpl_options, callback);
|
|
392
392
|
|
|
393
393
|
if (result.success) {
|
|
394
|
+
// Parse the generated content for tool calls and structured responses
|
|
395
|
+
common_chat_msg parsed_msg;
|
|
396
|
+
bool has_parsed_content = false;
|
|
397
|
+
|
|
398
|
+
// Only parse if we have tools available and the response isn't empty
|
|
399
|
+
if (!template_inputs.tools.empty() && !result.content.empty()) {
|
|
400
|
+
try {
|
|
401
|
+
// Construct the chat syntax for parsing using the format from template application
|
|
402
|
+
common_chat_syntax syntax;
|
|
403
|
+
syntax.format = chat_params.format; // Use format from template, not from params
|
|
404
|
+
syntax.reasoning_format = rn_ctx->params.reasoning_format;
|
|
405
|
+
syntax.reasoning_in_content = true;
|
|
406
|
+
syntax.thinking_forced_open = false;
|
|
407
|
+
syntax.parse_tool_calls = true;
|
|
408
|
+
|
|
409
|
+
// Parse the generated content for tool calls
|
|
410
|
+
parsed_msg = common_chat_parse(result.content, false, syntax);
|
|
411
|
+
has_parsed_content = true;
|
|
412
|
+
|
|
413
|
+
} catch (const std::exception& e) {
|
|
414
|
+
// If parsing fails, treat as regular content
|
|
415
|
+
has_parsed_content = false;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
394
419
|
// Create OpenAI-compatible response
|
|
395
420
|
json response = {
|
|
396
421
|
{"id", gen_chatcmplid()},
|
|
@@ -403,11 +428,39 @@ CompletionResult run_chat_completion(
|
|
|
403
428
|
json choice = {
|
|
404
429
|
{"index", 0},
|
|
405
430
|
{"message", {
|
|
406
|
-
{"role", "assistant"}
|
|
407
|
-
{"content", result.content}
|
|
431
|
+
{"role", "assistant"}
|
|
408
432
|
}},
|
|
409
433
|
{"finish_reason", "stop"}
|
|
410
434
|
};
|
|
435
|
+
|
|
436
|
+
// Add parsed content and tool calls if available
|
|
437
|
+
if (has_parsed_content && !parsed_msg.tool_calls.empty()) {
|
|
438
|
+
// Set content to the parsed content (may be null for tool-only responses)
|
|
439
|
+
if (!parsed_msg.content.empty()) {
|
|
440
|
+
choice["message"]["content"] = parsed_msg.content;
|
|
441
|
+
} else {
|
|
442
|
+
choice["message"]["content"] = nullptr;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// Add tool calls to the message
|
|
446
|
+
json tool_calls = json::array();
|
|
447
|
+
for (const auto& tool_call : parsed_msg.tool_calls) {
|
|
448
|
+
json tc = {
|
|
449
|
+
{"id", tool_call.id.empty() ? ("call_" + std::to_string(std::rand())) : tool_call.id},
|
|
450
|
+
{"type", "function"},
|
|
451
|
+
{"function", {
|
|
452
|
+
{"name", tool_call.name},
|
|
453
|
+
{"arguments", tool_call.arguments}
|
|
454
|
+
}}
|
|
455
|
+
};
|
|
456
|
+
tool_calls.push_back(tc);
|
|
457
|
+
}
|
|
458
|
+
choice["message"]["tool_calls"] = tool_calls;
|
|
459
|
+
choice["finish_reason"] = "tool_calls";
|
|
460
|
+
} else {
|
|
461
|
+
// Regular text response
|
|
462
|
+
choice["message"]["content"] = has_parsed_content ? parsed_msg.content : result.content;
|
|
463
|
+
}
|
|
411
464
|
|
|
412
465
|
choices.push_back(choice);
|
|
413
466
|
response["choices"] = choices;
|