@fugood/llama.node 1.0.0-beta.4 → 1.0.0-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +7 -4
- package/lib/binding.ts +1 -1
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +27 -26
- package/src/LlamaCompletionWorker.cpp +21 -4
- package/src/LlamaCompletionWorker.h +2 -0
- package/src/LlamaContext.cpp +3 -12
- package/src/common.hpp +6 -5
- package/src/llama.cpp/CMakeLists.txt +15 -4
- package/src/llama.cpp/common/CMakeLists.txt +15 -24
- package/src/llama.cpp/common/arg.cpp +172 -110
- package/src/llama.cpp/common/chat-parser.cpp +385 -0
- package/src/llama.cpp/common/chat-parser.h +120 -0
- package/src/llama.cpp/common/chat.cpp +726 -596
- package/src/llama.cpp/common/chat.h +74 -8
- package/src/llama.cpp/common/common.cpp +56 -38
- package/src/llama.cpp/common/common.h +9 -3
- package/src/llama.cpp/common/json-partial.cpp +256 -0
- package/src/llama.cpp/common/json-partial.h +38 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/src/llama.cpp/common/sampling.cpp +7 -8
- package/src/llama.cpp/common/speculative.cpp +6 -4
- package/src/llama.cpp/ggml/CMakeLists.txt +48 -3
- package/src/llama.cpp/ggml/include/ggml.h +22 -3
- package/src/llama.cpp/ggml/src/CMakeLists.txt +81 -22
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +131 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +12 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +64 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +282 -100
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +119 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +204 -49
- package/src/llama.cpp/include/llama.h +145 -40
- package/src/llama.cpp/src/CMakeLists.txt +5 -1
- package/src/llama.cpp/src/llama-arch.cpp +99 -3
- package/src/llama.cpp/src/llama-arch.h +10 -1
- package/src/llama.cpp/src/llama-batch.cpp +728 -272
- package/src/llama.cpp/src/llama-batch.h +112 -54
- package/src/llama.cpp/src/llama-chat.cpp +19 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +525 -339
- package/src/llama.cpp/src/llama-context.h +38 -17
- package/src/llama.cpp/src/llama-cparams.cpp +4 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-grammar.cpp +12 -2
- package/src/llama.cpp/src/llama-graph.cpp +413 -353
- package/src/llama.cpp/src/llama-graph.h +112 -56
- package/src/llama.cpp/src/llama-hparams.cpp +10 -2
- package/src/llama.cpp/src/llama-hparams.h +13 -2
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +279 -0
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +128 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +1815 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.h +303 -0
- package/src/llama.cpp/src/llama-kv-cells.h +415 -0
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/src/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +1112 -0
- package/src/llama.cpp/src/llama-memory-recurrent.h +183 -0
- package/src/llama.cpp/src/llama-memory.cpp +41 -0
- package/src/llama.cpp/src/llama-memory.h +86 -5
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/src/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1137 -528
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +2 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2 -2
- package/src/llama.cpp/src/llama-vocab.cpp +69 -32
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/llama.cpp +11 -7
- package/src/llama.cpp/src/unicode.cpp +5 -0
- package/src/tts_utils.h +1 -1
- package/src/llama.cpp/common/json.hpp +0 -24766
- package/src/llama.cpp/common/minja/chat-template.hpp +0 -541
- package/src/llama.cpp/common/minja/minja.hpp +0 -2974
- package/src/llama.cpp/common/stb_image.h +0 -7988
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/src/llama.cpp/src/llama-kv-cache.cpp +0 -2827
- package/src/llama.cpp/src/llama-kv-cache.h +0 -515
- /package/src/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -3,11 +3,13 @@
|
|
|
3
3
|
#pragma once
|
|
4
4
|
|
|
5
5
|
#include "common.h"
|
|
6
|
+
#include <functional>
|
|
6
7
|
#include <chrono>
|
|
7
8
|
#include <string>
|
|
8
9
|
#include <vector>
|
|
9
|
-
|
|
10
|
-
#include
|
|
10
|
+
|
|
11
|
+
#include <minja/chat-template.hpp>
|
|
12
|
+
#include <minja/minja.hpp>
|
|
11
13
|
|
|
12
14
|
typedef minja::chat_template common_chat_template;
|
|
13
15
|
|
|
@@ -21,11 +23,19 @@ struct common_chat_tool_call {
|
|
|
21
23
|
std::string name;
|
|
22
24
|
std::string arguments;
|
|
23
25
|
std::string id;
|
|
26
|
+
|
|
27
|
+
bool operator==(const common_chat_tool_call & other) const {
|
|
28
|
+
return name == other.name && arguments == other.arguments && id == other.id;
|
|
29
|
+
}
|
|
24
30
|
};
|
|
25
31
|
|
|
26
32
|
struct common_chat_msg_content_part {
|
|
27
33
|
std::string type;
|
|
28
34
|
std::string text;
|
|
35
|
+
|
|
36
|
+
bool operator==(const common_chat_msg_content_part & other) const {
|
|
37
|
+
return type == other.type && text == other.text;
|
|
38
|
+
}
|
|
29
39
|
};
|
|
30
40
|
|
|
31
41
|
struct common_chat_msg {
|
|
@@ -36,6 +46,51 @@ struct common_chat_msg {
|
|
|
36
46
|
std::string reasoning_content;
|
|
37
47
|
std::string tool_name;
|
|
38
48
|
std::string tool_call_id;
|
|
49
|
+
|
|
50
|
+
template <class T> T to_json_oaicompat() const;
|
|
51
|
+
|
|
52
|
+
bool empty() const {
|
|
53
|
+
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
|
|
54
|
+
}
|
|
55
|
+
void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
|
|
56
|
+
for (auto i = 0u; i < tool_calls.size(); i++) {
|
|
57
|
+
if (ids_cache.size() <= i) {
|
|
58
|
+
auto id = tool_calls[i].id;
|
|
59
|
+
if (id.empty()) {
|
|
60
|
+
id = gen_tool_call_id();
|
|
61
|
+
}
|
|
62
|
+
ids_cache.push_back(id);
|
|
63
|
+
}
|
|
64
|
+
tool_calls[i].id = ids_cache[i];
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
bool operator==(const common_chat_msg & other) const {
|
|
68
|
+
return role == other.role
|
|
69
|
+
&& content == other.content
|
|
70
|
+
&& content_parts == other.content_parts
|
|
71
|
+
&& tool_calls == other.tool_calls
|
|
72
|
+
&& reasoning_content == other.reasoning_content
|
|
73
|
+
&& tool_name == other.tool_name
|
|
74
|
+
&& tool_call_id == other.tool_call_id;
|
|
75
|
+
}
|
|
76
|
+
bool operator!=(const common_chat_msg & other) const {
|
|
77
|
+
return !(*this == other);
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
struct common_chat_msg_diff {
|
|
82
|
+
std::string reasoning_content_delta;
|
|
83
|
+
std::string content_delta;
|
|
84
|
+
size_t tool_call_index = std::string::npos;
|
|
85
|
+
common_chat_tool_call tool_call_delta;
|
|
86
|
+
|
|
87
|
+
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
|
|
88
|
+
|
|
89
|
+
bool operator==(const common_chat_msg_diff & other) const {
|
|
90
|
+
return content_delta == other.content_delta
|
|
91
|
+
&& tool_call_index == other.tool_call_index
|
|
92
|
+
&& tool_call_delta == other.tool_call_delta;
|
|
93
|
+
}
|
|
39
94
|
};
|
|
40
95
|
|
|
41
96
|
struct common_chat_tool {
|
|
@@ -57,14 +112,11 @@ enum common_chat_format {
|
|
|
57
112
|
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
|
58
113
|
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
|
59
114
|
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
|
60
|
-
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
|
|
61
115
|
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
|
62
116
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
|
63
117
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
|
64
118
|
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
|
65
|
-
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
|
|
66
119
|
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
|
67
|
-
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
|
|
68
120
|
|
|
69
121
|
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
|
70
122
|
};
|
|
@@ -79,7 +131,8 @@ struct common_chat_templates_inputs {
|
|
|
79
131
|
std::vector<common_chat_tool> tools;
|
|
80
132
|
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
|
81
133
|
bool parallel_tool_calls = false;
|
|
82
|
-
|
|
134
|
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
135
|
+
bool enable_thinking = true;
|
|
83
136
|
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
|
84
137
|
};
|
|
85
138
|
|
|
@@ -88,11 +141,21 @@ struct common_chat_params {
|
|
|
88
141
|
std::string prompt;
|
|
89
142
|
std::string grammar;
|
|
90
143
|
bool grammar_lazy = false;
|
|
144
|
+
bool thinking_forced_open = false;
|
|
91
145
|
std::vector<common_grammar_trigger> grammar_triggers;
|
|
92
146
|
std::vector<std::string> preserved_tokens;
|
|
93
147
|
std::vector<std::string> additional_stops;
|
|
94
148
|
};
|
|
95
149
|
|
|
150
|
+
struct common_chat_syntax {
|
|
151
|
+
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
152
|
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
153
|
+
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
|
|
154
|
+
bool reasoning_in_content = false;
|
|
155
|
+
bool thinking_forced_open = false;
|
|
156
|
+
bool parse_tool_calls = true;
|
|
157
|
+
};
|
|
158
|
+
|
|
96
159
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
|
97
160
|
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
|
|
98
161
|
|
|
@@ -129,8 +192,9 @@ std::string common_chat_format_example(
|
|
|
129
192
|
const struct common_chat_templates * tmpls,
|
|
130
193
|
bool use_jinja);
|
|
131
194
|
|
|
132
|
-
|
|
133
|
-
|
|
195
|
+
const char* common_chat_format_name(common_chat_format format);
|
|
196
|
+
const char* common_reasoning_format_name(common_reasoning_format format);
|
|
197
|
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
|
|
134
198
|
|
|
135
199
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
|
136
200
|
|
|
@@ -143,3 +207,5 @@ template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common
|
|
|
143
207
|
// T can be std::string containing JSON or nlohmann::ordered_json
|
|
144
208
|
template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
|
|
145
209
|
template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
|
|
210
|
+
|
|
211
|
+
template <class T> T common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);
|
|
@@ -203,6 +203,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
|
|
|
203
203
|
|
|
204
204
|
DWORD p = NORMAL_PRIORITY_CLASS;
|
|
205
205
|
switch (prio) {
|
|
206
|
+
case GGML_SCHED_PRIO_LOW: p = BELOW_NORMAL_PRIORITY_CLASS; break;
|
|
206
207
|
case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
|
|
207
208
|
case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
|
|
208
209
|
case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
|
|
@@ -228,6 +229,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
|
|
|
228
229
|
|
|
229
230
|
int p = 0;
|
|
230
231
|
switch (prio) {
|
|
232
|
+
case GGML_SCHED_PRIO_LOW: p = 5; break;
|
|
231
233
|
case GGML_SCHED_PRIO_NORMAL: p = 0; break;
|
|
232
234
|
case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
|
|
233
235
|
case GGML_SCHED_PRIO_HIGH: p = -10; break;
|
|
@@ -464,7 +466,7 @@ size_t string_find_partial_stop(const std::string_view & str, const std::string_
|
|
|
464
466
|
|
|
465
467
|
std::string regex_escape(const std::string & s) {
|
|
466
468
|
static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]");
|
|
467
|
-
return std::regex_replace(s, special_chars, "
|
|
469
|
+
return std::regex_replace(s, special_chars, "\\$&");
|
|
468
470
|
}
|
|
469
471
|
|
|
470
472
|
std::string string_join(const std::vector<std::string> & values, const std::string & separator) {
|
|
@@ -704,11 +706,17 @@ bool fs_validate_filename(const std::string & filename) {
|
|
|
704
706
|
// disable C++17 deprecation warning for std::codecvt_utf8
|
|
705
707
|
# pragma clang diagnostic push
|
|
706
708
|
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
709
|
+
#elif defined(__GNUC__)
|
|
710
|
+
# pragma GCC diagnostic push
|
|
711
|
+
# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
|
707
712
|
#endif
|
|
713
|
+
|
|
708
714
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
|
709
715
|
|
|
710
716
|
#if defined(__clang__)
|
|
711
717
|
# pragma clang diagnostic pop
|
|
718
|
+
#elif defined(__GNUC__)
|
|
719
|
+
# pragma GCC diagnostic pop
|
|
712
720
|
#endif
|
|
713
721
|
|
|
714
722
|
filename_utf32 = converter.from_bytes(filename);
|
|
@@ -765,6 +773,9 @@ bool fs_validate_filename(const std::string & filename) {
|
|
|
765
773
|
return true;
|
|
766
774
|
}
|
|
767
775
|
|
|
776
|
+
#include <iostream>
|
|
777
|
+
|
|
778
|
+
|
|
768
779
|
// returns true if successful, false otherwise
|
|
769
780
|
bool fs_create_directory_with_parents(const std::string & path) {
|
|
770
781
|
#ifdef _WIN32
|
|
@@ -782,9 +793,16 @@ bool fs_create_directory_with_parents(const std::string & path) {
|
|
|
782
793
|
// process path from front to back, procedurally creating directories
|
|
783
794
|
while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) {
|
|
784
795
|
const std::wstring subpath = wpath.substr(0, pos_slash);
|
|
785
|
-
const wchar_t * test = subpath.c_str();
|
|
786
796
|
|
|
787
|
-
|
|
797
|
+
pos_slash += 1;
|
|
798
|
+
|
|
799
|
+
// skip the drive letter, in some systems it can return an access denied error
|
|
800
|
+
if (subpath.length() == 2 && subpath[1] == ':') {
|
|
801
|
+
continue;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
const bool success = CreateDirectoryW(subpath.c_str(), NULL);
|
|
805
|
+
|
|
788
806
|
if (!success) {
|
|
789
807
|
const DWORD error = GetLastError();
|
|
790
808
|
|
|
@@ -798,8 +816,6 @@ bool fs_create_directory_with_parents(const std::string & path) {
|
|
|
798
816
|
return false;
|
|
799
817
|
}
|
|
800
818
|
}
|
|
801
|
-
|
|
802
|
-
pos_slash += 1;
|
|
803
819
|
}
|
|
804
820
|
|
|
805
821
|
return true;
|
|
@@ -849,7 +865,7 @@ std::string fs_get_cache_directory() {
|
|
|
849
865
|
if (getenv("LLAMA_CACHE")) {
|
|
850
866
|
cache_directory = std::getenv("LLAMA_CACHE");
|
|
851
867
|
} else {
|
|
852
|
-
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
|
|
868
|
+
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || defined(__OpenBSD__)
|
|
853
869
|
if (std::getenv("XDG_CACHE_HOME")) {
|
|
854
870
|
cache_directory = std::getenv("XDG_CACHE_HOME");
|
|
855
871
|
} else {
|
|
@@ -895,31 +911,6 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
|
895
911
|
|
|
896
912
|
const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
897
913
|
|
|
898
|
-
if (params.reranking) {
|
|
899
|
-
bool ok = true;
|
|
900
|
-
|
|
901
|
-
if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
|
|
902
|
-
LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__);
|
|
903
|
-
ok = false;
|
|
904
|
-
}
|
|
905
|
-
|
|
906
|
-
if (llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) {
|
|
907
|
-
LOG_WRN("%s: warning: vocab does not have an EOS token, reranking will not work\n", __func__);
|
|
908
|
-
ok = false;
|
|
909
|
-
}
|
|
910
|
-
|
|
911
|
-
if (llama_vocab_sep(vocab) == LLAMA_TOKEN_NULL) {
|
|
912
|
-
LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
|
|
913
|
-
ok = false;
|
|
914
|
-
}
|
|
915
|
-
|
|
916
|
-
if (!ok) {
|
|
917
|
-
llama_model_free(model);
|
|
918
|
-
|
|
919
|
-
return iparams;
|
|
920
|
-
}
|
|
921
|
-
}
|
|
922
|
-
|
|
923
914
|
auto cparams = common_context_params_to_llama(params);
|
|
924
915
|
|
|
925
916
|
llama_context * lctx = llama_init_from_model(model, cparams);
|
|
@@ -929,7 +920,7 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
|
929
920
|
return iparams;
|
|
930
921
|
}
|
|
931
922
|
|
|
932
|
-
if (params.ctx_shift && !
|
|
923
|
+
if (params.ctx_shift && !llama_memory_can_shift(llama_get_memory(lctx))) {
|
|
933
924
|
LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__);
|
|
934
925
|
params.ctx_shift = false;
|
|
935
926
|
}
|
|
@@ -961,6 +952,35 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
|
961
952
|
}
|
|
962
953
|
}
|
|
963
954
|
|
|
955
|
+
if (llama_pooling_type(lctx) == LLAMA_POOLING_TYPE_RANK) {
|
|
956
|
+
bool ok = true;
|
|
957
|
+
|
|
958
|
+
if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
|
|
959
|
+
LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__);
|
|
960
|
+
ok = false;
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
|
964
|
+
bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL;
|
|
965
|
+
|
|
966
|
+
if (!has_eos && !has_sep) {
|
|
967
|
+
LOG_WRN("%s: warning: vocab does not have an EOS token or SEP token, reranking will not work\n", __func__);
|
|
968
|
+
ok = false;
|
|
969
|
+
} else if (!has_eos) {
|
|
970
|
+
LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__);
|
|
971
|
+
} else if (!has_sep) {
|
|
972
|
+
LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
|
|
973
|
+
ok = false;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
if (!ok) {
|
|
977
|
+
llama_free(lctx);
|
|
978
|
+
llama_model_free(model);
|
|
979
|
+
|
|
980
|
+
return iparams;
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
|
|
964
984
|
// load and optionally apply lora adapters
|
|
965
985
|
for (auto & la : params.lora_adapters) {
|
|
966
986
|
llama_adapter_lora_ptr lora;
|
|
@@ -1036,7 +1056,7 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
|
1036
1056
|
if (llama_model_has_decoder(model)) {
|
|
1037
1057
|
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch)));
|
|
1038
1058
|
}
|
|
1039
|
-
|
|
1059
|
+
llama_memory_clear(llama_get_memory(lctx), true);
|
|
1040
1060
|
llama_synchronize(lctx);
|
|
1041
1061
|
llama_perf_context_reset(lctx);
|
|
1042
1062
|
llama_set_warmup(lctx, false);
|
|
@@ -1139,11 +1159,6 @@ struct llama_context_params common_context_params_to_llama(const common_params &
|
|
|
1139
1159
|
cparams.op_offload = !params.no_op_offload;
|
|
1140
1160
|
cparams.swa_full = params.swa_full;
|
|
1141
1161
|
|
|
1142
|
-
if (params.reranking) {
|
|
1143
|
-
cparams.embeddings = true;
|
|
1144
|
-
cparams.pooling_type = LLAMA_POOLING_TYPE_RANK;
|
|
1145
|
-
}
|
|
1146
|
-
|
|
1147
1162
|
cparams.type_k = params.cache_type_k;
|
|
1148
1163
|
cparams.type_v = params.cache_type_v;
|
|
1149
1164
|
|
|
@@ -1276,6 +1291,9 @@ std::vector<llama_token> common_tokenize(
|
|
|
1276
1291
|
int n_tokens = text.length() + 2 * add_special;
|
|
1277
1292
|
std::vector<llama_token> result(n_tokens);
|
|
1278
1293
|
n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
|
1294
|
+
if (n_tokens == std::numeric_limits<int32_t>::min()) {
|
|
1295
|
+
throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
|
|
1296
|
+
}
|
|
1279
1297
|
if (n_tokens < 0) {
|
|
1280
1298
|
result.resize(-n_tokens);
|
|
1281
1299
|
int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
|
@@ -115,7 +115,7 @@ enum common_grammar_trigger_type {
|
|
|
115
115
|
COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN,
|
|
116
116
|
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
|
|
117
117
|
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
|
|
118
|
-
|
|
118
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
119
119
|
};
|
|
120
120
|
|
|
121
121
|
struct common_grammar_trigger {
|
|
@@ -199,6 +199,9 @@ struct common_params_speculative {
|
|
|
199
199
|
float p_split = 0.1f; // speculative decoding split probability
|
|
200
200
|
float p_min = 0.75f; // minimum speculative decoding probability (greedy)
|
|
201
201
|
|
|
202
|
+
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
|
203
|
+
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
|
204
|
+
|
|
202
205
|
struct cpu_params cpuparams;
|
|
203
206
|
struct cpu_params cpuparams_batch;
|
|
204
207
|
|
|
@@ -215,7 +218,8 @@ struct common_params_vocoder {
|
|
|
215
218
|
|
|
216
219
|
enum common_reasoning_format {
|
|
217
220
|
COMMON_REASONING_FORMAT_NONE,
|
|
218
|
-
|
|
221
|
+
COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY, // Extract thinking tag contents and return as `message.reasoning_content`, or leave inline in <think> tags in stream mode
|
|
222
|
+
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
|
|
219
223
|
};
|
|
220
224
|
|
|
221
225
|
struct common_params {
|
|
@@ -292,6 +296,7 @@ struct common_params {
|
|
|
292
296
|
int32_t verbosity = 0;
|
|
293
297
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
|
294
298
|
int32_t control_vector_layer_end = -1; // layer range for control vector
|
|
299
|
+
bool offline = false;
|
|
295
300
|
|
|
296
301
|
int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
|
|
297
302
|
int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
|
|
@@ -354,7 +359,7 @@ struct common_params {
|
|
|
354
359
|
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
|
355
360
|
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
|
|
356
361
|
std::string embd_sep = "\n"; // separator of embeddings
|
|
357
|
-
|
|
362
|
+
std::string cls_sep = "\t"; // separator of classification sequences
|
|
358
363
|
|
|
359
364
|
// server params
|
|
360
365
|
int32_t port = 8080; // server listens on this network port
|
|
@@ -369,6 +374,7 @@ struct common_params {
|
|
|
369
374
|
bool use_jinja = false; // NOLINT
|
|
370
375
|
bool enable_chat_template = true;
|
|
371
376
|
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
377
|
+
int reasoning_budget = -1;
|
|
372
378
|
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
|
|
373
379
|
|
|
374
380
|
std::vector<std::string> api_keys;
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
#include "json-partial.h"
|
|
2
|
+
|
|
3
|
+
#include "log.h"
|
|
4
|
+
|
|
5
|
+
#include <nlohmann/json.hpp>
|
|
6
|
+
|
|
7
|
+
#include <string>
|
|
8
|
+
|
|
9
|
+
using json = nlohmann::ordered_json;
|
|
10
|
+
|
|
11
|
+
enum common_json_stack_element_type {
|
|
12
|
+
COMMON_JSON_STACK_ELEMENT_OBJECT,
|
|
13
|
+
COMMON_JSON_STACK_ELEMENT_KEY,
|
|
14
|
+
COMMON_JSON_STACK_ELEMENT_ARRAY,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
struct common_json_stack_element {
|
|
18
|
+
common_json_stack_element_type type;
|
|
19
|
+
std::string key;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
bool common_json_parse(
|
|
23
|
+
const std::string & input,
|
|
24
|
+
const std::string & healing_marker,
|
|
25
|
+
common_json & out)
|
|
26
|
+
{
|
|
27
|
+
std::string::const_iterator it = input.begin();
|
|
28
|
+
const auto end = input.end();
|
|
29
|
+
return common_json_parse(it, end, healing_marker, out);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
bool common_json_parse(
|
|
33
|
+
std::string::const_iterator & it,
|
|
34
|
+
const std::string::const_iterator & end,
|
|
35
|
+
const std::string & healing_marker,
|
|
36
|
+
common_json & out)
|
|
37
|
+
{
|
|
38
|
+
// // https://json.nlohmann.me/features/parsing/sax_interface/
|
|
39
|
+
struct json_error_locator : public nlohmann::json_sax<json> {
|
|
40
|
+
std::size_t position;
|
|
41
|
+
bool found_error;
|
|
42
|
+
std::string last_token;
|
|
43
|
+
std::string exception_message;
|
|
44
|
+
std::vector<common_json_stack_element> stack;
|
|
45
|
+
|
|
46
|
+
json_error_locator() : position(0), found_error(false) {}
|
|
47
|
+
|
|
48
|
+
bool parse_error(std::size_t position, const std::string & last_token, const json::exception & ex) override { // NOLINT
|
|
49
|
+
this->position = position - 1;
|
|
50
|
+
this->found_error = true;
|
|
51
|
+
this->last_token = last_token;
|
|
52
|
+
this->exception_message = ex.what();
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
void close_value() {
|
|
56
|
+
if (!stack.empty() && (stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY)) {
|
|
57
|
+
stack.pop_back();
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
bool null() override { // NOLINT
|
|
61
|
+
close_value();
|
|
62
|
+
return true;
|
|
63
|
+
}
|
|
64
|
+
bool boolean(bool) override { // NOLINT
|
|
65
|
+
close_value();
|
|
66
|
+
return true;
|
|
67
|
+
}
|
|
68
|
+
bool number_integer(number_integer_t) override { // NOLINT
|
|
69
|
+
close_value();
|
|
70
|
+
return true;
|
|
71
|
+
}
|
|
72
|
+
bool number_unsigned(number_unsigned_t) override { // NOLINT
|
|
73
|
+
close_value();
|
|
74
|
+
return true;
|
|
75
|
+
}
|
|
76
|
+
bool number_float(number_float_t, const string_t &) override { // NOLINT
|
|
77
|
+
close_value();
|
|
78
|
+
return true;
|
|
79
|
+
}
|
|
80
|
+
bool string(string_t &) override { // NOLINT
|
|
81
|
+
close_value();
|
|
82
|
+
return true;
|
|
83
|
+
}
|
|
84
|
+
bool binary(binary_t &) override { // NOLINT
|
|
85
|
+
close_value();
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
bool start_object(std::size_t) override { // NOLINT
|
|
89
|
+
stack.push_back({COMMON_JSON_STACK_ELEMENT_OBJECT, ""});
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
bool end_object() override {
|
|
93
|
+
GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT);
|
|
94
|
+
stack.pop_back();
|
|
95
|
+
close_value();
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
|
+
bool key(string_t & key) override { // NOLINT
|
|
99
|
+
stack.push_back({COMMON_JSON_STACK_ELEMENT_KEY, key});
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
bool start_array(std::size_t) override { // NOLINT
|
|
103
|
+
stack.push_back({COMMON_JSON_STACK_ELEMENT_ARRAY, ""});
|
|
104
|
+
return true;
|
|
105
|
+
}
|
|
106
|
+
bool end_array() override {
|
|
107
|
+
GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY);
|
|
108
|
+
stack.pop_back();
|
|
109
|
+
close_value();
|
|
110
|
+
return true;
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
json_error_locator err_loc;
|
|
114
|
+
auto start = it;
|
|
115
|
+
json::sax_parse(it, end, &err_loc);
|
|
116
|
+
|
|
117
|
+
if (err_loc.found_error) {
|
|
118
|
+
it = start;
|
|
119
|
+
auto temptative_end = it + err_loc.position;
|
|
120
|
+
// LOG_DBG("Error at position %zu (is_end = %s): %s\n", err_loc.position, temptative_end == end ? "true" : "false", err_loc.exception_message.c_str());
|
|
121
|
+
|
|
122
|
+
auto input = std::string(it, temptative_end);
|
|
123
|
+
try {
|
|
124
|
+
out.json = json::parse(input);
|
|
125
|
+
// out.json = json::parse(it, temptative_end);
|
|
126
|
+
it = temptative_end;
|
|
127
|
+
return true;
|
|
128
|
+
} catch (const std::exception & ex) {
|
|
129
|
+
// No, needs healing.
|
|
130
|
+
LOG_DBG("Failed to parse up to error: %s: <<<%s>>>\n", ex.what(), std::string(it, temptative_end).c_str());
|
|
131
|
+
}
|
|
132
|
+
auto can_parse = [](const std::string & str) {
|
|
133
|
+
try {
|
|
134
|
+
auto _ = json::parse(str); // NOLINT
|
|
135
|
+
return true;
|
|
136
|
+
} catch (const std::exception &) {
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
if (!healing_marker.empty() && !err_loc.stack.empty()) {
|
|
141
|
+
std::string str(it, temptative_end);
|
|
142
|
+
auto last_non_sp_pos = str.find_last_not_of(" \n\r\t");
|
|
143
|
+
if (last_non_sp_pos == std::string::npos) {
|
|
144
|
+
throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
|
|
145
|
+
}
|
|
146
|
+
auto last_non_sp_char = str[last_non_sp_pos];
|
|
147
|
+
// Used to detect stops on a number, which may not be complete.
|
|
148
|
+
auto was_maybe_number = [&]() {
|
|
149
|
+
if (!str.empty() && std::isspace(str.back())) {
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
152
|
+
return std::isdigit(last_non_sp_char) ||
|
|
153
|
+
last_non_sp_char == '.' ||
|
|
154
|
+
last_non_sp_char == 'e' ||
|
|
155
|
+
last_non_sp_char == 'E' ||
|
|
156
|
+
last_non_sp_char == '-';
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
std::string closing;
|
|
160
|
+
for (size_t i = err_loc.stack.size(); i > 0; i--) {
|
|
161
|
+
auto & el = err_loc.stack[i - 1];
|
|
162
|
+
if (el.type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
|
|
163
|
+
closing += "}";
|
|
164
|
+
} else if (el.type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
|
|
165
|
+
closing += "]";
|
|
166
|
+
} else if (el.type != COMMON_JSON_STACK_ELEMENT_KEY) {
|
|
167
|
+
throw std::runtime_error("Unexpected stack element type");
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const auto & magic_seed = out.healing_marker.marker = healing_marker;//"$llama.cpp.json$";
|
|
172
|
+
|
|
173
|
+
if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY) {
|
|
174
|
+
// We're inside an object value
|
|
175
|
+
if (last_non_sp_char == ':' && can_parse(str + "1" + closing)) {
|
|
176
|
+
// Was about to create an object value
|
|
177
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
178
|
+
} else if (can_parse(str + ": 1" + closing)) {
|
|
179
|
+
str += (out.healing_marker.json_dump_marker = ":\"" + magic_seed) + "\"" + closing;
|
|
180
|
+
} else if (last_non_sp_char == '{' && can_parse(str + closing)) {
|
|
181
|
+
// Was about to create an object
|
|
182
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
|
|
183
|
+
} else if (can_parse(str + "\"" + closing)) {
|
|
184
|
+
// Was inside an object value string
|
|
185
|
+
str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
|
|
186
|
+
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
|
|
187
|
+
// Was inside an object value string after an escape
|
|
188
|
+
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
|
|
189
|
+
} else {
|
|
190
|
+
// find last :
|
|
191
|
+
auto last_pos = str.find_last_of(':');
|
|
192
|
+
if (last_pos == std::string::npos) {
|
|
193
|
+
throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
|
|
194
|
+
}
|
|
195
|
+
// Cutting back to opening : for object value
|
|
196
|
+
str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
197
|
+
}
|
|
198
|
+
} else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
|
|
199
|
+
if ((last_non_sp_char == ',' || last_non_sp_char == '[') && can_parse(str + "1" + closing)) {
|
|
200
|
+
// Was about to create an array value
|
|
201
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
202
|
+
} else if (can_parse(str + "\"" + closing)) {
|
|
203
|
+
// Was inside an array value string
|
|
204
|
+
str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
|
|
205
|
+
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
|
|
206
|
+
// Was inside an array value string after an escape
|
|
207
|
+
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
|
|
208
|
+
} else if (!was_maybe_number() && can_parse(str + ", 1" + closing)) {
|
|
209
|
+
// Had just finished a value
|
|
210
|
+
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\"" + closing;
|
|
211
|
+
} else {
|
|
212
|
+
auto last_pos = str.find_last_of("[,");
|
|
213
|
+
if (last_pos == std::string::npos) {
|
|
214
|
+
throw std::runtime_error("Cannot heal a truncated JSON array stopped in an unknown location");
|
|
215
|
+
}
|
|
216
|
+
// Cutting back to last [ or , for array value
|
|
217
|
+
str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
218
|
+
}
|
|
219
|
+
} else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
|
|
220
|
+
if ((last_non_sp_char == '{' && can_parse(str + closing)) ||
|
|
221
|
+
(last_non_sp_char == ',' && can_parse(str + "\"\": 1" + closing))) {
|
|
222
|
+
// Was about to create an object key+value
|
|
223
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
|
|
224
|
+
} else if (!was_maybe_number() && can_parse(str + ",\"\": 1" + closing)) {
|
|
225
|
+
// Was about to create an object key+value
|
|
226
|
+
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\": 1" + closing;
|
|
227
|
+
} else if (can_parse(str + "\": 1" + closing)) {
|
|
228
|
+
// Was inside an object key string
|
|
229
|
+
str += (out.healing_marker.json_dump_marker = magic_seed) + "\": 1" + closing;
|
|
230
|
+
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\": 1" + closing)) {
|
|
231
|
+
// Was inside an object key string after an escape
|
|
232
|
+
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\": 1" + closing;
|
|
233
|
+
} else {
|
|
234
|
+
auto last_pos = str.find_last_of(':');
|
|
235
|
+
if (last_pos == std::string::npos) {
|
|
236
|
+
throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
|
|
237
|
+
}
|
|
238
|
+
// fprintf(stderr, "Cutting back to last : for object key+value\n");
|
|
239
|
+
str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
240
|
+
}
|
|
241
|
+
} else {
|
|
242
|
+
throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
|
|
243
|
+
}
|
|
244
|
+
// fprintf(stderr, "HEALED:\nSTRING <<<\n%s\n>>>\n\nmagic_cut: <<<\n%s\n>>>\n\n", str.c_str(), out.healing_marker.json_dump_marker.c_str());
|
|
245
|
+
out.json = json::parse(str);
|
|
246
|
+
it = temptative_end;
|
|
247
|
+
return true;
|
|
248
|
+
}
|
|
249
|
+
// TODO: handle unclosed top-level primitive if the stack was empty but we got an error (e.g. "tru", "\"", etc...)
|
|
250
|
+
// fprintf(stderr, "Closing: TODO\n");
|
|
251
|
+
return false;
|
|
252
|
+
}
|
|
253
|
+
out.json = json::parse(it, end);
|
|
254
|
+
it = end;
|
|
255
|
+
return true;
|
|
256
|
+
}
|