npm - cui-llama.rn - Versions diffs - 1.7.3 → 1.7.6 - Mend

cui-llama.rn 1.7.3 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

package/cpp/ggml.c CHANGED Viewed

@@ -61,9 +61,6 @@
 #define m512i(p) (__m512i)(p)
 #endif
-// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
-float lm_ggml_table_f32_f16[1 << 16];
 #if defined(__linux__) || \
     defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
     (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
@@ -133,7 +130,7 @@ static void lm_ggml_print_backtrace_symbols(void) {
 }
 #endif
-static void lm_ggml_print_backtrace(void) {
+void lm_ggml_print_backtrace(void) {
     const char * LM_GGML_NO_BACKTRACE = getenv("LM_GGML_NO_BACKTRACE");
     if (LM_GGML_NO_BACKTRACE) {
         return;
@@ -160,6 +157,10 @@ static void lm_ggml_print_backtrace(void) {
     const int parent_pid = getpid();
     const int child_pid = fork();
     if (child_pid < 0) { // error
+#if defined(__linux__)
+        close(lock[1]);
+        close(lock[0]);
+#endif
         return;
     } else if (child_pid == 0) { // child
         char attach[32];
@@ -167,6 +168,7 @@ static void lm_ggml_print_backtrace(void) {
 #if defined(__linux__)
         close(lock[1]);
         (void) !read(lock[0], lock, 1);
+        close(lock[0]);
 #endif
         // try gdb
         execlp("gdb", "gdb", "--batch",
@@ -195,7 +197,7 @@ static void lm_ggml_print_backtrace(void) {
     }
 }
 #else
-static void lm_ggml_print_backtrace(void) {
+void lm_ggml_print_backtrace(void) {
     // platform not supported
 }
 #endif
@@ -216,6 +218,8 @@ void lm_ggml_abort(const char * file, int line, const char * fmt, ...) {
     abort();
 }
+// lm_ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
 //
 // logging
 //
@@ -894,12 +898,6 @@ struct lm_ggml_context {
     struct lm_ggml_object * objects_end;
 };
-struct lm_ggml_context_container {
-    bool used;
-    struct lm_ggml_context context;
-};
 //
 // data types
 //
@@ -967,6 +965,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
     "UPSCALE",
     "PAD",
     "PAD_REFLECT_1D",
+    "ROLL",
     "ARANGE",
     "TIMESTEP_EMBEDDING",
     "ARGSORT",
@@ -997,7 +996,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
     "OPT_STEP_ADAMW",
 };
-static_assert(LM_GGML_OP_COUNT == 82, "LM_GGML_OP_COUNT != 82");
+static_assert(LM_GGML_OP_COUNT == 83, "LM_GGML_OP_COUNT != 83");
 static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
     "none",
@@ -1062,6 +1061,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
     "upscale(x)",
     "pad(x)",
     "pad_reflect_1d(x)",
+    "roll(x)",
     "arange(start, stop, step)",
     "timestep_embedding(timesteps, dim, max_period)",
     "argsort(x)",
@@ -1092,7 +1092,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
     "adamw(x)",
 };
-static_assert(LM_GGML_OP_COUNT == 82, "LM_GGML_OP_COUNT != 82");
+static_assert(LM_GGML_OP_COUNT == 83, "LM_GGML_OP_COUNT != 83");
 static_assert(LM_GGML_OP_POOL_COUNT == 2, "LM_GGML_OP_POOL_COUNT != 2");
@@ -1432,14 +1432,6 @@ struct lm_ggml_context * lm_ggml_init(struct lm_ggml_init_params params) {
         // initialize time system (required on Windows)
         lm_ggml_time_init();
-        for (int i = 0; i < (1 << 16); ++i) {
-            union {
-                uint16_t u16;
-                lm_ggml_fp16_t fp16;
-            } u = {i};
-            lm_ggml_table_f32_f16[i] = LM_GGML_COMPUTE_FP16_TO_FP32(u.fp16);
-        }
         is_first_call = false;
     }
@@ -2325,6 +2317,26 @@ struct lm_ggml_tensor * lm_ggml_repeat(
     return result;
 }
+struct lm_ggml_tensor * lm_ggml_repeat_4d(
+        struct lm_ggml_context * ctx,
+        struct lm_ggml_tensor * a,
+        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
+    const bool can_repeat = lm_ggml_is_empty(a) || (
+        (ne0 % a->ne[0] == 0) &&
+        (ne1 % a->ne[1] == 0) &&
+        (ne2 % a->ne[2] == 0) &&
+        (ne3 % a->ne[3] == 0)
+    );
+    LM_GGML_ASSERT(can_repeat);
+    struct lm_ggml_tensor * result = lm_ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
+    result->op     = LM_GGML_OP_REPEAT;
+    result->src[0] = a;
+    return result;
+}
 // lm_ggml_repeat_back
 struct lm_ggml_tensor * lm_ggml_repeat_back(
@@ -4333,6 +4345,34 @@ struct lm_ggml_tensor * lm_ggml_pad_reflect_1d(
     return result;
 }
+// lm_ggml_roll
+struct lm_ggml_tensor * lm_ggml_roll(
+        struct lm_ggml_context * ctx,
+        struct lm_ggml_tensor  * a,
+        int                   shift0,
+        int                   shift1,
+        int                   shift2,
+        int                   shift3) {
+    LM_GGML_ASSERT(a->nb[0] == lm_ggml_type_size(a->type));
+    LM_GGML_ASSERT(abs(shift0) < a->ne[0]);
+    LM_GGML_ASSERT(abs(shift1) < a->ne[1]);
+    LM_GGML_ASSERT(abs(shift2) < a->ne[2]);
+    LM_GGML_ASSERT(abs(shift3) < a->ne[3]);
+    struct lm_ggml_tensor * result = lm_ggml_dup_tensor(ctx, a);
+    lm_ggml_set_op_params_i32(result, 0, shift0);
+    lm_ggml_set_op_params_i32(result, 1, shift1);
+    lm_ggml_set_op_params_i32(result, 2, shift2);
+    lm_ggml_set_op_params_i32(result, 3, shift3);
+    result->op     = LM_GGML_OP_ROLL;
+    result->src[0] = a;
+    return result;
+}
 // lm_ggml_arange
 struct lm_ggml_tensor * lm_ggml_arange(

package/cpp/ggml.h CHANGED Viewed

@@ -490,6 +490,7 @@ extern "C" {
         LM_GGML_OP_UPSCALE, // nearest interpolate
         LM_GGML_OP_PAD,
         LM_GGML_OP_PAD_REFLECT_1D,
+        LM_GGML_OP_ROLL,
         LM_GGML_OP_ARANGE,
         LM_GGML_OP_TIMESTEP_EMBEDDING,
         LM_GGML_OP_ARGSORT,
@@ -936,6 +937,15 @@ extern "C" {
             struct lm_ggml_tensor  * a,
             struct lm_ggml_tensor  * b);
+    // repeat a to the specified shape
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_4d(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a,
+                       int64_t    ne0,
+                       int64_t    ne1,
+                       int64_t    ne2,
+                       int64_t    ne3);
     // sums repetitions in a into shape of b
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_back(
             struct lm_ggml_context * ctx,
@@ -1793,6 +1803,17 @@ extern "C" {
             int                   p0,
             int                   p1);
+    // Move tensor elements by an offset given for each dimension. Elements that
+    // are shifted beyond the last position are wrapped around to the beginning.
+    LM_GGML_API struct lm_ggml_tensor * lm_ggml_roll(
+            struct lm_ggml_context * ctx,
+            struct lm_ggml_tensor  * a,
+            int                   shift0,
+            int                   shift1,
+            int                   shift2,
+            int                   shift3);
     // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
     // timesteps: [N,]
     // return: [N, dim]
@@ -2087,9 +2108,6 @@ extern "C" {
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_get_grad    (const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node);
     LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_get_grad_acc(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node);
-    LM_GGML_API void                 lm_ggml_graph_export(const struct lm_ggml_cgraph * cgraph, const char * fname);
-    LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_import(const char * fname, struct lm_ggml_context ** ctx_data, struct lm_ggml_context ** ctx_eval);
     // print info and performance information for the graph
     LM_GGML_API void lm_ggml_graph_print(const struct lm_ggml_cgraph * cgraph);
@@ -2173,6 +2191,7 @@ extern "C" {
     // scheduling priorities
     enum lm_ggml_sched_priority {
+        LM_GGML_SCHED_PRIO_LOW = -1,
         LM_GGML_SCHED_PRIO_NORMAL,
         LM_GGML_SCHED_PRIO_MEDIUM,
         LM_GGML_SCHED_PRIO_HIGH,

package/cpp/gguf.cpp CHANGED Viewed

@@ -335,7 +335,11 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
         for (uint32_t i = 0; i < magic.size(); i++) {
             if (magic[i] != LM_GGUF_MAGIC[i]) {
-                LM_GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
+                char c0 = isprint(magic[0]) ? magic[0] : '?';
+                char c1 = isprint(magic[1]) ? magic[1] : '?';
+                char c2 = isprint(magic[2]) ? magic[2] : '?';
+                char c3 = isprint(magic[3]) ? magic[3] : '?';
+                LM_GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3);
                 lm_gguf_free(ctx);
                 return nullptr;
             }
@@ -347,11 +351,28 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
     int64_t n_tensors = 0;
     if (ok && gr.read(ctx->version)) {
-        if (ctx->version == 1) {
+        if (ok && ctx->version == 0) {
+            LM_GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
+            ok = false;
+        }
+        /*
+         * bit layout is different when reading non-native endian models.
+         * assuming that the GGUF version is 3, the non-native endian model
+         * would read it as 0x30000000. we can use the AND operation against
+         * the last 4 hexadecimal digits to check if the model is the same
+         * endianness as the host system.
+        */
+        if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
+            LM_GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
+            ok = false;
+        }
+        if (ok && ctx->version == 1) {
             LM_GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
             ok = false;
         }
-        if (ctx->version > LM_GGUF_VERSION) {
+        if (ok && ctx->version > LM_GGUF_VERSION) {
             LM_GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
                 __func__, ctx->version, LM_GGUF_VERSION);
             ok = false;

package/cpp/json-partial.cpp ADDED Viewed

@@ -0,0 +1,256 @@
+#include "json-partial.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+#include <string>
+using json = nlohmann::ordered_json;
+enum common_json_stack_element_type {
+    COMMON_JSON_STACK_ELEMENT_OBJECT,
+    COMMON_JSON_STACK_ELEMENT_KEY,
+    COMMON_JSON_STACK_ELEMENT_ARRAY,
+};
+struct common_json_stack_element {
+    common_json_stack_element_type type;
+    std::string key;
+};
+bool common_json_parse(
+    const std::string & input,
+    const std::string & healing_marker,
+    common_json & out)
+{
+    std::string::const_iterator it = input.begin();
+    const auto end = input.end();
+    return common_json_parse(it, end, healing_marker, out);
+}
+bool common_json_parse(
+    std::string::const_iterator & it,
+    const std::string::const_iterator & end,
+    const std::string & healing_marker,
+    common_json & out)
+{
+    // // https://json.nlohmann.me/features/parsing/sax_interface/
+    struct json_error_locator : public nlohmann::json_sax<json> {
+        std::size_t position;
+        bool found_error;
+        std::string last_token;
+        std::string exception_message;
+        std::vector<common_json_stack_element> stack;
+        json_error_locator() : position(0), found_error(false) {}
+        bool parse_error(std::size_t position, const std::string & last_token, const json::exception & ex) override { // NOLINT
+            this->position = position - 1;
+            this->found_error = true;
+            this->last_token = last_token;
+            this->exception_message = ex.what();
+            return false;
+        }
+        void close_value() {
+            if (!stack.empty() && (stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY)) {
+                stack.pop_back();
+            }
+        }
+        bool null() override { // NOLINT
+            close_value();
+            return true;
+        }
+        bool boolean(bool) override { // NOLINT
+            close_value();
+            return true;
+        }
+        bool number_integer(number_integer_t) override { // NOLINT
+            close_value();
+            return true;
+        }
+        bool number_unsigned(number_unsigned_t) override { // NOLINT
+            close_value();
+            return true;
+        }
+        bool number_float(number_float_t, const string_t &) override { // NOLINT
+            close_value();
+            return true;
+        }
+        bool string(string_t &) override { // NOLINT
+            close_value();
+            return true;
+        }
+        bool binary(binary_t &) override { // NOLINT
+            close_value();
+            return true;
+        }
+        bool start_object(std::size_t) override { // NOLINT
+            stack.push_back({COMMON_JSON_STACK_ELEMENT_OBJECT, ""});
+            return true;
+        }
+        bool end_object() override {
+            LM_GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT);
+            stack.pop_back();
+            close_value();
+            return true;
+        }
+        bool key(string_t & key) override { // NOLINT
+            stack.push_back({COMMON_JSON_STACK_ELEMENT_KEY, key});
+            return true;
+        }
+        bool start_array(std::size_t) override { // NOLINT
+            stack.push_back({COMMON_JSON_STACK_ELEMENT_ARRAY, ""});
+            return true;
+        }
+        bool end_array() override {
+            LM_GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY);
+            stack.pop_back();
+            close_value();
+            return true;
+        }
+    };
+    json_error_locator err_loc;
+    auto start = it;
+    json::sax_parse(it, end, &err_loc);
+    if (err_loc.found_error) {
+        it = start;
+        auto temptative_end = it + err_loc.position;
+        // LOG_DBG("Error at position %zu (is_end = %s): %s\n", err_loc.position, temptative_end == end ? "true" : "false", err_loc.exception_message.c_str());
+        auto input = std::string(it, temptative_end);
+        try {
+            out.json = json::parse(input);
+            // out.json = json::parse(it, temptative_end);
+            it = temptative_end;
+            return true;
+        } catch (const std::exception & ex) {
+            // No, needs healing.
+            LOG_DBG("Failed to parse up to error: %s: <<<%s>>>\n", ex.what(), std::string(it, temptative_end).c_str());
+        }
+        auto can_parse = [](const std::string & str) {
+            try {
+                auto _ = json::parse(str); // NOLINT
+                return true;
+            } catch (const std::exception &) {
+                return false;
+            }
+        };
+        if (!healing_marker.empty() && !err_loc.stack.empty()) {
+            std::string str(it, temptative_end);
+            auto last_non_sp_pos = str.find_last_not_of(" \n\r\t");
+            if (last_non_sp_pos == std::string::npos) {
+                throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
+            }
+            auto last_non_sp_char = str[last_non_sp_pos];
+            // Used to detect stops on a number, which may not be complete.
+            auto was_maybe_number = [&]() {
+                if (!str.empty() && std::isspace(str.back())) {
+                    return false;
+                }
+                return std::isdigit(last_non_sp_char) ||
+                    last_non_sp_char == '.' ||
+                    last_non_sp_char == 'e' ||
+                    last_non_sp_char == 'E' ||
+                    last_non_sp_char == '-';
+            };
+            std::string closing;
+            for (size_t i = err_loc.stack.size(); i > 0; i--) {
+                auto & el = err_loc.stack[i - 1];
+                if (el.type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
+                    closing += "}";
+                } else if (el.type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
+                    closing += "]";
+                } else if (el.type != COMMON_JSON_STACK_ELEMENT_KEY) {
+                    throw std::runtime_error("Unexpected stack element type");
+                }
+            }
+            const auto & magic_seed = out.healing_marker.marker = healing_marker;//"$llama.cpp.json$";
+            if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY) {
+                // We're inside an object value
+                if (last_non_sp_char == ':' && can_parse(str + "1" + closing)) {
+                    // Was about to create an object value
+                    str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
+                } else if (can_parse(str + ": 1" + closing)) {
+                    str += (out.healing_marker.json_dump_marker = ":\"" + magic_seed) + "\"" + closing;
+                } else if (last_non_sp_char == '{' && can_parse(str + closing)) {
+                    // Was about to create an object
+                    str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
+                } else if (can_parse(str + "\"" + closing)) {
+                    // Was inside an object value string
+                    str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
+                } else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
+                    // Was inside an object value string after an escape
+                    str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
+                } else {
+                    // find last :
+                    auto last_pos = str.find_last_of(':');
+                    if (last_pos == std::string::npos) {
+                        throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
+                    }
+                    // Cutting back to opening : for object value
+                    str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
+                }
+            } else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
+                if ((last_non_sp_char == ',' || last_non_sp_char == '[') && can_parse(str + "1" + closing)) {
+                    // Was about to create an array value
+                    str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
+                } else if (can_parse(str + "\"" + closing)) {
+                    // Was inside an array value string
+                    str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
+                } else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
+                    // Was inside an array value string after an escape
+                    str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
+                } else if (!was_maybe_number() && can_parse(str + ", 1" + closing)) {
+                    // Had just finished a value
+                    str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\"" + closing;
+                } else {
+                    auto last_pos = str.find_last_of("[,");
+                    if (last_pos == std::string::npos) {
+                        throw std::runtime_error("Cannot heal a truncated JSON array stopped in an unknown location");
+                    }
+                    // Cutting back to last [ or , for array value
+                    str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
+                }
+            } else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
+                if ((last_non_sp_char == '{' && can_parse(str + closing)) ||
+                        (last_non_sp_char == ',' && can_parse(str + "\"\": 1" + closing))) {
+                    // Was about to create an object key+value
+                    str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
+                } else if (!was_maybe_number() && can_parse(str + ",\"\": 1" + closing)) {
+                    // Was about to create an object key+value
+                    str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\": 1" + closing;
+                } else if (can_parse(str + "\": 1" + closing)) {
+                    // Was inside an object key string
+                    str += (out.healing_marker.json_dump_marker = magic_seed) + "\": 1" + closing;
+                } else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\": 1" + closing)) {
+                    // Was inside an object key string after an escape
+                    str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\": 1" + closing;
+                } else {
+                    auto last_pos = str.find_last_of(':');
+                    if (last_pos == std::string::npos) {
+                        throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
+                    }
+                    // fprintf(stderr, "Cutting back to last : for object key+value\n");
+                    str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
+                }
+            } else {
+                throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
+            }
+            // fprintf(stderr, "HEALED:\nSTRING <<<\n%s\n>>>\n\nmagic_cut: <<<\n%s\n>>>\n\n", str.c_str(), out.healing_marker.json_dump_marker.c_str());
+            out.json = json::parse(str);
+            it = temptative_end;
+            return true;
+        }
+        // TODO: handle unclosed top-level primitive if the stack was empty but we got an error (e.g. "tru", "\"", etc...)
+        // fprintf(stderr, "Closing: TODO\n");
+        return false;
+    }
+    out.json = json::parse(it, end);
+    it = end;
+    return true;
+}

package/cpp/json-partial.h ADDED Viewed

@@ -0,0 +1,38 @@
+#pragma once
+#include "nlohmann/json.hpp"
+// Healing marker (empty if the JSON was fully parsed / wasn't healed).
+struct common_healing_marker {
+    // Raw marker.
+    std::string marker;
+    // Cutting the `common_json.json.dump()` string at the (only) occurrence of this marker should yield the original partial JSON string (modulo spaces / if it had the same dump format).
+    std::string json_dump_marker;
+};
+// Represents a parsed JSON object, with its optional healing marker (a JSON dump fragment that can be used to find the position of healing in the JSON dump string)
+struct common_json {
+    nlohmann::ordered_json json;
+    common_healing_marker healing_marker;
+};
+// Parse the JSON string, healing (closing) any partial JSON if `healing_marker` is not empty.
+//
+// Healing completes partial JSON strings by adding a (possibly modified) healing marker, then whatever is needed to close the JSON.
+// This allows to parse the resulting healed JSON string, yet be able to cut it again if needed at the healing marker.
+// (this is used when parsing JSON outputs from the models, then crafting partial JSONs for the partial tool calls in OAI format).
+//
+// For instance, parsing `{` with a healing marker `foo` will produce a healed JSON `{"foo":1}`, w/ json_dump_marker = `"foo"` (which can be used to break the JSON again).
+bool common_json_parse(
+    const std::string & input,
+    const std::string & healing_marker,
+    common_json & out);
+// Parse the JSON string (see overload above), but advancing an iterator to the end of the input when the (potentially partial) parsing succeeds.
+bool common_json_parse(
+    std::string::const_iterator & it,
+    const std::string::const_iterator & end,
+    const std::string & healing_marker,
+    common_json & out);

package/cpp/json-schema-to-grammar.cpp CHANGED Viewed

@@ -1,8 +1,9 @@
 #include "json-schema-to-grammar.h"
 #include "common.h"
+#include "nlohmann/json.hpp"
 #include <algorithm>
-#include <fstream>
 #include <map>
 #include <regex>
 #include <sstream>
@@ -40,49 +41,6 @@ static std::string build_repetition(const std::string & item_rule, int min_items
     return result;
 }
-/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */
-class string_view {
-    const std::string & _str;
-    const size_t _start;
-    const size_t _end;
-public:
-    string_view(const std::string & str, size_t start = 0, size_t end  = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {}
-    size_t size() const {
-        return _end - _start;
-    }
-    size_t length() const {
-        return size();
-    }
-    operator std::string() const {
-        return str();
-    }
-    std::string str() const {
-        return _str.substr(_start, _end - _start);
-    }
-    string_view substr(size_t pos, size_t len = std::string::npos) const {
-        return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len);
-    }
-    char operator[](size_t pos) const {
-        auto index = _start + pos;
-        if (index >= _end) {
-            throw std::out_of_range("string_view index out of range");
-        }
-        return _str[_start + pos];
-    }
-    bool operator==(const string_view & other) const {
-        std::string this_str = *this;
-        std::string other_str = other;
-        return this_str == other_str;
-    }
-};
 static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
     auto has_min = min_value != std::numeric_limits<int>::min();
     auto has_max = max_value != std::numeric_limits<int>::max();
@@ -111,14 +69,14 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
         }
         out << "}";
     };
-    std::function<void(const string_view &, const string_view &)> uniform_range =
-        [&](const string_view & from, const string_view & to) {
+    std::function<void(const std::string_view &, const std::string_view &)> uniform_range =
+        [&](const std::string_view & from, const std::string_view & to) {
             size_t i = 0;
             while (i < from.length() && i < to.length() && from[i] == to[i]) {
                 i++;
             }
             if (i > 0) {
-                out << "\"" << from.substr(0, i).str() << "\"";
+                out << "\"" << from.substr(0, i) << "\"";
             }
             if (i < from.length() && i < to.length()) {
                 if (i > 0) {

package/cpp/json-schema-to-grammar.h CHANGED Viewed

@@ -1,9 +1,9 @@
 #pragma once
-#include "ggml.h"
-// Change JSON_ASSERT from assert() to LM_GGML_ASSERT:
-#define JSON_ASSERT LM_GGML_ASSERT
-#include "json.hpp"
+#include "nlohmann/json_fwd.hpp"
+#include <functional>
+#include <string>
 std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
                                    bool force_gbnf = false);