npm - @fugood/llama.node - Versions diffs - 0.0.1-alpha.4 → 0.1.0 - Mend

@fugood/llama.node 0.0.1-alpha.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/CMakeLists.txt +36 -7
package/README.md +9 -0
package/bin/darwin/arm64/default.metallib +0 -0
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/default.metallib +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/lib/binding.js +1 -1
package/lib/binding.ts +5 -2
package/lib/index.ts +2 -2
package/package.json +15 -3
package/src/LlamaCompletionWorker.cpp +5 -1
package/src/LlamaCompletionWorker.h +4 -0
package/src/LlamaContext.cpp +18 -1
package/src/common.hpp +11 -7
package/src/llama.cpp/CMakeLists.txt +13 -7
package/src/llama.cpp/common/common.cpp +221 -173
package/src/llama.cpp/common/common.h +19 -8
package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
package/src/llama.cpp/common/log.h +2 -2
package/src/llama.cpp/common/sampling.cpp +17 -1
package/src/llama.cpp/common/sampling.h +28 -20
package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +17 -11
package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +5 -5
package/src/llama.cpp/examples/finetune/finetune.cpp +1 -1
package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +15 -4
package/src/llama.cpp/examples/imatrix/imatrix.cpp +72 -39
package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -3
package/src/llama.cpp/examples/llava/clip.cpp +74 -23
package/src/llama.cpp/examples/llava/llava-cli.cpp +37 -28
package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -1
package/src/llama.cpp/examples/lookup/lookup.cpp +0 -1
package/src/llama.cpp/examples/main/main.cpp +10 -8
package/src/llama.cpp/examples/perplexity/perplexity.cpp +175 -55
package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
package/src/llama.cpp/examples/quantize/quantize.cpp +74 -47
package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
package/src/llama.cpp/examples/server/server.cpp +97 -86
package/src/llama.cpp/examples/server/utils.hpp +17 -15
package/src/llama.cpp/ggml-backend.c +7 -5
package/src/llama.cpp/ggml-impl.h +339 -4
package/src/llama.cpp/ggml-kompute.cpp +7 -0
package/src/llama.cpp/ggml-opencl.cpp +1 -0
package/src/llama.cpp/ggml-quants.c +302 -293
package/src/llama.cpp/ggml-sycl.cpp +28 -16
package/src/llama.cpp/ggml-vulkan-shaders.hpp +46843 -39205
package/src/llama.cpp/ggml-vulkan.cpp +951 -263
package/src/llama.cpp/ggml.c +1469 -116
package/src/llama.cpp/ggml.h +37 -7
package/src/llama.cpp/llama.cpp +969 -432
package/src/llama.cpp/llama.h +46 -14
package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +2 -0
package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -1
package/src/llama.cpp/requirements/requirements-convert.txt +2 -2
package/src/llama.cpp/requirements.txt +1 -0
package/src/llama.cpp/sgemm.cpp +134 -103
package/src/llama.cpp/sgemm.h +4 -2
package/src/llama.cpp/tests/CMakeLists.txt +96 -36
package/src/llama.cpp/tests/test-backend-ops.cpp +56 -6
package/src/llama.cpp/tests/test-chat-template.cpp +4 -0
package/src/llama.cpp/tests/test-grammar-integration.cpp +225 -136
package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -0
package/src/llama.cpp/tests/test-tokenizer-0.cpp +292 -0
package/src/llama.cpp/tests/{test-tokenizer-1-llama.cpp → test-tokenizer-1-spm.cpp} +1 -1
package/src/llama.cpp/unicode-data.cpp +1188 -656
package/src/llama.cpp/unicode-data.h +4 -3
package/src/llama.cpp/unicode.cpp +590 -49
package/src/llama.cpp/unicode.h +6 -3
package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +0 -187
package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +0 -190

package/src/llama.cpp/unicode.h CHANGED Viewed

@@ -5,9 +5,9 @@
 #include <vector>
 #define CODEPOINT_TYPE_UNIDENTIFIED 0
-#define CODEPOINT_TYPE_DIGIT        1
+#define CODEPOINT_TYPE_NUMBER       1
 #define CODEPOINT_TYPE_LETTER       2
-#define CODEPOINT_TYPE_WHITESPACE   3
+#define CODEPOINT_TYPE_SEPARATOR    3
 #define CODEPOINT_TYPE_ACCENT_MARK  4
 #define CODEPOINT_TYPE_PUNCTUATION  5
 #define CODEPOINT_TYPE_SYMBOL       6
@@ -21,8 +21,11 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
 int unicode_cpt_type(uint32_t cp);
 int unicode_cpt_type(const std::string & utf8);
+bool unicode_cpt_is_whitespace(uint32_t cp);
 std::string unicode_byte_to_utf8(uint8_t byte);
 uint8_t unicode_utf8_to_byte(const std::string & utf8);
-// simple tolower that only implements one-to-one mapping, not one-to-many
 char32_t unicode_tolower(char32_t cp);
+std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);

package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp DELETED Viewed

@@ -1,187 +0,0 @@
-#include "llama.h"
-#include "common.h"
-#include "console.h"
-#include <cstdio>
-#include <string>
-#include <map>
-#include <vector>
-#include <fstream>
-// generate using test-tokenizer-0-falcon.py
-static const std::map<std::string, std::vector<llama_token>> & k_tests() {
-    static std::map<std::string, std::vector<llama_token>> _k_tests = {
-        { ""                      , {  }, },
-        { " "                     , {     204, }, },
-        { "  "                    , {     258, }, },
-        { "   "                   , {     466, }, },
-        { "\t"                    , {     192, }, },
-        { "\n"                    , {     193, }, },
-        { "\t\n"                  , {   19125, }, },
-        { "Hello world"           , {    9856,   1079, }, },
-        { " Hello world"          , {   23090,   1079, }, },
-        { "Hello World"           , {    9856,   2889, }, },
-        { " Hello World"          , {   23090,   2889, }, },
-        { " Hello World!"         , {   23090,   2889,     12, }, },
-        { "Hello, world!"         , {    9856,     23,   1079,     12, }, },
-        { " Hello, world!"        , {   23090,     23,   1079,     12, }, },
-        { " this is 🦙.cpp"        , {     414,    304,   3346,    111,    231,     25,  29247, }, },
-        { "w048 7tuijk dsdfhu"    , {      98,  55866,    204,     34,  16682,   7149,  36190,   6869,  11481, }, },
-        { "нещо на Български"     , {     150,    133,   6207,    151,    215,    150,    134,   5052,    133,   6279,   5052,    223,    151,    216,  49679,    123,  53110,  47043,   7795, }, },
-        { "កាន់តែពិសេសអាចខលចេញ"   , {   38154,    206,  38154,    126,  38154,    225,    167,    237,    217,  38154,    221,    167,    237,    208,  38154,    228,  38154,    127,  38154,    237,    167,    237,    207,  38154,    237,  38154,    107,  38154,    126,  38154,    211,  38154,    207,  38154,    233,  38154,    211,    167,    237,    207,  38154,    215, }, },
-        { "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", {    2571,    232,    206,    204,     19,  11003,     20,   8196,    126,    283,    219,  48778,    116,  13392,    204,     19,  51831,    732,  63209,   1741,   7955,    522,     20,  22438,    211,    204,     19,   7927,  53360,    325,    504,    701,    946,  10930,     20, }, },
-        { "Hello"                 , {    9856, }, },
-        { " Hello"                , {   23090, }, },
-        { "  Hello"               , {     204,  23090, }, },
-        { "   Hello"              , {     258,  23090, }, },
-        { "    Hello"             , {     466,  23090, }, },
-        { "    Hello\n    Hello"  , {     466,  23090,    742,  23090, }, },
-        { "\n ="                  , {    1212,     40, }, },
-        { "' era"                 , {      18,   4932, }, },
-    };
-    return _k_tests;
-}
-int main(int argc, char **argv) {
-    if (argc < 2) {
-        fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
-        return 1;
-    }
-    const std::string fname = argv[1];
-    std::string fname_text;
-    if (argc > 2) {
-        fname_text = argv[2];
-    }
-    fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
-    llama_model * model;
-    llama_context * ctx;
-    llama_backend_init();
-    // load the vocab
-    {
-        auto mparams = llama_model_default_params();
-        mparams.vocab_only = true;
-        model = llama_load_model_from_file(fname.c_str(), mparams);
-        if (model == NULL) {
-            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
-            return 1;
-        }
-        auto cparams = llama_context_default_params();
-        ctx = llama_new_context_with_model(model, cparams);
-        if (ctx == NULL) {
-            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
-            llama_free_model(model);
-            return 1;
-        }
-    }
-    if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
-        fprintf(stderr, "%s : error: vocab type is not BPE\n", __func__);
-        llama_free_model(model);
-        llama_free(ctx);
-        return 2;
-    }
-#ifdef _WIN32
-    // We need this for unicode console support
-    console::init(false, false);
-    atexit([]() { console::cleanup(); });
-#endif
-    bool success = true;
-    for (const auto & test_kv : k_tests()) {
-        const std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, false);
-        printf("\n");
-        printf("src: '%s'\n", test_kv.first.c_str());
-        printf("res: '%s'\n", llama_detokenize_bpe(ctx, res).c_str());
-        printf("tok: ");
-        for (const auto & tok : res) {
-            printf("%d ", tok);
-        }
-        printf("\n");
-        bool correct = res.size() == test_kv.second.size();
-        for (int i = 0; i < (int) res.size() && correct; ++i) {
-            if (test_kv.second[i] != res[i]) {
-                correct = false;
-            }
-        }
-        if (!correct) {
-            fprintf(stderr, "%s : failed test:    '%s'\n", __func__, test_kv.first.c_str());
-            fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
-                llama_detokenize_bpe(ctx, res).c_str(),
-                llama_detokenize_bpe(ctx, test_kv.second).c_str());
-            fprintf(stderr, "%s : expected tokens: ", __func__);
-            for (const auto & t : test_kv.second) {
-                fprintf(stderr, "%6d, ", t);
-            }
-            fprintf(stderr, "\n");
-            fprintf(stderr, "%s : got tokens:      ", __func__);
-            for (const auto & t : res) {
-                fprintf(stderr, "%6d, ", t);
-            }
-            fprintf(stderr, "\n");
-            success = false;
-        }
-    }
-    if (!fname_text.empty()) {
-        fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
-        std::string text;
-        {
-            std::ifstream ifs(fname_text);
-            if (!ifs) {
-                fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
-                return 1;
-            }
-            text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
-        }
-        fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
-        const std::vector<llama_token> res = llama_tokenize(ctx, text, false);
-        fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
-        {
-            const std::string fname_out = fname_text + ".tokcpp";
-            std::ofstream ofs(fname_out);
-            if (!ofs) {
-                fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
-                return 1;
-            }
-            for (const auto & tok : res) {
-                ofs << tok << " '" << llama_detokenize_bpe(ctx, std::vector<int>{tok}) << "'" << std::endl;
-            }
-        }
-        fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
-    }
-    llama_free_model(model);
-    llama_free(ctx);
-    llama_backend_free();
-    return success ? 0 : 3;
-}

package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp DELETED Viewed

@@ -1,190 +0,0 @@
-#include "llama.h"
-#include "common.h"
-#include "console.h"
-#include <cstdio>
-#include <string>
-#include <map>
-#include <vector>
-#include <fstream>
-// generate using test-tokenizer-0-llama.py
-static const std::map<std::string, std::vector<llama_token>> & k_tests() {
-    static std::map<std::string, std::vector<llama_token>> _k_tests = {
-        { ""                      , {  }, },
-        { " "                     , {     259, }, },
-        { "  "                    , {    1678, }, },
-        { "   "                   , {     268, }, },
-        { "\t"                    , {   29871,     12, }, },
-        { "\n"                    , {   29871,     13, }, },
-        { "\t\n"                  , {   29871,     12,     13, }, },
-        { "Hello world"           , {   15043,   3186, }, },
-        { " Hello world"          , {   29871,  15043,   3186, }, },
-        { "Hello World"           , {   15043,   2787, }, },
-        { " Hello World"          , {   29871,  15043,   2787, }, },
-        { " Hello World!"         , {   29871,  15043,   2787,  29991, }, },
-        { "Hello, world!"         , {   15043,  29892,   3186,  29991, }, },
-        { " Hello, world!"        , {   29871,  15043,  29892,   3186,  29991, }, },
-        { " this is 🦙.cpp"        , {   29871,    445,    338,  29871,    243,    162,    169,    156,  29889,   8223, }, },
-        { "w048 7tuijk dsdfhu"    , {     281,  29900,  29946,  29947,  29871,  29955,   9161,  13535,  18031,   2176,   6905, }, },
-        { "нещо на Български"     , {    1538,   4851,    665,   1386,  29713,   1305, }, },
-        { "កាន់តែពិសេសអាចខលចេញ"   , {   29871,  31849,  31324,  31934,    228,    162,    142,    228,    161,    146,    228,    162,    133,    228,    161,    153,    228,    161,    186,  31708,    228,    162,    132,  31708,    228,    161,    165,  31324,    228,    161,    136,    228,    161,    132,    228,    161,    158,    228,    161,    136,    228,    162,    132,    228,    161,    140, }, },
-        { "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", {   29871,    243,    162,    157,    131,    313,   8945,  29897,  29871,    243,    162,    155,    185,  30722,    243,    162,    143,    174,  30598,    313,  20787,    953,   3848,    275,  16125,    630,  29897,  29871,  31681,    313,   6194,    953,  29877,   2397,    393,    756,    967,   1914,   5993,  29897, }, },
-        { "Hello"                 , {   15043, }, },
-        { " Hello"                , {   29871,  15043, }, },
-        { "  Hello"               , {     259,  15043, }, },
-        { "   Hello"              , {    1678,  15043, }, },
-        { "    Hello"             , {     268,  15043, }, },
-        { "    Hello\n    Hello"  , {     268,  15043,     13,   1678,  15043, }, },
-        { " ("                    , {   29871,  313, }, },
-    };
-    return _k_tests;
-}
-int main(int argc, char **argv) {
-    if (argc < 2) {
-        fprintf(stderr, "Usage: %s vocab-file [text-file]\n", argv[0]);
-        return 1;
-    }
-    const std::string fname = argv[1];
-    std::string fname_text;
-    if (argc > 2) {
-        fname_text = argv[2];
-    }
-    fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
-    llama_model * model;
-    llama_context * ctx;
-    llama_backend_init();
-    // load the vocab
-    {
-        auto mparams = llama_model_default_params();
-        mparams.vocab_only = true;
-        model = llama_load_model_from_file(fname.c_str(), mparams);
-        if (model == NULL) {
-            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
-            return 1;
-        }
-        auto cparams = llama_context_default_params();
-        ctx = llama_new_context_with_model(model, cparams);
-        if (ctx == NULL) {
-            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
-            llama_free_model(model);
-            return 1;
-        }
-    }
-    if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
-        fprintf(stderr, "%s : error: vocab type is not SPM\n", __func__);
-        llama_free_model(model);
-        llama_free(ctx);
-        return 2;
-    }
-#ifdef _WIN32
-    // We need this for unicode console support
-    console::init(false, false);
-    atexit([]() { console::cleanup(); });
-#endif
-    bool success = true;
-    for (const auto & test_kv : k_tests()) {
-        const std::vector<llama_token> res_bos   = llama_tokenize(ctx, test_kv.first, true);
-        const std::vector<llama_token> res_nobos = llama_tokenize(ctx, test_kv.first, false);
-        printf("\n");
-        printf("src: '%s'\n", test_kv.first.c_str());
-        printf("res: '%s'\n", llama_detokenize_spm(ctx, res_bos).c_str());
-        printf("tok: ");
-        for (const auto & tok : res_bos) {
-            printf("%d ", tok);
-        }
-        printf("\n");
-        bool correct = res_nobos.size() == test_kv.second.size() && res_bos.size() == res_nobos.size() + 1 && res_bos[0] == 1;
-        for (int i = 0; i < (int) res_nobos.size() && correct; ++i) {
-            if (test_kv.second[i] != res_bos[i + 1]) {
-                correct = false;
-            }
-            if (test_kv.second[i] != res_nobos[i]) {
-                correct = false;
-            }
-        }
-        if (!correct) {
-            fprintf(stderr, "%s : failed test:    '%s'\n", __func__, test_kv.first.c_str());
-            fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
-                llama_detokenize_spm(ctx, res_nobos).c_str(),
-                llama_detokenize_spm(ctx, test_kv.second).c_str());
-            fprintf(stderr, "%s : expected tokens: ", __func__);
-            for (const auto & t : test_kv.second) {
-                fprintf(stderr, "%6d, ", t);
-            }
-            fprintf(stderr, "\n");
-            fprintf(stderr, "%s : got tokens:      ", __func__);
-            for (const auto & t : res_nobos) {
-                fprintf(stderr, "%6d, ", t);
-            }
-            fprintf(stderr, "\n");
-            success = false;
-        }
-    }
-    if (!fname_text.empty()) {
-        fprintf(stderr, "%s : tokenizing: '%s'\n", __func__, fname_text.c_str());
-        std::string text;
-        {
-            std::ifstream ifs(fname_text);
-            if (!ifs) {
-                fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_text.c_str());
-                return 1;
-            }
-            text = std::string(std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>());
-        }
-        fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
-        const std::vector<llama_token> res = llama_tokenize(ctx, text, true);
-        fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
-        {
-            const std::string fname_out = fname_text + ".tokcpp";
-            std::ofstream ofs(fname_out);
-            if (!ofs) {
-                fprintf(stderr, "%s : error: could not open file '%s'\n", __func__, fname_out.c_str());
-                return 1;
-            }
-            for (const auto & tok : res) {
-                ofs << tok << " '" << llama_detokenize_spm(ctx, std::vector<int>{tok}) << "'" << std::endl;
-            }
-        }
-        fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
-    }
-    llama_free_model(model);
-    llama_free(ctx);
-    llama_backend_free();
-    return success ? 0 : 3;
-}