npm - @fugood/llama.node - Versions diffs - 0.4.7 → 0.5.0 - Mend

@fugood/llama.node 0.4.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0
package/lib/binding.ts +20 -6
package/lib/index.js +41 -17
package/lib/index.ts +50 -23
package/package.json +1 -1
package/src/LlamaCompletionWorker.cpp +9 -9
package/src/LlamaCompletionWorker.h +2 -2
package/src/LlamaContext.cpp +37 -18
package/src/LlamaContext.h +1 -0
package/src/TokenizeWorker.cpp +16 -12
package/src/TokenizeWorker.h +2 -2
package/src/common.hpp +54 -50
package/src/llama.cpp/.github/workflows/build.yml +2 -2
package/src/llama.cpp/.github/workflows/release.yml +152 -129
package/src/llama.cpp/.github/workflows/winget.yml +42 -0
package/src/llama.cpp/common/arg.cpp +14 -13
package/src/llama.cpp/common/common.cpp +4 -75
package/src/llama.cpp/common/common.h +7 -12
package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
package/src/llama.cpp/examples/simple/simple.cpp +1 -1
package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
package/src/llama.cpp/ggml/include/ggml.h +11 -0
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
package/src/llama.cpp/ggml/src/ggml.c +64 -18
package/src/llama.cpp/include/llama.h +24 -124
package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
package/src/llama.cpp/src/llama-batch.cpp +3 -1
package/src/llama.cpp/src/llama-context.cpp +60 -110
package/src/llama.cpp/src/llama-graph.cpp +137 -233
package/src/llama.cpp/src/llama-graph.h +49 -7
package/src/llama.cpp/src/llama-hparams.cpp +17 -1
package/src/llama.cpp/src/llama-hparams.h +34 -5
package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
package/src/llama.cpp/src/llama-kv-cache.h +201 -85
package/src/llama.cpp/src/llama-memory.h +3 -2
package/src/llama.cpp/src/llama-model.cpp +273 -94
package/src/llama.cpp/src/llama-model.h +4 -1
package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
package/src/llama.cpp/tools/mtmd/clip.h +6 -4
package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
package/src/llama.cpp/tools/run/run.cpp +2 -2
package/src/llama.cpp/tools/server/server.cpp +158 -47
package/src/llama.cpp/tools/server/utils.hpp +71 -43
package/src/llama.cpp/tools/tts/tts.cpp +4 -2

package/src/llama.cpp/tools/mtmd/mtmd-audio.h ADDED Viewed

@@ -0,0 +1,62 @@
+#pragma once
+#include "ggml.h"
+#include <cstdint>
+#include <vector>
+#include <string>
+#define WHISPER_ASSERT GGML_ASSERT
+#define WHISPER_SAMPLE_RATE 16000
+#define WHISPER_N_FFT       400
+#define WHISPER_HOP_LENGTH  160
+#define WHISPER_CHUNK_SIZE  30
+#define COMMON_SAMPLE_RATE 16000
+namespace whisper_preprocessor {
+struct whisper_mel {
+    int n_len;
+    int n_len_org;
+    int n_mel;
+    std::vector<float> data;
+};
+struct whisper_filters {
+    int32_t n_mel;
+    int32_t n_fft;
+    std::vector<float> data;
+};
+extern bool preprocess_audio(
+        const float * samples,
+        size_t n_samples,
+        const whisper_filters & filters,
+        std::vector<whisper_mel> & output);
+} // namespace whisper_preprocessor
+// TODO @ngxson : move this helper to mtmd-helpers.cpp
+namespace audio_helpers {
+extern bool is_audio_file(const char * buf, size_t len);
+extern bool decode_audio_from_buf(
+        const unsigned char * buf_in,
+        size_t len,
+        int target_sampler_rate,
+        std::vector<float> & pcmf32_mono);
+} // namespace audio_helpers
+namespace whisper_precalc_filters {
+extern whisper_preprocessor::whisper_filters get_128_bins();
+} // namespace whisper_precalc_filters

package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp CHANGED Viewed

@@ -37,10 +37,10 @@ static volatile bool g_is_interrupted = false;
 static void show_additional_info(int /*argc*/, char ** argv) {
     LOG(
         "Experimental CLI for multimodal\n\n"
-        "Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> -p <prompt>\n\n"
+        "Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> --audio <audio> -p <prompt>\n\n"
         "  -m and --mmproj are required\n"
         "  -hf user/repo can replace both -m and --mmproj in most cases\n"
-        "  --image and -p are optional, if NOT provided, the CLI will run in chat mode\n"
+        "  --image, --audio and -p are optional, if NOT provided, the CLI will run in chat mode\n"
         "  to disable using GPU for mmproj model, add --no-mmproj-offload\n",
         argv[0]
     );
@@ -142,7 +142,7 @@ struct mtmd_cli_context {
         );
     }
-    bool load_image(const std::string & fname) {
+    bool load_media(const std::string & fname) {
         mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(fname.c_str()));
         if (!bmp.ptr) {
             return false;
@@ -243,7 +243,7 @@ int main(int argc, char ** argv) {
     common_params params;
     params.sampling.temp = 0.2; // lower temp by default for better quality
-    if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) {
+    if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MTMD, show_additional_info)) {
         return 1;
     }
@@ -283,14 +283,14 @@ int main(int argc, char ** argv) {
     if (is_single_turn) {
         g_is_generating = true;
-        if (params.prompt.find("<__image__>") == std::string::npos) {
-            params.prompt += " <__image__>";
+        if (params.prompt.find(mtmd_default_marker()) == std::string::npos) {
+            params.prompt += mtmd_default_marker();
         }
         common_chat_msg msg;
         msg.role = "user";
         msg.content = params.prompt;
         for (const auto & image : params.image) {
-            if (!ctx.load_image(image)) {
+            if (!ctx.load_media(image)) {
                 return 1; // error is already printed by libmtmd
             }
         }
@@ -303,7 +303,12 @@ int main(int argc, char ** argv) {
     } else {
         LOG("\n Running in chat mode, available commands:");
-        LOG("\n   /image <path>    load an image");
+        if (mtmd_support_vision(ctx.ctx_vision.get())) {
+            LOG("\n   /image <path>    load an image");
+        }
+        if (mtmd_support_audio(ctx.ctx_vision.get())) {
+            LOG("\n   /audio <path>    load an audio");
+        }
         LOG("\n   /clear           clear the chat history");
         LOG("\n   /quit or /exit   exit the program");
         LOG("\n");
@@ -333,15 +338,17 @@ int main(int argc, char ** argv) {
                 continue;
             }
             g_is_generating = true;
-            if (line == "/image" || line.find("/image ") == 0) {
+            bool is_image = line == "/image" || line.find("/image ") == 0;
+            bool is_audio = line == "/audio" || line.find("/audio ") == 0;
+            if (is_image || is_audio) {
                 if (line.size() < 8) {
-                    LOG_ERR("ERR: Missing image filename\n");
+                    LOG_ERR("ERR: Missing media filename\n");
                     continue;
                 }
-                std::string image = line.substr(7);
-                if (ctx.load_image(image)) {
-                    LOG("Image %s loaded\n", image.c_str());
-                    content += "<__image__>";
+                std::string media_path = line.substr(7);
+                if (ctx.load_media(media_path)) {
+                    LOG("%s %s loaded\n", media_path.c_str(), is_image ? "image" : "audio");
+                    content += mtmd_default_marker();
                 }
                 // else, error is already printed by libmtmd
                 continue;

package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp CHANGED Viewed

@@ -12,17 +12,7 @@ size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks) {
     size_t n_tokens = 0;
     for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
         auto chunk = mtmd_input_chunks_get(chunks, i);
-        auto chunk_type = mtmd_input_chunk_get_type(chunk);
-        if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
-            size_t n_tokens_text;
-            mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
-            n_tokens += n_tokens_text;
-        } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
-            auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
-            n_tokens += mtmd_image_tokens_get_n_tokens(tokens_image);
-        } else {
-            GGML_ASSERT(false && "chunk type not supported");
-        }
+        n_tokens += mtmd_input_chunk_get_n_tokens(chunk);
     }
     return n_tokens;
 }
@@ -31,17 +21,7 @@ llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks) {
     llama_pos n_pos = 0;
     for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
         auto chunk = mtmd_input_chunks_get(chunks, i);
-        auto chunk_type = mtmd_input_chunk_get_type(chunk);
-        if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
-            size_t n_tokens_text;
-            mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
-            n_pos += n_tokens_text;
-        } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
-            auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
-            n_pos += mtmd_image_tokens_get_n_pos(tokens_image);
-        } else {
-            GGML_ASSERT(false && "chunk type not supported");
-        }
+        n_pos += mtmd_input_chunk_get_n_pos(chunk);
     }
     return n_pos;
 }
@@ -149,13 +129,10 @@ int32_t mtmd_helper_decode_image_chunk(
         llama_seq_id seq_id,
         int32_t n_batch,
         llama_pos * new_n_past) {
-    if (mtmd_input_chunk_get_type(chunk) != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
-        LOG_ERR("failed to decode image chunk: input chunk not of image type\n");
-        return -1;
-    }
-    const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
-    if (!image_tokens) {
-        LOG_ERR("failed to decode image chunk: image tokens are null\n");
+    auto chunk_type = mtmd_input_chunk_get_type(chunk);
+    const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
+    if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
+        LOG_ERR("failed to decode chunk: input chunk not of image/audio type\n");
         return -1;
     }
@@ -163,15 +140,23 @@ int32_t mtmd_helper_decode_image_chunk(
     int n_mmproj_embd = llama_model_n_embd(model);
     int n_pos_per_embd = mtmd_decode_use_mrope(ctx) ? 4 : 1;
-    int32_t n_tokens = mtmd_image_tokens_get_n_tokens(image_tokens);
+    int32_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
     int32_t i_batch = 0;
     int32_t n_img_batches = GGML_PAD(n_tokens, n_batch) / n_batch;
     decode_embd_batch batch_embd(encoded_embd, n_tokens, n_pos_per_embd, n_mmproj_embd);
-    const int nx = mtmd_image_tokens_get_nx(image_tokens);
-    const int ny = mtmd_image_tokens_get_ny(image_tokens);
     if (mtmd_decode_use_mrope(ctx)) {
+        const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
+        if (chunk_type != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
+            LOG_ERR("failed to decode chunk: M-RoPE only accepts image chunk\n");
+            return -1;
+        }
+        if (!image_tokens) {
+            LOG_ERR("failed to decode chunk: image tokens are null\n");
+            return -1;
+        }
+        const int nx = mtmd_image_tokens_get_nx(image_tokens);
+        const int ny = mtmd_image_tokens_get_ny(image_tokens);
         batch_embd.set_position_mrope(n_past, nx, ny, seq_id);
     } else {
         batch_embd.set_position_normal(n_past, seq_id);
@@ -187,22 +172,22 @@ int32_t mtmd_helper_decode_image_chunk(
         int n_tokens_batch = std::min(n_batch, n_tokens - pos_offset);
         llama_batch batch_embd_view = batch_embd.get_view(pos_offset, n_tokens_batch);
-        LOG_INF("decoding image batch %d/%d, n_tokens_batch = %d\n", i_batch+1, n_img_batches, n_tokens_batch);
+        LOG_INF("decoding %s batch %d/%d, n_tokens_batch = %d\n", name, i_batch+1, n_img_batches, n_tokens_batch);
         int64_t t1 = ggml_time_ms();
         int32_t ret = llama_decode(lctx, batch_embd_view);
         if (ret != 0) {
-            LOG_ERR("failed to decode image\n");
+            LOG_ERR("failed to decode %s\n", name);
             llama_set_causal_attn(lctx, true); // restore causal attn
             return ret;
         }
-        LOG_INF("image decoded (batch %d/%d) in %" PRId64 " ms\n", i_batch+1, n_img_batches, ggml_time_ms() - t1);
+        LOG_INF("%s decoded (batch %d/%d) in %" PRId64 " ms\n", name, i_batch+1, n_img_batches, ggml_time_ms() - t1);
         i_batch++;
     }
-    n_past += mtmd_image_tokens_get_n_pos(image_tokens);
+    n_past += mtmd_input_chunk_get_n_pos(chunk);
     *new_n_past = n_past;
     if (mtmd_decode_use_non_causal(ctx)) {
@@ -231,12 +216,14 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
         while (i < n_tokens) { // split into batches
             text_batch.n_tokens = 0; // clear the batch
             for (; i < n_tokens && text_batch.n_tokens < n_batch; i++) {
+                int32_t j = text_batch.n_tokens;
+                text_batch.token   [j]    = tokens[i];
+                text_batch.pos     [j]    = n_past++;
+                text_batch.n_seq_id[j]    = 1;
+                text_batch.seq_id  [j][0] = seq_id;
+                text_batch.logits  [j]    = false;
                 text_batch.n_tokens++;
-                text_batch.token   [i]    = tokens[i];
-                text_batch.pos     [i]    = n_past++;
-                text_batch.n_seq_id[i]    = 1;
-                text_batch.seq_id  [i][0] = seq_id;
-                text_batch.logits  [i]    = false;
             }
             bool is_last_token = (i == n_tokens);
             if (logits_last && is_last_token) {
@@ -251,25 +238,25 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
             *new_n_past += text_batch.n_tokens;
         }
-    } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
-        const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
+    } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
+        const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
         int64_t t0 = ggml_time_ms();
-        LOG_INF("encoding image or slice...\n");
+        LOG_INF("encoding %s slice...\n", name);
-        ret = mtmd_encode(ctx, image_tokens);
+        ret = mtmd_encode_chunk(ctx, chunk);
         if (ret != 0) {
-            LOG_ERR("failed to encode image\n");
+            LOG_ERR("failed to encode %s slice\n", name);
             llama_batch_free(text_batch);
             return ret;
         }
-        LOG_INF("image/slice encoded in %" PRId64 " ms\n", ggml_time_ms() - t0);
+        LOG_INF("%s slice encoded in %" PRId64 " ms\n", name, ggml_time_ms() - t0);
         float * embd = mtmd_get_output_embd(ctx);
         ret = mtmd_helper_decode_image_chunk(ctx, lctx, chunk, embd, n_past, seq_id, n_batch, new_n_past);
         if (ret != 0) {
-            LOG_ERR("failed to decode image\n");
+            LOG_ERR("failed to decode %s\n", name);
             llama_batch_free(text_batch);
             return ret;
         }