npm - @fugood/llama.node - Versions diffs - 1.4.13 → 1.4.15 - Mend

@fugood/llama.node 1.4.13 → 1.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/lib/binding.ts +23 -2
package/lib/index.js +2 -1
package/lib/index.ts +8 -1
package/lib/parallel.ts +2 -2
package/package.json +15 -15
package/scripts/llama.cpp.patch +9 -12
package/src/LlamaContext.cpp +16 -4
package/src/llama.cpp/CMakeLists.txt +24 -8
package/src/llama.cpp/common/CMakeLists.txt +3 -34
package/src/llama.cpp/common/arg.cpp +183 -60
package/src/llama.cpp/common/arg.h +0 -8
package/src/llama.cpp/common/chat-parser.cpp +115 -0
package/src/llama.cpp/common/chat.cpp +67 -0
package/src/llama.cpp/common/chat.h +1 -0
package/src/llama.cpp/common/common.cpp +2 -1
package/src/llama.cpp/common/common.h +12 -7
package/src/llama.cpp/common/debug.cpp +165 -0
package/src/llama.cpp/common/debug.h +43 -0
package/src/llama.cpp/common/download.cpp +88 -369
package/src/llama.cpp/common/download.h +32 -5
package/src/llama.cpp/common/preset.cpp +87 -2
package/src/llama.cpp/common/preset.h +10 -1
package/src/llama.cpp/ggml/include/ggml.h +5 -0
package/src/llama.cpp/include/llama.h +5 -2
package/src/llama.cpp/src/CMakeLists.txt +1 -0
package/src/llama.cpp/src/llama-arch.cpp +35 -0
package/src/llama.cpp/src/llama-arch.h +1 -0
package/src/llama.cpp/src/llama-chat.cpp +20 -0
package/src/llama.cpp/src/llama-chat.h +1 -0
package/src/llama.cpp/src/llama-graph.cpp +31 -43
package/src/llama.cpp/src/llama-mmap.cpp +78 -42
package/src/llama.cpp/src/llama-mmap.h +5 -4
package/src/llama.cpp/src/llama-model-loader.cpp +17 -5
package/src/llama.cpp/src/llama-model-loader.h +2 -0
package/src/llama.cpp/src/llama-model.cpp +225 -101
package/src/llama.cpp/src/llama-quant.cpp +1 -1
package/src/llama.cpp/src/llama-sampling.cpp +1 -1
package/src/llama.cpp/src/llama-vocab.cpp +37 -24
package/src/llama.cpp/src/llama-vocab.h +1 -0
package/src/llama.cpp/src/llama.cpp +63 -27
package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
package/src/llama.cpp/src/models/models.h +13 -2
package/src/llama.cpp/src/models/qwen3next.cpp +198 -182

package/src/llama.cpp/common/download.h CHANGED Viewed

@@ -1,12 +1,27 @@
 #pragma once
 #include <string>
+#include <vector>
 struct common_params_model;
-//
-// download functionalities
-//
+using common_header      = std::pair<std::string, std::string>;
+using common_header_list = std::vector<common_header>;
+struct common_remote_params {
+    common_header_list headers;
+    long timeout  = 0;           // in seconds, 0 means no timeout
+    long max_size = 0;           // unlimited if 0
+};
+// get remote file content, returns <http_code, raw_response_body>
+std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
+// split HF repo with tag into <repo, tag>
+// for example: "user/model:tag" -> <"user/model", "tag">
+// if tag is not present, default to "latest"
+// example: "user/model" -> <"user/model", "latest">
+std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag);
 struct common_cached_model_info {
     std::string manifest_path;
@@ -41,17 +56,29 @@ struct common_hf_file_res {
 common_hf_file_res common_get_hf_file(
     const std::string & hf_repo_with_tag,
     const std::string & bearer_token,
-    bool offline);
+    bool offline,
+    const common_header_list & headers = {}
+);
 // returns true if download succeeded
 bool common_download_model(
     const common_params_model & model,
     const std::string & bearer_token,
-    bool offline);
+    bool offline,
+    const common_header_list & headers = {}
+);
 // returns list of cached models
 std::vector<common_cached_model_info> common_list_cached_models();
+// download single file from url to local path
+// returns status code or -1 on error
+int common_download_file_single(const std::string & url,
+                                const std::string & path,
+                                const std::string & bearer_token,
+                                bool offline,
+                                const common_header_list & headers = {});
 // resolve and download model from Docker registry
 // return local path to downloaded model file
 std::string common_docker_resolve_model(const std::string & docker);

package/src/llama.cpp/common/preset.cpp CHANGED Viewed

@@ -16,6 +16,48 @@ static std::string rm_leading_dashes(const std::string & str) {
     return str.substr(pos);
 }
+// only allow a subset of args for remote presets for security reasons
+// do not add more args unless absolutely necessary
+// args that output to files are strictly prohibited
+static std::set<std::string> get_remote_preset_whitelist(const std::map<std::string, common_arg> & key_to_opt) {
+    static const std::set<std::string> allowed_options = {
+        "model-url",
+        "hf-repo",
+        "hf-repo-draft",
+        "hf-repo-v", // vocoder
+        "hf-file-v", // vocoder
+        "mmproj-url",
+        "pooling",
+        "jinja",
+        "batch-size",
+        "ubatch-size",
+        "cache-reuse",
+        "chat-template-kwargs",
+        "mmap",
+        // note: sampling params are automatically allowed by default
+        // negated args will be added automatically if the positive arg is specified above
+    };
+    std::set<std::string> allowed_keys;
+    for (const auto & it : key_to_opt) {
+        const std::string & key = it.first;
+        const common_arg & opt = it.second;
+        if (allowed_options.find(key) != allowed_options.end() || opt.is_sparam) {
+            allowed_keys.insert(key);
+            // also add variant keys (args without leading dashes and env vars)
+            for (const auto & arg : opt.get_args()) {
+                allowed_keys.insert(rm_leading_dashes(arg));
+            }
+            for (const auto & env : opt.get_env()) {
+                allowed_keys.insert(env);
+            }
+        }
+    }
+    return allowed_keys;
+}
 std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
     std::vector<std::string> args;
@@ -121,6 +163,29 @@ void common_preset::merge(const common_preset & other) {
     }
 }
+void common_preset::apply_to_params(common_params & params) const {
+    for (const auto & [opt, val] : options) {
+        // apply each option to params
+        if (opt.handler_string) {
+            opt.handler_string(params, val);
+        } else if (opt.handler_int) {
+            opt.handler_int(params, std::stoi(val));
+        } else if (opt.handler_bool) {
+            opt.handler_bool(params, common_arg_utils::is_truthy(val));
+        } else if (opt.handler_str_str) {
+            // not supported yet
+            throw std::runtime_error(string_format(
+                "%s: option with two values is not supported yet",
+                __func__
+            ));
+        } else if (opt.handler_void) {
+            opt.handler_void(params);
+        } else {
+            GGML_ABORT("unknown handler type");
+        }
+    }
+}
 static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
     std::map<std::string, std::map<std::string, std::string>> parsed;
@@ -230,10 +295,16 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke
     return value;
 }
-common_preset_context::common_preset_context(llama_example ex)
+common_preset_context::common_preset_context(llama_example ex, bool only_remote_allowed)
         : ctx_params(common_params_parser_init(default_params, ex)) {
     common_params_add_preset_options(ctx_params.options);
     key_to_opt = get_map_key_opt(ctx_params);
+    // setup allowed keys if only_remote_allowed is true
+    if (only_remote_allowed) {
+        filter_allowed_keys = true;
+        allowed_keys = get_remote_preset_whitelist(key_to_opt);
+    }
 }
 common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
@@ -249,7 +320,18 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co
         }
         LOG_DBG("loading preset: %s\n", preset.name.c_str());
         for (const auto & [key, value] : section.second) {
+            if (key == "version") {
+                // skip version key (reserved for future use)
+                continue;
+            }
             LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
+            if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) {
+                throw std::runtime_error(string_format(
+                    "option '%s' is not allowed in remote presets",
+                    key.c_str()
+                ));
+            }
             if (key_to_opt.find(key) != key_to_opt.end()) {
                 const auto & opt = key_to_opt.at(key);
                 if (is_bool_arg(opt)) {
@@ -259,7 +341,10 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co
                 }
                 LOG_DBG("accepted option: %s = %s\n", key.c_str(), preset.options[opt].c_str());
             } else {
-                // TODO: maybe warn about unknown key?
+                throw std::runtime_error(string_format(
+                    "option '%s' not recognized in preset '%s'",
+                    key.c_str(), preset.name.c_str()
+                ));
             }
         }

package/src/llama.cpp/common/preset.h CHANGED Viewed

@@ -6,6 +6,7 @@
 #include <string>
 #include <vector>
 #include <map>
+#include <set>
 //
 // INI preset parser and writer
@@ -40,6 +41,9 @@ struct common_preset {
     // merge another preset into this one, overwriting existing options
     void merge(const common_preset & other);
+    // apply preset options to common_params
+    void apply_to_params(common_params & params) const;
 };
 // interface for multiple presets in one file
@@ -50,7 +54,12 @@ struct common_preset_context {
     common_params default_params; // unused for now
     common_params_context ctx_params;
     std::map<std::string, common_arg> key_to_opt;
-    common_preset_context(llama_example ex);
+    bool filter_allowed_keys = false;
+    std::set<std::string> allowed_keys;
+    // if only_remote_allowed is true, only accept whitelisted keys
+    common_preset_context(llama_example ex, bool only_remote_allowed = false);
     // load presets from INI file
     common_presets load_from_ini(const std::string & path, common_preset & global) const;

package/src/llama.cpp/ggml/include/ggml.h CHANGED Viewed

@@ -234,6 +234,11 @@
 #if UINTPTR_MAX == 0xFFFFFFFF
     #define GGML_MEM_ALIGN 4
+#elif defined(__EMSCRIPTEN__)
+// emscripten uses max_align_t == 8, so we need GGML_MEM_ALIGN == 8 for 64-bit wasm.
+// (for 32-bit wasm, the first conditional is true and GGML_MEM_ALIGN stays 4.)
+// ref: https://github.com/ggml-org/llama.cpp/pull/18628
+    #define GGML_MEM_ALIGN 8
 #else
     #define GGML_MEM_ALIGN 16
 #endif

package/src/llama.cpp/include/llama.h CHANGED Viewed

@@ -309,6 +309,7 @@ extern "C" {
         // Keep the booleans together to avoid misalignment during copy-by-value.
         bool vocab_only;      // only load the vocabulary, no weights
         bool use_mmap;        // use mmap if possible
+        bool use_direct_io;   // use direct io, takes precedence over use_mmap
         bool use_mlock;       // force system to keep model in RAM
         bool check_tensors;   // validate model tensor data
         bool use_extra_bufts; // use extra buffer types (used for weight repacking)
@@ -494,7 +495,7 @@ extern "C" {
                     struct llama_context_params * cparams,
                                           float * tensor_split,          // writable buffer for tensor split, needs at least llama_max_devices elements
         struct llama_model_tensor_buft_override * tensor_buft_overrides, // writable buffer for overrides, needs at least llama_max_tensor_buft_overrides elements
-                                         size_t   margin,                // margin of memory to leave per device in bytes
+                                         size_t * margins,               // margins of memory to leave per device in bytes
                                        uint32_t   n_ctx_min,             // minimum context size to set when trying to reduce memory use
                             enum ggml_log_level   log_level);            // minimum log level to print during fitting, lower levels go to debug log
@@ -1291,7 +1292,9 @@ extern "C" {
     // available samplers:
     LLAMA_API struct llama_sampler * llama_sampler_init_greedy(void);
-    LLAMA_API struct llama_sampler * llama_sampler_init_dist  (uint32_t seed);
+    /// seed == LLAMA_DEFAULT_SEED to use a random seed.
+    LLAMA_API struct llama_sampler * llama_sampler_init_dist(uint32_t seed);
     /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
     /// Setting k <= 0 makes this a noop

package/src/llama.cpp/src/CMakeLists.txt CHANGED Viewed

@@ -62,6 +62,7 @@ add_library(llama
             models/ernie4-5.cpp
             models/exaone.cpp
             models/exaone4.cpp
+            models/exaone-moe.cpp
             models/falcon-h1.cpp
             models/falcon.cpp
             models/gemma-embedding.cpp

package/src/llama.cpp/src/llama-arch.cpp CHANGED Viewed

@@ -81,6 +81,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
     { LLM_ARCH_NEMOTRON_H_MOE,   "nemotron_h_moe"   },
     { LLM_ARCH_EXAONE,           "exaone"           },
     { LLM_ARCH_EXAONE4,          "exaone4"          },
+    { LLM_ARCH_EXAONE_MOE,       "exaone-moe"       },
     { LLM_ARCH_RWKV6,            "rwkv6"            },
     { LLM_ARCH_RWKV6QWEN2,       "rwkv6qwen2"       },
     { LLM_ARCH_RWKV7,            "rwkv7"            },
@@ -950,6 +951,8 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
                 LLM_TENSOR_ATTN_K_NORM,
                 LLM_TENSOR_ATTN_V,
                 LLM_TENSOR_ATTN_OUT,
+                LLM_TENSOR_ATTN_QKV,
+                LLM_TENSOR_ATTN_GATE,
                 LLM_TENSOR_FFN_NORM,
                 LLM_TENSOR_FFN_GATE_INP,
                 LLM_TENSOR_FFN_GATE_EXPS,
@@ -1726,6 +1729,38 @@ static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
                 LLM_TENSOR_FFN_UP,
                 LLM_TENSOR_FFN_POST_NORM,
             };
+        case LLM_ARCH_EXAONE_MOE:
+            return {
+                LLM_TENSOR_TOKEN_EMBD,
+                LLM_TENSOR_OUTPUT_NORM,
+                LLM_TENSOR_OUTPUT,
+                LLM_TENSOR_ROPE_FREQS,
+                LLM_TENSOR_ATTN_NORM,
+                LLM_TENSOR_ATTN_Q,
+                LLM_TENSOR_ATTN_Q_NORM,
+                LLM_TENSOR_ATTN_K,
+                LLM_TENSOR_ATTN_K_NORM,
+                LLM_TENSOR_ATTN_V,
+                LLM_TENSOR_ATTN_OUT,
+                LLM_TENSOR_FFN_NORM,
+                LLM_TENSOR_FFN_GATE,
+                LLM_TENSOR_FFN_DOWN,
+                LLM_TENSOR_FFN_UP,
+                LLM_TENSOR_FFN_GATE_INP,
+                LLM_TENSOR_FFN_GATE_EXPS,
+                LLM_TENSOR_FFN_DOWN_EXPS,
+                LLM_TENSOR_FFN_UP_EXPS,
+                LLM_TENSOR_FFN_GATE_SHEXP,
+                LLM_TENSOR_FFN_UP_SHEXP,
+                LLM_TENSOR_FFN_DOWN_SHEXP,
+                LLM_TENSOR_FFN_EXP_PROBS_B,
+                LLM_TENSOR_NEXTN_EH_PROJ,
+                LLM_TENSOR_NEXTN_EMBED_TOKENS,
+                LLM_TENSOR_NEXTN_ENORM,
+                LLM_TENSOR_NEXTN_HNORM,
+                LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
+                LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
+            };
         case LLM_ARCH_RWKV6:
             return {
                 LLM_TENSOR_TOKEN_EMBD,

package/src/llama.cpp/src/llama-arch.h CHANGED Viewed

@@ -85,6 +85,7 @@ enum llm_arch {
     LLM_ARCH_NEMOTRON_H_MOE,
     LLM_ARCH_EXAONE,
     LLM_ARCH_EXAONE4,
+    LLM_ARCH_EXAONE_MOE,
     LLM_ARCH_RWKV6,
     LLM_ARCH_RWKV6QWEN2,
     LLM_ARCH_RWKV7,

package/src/llama.cpp/src/llama-chat.cpp CHANGED Viewed

@@ -57,6 +57,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
     { "minicpm",           LLM_CHAT_TEMPLATE_MINICPM           },
     { "exaone3",           LLM_CHAT_TEMPLATE_EXAONE_3          },
     { "exaone4",           LLM_CHAT_TEMPLATE_EXAONE_4          },
+    { "exaone-moe",        LLM_CHAT_TEMPLATE_EXAONE_MOE        },
     { "rwkv-world",        LLM_CHAT_TEMPLATE_RWKV_WORLD        },
     { "granite",           LLM_CHAT_TEMPLATE_GRANITE           },
     { "gigachat",          LLM_CHAT_TEMPLATE_GIGACHAT          },
@@ -137,6 +138,9 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
     } else if (tmpl_contains("[gMASK]<sop>")) {
         return LLM_CHAT_TEMPLATE_CHATGLM_4;
     } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
+        if (tmpl_contains("<|tool_declare|>")) {
+            return LLM_CHAT_TEMPLATE_EXAONE_MOE;
+        }
         return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
     } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
         return LLM_CHAT_TEMPLATE_GLMEDGE;
@@ -576,6 +580,22 @@ int32_t llm_chat_apply_template(
         if (add_ass) {
             ss << "[|assistant|]";
         }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_MOE) {
+        for (auto message : chat) {
+            std::string role(message->role);
+            if (role == "system") {
+                ss << "<|system|>\n" << trim(message->content) << "<|endofturn|>\n";
+            } else if (role == "user") {
+                ss << "<|user|>\n" << trim(message->content) << "<|endofturn|>\n";
+            } else if (role == "assistant") {
+                ss << "<|assistant|>\n" << trim(message->content) << "<|endofturn|>\n";
+            } else if (role == "tool") {
+                ss << "<|tool|>\n" << trim(message->content) << "<|endofturn|>\n";
+            }
+        }
+        if (add_ass) {
+            ss << "<|assistant|>\n";
+        }
     } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
         // this template requires the model to have "\n\n" as EOT token
         for (size_t i = 0; i < chat.size(); i++) {

package/src/llama.cpp/src/llama-chat.h CHANGED Viewed

@@ -36,6 +36,7 @@ enum llm_chat_template {
     LLM_CHAT_TEMPLATE_MINICPM,
     LLM_CHAT_TEMPLATE_EXAONE_3,
     LLM_CHAT_TEMPLATE_EXAONE_4,
+    LLM_CHAT_TEMPLATE_EXAONE_MOE,
     LLM_CHAT_TEMPLATE_RWKV_WORLD,
     LLM_CHAT_TEMPLATE_GRANITE,
     LLM_CHAT_TEMPLATE_GIGACHAT,

package/src/llama.cpp/src/llama-graph.cpp CHANGED Viewed

@@ -96,11 +96,9 @@ void llm_graph_input_pos_bucket::set_input(const llama_ubatch * ubatch) {
         int32_t * data = (int32_t *) pos_bucket->data;
-        for (int h = 0; h < 1; ++h) {
-            for (int j = 0; j < n_tokens; ++j) {
-                for (int i = 0; i < n_tokens; ++i) {
-                    data[h*(n_tokens*n_tokens) + j*n_tokens + i] = llama_relative_position_bucket(ubatch->pos[i], ubatch->pos[j], hparams.n_rel_attn_bkts, true);
-                }
+        for (int j = 0; j < n_tokens; ++j) {
+            for (int i = 0; i < n_tokens; ++i) {
+                data[j*n_tokens + i] = llama_relative_position_bucket(ubatch->pos[i], ubatch->pos[j], hparams.n_rel_attn_bkts, true);
             }
         }
     }
@@ -323,34 +321,32 @@ void llm_graph_input_attn_no_cache::set_input(const llama_ubatch * ubatch) {
     const int64_t n_tokens = ubatch->n_tokens;
     const auto fill_mask = [&](float * data, int n_swa, llama_swa_type swa_type) {
-        for (int h = 0; h < 1; ++h) {
-            for (int i1 = 0; i1 < n_tokens; ++i1) {
-                const llama_seq_id s1 = ubatch->seq_id[i1][0];
-                const llama_pos    p1 = ubatch->pos[i1];
+        for (int i1 = 0; i1 < n_tokens; ++i1) {
+            const llama_seq_id s1 = ubatch->seq_id[i1][0];
+            const llama_pos    p1 = ubatch->pos[i1];
-                const uint64_t idst = h*(n_kv*n_tokens) + i1*n_kv;
+            const uint64_t idst = i1*n_kv;
-                for (int i0 = 0; i0 < n_tokens; ++i0) {
-                    const llama_seq_id s0 = ubatch->seq_id[i0][0];
-                    const llama_pos p0    = ubatch->pos[i0];
-                    // mask different sequences
-                    if (s0 != s1) {
-                        continue;
-                    }
+            for (int i0 = 0; i0 < n_tokens; ++i0) {
+                const llama_seq_id s0 = ubatch->seq_id[i0][0];
+                const llama_pos p0    = ubatch->pos[i0];
-                    // mask future tokens
-                    if (cparams.causal_attn && p0 > p1) {
-                        continue;
-                    }
+                // mask different sequences
+                if (s0 != s1) {
+                    continue;
+                }
-                    // apply SWA if any
-                    if (llama_hparams::is_masked_swa(n_swa, swa_type, p0, p1)) {
-                        continue;
-                    }
+                // mask future tokens
+                if (cparams.causal_attn && p0 > p1) {
+                    continue;
+                }
-                    data[idst + i0] = hparams.use_alibi ? -std::abs(p0 - p1) : 0.0f;
+                // apply SWA if any
+                if (llama_hparams::is_masked_swa(n_swa, swa_type, p0, p1)) {
+                    continue;
                 }
+                data[idst + i0] = hparams.use_alibi ? -std::abs(p0 - p1) : 0.0f;
             }
         }
     };
@@ -454,27 +450,19 @@ void llm_graph_input_attn_cross::set_input(const llama_ubatch * ubatch) {
     float * data = (float *) cross_kq_mask->data;
-    for (int h = 0; h < 1; ++h) {
-        for (int i = 0; i < n_tokens; ++i) {
-            for (int j = 0; j < n_enc; ++j) {
-                float f = -INFINITY;
+    for (int i = 0; i < n_tokens; ++i) {
+        for (int j = 0; j < n_enc; ++j) {
+            float f = -INFINITY;
-                for (int s = 0; s < ubatch->n_seq_id[i]; ++s) {
-                    const llama_seq_id seq_id = ubatch->seq_id[i][s];
+            for (int s = 0; s < ubatch->n_seq_id[i]; ++s) {
+                const llama_seq_id seq_id = ubatch->seq_id[i][s];
-                    if (cross->seq_ids_enc[j].find(seq_id) != cross->seq_ids_enc[j].end()) {
-                        f = 0.0f;
-                    }
+                if (cross->seq_ids_enc[j].find(seq_id) != cross->seq_ids_enc[j].end()) {
+                    f = 0.0f;
                 }
-                data[h*(n_enc*n_tokens) + i*n_enc + j] = f;
             }
-        }
-        for (int i = n_tokens; i < n_tokens; ++i) {
-            for (int j = 0; j < n_enc; ++j) {
-                data[h*(n_enc*n_tokens) + i*n_enc + j] = -INFINITY;
-            }
+            data[i*n_enc + j] = f;
         }
     }
 }