npm - @novastera-oss/llamarn - Versions diffs - 0.2.9 → 0.3.0 - Mend

@novastera-oss/llamarn 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (247) hide show

package/cpp/llama.cpp/src/llama-model.h CHANGED Viewed

@@ -32,16 +32,21 @@ enum llm_type {
     LLM_TYPE_190M,
     LLM_TYPE_220M,
     LLM_TYPE_250M,
+    LLM_TYPE_256M,
     LLM_TYPE_270M,
     LLM_TYPE_335M,
+    LLM_TYPE_350M,
     LLM_TYPE_410M,
     LLM_TYPE_450M,
     LLM_TYPE_475M,
+    LLM_TYPE_700M,
     LLM_TYPE_770M,
     LLM_TYPE_780M,
+    LLM_TYPE_0_3B,
     LLM_TYPE_0_5B,
     LLM_TYPE_0_6B,
     LLM_TYPE_1B,
+    LLM_TYPE_1_2B,
     LLM_TYPE_1_3B,
     LLM_TYPE_1_4B,
     LLM_TYPE_1_5B,
@@ -93,6 +98,7 @@ enum llm_type {
     LLM_TYPE_57B_A14B,
     LLM_TYPE_17B_16E, // llama4 Scout
     LLM_TYPE_17B_128E, // llama4 Maverick
+    LLM_TYPE_A13B,
     LLM_TYPE_30B_A3B,
     LLM_TYPE_235B_A22B,
     LLM_TYPE_E2B,
@@ -152,6 +158,12 @@ struct llama_layer_convnext {
     struct ggml_tensor * gamma = nullptr;
 };
+struct llama_layer_shortconv {
+    struct ggml_tensor * in_proj  = nullptr;
+    struct ggml_tensor * conv     = nullptr;
+    struct ggml_tensor * out_proj = nullptr;
+};
 struct llama_layer {
     // normalization
     struct ggml_tensor * attn_norm       = nullptr;
@@ -171,6 +183,10 @@ struct llama_layer {
     struct ggml_tensor * ffn_sub_norm    = nullptr;
     struct ggml_tensor * attn_norm_cross = nullptr;
     struct ggml_tensor * attn_norm_enc   = nullptr;
+    struct ggml_tensor * ssm_norm        = nullptr;
+    struct ggml_tensor * ssm_dt_norm     = nullptr;
+    struct ggml_tensor * ssm_b_norm      = nullptr;
+    struct ggml_tensor * ssm_c_norm      = nullptr;
     // attention
     struct ggml_tensor * wq        = nullptr;
@@ -334,6 +350,8 @@ struct llama_layer {
     struct llama_layer_posnet posnet;
     struct llama_layer_convnext convnext;
+    struct llama_layer_shortconv shortconv;
 };
 struct llama_model {

package/cpp/llama.cpp/src/llama-quant.cpp CHANGED Viewed

@@ -844,6 +844,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
         // do not quantize Mamba's small yet 2D weights
         // NOTE: can't use LLM_TN here because the layer number is not known
         quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
+        quantize &= name.find("shortconv.conv.weight") == std::string::npos;
         // do not quantize RWKV's small yet 2D weights
         quantize &= name.find("time_mix_first.weight") == std::string::npos;

package/cpp/llama.cpp/src/llama-vocab.cpp CHANGED Viewed

@@ -351,6 +351,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
                 break;
             case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
             case LLAMA_VOCAB_PRE_TYPE_QWEN2:
+            case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
                 regex_exprs = {
                     // original regex from tokenizer.json
                     // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
@@ -1522,7 +1523,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                     tokenizer_pre == "llama-v3" ||
                     tokenizer_pre == "llama-bpe"||
                     tokenizer_pre == "falcon3"  ||
-                    tokenizer_pre == "pixtral") {
+                    tokenizer_pre == "falcon-h1" ||
+                    tokenizer_pre == "pixtral"  ||
+                    tokenizer_pre == "midm-2.0" ||
+                    tokenizer_pre == "lfm2") {
                 pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
                 ignore_merges = true;
                 add_bos = true;
@@ -1554,7 +1558,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                     tokenizer_pre == "jina-de" ||
                     tokenizer_pre == "gigachat"   ||
                     tokenizer_pre == "jina-v2-es" ||
-                    tokenizer_pre == "jina-v2-de") {
+                    tokenizer_pre == "jina-v2-de" ||
+                    tokenizer_pre == "a.x-4.0") {
                 pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
             } else if (
                     tokenizer_pre == "jina-v1-en" ||
@@ -1656,6 +1661,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                 tokenizer_pre == "seed-coder") {
                 pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER;
                 clean_spaces = false;
+            } else if (
+                tokenizer_pre == "hunyuan") {
+                pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN;
+                clean_spaces = false;
             } else {
                 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
             }
@@ -1839,6 +1848,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                         || t.first == "<EOT>"
                         || t.first == "_<EOT>"
                         || t.first == "<｜end▁of▁sentence｜>" // DeepSeek
+                        || t.first == "<end_of_utterance>" // smoldocling
                    ) {
                     special_eot_id = t.second;
                     if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@@ -1998,6 +2008,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                     || t.first == "<EOT>"
                     || t.first == "_<EOT>"
                     || t.first == "<|end_of_text|>"
+                    || t.first == "<end_of_utterance>" // smoldocling
                ) {
                 special_eog_ids.insert(t.second);
                 if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {

package/cpp/llama.cpp/src/llama-vocab.h CHANGED Viewed

@@ -6,6 +6,47 @@
 #include <vector>
 #include <memory>
+// pre-tokenization types
+enum llama_vocab_pre_type {
+    LLAMA_VOCAB_PRE_TYPE_DEFAULT        = 0,
+    LLAMA_VOCAB_PRE_TYPE_LLAMA3         = 1,
+    LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM   = 2,
+    LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
+    LLAMA_VOCAB_PRE_TYPE_FALCON         = 4,
+    LLAMA_VOCAB_PRE_TYPE_MPT            = 5,
+    LLAMA_VOCAB_PRE_TYPE_STARCODER      = 6,
+    LLAMA_VOCAB_PRE_TYPE_GPT2           = 7,
+    LLAMA_VOCAB_PRE_TYPE_REFACT         = 8,
+    LLAMA_VOCAB_PRE_TYPE_COMMAND_R      = 9,
+    LLAMA_VOCAB_PRE_TYPE_STABLELM2      = 10,
+    LLAMA_VOCAB_PRE_TYPE_QWEN2          = 11,
+    LLAMA_VOCAB_PRE_TYPE_OLMO           = 12,
+    LLAMA_VOCAB_PRE_TYPE_DBRX           = 13,
+    LLAMA_VOCAB_PRE_TYPE_SMAUG          = 14,
+    LLAMA_VOCAB_PRE_TYPE_PORO           = 15,
+    LLAMA_VOCAB_PRE_TYPE_CHATGLM3       = 16,
+    LLAMA_VOCAB_PRE_TYPE_CHATGLM4       = 17,
+    LLAMA_VOCAB_PRE_TYPE_VIKING         = 18,
+    LLAMA_VOCAB_PRE_TYPE_JAIS           = 19,
+    LLAMA_VOCAB_PRE_TYPE_TEKKEN         = 20,
+    LLAMA_VOCAB_PRE_TYPE_SMOLLM         = 21,
+    LLAMA_VOCAB_PRE_TYPE_CODESHELL      = 22,
+    LLAMA_VOCAB_PRE_TYPE_BLOOM          = 23,
+    LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH   = 24,
+    LLAMA_VOCAB_PRE_TYPE_EXAONE         = 25,
+    LLAMA_VOCAB_PRE_TYPE_CHAMELEON      = 26,
+    LLAMA_VOCAB_PRE_TYPE_MINERVA        = 27,
+    LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM  = 28,
+    LLAMA_VOCAB_PRE_TYPE_GPT4O          = 29,
+    LLAMA_VOCAB_PRE_TYPE_SUPERBPE       = 30,
+    LLAMA_VOCAB_PRE_TYPE_TRILLION       = 31,
+    LLAMA_VOCAB_PRE_TYPE_BAILINGMOE     = 32,
+    LLAMA_VOCAB_PRE_TYPE_LLAMA4         = 33,
+    LLAMA_VOCAB_PRE_TYPE_PIXTRAL        = 34,
+    LLAMA_VOCAB_PRE_TYPE_SEED_CODER     = 35,
+    LLAMA_VOCAB_PRE_TYPE_HUNYUAN        = 36,
+};
 struct LLM_KV;
 struct llama_model_loader;

package/ios/include/chat.h CHANGED Viewed

@@ -7,6 +7,7 @@
 #include <chrono>
 #include <string>
 #include <vector>
+#include <map>
 struct common_chat_templates;
@@ -125,6 +126,7 @@ struct common_chat_templates_inputs {
     common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
     bool enable_thinking = true;
     std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
+    std::map<std::string, std::string> chat_template_kwargs;
 };
 struct common_chat_params {

package/ios/include/common.h CHANGED Viewed

@@ -8,6 +8,7 @@
 #include <string>
 #include <string_view>
 #include <vector>
+#include <map>
 #include <sstream>
 #ifdef _WIN32
@@ -369,6 +370,7 @@ struct common_params {
     std::string hostname      = "127.0.0.1";
     std::string public_path   = "";                                                                         // NOLINT
+    std::string api_prefix    = "";                                                                         // NOLINT
     std::string chat_template = "";                                                                         // NOLINT
     bool use_jinja = false;                                                                                 // NOLINT
     bool enable_chat_template = true;
@@ -381,6 +383,8 @@ struct common_params {
     std::string ssl_file_key  = "";                                                                         // NOLINT
     std::string ssl_file_cert = "";                                                                         // NOLINT
+    std::map<std::string, std::string> default_template_kwargs;
     // "advanced" endpoints are disabled by default for better security
     bool webui            = true;
     bool endpoint_slots   = false;

package/ios/include/llama.h CHANGED Viewed

@@ -79,46 +79,6 @@ extern "C" {
         LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization
     };
-    // pre-tokenization types
-    enum llama_vocab_pre_type {
-        LLAMA_VOCAB_PRE_TYPE_DEFAULT        = 0,
-        LLAMA_VOCAB_PRE_TYPE_LLAMA3         = 1,
-        LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM   = 2,
-        LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
-        LLAMA_VOCAB_PRE_TYPE_FALCON         = 4,
-        LLAMA_VOCAB_PRE_TYPE_MPT            = 5,
-        LLAMA_VOCAB_PRE_TYPE_STARCODER      = 6,
-        LLAMA_VOCAB_PRE_TYPE_GPT2           = 7,
-        LLAMA_VOCAB_PRE_TYPE_REFACT         = 8,
-        LLAMA_VOCAB_PRE_TYPE_COMMAND_R      = 9,
-        LLAMA_VOCAB_PRE_TYPE_STABLELM2      = 10,
-        LLAMA_VOCAB_PRE_TYPE_QWEN2          = 11,
-        LLAMA_VOCAB_PRE_TYPE_OLMO           = 12,
-        LLAMA_VOCAB_PRE_TYPE_DBRX           = 13,
-        LLAMA_VOCAB_PRE_TYPE_SMAUG          = 14,
-        LLAMA_VOCAB_PRE_TYPE_PORO           = 15,
-        LLAMA_VOCAB_PRE_TYPE_CHATGLM3       = 16,
-        LLAMA_VOCAB_PRE_TYPE_CHATGLM4       = 17,
-        LLAMA_VOCAB_PRE_TYPE_VIKING         = 18,
-        LLAMA_VOCAB_PRE_TYPE_JAIS           = 19,
-        LLAMA_VOCAB_PRE_TYPE_TEKKEN         = 20,
-        LLAMA_VOCAB_PRE_TYPE_SMOLLM         = 21,
-        LLAMA_VOCAB_PRE_TYPE_CODESHELL      = 22,
-        LLAMA_VOCAB_PRE_TYPE_BLOOM          = 23,
-        LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH   = 24,
-        LLAMA_VOCAB_PRE_TYPE_EXAONE         = 25,
-        LLAMA_VOCAB_PRE_TYPE_CHAMELEON      = 26,
-        LLAMA_VOCAB_PRE_TYPE_MINERVA        = 27,
-        LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM  = 28,
-        LLAMA_VOCAB_PRE_TYPE_GPT4O          = 29,
-        LLAMA_VOCAB_PRE_TYPE_SUPERBPE       = 30,
-        LLAMA_VOCAB_PRE_TYPE_TRILLION       = 31,
-        LLAMA_VOCAB_PRE_TYPE_BAILINGMOE     = 32,
-        LLAMA_VOCAB_PRE_TYPE_LLAMA4         = 33,
-        LLAMA_VOCAB_PRE_TYPE_PIXTRAL        = 34,
-        LLAMA_VOCAB_PRE_TYPE_SEED_CODER     = 35,
-    };
     enum llama_rope_type {
         LLAMA_ROPE_TYPE_NONE   = -1,
         LLAMA_ROPE_TYPE_NORM   = 0,

package/ios/libs/llama.xcframework/Info.plist CHANGED Viewed

@@ -10,7 +10,7 @@
 			<key>DebugSymbolsPath</key>
 			<string>dSYMs</string>
 			<key>LibraryIdentifier</key>
-			<string>ios-arm64_x86_64-simulator</string>
+			<string>xros-arm64_x86_64-simulator</string>
 			<key>LibraryPath</key>
 			<string>llama.framework</string>
 			<key>SupportedArchitectures</key>
@@ -19,7 +19,7 @@
 				<string>x86_64</string>
 			</array>
 			<key>SupportedPlatform</key>
-			<string>ios</string>
+			<string>xros</string>
 			<key>SupportedPlatformVariant</key>
 			<string>simulator</string>
 		</dict>
@@ -29,18 +29,15 @@
 			<key>DebugSymbolsPath</key>
 			<string>dSYMs</string>
 			<key>LibraryIdentifier</key>
-			<string>xros-arm64_x86_64-simulator</string>
+			<string>ios-arm64</string>
 			<key>LibraryPath</key>
 			<string>llama.framework</string>
 			<key>SupportedArchitectures</key>
 			<array>
 				<string>arm64</string>
-				<string>x86_64</string>
 			</array>
 			<key>SupportedPlatform</key>
-			<string>xros</string>
-			<key>SupportedPlatformVariant</key>
-			<string>simulator</string>
+			<string>ios</string>
 		</dict>
 		<dict>
 			<key>BinaryPath</key>
@@ -48,7 +45,7 @@
 			<key>DebugSymbolsPath</key>
 			<string>dSYMs</string>
 			<key>LibraryIdentifier</key>
-			<string>ios-arm64</string>
+			<string>tvos-arm64</string>
 			<key>LibraryPath</key>
 			<string>llama.framework</string>
 			<key>SupportedArchitectures</key>
@@ -56,7 +53,7 @@
 				<string>arm64</string>
 			</array>
 			<key>SupportedPlatform</key>
-			<string>ios</string>
+			<string>tvos</string>
 		</dict>
 		<dict>
 			<key>BinaryPath</key>
@@ -64,7 +61,7 @@
 			<key>DebugSymbolsPath</key>
 			<string>dSYMs</string>
 			<key>LibraryIdentifier</key>
-			<string>tvos-arm64_x86_64-simulator</string>
+			<string>ios-arm64_x86_64-simulator</string>
 			<key>LibraryPath</key>
 			<string>llama.framework</string>
 			<key>SupportedArchitectures</key>
@@ -73,42 +70,42 @@
 				<string>x86_64</string>
 			</array>
 			<key>SupportedPlatform</key>
-			<string>tvos</string>
+			<string>ios</string>
 			<key>SupportedPlatformVariant</key>
 			<string>simulator</string>
 		</dict>
 		<dict>
 			<key>BinaryPath</key>
-			<string>llama.framework/llama</string>
+			<string>llama.framework/Versions/A/llama</string>
 			<key>DebugSymbolsPath</key>
 			<string>dSYMs</string>
 			<key>LibraryIdentifier</key>
-			<string>xros-arm64</string>
+			<string>macos-arm64_x86_64</string>
 			<key>LibraryPath</key>
 			<string>llama.framework</string>
 			<key>SupportedArchitectures</key>
 			<array>
 				<string>arm64</string>
+				<string>x86_64</string>
 			</array>
 			<key>SupportedPlatform</key>
-			<string>xros</string>
+			<string>macos</string>
 		</dict>
 		<dict>
 			<key>BinaryPath</key>
-			<string>llama.framework/Versions/A/llama</string>
+			<string>llama.framework/llama</string>
 			<key>DebugSymbolsPath</key>
 			<string>dSYMs</string>
 			<key>LibraryIdentifier</key>
-			<string>macos-arm64_x86_64</string>
+			<string>xros-arm64</string>
 			<key>LibraryPath</key>
 			<string>llama.framework</string>
 			<key>SupportedArchitectures</key>
 			<array>
 				<string>arm64</string>
-				<string>x86_64</string>
 			</array>
 			<key>SupportedPlatform</key>
-			<string>macos</string>
+			<string>xros</string>
 		</dict>
 		<dict>
 			<key>BinaryPath</key>
@@ -116,15 +113,18 @@
 			<key>DebugSymbolsPath</key>
 			<string>dSYMs</string>
 			<key>LibraryIdentifier</key>
-			<string>tvos-arm64</string>
+			<string>tvos-arm64_x86_64-simulator</string>
 			<key>LibraryPath</key>
 			<string>llama.framework</string>
 			<key>SupportedArchitectures</key>
 			<array>
 				<string>arm64</string>
+				<string>x86_64</string>
 			</array>
 			<key>SupportedPlatform</key>
 			<string>tvos</string>
+			<key>SupportedPlatformVariant</key>
+			<string>simulator</string>
 		</dict>
 	</array>
 	<key>CFBundlePackageType</key>

package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama CHANGED Viewed

Binary file