@fugood/llama.node 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +12 -12
- package/src/llama.cpp/CMakeLists.txt +0 -1
- package/src/llama.cpp/common/arg.cpp +17 -0
- package/src/llama.cpp/common/chat.cpp +37 -20
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.h +4 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +7 -2
- package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +181 -10
- package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +38 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1297 -211
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +33 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +103 -9
- package/src/llama.cpp/include/llama.h +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +108 -2
- package/src/llama.cpp/src/llama-arch.h +7 -0
- package/src/llama.cpp/src/llama-batch.cpp +27 -1
- package/src/llama.cpp/src/llama-batch.h +8 -1
- package/src/llama.cpp/src/llama-chat.cpp +15 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +95 -81
- package/src/llama.cpp/src/llama-graph.h +43 -16
- package/src/llama.cpp/src/llama-hparams.cpp +2 -1
- package/src/llama.cpp/src/llama-hparams.h +1 -0
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +28 -18
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +4 -2
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +214 -65
- package/src/llama.cpp/src/llama-kv-cache-unified.h +62 -24
- package/src/llama.cpp/src/llama-kv-cells.h +62 -10
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +9 -4
- package/src/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +34 -16
- package/src/llama.cpp/src/llama-memory.cpp +17 -0
- package/src/llama.cpp/src/llama-memory.h +3 -0
- package/src/llama.cpp/src/llama-model.cpp +1374 -210
- package/src/llama.cpp/src/llama-model.h +3 -0
- package/src/llama.cpp/src/llama-vocab.cpp +8 -1
- package/src/llama.cpp/ggml/include/ggml-kompute.h +0 -50
|
@@ -39,6 +39,7 @@ enum llm_type {
|
|
|
39
39
|
LLM_TYPE_475M,
|
|
40
40
|
LLM_TYPE_770M,
|
|
41
41
|
LLM_TYPE_780M,
|
|
42
|
+
LLM_TYPE_0_3B,
|
|
42
43
|
LLM_TYPE_0_5B,
|
|
43
44
|
LLM_TYPE_0_6B,
|
|
44
45
|
LLM_TYPE_1B,
|
|
@@ -93,6 +94,7 @@ enum llm_type {
|
|
|
93
94
|
LLM_TYPE_57B_A14B,
|
|
94
95
|
LLM_TYPE_17B_16E, // llama4 Scout
|
|
95
96
|
LLM_TYPE_17B_128E, // llama4 Maverick
|
|
97
|
+
LLM_TYPE_A13B,
|
|
96
98
|
LLM_TYPE_30B_A3B,
|
|
97
99
|
LLM_TYPE_235B_A22B,
|
|
98
100
|
LLM_TYPE_E2B,
|
|
@@ -171,6 +173,7 @@ struct llama_layer {
|
|
|
171
173
|
struct ggml_tensor * ffn_sub_norm = nullptr;
|
|
172
174
|
struct ggml_tensor * attn_norm_cross = nullptr;
|
|
173
175
|
struct ggml_tensor * attn_norm_enc = nullptr;
|
|
176
|
+
struct ggml_tensor * ssm_norm = nullptr;
|
|
174
177
|
|
|
175
178
|
// attention
|
|
176
179
|
struct ggml_tensor * wq = nullptr;
|
|
@@ -351,6 +351,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
|
351
351
|
break;
|
|
352
352
|
case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
|
|
353
353
|
case LLAMA_VOCAB_PRE_TYPE_QWEN2:
|
|
354
|
+
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
|
|
354
355
|
regex_exprs = {
|
|
355
356
|
// original regex from tokenizer.json
|
|
356
357
|
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
|
@@ -1522,6 +1523,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1522
1523
|
tokenizer_pre == "llama-v3" ||
|
|
1523
1524
|
tokenizer_pre == "llama-bpe"||
|
|
1524
1525
|
tokenizer_pre == "falcon3" ||
|
|
1526
|
+
tokenizer_pre == "falcon-h1" ||
|
|
1525
1527
|
tokenizer_pre == "pixtral") {
|
|
1526
1528
|
pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
|
|
1527
1529
|
ignore_merges = true;
|
|
@@ -1554,7 +1556,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1554
1556
|
tokenizer_pre == "jina-de" ||
|
|
1555
1557
|
tokenizer_pre == "gigachat" ||
|
|
1556
1558
|
tokenizer_pre == "jina-v2-es" ||
|
|
1557
|
-
tokenizer_pre == "jina-v2-de"
|
|
1559
|
+
tokenizer_pre == "jina-v2-de" ||
|
|
1560
|
+
tokenizer_pre == "a.x-4.0") {
|
|
1558
1561
|
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
|
1559
1562
|
} else if (
|
|
1560
1563
|
tokenizer_pre == "jina-v1-en" ||
|
|
@@ -1656,6 +1659,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
|
1656
1659
|
tokenizer_pre == "seed-coder") {
|
|
1657
1660
|
pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER;
|
|
1658
1661
|
clean_spaces = false;
|
|
1662
|
+
} else if (
|
|
1663
|
+
tokenizer_pre == "hunyuan") {
|
|
1664
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN;
|
|
1665
|
+
clean_spaces = false;
|
|
1659
1666
|
} else {
|
|
1660
1667
|
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
|
1661
1668
|
}
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
#pragma once
|
|
2
|
-
|
|
3
|
-
#include "ggml.h"
|
|
4
|
-
#include "ggml-backend.h"
|
|
5
|
-
|
|
6
|
-
#include <stdbool.h>
|
|
7
|
-
#include <stddef.h>
|
|
8
|
-
#include <stdint.h>
|
|
9
|
-
|
|
10
|
-
#ifdef __cplusplus
|
|
11
|
-
extern "C" {
|
|
12
|
-
#endif
|
|
13
|
-
|
|
14
|
-
#define GGML_KOMPUTE_MAX_DEVICES 16
|
|
15
|
-
|
|
16
|
-
struct ggml_vk_device {
|
|
17
|
-
int index;
|
|
18
|
-
int type; // same as VkPhysicalDeviceType
|
|
19
|
-
size_t heapSize;
|
|
20
|
-
const char * name;
|
|
21
|
-
const char * vendor;
|
|
22
|
-
int subgroupSize;
|
|
23
|
-
uint64_t bufferAlignment;
|
|
24
|
-
uint64_t maxAlloc;
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
|
28
|
-
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
|
29
|
-
bool ggml_vk_has_vulkan(void);
|
|
30
|
-
bool ggml_vk_has_device(void);
|
|
31
|
-
struct ggml_vk_device ggml_vk_current_device(void);
|
|
32
|
-
|
|
33
|
-
//
|
|
34
|
-
// backend API
|
|
35
|
-
//
|
|
36
|
-
|
|
37
|
-
// forward declaration
|
|
38
|
-
typedef struct ggml_backend * ggml_backend_t;
|
|
39
|
-
|
|
40
|
-
GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
|
|
41
|
-
|
|
42
|
-
GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
|
|
43
|
-
|
|
44
|
-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
|
45
|
-
|
|
46
|
-
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
|
|
47
|
-
|
|
48
|
-
#ifdef __cplusplus
|
|
49
|
-
}
|
|
50
|
-
#endif
|