@fugood/llama.node 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/package.json +14 -14
  2. package/scripts/llama.cpp.patch +12 -12
  3. package/src/llama.cpp/CMakeLists.txt +0 -1
  4. package/src/llama.cpp/common/arg.cpp +17 -0
  5. package/src/llama.cpp/common/chat.cpp +37 -20
  6. package/src/llama.cpp/common/chat.h +2 -0
  7. package/src/llama.cpp/common/common.h +4 -0
  8. package/src/llama.cpp/ggml/CMakeLists.txt +7 -2
  9. package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
  10. package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -0
  11. package/src/llama.cpp/ggml/include/ggml.h +181 -10
  12. package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -1
  13. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
  14. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +38 -1
  15. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +1 -0
  16. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1297 -211
  17. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +7 -0
  18. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
  19. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +33 -9
  20. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +103 -9
  21. package/src/llama.cpp/include/llama.h +1 -0
  22. package/src/llama.cpp/src/llama-arch.cpp +108 -2
  23. package/src/llama.cpp/src/llama-arch.h +7 -0
  24. package/src/llama.cpp/src/llama-batch.cpp +27 -1
  25. package/src/llama.cpp/src/llama-batch.h +8 -1
  26. package/src/llama.cpp/src/llama-chat.cpp +15 -0
  27. package/src/llama.cpp/src/llama-chat.h +1 -0
  28. package/src/llama.cpp/src/llama-graph.cpp +95 -81
  29. package/src/llama.cpp/src/llama-graph.h +43 -16
  30. package/src/llama.cpp/src/llama-hparams.cpp +2 -1
  31. package/src/llama.cpp/src/llama-hparams.h +1 -0
  32. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +28 -18
  33. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +4 -2
  34. package/src/llama.cpp/src/llama-kv-cache-unified.cpp +214 -65
  35. package/src/llama.cpp/src/llama-kv-cache-unified.h +62 -24
  36. package/src/llama.cpp/src/llama-kv-cells.h +62 -10
  37. package/src/llama.cpp/src/llama-memory-hybrid.cpp +9 -4
  38. package/src/llama.cpp/src/llama-memory-hybrid.h +3 -1
  39. package/src/llama.cpp/src/llama-memory-recurrent.cpp +34 -16
  40. package/src/llama.cpp/src/llama-memory.cpp +17 -0
  41. package/src/llama.cpp/src/llama-memory.h +3 -0
  42. package/src/llama.cpp/src/llama-model.cpp +1374 -210
  43. package/src/llama.cpp/src/llama-model.h +3 -0
  44. package/src/llama.cpp/src/llama-vocab.cpp +8 -1
  45. package/src/llama.cpp/ggml/include/ggml-kompute.h +0 -50
@@ -39,6 +39,7 @@ enum llm_type {
39
39
  LLM_TYPE_475M,
40
40
  LLM_TYPE_770M,
41
41
  LLM_TYPE_780M,
42
+ LLM_TYPE_0_3B,
42
43
  LLM_TYPE_0_5B,
43
44
  LLM_TYPE_0_6B,
44
45
  LLM_TYPE_1B,
@@ -93,6 +94,7 @@ enum llm_type {
93
94
  LLM_TYPE_57B_A14B,
94
95
  LLM_TYPE_17B_16E, // llama4 Scout
95
96
  LLM_TYPE_17B_128E, // llama4 Maverick
97
+ LLM_TYPE_A13B,
96
98
  LLM_TYPE_30B_A3B,
97
99
  LLM_TYPE_235B_A22B,
98
100
  LLM_TYPE_E2B,
@@ -171,6 +173,7 @@ struct llama_layer {
171
173
  struct ggml_tensor * ffn_sub_norm = nullptr;
172
174
  struct ggml_tensor * attn_norm_cross = nullptr;
173
175
  struct ggml_tensor * attn_norm_enc = nullptr;
176
+ struct ggml_tensor * ssm_norm = nullptr;
174
177
 
175
178
  // attention
176
179
  struct ggml_tensor * wq = nullptr;
@@ -351,6 +351,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
351
351
  break;
352
352
  case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
353
353
  case LLAMA_VOCAB_PRE_TYPE_QWEN2:
354
+ case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
354
355
  regex_exprs = {
355
356
  // original regex from tokenizer.json
356
357
  // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
@@ -1522,6 +1523,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1522
1523
  tokenizer_pre == "llama-v3" ||
1523
1524
  tokenizer_pre == "llama-bpe"||
1524
1525
  tokenizer_pre == "falcon3" ||
1526
+ tokenizer_pre == "falcon-h1" ||
1525
1527
  tokenizer_pre == "pixtral") {
1526
1528
  pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
1527
1529
  ignore_merges = true;
@@ -1554,7 +1556,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1554
1556
  tokenizer_pre == "jina-de" ||
1555
1557
  tokenizer_pre == "gigachat" ||
1556
1558
  tokenizer_pre == "jina-v2-es" ||
1557
- tokenizer_pre == "jina-v2-de") {
1559
+ tokenizer_pre == "jina-v2-de" ||
1560
+ tokenizer_pre == "a.x-4.0") {
1558
1561
  pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
1559
1562
  } else if (
1560
1563
  tokenizer_pre == "jina-v1-en" ||
@@ -1656,6 +1659,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1656
1659
  tokenizer_pre == "seed-coder") {
1657
1660
  pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER;
1658
1661
  clean_spaces = false;
1662
+ } else if (
1663
+ tokenizer_pre == "hunyuan") {
1664
+ pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN;
1665
+ clean_spaces = false;
1659
1666
  } else {
1660
1667
  throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
1661
1668
  }
@@ -1,50 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
- #include "ggml-backend.h"
5
-
6
- #include <stdbool.h>
7
- #include <stddef.h>
8
- #include <stdint.h>
9
-
10
- #ifdef __cplusplus
11
- extern "C" {
12
- #endif
13
-
14
- #define GGML_KOMPUTE_MAX_DEVICES 16
15
-
16
- struct ggml_vk_device {
17
- int index;
18
- int type; // same as VkPhysicalDeviceType
19
- size_t heapSize;
20
- const char * name;
21
- const char * vendor;
22
- int subgroupSize;
23
- uint64_t bufferAlignment;
24
- uint64_t maxAlloc;
25
- };
26
-
27
- struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
28
- bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
29
- bool ggml_vk_has_vulkan(void);
30
- bool ggml_vk_has_device(void);
31
- struct ggml_vk_device ggml_vk_current_device(void);
32
-
33
- //
34
- // backend API
35
- //
36
-
37
- // forward declaration
38
- typedef struct ggml_backend * ggml_backend_t;
39
-
40
- GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
41
-
42
- GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
43
-
44
- GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
45
-
46
- GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
47
-
48
- #ifdef __cplusplus
49
- }
50
- #endif