@fugood/llama.node 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/package.json +14 -14
  2. package/src/llama.cpp/CMakeLists.txt +0 -1
  3. package/src/llama.cpp/common/CMakeLists.txt +4 -5
  4. package/src/llama.cpp/common/arg.cpp +44 -0
  5. package/src/llama.cpp/common/common.cpp +22 -6
  6. package/src/llama.cpp/common/common.h +15 -1
  7. package/src/llama.cpp/ggml/CMakeLists.txt +10 -2
  8. package/src/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
  9. package/src/llama.cpp/ggml/include/ggml.h +104 -10
  10. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
  11. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
  12. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +12 -1
  13. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
  14. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +749 -163
  15. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -0
  16. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
  17. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +12 -9
  18. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +88 -9
  19. package/src/llama.cpp/include/llama.h +13 -47
  20. package/src/llama.cpp/src/llama-arch.cpp +298 -3
  21. package/src/llama.cpp/src/llama-arch.h +22 -1
  22. package/src/llama.cpp/src/llama-batch.cpp +103 -71
  23. package/src/llama.cpp/src/llama-batch.h +31 -18
  24. package/src/llama.cpp/src/llama-chat.cpp +59 -1
  25. package/src/llama.cpp/src/llama-chat.h +3 -0
  26. package/src/llama.cpp/src/llama-context.cpp +134 -95
  27. package/src/llama.cpp/src/llama-context.h +13 -16
  28. package/src/llama.cpp/src/llama-cparams.h +3 -2
  29. package/src/llama.cpp/src/llama-graph.cpp +279 -180
  30. package/src/llama.cpp/src/llama-graph.h +183 -122
  31. package/src/llama.cpp/src/llama-hparams.cpp +47 -1
  32. package/src/llama.cpp/src/llama-hparams.h +12 -1
  33. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
  34. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
  35. package/src/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
  36. package/src/llama.cpp/src/llama-kv-cache-unified.h +143 -47
  37. package/src/llama.cpp/src/llama-kv-cells.h +62 -10
  38. package/src/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
  39. package/src/llama.cpp/src/llama-memory-hybrid.h +3 -1
  40. package/src/llama.cpp/src/llama-memory-recurrent.cpp +21 -11
  41. package/src/llama.cpp/src/llama-memory.cpp +17 -0
  42. package/src/llama.cpp/src/llama-memory.h +3 -0
  43. package/src/llama.cpp/src/llama-model.cpp +3373 -743
  44. package/src/llama.cpp/src/llama-model.h +20 -4
  45. package/src/llama.cpp/src/llama-quant.cpp +2 -2
  46. package/src/llama.cpp/src/llama-vocab.cpp +376 -10
  47. package/src/llama.cpp/src/llama-vocab.h +43 -0
  48. package/src/llama.cpp/src/unicode.cpp +207 -0
  49. package/src/llama.cpp/src/unicode.h +2 -0
  50. package/src/llama.cpp/ggml/include/ggml-kompute.h +0 -50
@@ -557,6 +557,178 @@ static std::vector<size_t> unicode_regex_split_stl(const std::string & text, con
557
557
  return bpe_offsets;
558
558
  }
559
559
 
560
+ // K2 system regex patterns (from tokenization_kimi.py):
561
+ // [\p{Han}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+
562
+ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string & text, const std::vector<size_t> & offsets) {
563
+ std::vector<size_t> bpe_offsets;
564
+ bpe_offsets.reserve(offsets.size());
565
+
566
+ const auto cpts = unicode_cpts_from_utf8(text);
567
+
568
+ size_t start = 0;
569
+ for (auto offset : offsets) {
570
+ const size_t offset_ini = start;
571
+ const size_t offset_end = start + offset;
572
+ assert(offset_end <= cpts.size());
573
+ start = offset_end;
574
+
575
+ static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
576
+ auto _get_cpt = [&] (const size_t pos) -> uint32_t {
577
+ return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
578
+ };
579
+
580
+ auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags {
581
+ return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{};
582
+ };
583
+
584
+ size_t _prev_end = offset_ini;
585
+ auto _add_token = [&] (const size_t end) -> size_t {
586
+ assert(_prev_end <= end && end <= offset_end);
587
+ size_t len = end - _prev_end;
588
+ if (len > 0) {
589
+ bpe_offsets.push_back(len);
590
+ }
591
+ _prev_end = end;
592
+ return len;
593
+ };
594
+
595
+ for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
596
+ const uint32_t cpt = _get_cpt(pos);
597
+ const auto flags = _get_flags(pos);
598
+
599
+ // Pattern 1: [\p{Han}]+ (Chinese characters)
600
+ if (unicode_cpt_is_han(cpt)) {
601
+ while (unicode_cpt_is_han(_get_cpt(pos))) {
602
+ pos++;
603
+ }
604
+ _add_token(pos);
605
+ continue;
606
+ }
607
+
608
+ // Pattern 2 & 3: Letter words excluding Han characters with optional contractions
609
+ // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)?
610
+ // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)?
611
+ // Check if current char is a letter OR if current char could be a leading char and next char is a letter
612
+ bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) ||
613
+ (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) &&
614
+ _get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1)));
615
+
616
+ if (is_letter_pattern) {
617
+ // Handle optional leading non-letter/non-number character
618
+ bool has_leading_char = false;
619
+ if (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number)) {
620
+ has_leading_char = true;
621
+ pos++;
622
+ }
623
+
624
+ // Match letter sequence (excluding Han characters)
625
+ bool has_letters = false;
626
+ while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
627
+ has_letters = true;
628
+ pos++;
629
+ }
630
+
631
+ // Only proceed if we found letters (after potentially skipping leading char)
632
+ if (has_letters || (!has_leading_char && _get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos)))) {
633
+ if (!has_letters) pos++; // consume the first letter if we didn't already
634
+
635
+ // Continue consuming letters
636
+ while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
637
+ pos++;
638
+ }
639
+
640
+ // Check for optional contractions (?:'s|'t|'re|'ve|'m|'ll|'d)
641
+ if (_get_cpt(pos) == '\'' && pos + 1 < offset_end) {
642
+ uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1));
643
+ if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
644
+ pos += 2;
645
+ } else if (pos + 2 < offset_end) {
646
+ uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos + 2));
647
+ if ((cpt_next == 'r' && cpt_next_next == 'e') ||
648
+ (cpt_next == 'v' && cpt_next_next == 'e') ||
649
+ (cpt_next == 'l' && cpt_next_next == 'l')) {
650
+ pos += 3;
651
+ }
652
+ }
653
+ }
654
+
655
+ _add_token(pos);
656
+ continue;
657
+ } else if (has_leading_char) {
658
+ // We consumed a leading char but found no letters, backtrack
659
+ pos--;
660
+ }
661
+ }
662
+
663
+ // Pattern 4: \p{N}{1,3} (numbers 1-3 digits)
664
+ if (flags.is_number) {
665
+ size_t ini = pos;
666
+ while (_get_flags(pos).is_number) {
667
+ if (++pos - ini >= 3) {
668
+ _add_token(pos);
669
+ ini = pos;
670
+ }
671
+ }
672
+ _add_token(pos);
673
+ continue;
674
+ }
675
+
676
+ // Pattern 5: ?[^\s\p{L}\p{N}]+[\r\n]* (optional space + non-word chars + optional newlines)
677
+ auto flags2 = (cpt == ' ' ? _get_flags(pos + 1) : flags);
678
+ if (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) {
679
+ pos += (cpt == ' ');
680
+ while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) {
681
+ flags2 = _get_flags(++pos);
682
+ }
683
+ // Match optional [\r\n]*
684
+ uint32_t cpt2 = _get_cpt(pos);
685
+ while (cpt2 == '\r' || cpt2 == '\n') {
686
+ cpt2 = _get_cpt(++pos);
687
+ }
688
+ _add_token(pos);
689
+ continue;
690
+ }
691
+
692
+ // Count whitespace characters
693
+ size_t num_whitespaces = 0;
694
+ size_t last_end_r_or_n = 0;
695
+ while (_get_flags(pos + num_whitespaces).is_whitespace) {
696
+ uint32_t cpt2 = _get_cpt(pos + num_whitespaces);
697
+ if (cpt2 == '\r' || cpt2 == '\n') {
698
+ last_end_r_or_n = pos + num_whitespaces + 1;
699
+ }
700
+ num_whitespaces++;
701
+ }
702
+
703
+ // Pattern 6: \s*[\r\n]+ (whitespace with newlines)
704
+ if (last_end_r_or_n > 0) {
705
+ pos = last_end_r_or_n;
706
+ _add_token(pos);
707
+ continue;
708
+ }
709
+
710
+ // Pattern 7: \s+(?!\S) (trailing whitespace)
711
+ if (num_whitespaces > 1 && _get_cpt(pos + num_whitespaces) != OUT_OF_RANGE) {
712
+ pos += num_whitespaces - 1;
713
+ _add_token(pos);
714
+ continue;
715
+ }
716
+
717
+ // Pattern 8: \s+ (general whitespace)
718
+ if (num_whitespaces > 0) {
719
+ pos += num_whitespaces;
720
+ _add_token(pos);
721
+ continue;
722
+ }
723
+
724
+ // No matches - consume single character
725
+ _add_token(++pos);
726
+ }
727
+ }
728
+
729
+ return bpe_offsets;
730
+ }
731
+
560
732
  static std::vector<size_t> unicode_regex_split_custom(const std::string & text, const std::string & regex_expr, const std::vector<size_t> & offsets) {
561
733
  std::vector<size_t> bpe_offsets;
562
734
 
@@ -567,6 +739,9 @@ static std::vector<size_t> unicode_regex_split_custom(const std::string & text,
567
739
  regex_expr == "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+") {
568
740
 
569
741
  bpe_offsets = unicode_regex_split_custom_llama3(text, offsets);
742
+ } else if (regex_expr == "\\p{Han}+") {
743
+ // K2's first pattern - handle all K2 patterns together
744
+ bpe_offsets = unicode_regex_split_custom_kimi_k2(text, offsets);
570
745
  }
571
746
 
572
747
  return bpe_offsets;
@@ -672,6 +847,38 @@ uint32_t unicode_tolower(uint32_t cpt) {
672
847
  return cpt; // Return the original code point if no lowercase mapping is found
673
848
  }
674
849
 
850
+ bool unicode_cpt_is_han(uint32_t cpt) {
851
+ // Han character ranges (Chinese/CJK characters)
852
+ // CJK Unified Ideographs (most common)
853
+ if (cpt >= 0x4E00 && cpt <= 0x9FFF) return true;
854
+
855
+ // CJK Extension A
856
+ if (cpt >= 0x3400 && cpt <= 0x4DBF) return true;
857
+
858
+ // CJK Extension B
859
+ if (cpt >= 0x20000 && cpt <= 0x2A6DF) return true;
860
+
861
+ // CJK Extension C
862
+ if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true;
863
+
864
+ // CJK Extension D
865
+ if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true;
866
+
867
+ // CJK Extension E
868
+ if (cpt >= 0x2B820 && cpt <= 0x2CEAF) return true;
869
+
870
+ // CJK Extension F
871
+ if (cpt >= 0x2CEB0 && cpt <= 0x2EBEF) return true;
872
+
873
+ // CJK Compatibility Ideographs
874
+ if (cpt >= 0xF900 && cpt <= 0xFAFF) return true;
875
+
876
+ // CJK Compatibility Ideographs Supplement
877
+ if (cpt >= 0x2F800 && cpt <= 0x2FA1F) return true;
878
+
879
+ return false;
880
+ }
881
+
675
882
  std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
676
883
  // unicode categories
677
884
  static const std::map<std::string, int> k_ucat_enum = {
@@ -63,4 +63,6 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8);
63
63
 
64
64
  uint32_t unicode_tolower(uint32_t cpt);
65
65
 
66
+ bool unicode_cpt_is_han(uint32_t cpt);
67
+
66
68
  std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
@@ -1,50 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
- #include "ggml-backend.h"
5
-
6
- #include <stdbool.h>
7
- #include <stddef.h>
8
- #include <stdint.h>
9
-
10
- #ifdef __cplusplus
11
- extern "C" {
12
- #endif
13
-
14
- #define GGML_KOMPUTE_MAX_DEVICES 16
15
-
16
- struct ggml_vk_device {
17
- int index;
18
- int type; // same as VkPhysicalDeviceType
19
- size_t heapSize;
20
- const char * name;
21
- const char * vendor;
22
- int subgroupSize;
23
- uint64_t bufferAlignment;
24
- uint64_t maxAlloc;
25
- };
26
-
27
- struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
28
- bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
29
- bool ggml_vk_has_vulkan(void);
30
- bool ggml_vk_has_device(void);
31
- struct ggml_vk_device ggml_vk_current_device(void);
32
-
33
- //
34
- // backend API
35
- //
36
-
37
- // forward declaration
38
- typedef struct ggml_backend * ggml_backend_t;
39
-
40
- GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
41
-
42
- GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
43
-
44
- GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
45
-
46
- GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
47
-
48
- #ifdef __cplusplus
49
- }
50
- #endif