@fugood/llama.node 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/lib/binding.ts +1 -0
  2. package/package.json +14 -14
  3. package/src/LlamaCompletionWorker.cpp +24 -4
  4. package/src/LlamaCompletionWorker.h +7 -1
  5. package/src/LlamaContext.cpp +2 -1
  6. package/src/llama.cpp/common/CMakeLists.txt +4 -5
  7. package/src/llama.cpp/common/arg.cpp +37 -0
  8. package/src/llama.cpp/common/common.cpp +22 -6
  9. package/src/llama.cpp/common/common.h +14 -1
  10. package/src/llama.cpp/ggml/CMakeLists.txt +3 -0
  11. package/src/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
  12. package/src/llama.cpp/ggml/include/ggml.h +13 -0
  13. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -0
  14. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
  15. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +23 -8
  16. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +3 -0
  17. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +39 -0
  18. package/src/llama.cpp/include/llama.h +13 -48
  19. package/src/llama.cpp/src/llama-arch.cpp +222 -15
  20. package/src/llama.cpp/src/llama-arch.h +16 -1
  21. package/src/llama.cpp/src/llama-batch.cpp +76 -70
  22. package/src/llama.cpp/src/llama-batch.h +24 -18
  23. package/src/llama.cpp/src/llama-chat.cpp +44 -1
  24. package/src/llama.cpp/src/llama-chat.h +2 -0
  25. package/src/llama.cpp/src/llama-context.cpp +134 -95
  26. package/src/llama.cpp/src/llama-context.h +13 -16
  27. package/src/llama.cpp/src/llama-cparams.h +3 -2
  28. package/src/llama.cpp/src/llama-graph.cpp +239 -154
  29. package/src/llama.cpp/src/llama-graph.h +162 -126
  30. package/src/llama.cpp/src/llama-hparams.cpp +45 -0
  31. package/src/llama.cpp/src/llama-hparams.h +11 -1
  32. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +11 -5
  33. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +3 -0
  34. package/src/llama.cpp/src/llama-kv-cache-unified.cpp +698 -302
  35. package/src/llama.cpp/src/llama-kv-cache-unified.h +89 -31
  36. package/src/llama.cpp/src/llama-memory-hybrid.cpp +1 -0
  37. package/src/llama.cpp/src/llama-memory-recurrent.cpp +6 -9
  38. package/src/llama.cpp/src/llama-model.cpp +2309 -665
  39. package/src/llama.cpp/src/llama-model.h +18 -4
  40. package/src/llama.cpp/src/llama-quant.cpp +2 -2
  41. package/src/llama.cpp/src/llama-vocab.cpp +368 -9
  42. package/src/llama.cpp/src/llama-vocab.h +43 -0
  43. package/src/llama.cpp/src/unicode.cpp +207 -0
  44. package/src/llama.cpp/src/unicode.h +2 -0
@@ -557,6 +557,178 @@ static std::vector<size_t> unicode_regex_split_stl(const std::string & text, con
557
557
  return bpe_offsets;
558
558
  }
559
559
 
560
+ // K2 system regex patterns (from tokenization_kimi.py):
561
+ // [\p{Han}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+
562
+ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string & text, const std::vector<size_t> & offsets) {
563
+ std::vector<size_t> bpe_offsets;
564
+ bpe_offsets.reserve(offsets.size());
565
+
566
+ const auto cpts = unicode_cpts_from_utf8(text);
567
+
568
+ size_t start = 0;
569
+ for (auto offset : offsets) {
570
+ const size_t offset_ini = start;
571
+ const size_t offset_end = start + offset;
572
+ assert(offset_end <= cpts.size());
573
+ start = offset_end;
574
+
575
+ static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
576
+ auto _get_cpt = [&] (const size_t pos) -> uint32_t {
577
+ return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
578
+ };
579
+
580
+ auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags {
581
+ return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{};
582
+ };
583
+
584
+ size_t _prev_end = offset_ini;
585
+ auto _add_token = [&] (const size_t end) -> size_t {
586
+ assert(_prev_end <= end && end <= offset_end);
587
+ size_t len = end - _prev_end;
588
+ if (len > 0) {
589
+ bpe_offsets.push_back(len);
590
+ }
591
+ _prev_end = end;
592
+ return len;
593
+ };
594
+
595
+ for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
596
+ const uint32_t cpt = _get_cpt(pos);
597
+ const auto flags = _get_flags(pos);
598
+
599
+ // Pattern 1: [\p{Han}]+ (Chinese characters)
600
+ if (unicode_cpt_is_han(cpt)) {
601
+ while (unicode_cpt_is_han(_get_cpt(pos))) {
602
+ pos++;
603
+ }
604
+ _add_token(pos);
605
+ continue;
606
+ }
607
+
608
+ // Pattern 2 & 3: Letter words excluding Han characters with optional contractions
609
+ // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)?
610
+ // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)?
611
+ // Check if current char is a letter OR if current char could be a leading char and next char is a letter
612
+ bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) ||
613
+ (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) &&
614
+ _get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1)));
615
+
616
+ if (is_letter_pattern) {
617
+ // Handle optional leading non-letter/non-number character
618
+ bool has_leading_char = false;
619
+ if (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number)) {
620
+ has_leading_char = true;
621
+ pos++;
622
+ }
623
+
624
+ // Match letter sequence (excluding Han characters)
625
+ bool has_letters = false;
626
+ while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
627
+ has_letters = true;
628
+ pos++;
629
+ }
630
+
631
+ // Only proceed if we found letters (after potentially skipping leading char)
632
+ if (has_letters || (!has_leading_char && _get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos)))) {
633
+ if (!has_letters) pos++; // consume the first letter if we didn't already
634
+
635
+ // Continue consuming letters
636
+ while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
637
+ pos++;
638
+ }
639
+
640
+ // Check for optional contractions (?:'s|'t|'re|'ve|'m|'ll|'d)
641
+ if (_get_cpt(pos) == '\'' && pos + 1 < offset_end) {
642
+ uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1));
643
+ if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
644
+ pos += 2;
645
+ } else if (pos + 2 < offset_end) {
646
+ uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos + 2));
647
+ if ((cpt_next == 'r' && cpt_next_next == 'e') ||
648
+ (cpt_next == 'v' && cpt_next_next == 'e') ||
649
+ (cpt_next == 'l' && cpt_next_next == 'l')) {
650
+ pos += 3;
651
+ }
652
+ }
653
+ }
654
+
655
+ _add_token(pos);
656
+ continue;
657
+ } else if (has_leading_char) {
658
+ // We consumed a leading char but found no letters, backtrack
659
+ pos--;
660
+ }
661
+ }
662
+
663
+ // Pattern 4: \p{N}{1,3} (numbers 1-3 digits)
664
+ if (flags.is_number) {
665
+ size_t ini = pos;
666
+ while (_get_flags(pos).is_number) {
667
+ if (++pos - ini >= 3) {
668
+ _add_token(pos);
669
+ ini = pos;
670
+ }
671
+ }
672
+ _add_token(pos);
673
+ continue;
674
+ }
675
+
676
+ // Pattern 5: ?[^\s\p{L}\p{N}]+[\r\n]* (optional space + non-word chars + optional newlines)
677
+ auto flags2 = (cpt == ' ' ? _get_flags(pos + 1) : flags);
678
+ if (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) {
679
+ pos += (cpt == ' ');
680
+ while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) {
681
+ flags2 = _get_flags(++pos);
682
+ }
683
+ // Match optional [\r\n]*
684
+ uint32_t cpt2 = _get_cpt(pos);
685
+ while (cpt2 == '\r' || cpt2 == '\n') {
686
+ cpt2 = _get_cpt(++pos);
687
+ }
688
+ _add_token(pos);
689
+ continue;
690
+ }
691
+
692
+ // Count whitespace characters
693
+ size_t num_whitespaces = 0;
694
+ size_t last_end_r_or_n = 0;
695
+ while (_get_flags(pos + num_whitespaces).is_whitespace) {
696
+ uint32_t cpt2 = _get_cpt(pos + num_whitespaces);
697
+ if (cpt2 == '\r' || cpt2 == '\n') {
698
+ last_end_r_or_n = pos + num_whitespaces + 1;
699
+ }
700
+ num_whitespaces++;
701
+ }
702
+
703
+ // Pattern 6: \s*[\r\n]+ (whitespace with newlines)
704
+ if (last_end_r_or_n > 0) {
705
+ pos = last_end_r_or_n;
706
+ _add_token(pos);
707
+ continue;
708
+ }
709
+
710
+ // Pattern 7: \s+(?!\S) (trailing whitespace)
711
+ if (num_whitespaces > 1 && _get_cpt(pos + num_whitespaces) != OUT_OF_RANGE) {
712
+ pos += num_whitespaces - 1;
713
+ _add_token(pos);
714
+ continue;
715
+ }
716
+
717
+ // Pattern 8: \s+ (general whitespace)
718
+ if (num_whitespaces > 0) {
719
+ pos += num_whitespaces;
720
+ _add_token(pos);
721
+ continue;
722
+ }
723
+
724
+ // No matches - consume single character
725
+ _add_token(++pos);
726
+ }
727
+ }
728
+
729
+ return bpe_offsets;
730
+ }
731
+
560
732
  static std::vector<size_t> unicode_regex_split_custom(const std::string & text, const std::string & regex_expr, const std::vector<size_t> & offsets) {
561
733
  std::vector<size_t> bpe_offsets;
562
734
 
@@ -567,6 +739,9 @@ static std::vector<size_t> unicode_regex_split_custom(const std::string & text,
567
739
  regex_expr == "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+") {
568
740
 
569
741
  bpe_offsets = unicode_regex_split_custom_llama3(text, offsets);
742
+ } else if (regex_expr == "\\p{Han}+") {
743
+ // K2's first pattern - handle all K2 patterns together
744
+ bpe_offsets = unicode_regex_split_custom_kimi_k2(text, offsets);
570
745
  }
571
746
 
572
747
  return bpe_offsets;
@@ -672,6 +847,38 @@ uint32_t unicode_tolower(uint32_t cpt) {
672
847
  return cpt; // Return the original code point if no lowercase mapping is found
673
848
  }
674
849
 
850
+ bool unicode_cpt_is_han(uint32_t cpt) {
851
+ // Han character ranges (Chinese/CJK characters)
852
+ // CJK Unified Ideographs (most common)
853
+ if (cpt >= 0x4E00 && cpt <= 0x9FFF) return true;
854
+
855
+ // CJK Extension A
856
+ if (cpt >= 0x3400 && cpt <= 0x4DBF) return true;
857
+
858
+ // CJK Extension B
859
+ if (cpt >= 0x20000 && cpt <= 0x2A6DF) return true;
860
+
861
+ // CJK Extension C
862
+ if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true;
863
+
864
+ // CJK Extension D
865
+ if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true;
866
+
867
+ // CJK Extension E
868
+ if (cpt >= 0x2B820 && cpt <= 0x2CEAF) return true;
869
+
870
+ // CJK Extension F
871
+ if (cpt >= 0x2CEB0 && cpt <= 0x2EBEF) return true;
872
+
873
+ // CJK Compatibility Ideographs
874
+ if (cpt >= 0xF900 && cpt <= 0xFAFF) return true;
875
+
876
+ // CJK Compatibility Ideographs Supplement
877
+ if (cpt >= 0x2F800 && cpt <= 0x2FA1F) return true;
878
+
879
+ return false;
880
+ }
881
+
675
882
  std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
676
883
  // unicode categories
677
884
  static const std::map<std::string, int> k_ucat_enum = {
@@ -63,4 +63,6 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8);
63
63
 
64
64
  uint32_t unicode_tolower(uint32_t cpt);
65
65
 
66
+ bool unicode_cpt_is_han(uint32_t cpt);
67
+
66
68
  std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);