@fugood/llama.node 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/src/llama.cpp/CMakeLists.txt +0 -1
- package/src/llama.cpp/common/CMakeLists.txt +4 -5
- package/src/llama.cpp/common/arg.cpp +44 -0
- package/src/llama.cpp/common/common.cpp +22 -6
- package/src/llama.cpp/common/common.h +15 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +10 -2
- package/src/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- package/src/llama.cpp/ggml/include/ggml.h +104 -10
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +12 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +749 -163
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +12 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +88 -9
- package/src/llama.cpp/include/llama.h +13 -47
- package/src/llama.cpp/src/llama-arch.cpp +298 -3
- package/src/llama.cpp/src/llama-arch.h +22 -1
- package/src/llama.cpp/src/llama-batch.cpp +103 -71
- package/src/llama.cpp/src/llama-batch.h +31 -18
- package/src/llama.cpp/src/llama-chat.cpp +59 -1
- package/src/llama.cpp/src/llama-chat.h +3 -0
- package/src/llama.cpp/src/llama-context.cpp +134 -95
- package/src/llama.cpp/src/llama-context.h +13 -16
- package/src/llama.cpp/src/llama-cparams.h +3 -2
- package/src/llama.cpp/src/llama-graph.cpp +279 -180
- package/src/llama.cpp/src/llama-graph.h +183 -122
- package/src/llama.cpp/src/llama-hparams.cpp +47 -1
- package/src/llama.cpp/src/llama-hparams.h +12 -1
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
- package/src/llama.cpp/src/llama-kv-cache-unified.h +143 -47
- package/src/llama.cpp/src/llama-kv-cells.h +62 -10
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
- package/src/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +21 -11
- package/src/llama.cpp/src/llama-memory.cpp +17 -0
- package/src/llama.cpp/src/llama-memory.h +3 -0
- package/src/llama.cpp/src/llama-model.cpp +3373 -743
- package/src/llama.cpp/src/llama-model.h +20 -4
- package/src/llama.cpp/src/llama-quant.cpp +2 -2
- package/src/llama.cpp/src/llama-vocab.cpp +376 -10
- package/src/llama.cpp/src/llama-vocab.h +43 -0
- package/src/llama.cpp/src/unicode.cpp +207 -0
- package/src/llama.cpp/src/unicode.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-kompute.h +0 -50
|
@@ -557,6 +557,178 @@ static std::vector<size_t> unicode_regex_split_stl(const std::string & text, con
|
|
|
557
557
|
return bpe_offsets;
|
|
558
558
|
}
|
|
559
559
|
|
|
560
|
+
// K2 system regex patterns (from tokenization_kimi.py):
|
|
561
|
+
// [\p{Han}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+
|
|
562
|
+
static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string & text, const std::vector<size_t> & offsets) {
|
|
563
|
+
std::vector<size_t> bpe_offsets;
|
|
564
|
+
bpe_offsets.reserve(offsets.size());
|
|
565
|
+
|
|
566
|
+
const auto cpts = unicode_cpts_from_utf8(text);
|
|
567
|
+
|
|
568
|
+
size_t start = 0;
|
|
569
|
+
for (auto offset : offsets) {
|
|
570
|
+
const size_t offset_ini = start;
|
|
571
|
+
const size_t offset_end = start + offset;
|
|
572
|
+
assert(offset_end <= cpts.size());
|
|
573
|
+
start = offset_end;
|
|
574
|
+
|
|
575
|
+
static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
|
|
576
|
+
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
|
577
|
+
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
|
|
578
|
+
};
|
|
579
|
+
|
|
580
|
+
auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags {
|
|
581
|
+
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{};
|
|
582
|
+
};
|
|
583
|
+
|
|
584
|
+
size_t _prev_end = offset_ini;
|
|
585
|
+
auto _add_token = [&] (const size_t end) -> size_t {
|
|
586
|
+
assert(_prev_end <= end && end <= offset_end);
|
|
587
|
+
size_t len = end - _prev_end;
|
|
588
|
+
if (len > 0) {
|
|
589
|
+
bpe_offsets.push_back(len);
|
|
590
|
+
}
|
|
591
|
+
_prev_end = end;
|
|
592
|
+
return len;
|
|
593
|
+
};
|
|
594
|
+
|
|
595
|
+
for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
|
|
596
|
+
const uint32_t cpt = _get_cpt(pos);
|
|
597
|
+
const auto flags = _get_flags(pos);
|
|
598
|
+
|
|
599
|
+
// Pattern 1: [\p{Han}]+ (Chinese characters)
|
|
600
|
+
if (unicode_cpt_is_han(cpt)) {
|
|
601
|
+
while (unicode_cpt_is_han(_get_cpt(pos))) {
|
|
602
|
+
pos++;
|
|
603
|
+
}
|
|
604
|
+
_add_token(pos);
|
|
605
|
+
continue;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
// Pattern 2 & 3: Letter words excluding Han characters with optional contractions
|
|
609
|
+
// [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)?
|
|
610
|
+
// [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)?
|
|
611
|
+
// Check if current char is a letter OR if current char could be a leading char and next char is a letter
|
|
612
|
+
bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) ||
|
|
613
|
+
(!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) &&
|
|
614
|
+
_get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1)));
|
|
615
|
+
|
|
616
|
+
if (is_letter_pattern) {
|
|
617
|
+
// Handle optional leading non-letter/non-number character
|
|
618
|
+
bool has_leading_char = false;
|
|
619
|
+
if (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number)) {
|
|
620
|
+
has_leading_char = true;
|
|
621
|
+
pos++;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// Match letter sequence (excluding Han characters)
|
|
625
|
+
bool has_letters = false;
|
|
626
|
+
while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
|
|
627
|
+
has_letters = true;
|
|
628
|
+
pos++;
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Only proceed if we found letters (after potentially skipping leading char)
|
|
632
|
+
if (has_letters || (!has_leading_char && _get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos)))) {
|
|
633
|
+
if (!has_letters) pos++; // consume the first letter if we didn't already
|
|
634
|
+
|
|
635
|
+
// Continue consuming letters
|
|
636
|
+
while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
|
|
637
|
+
pos++;
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
// Check for optional contractions (?:'s|'t|'re|'ve|'m|'ll|'d)
|
|
641
|
+
if (_get_cpt(pos) == '\'' && pos + 1 < offset_end) {
|
|
642
|
+
uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1));
|
|
643
|
+
if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
|
|
644
|
+
pos += 2;
|
|
645
|
+
} else if (pos + 2 < offset_end) {
|
|
646
|
+
uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos + 2));
|
|
647
|
+
if ((cpt_next == 'r' && cpt_next_next == 'e') ||
|
|
648
|
+
(cpt_next == 'v' && cpt_next_next == 'e') ||
|
|
649
|
+
(cpt_next == 'l' && cpt_next_next == 'l')) {
|
|
650
|
+
pos += 3;
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
_add_token(pos);
|
|
656
|
+
continue;
|
|
657
|
+
} else if (has_leading_char) {
|
|
658
|
+
// We consumed a leading char but found no letters, backtrack
|
|
659
|
+
pos--;
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// Pattern 4: \p{N}{1,3} (numbers 1-3 digits)
|
|
664
|
+
if (flags.is_number) {
|
|
665
|
+
size_t ini = pos;
|
|
666
|
+
while (_get_flags(pos).is_number) {
|
|
667
|
+
if (++pos - ini >= 3) {
|
|
668
|
+
_add_token(pos);
|
|
669
|
+
ini = pos;
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
_add_token(pos);
|
|
673
|
+
continue;
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// Pattern 5: ?[^\s\p{L}\p{N}]+[\r\n]* (optional space + non-word chars + optional newlines)
|
|
677
|
+
auto flags2 = (cpt == ' ' ? _get_flags(pos + 1) : flags);
|
|
678
|
+
if (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) {
|
|
679
|
+
pos += (cpt == ' ');
|
|
680
|
+
while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) {
|
|
681
|
+
flags2 = _get_flags(++pos);
|
|
682
|
+
}
|
|
683
|
+
// Match optional [\r\n]*
|
|
684
|
+
uint32_t cpt2 = _get_cpt(pos);
|
|
685
|
+
while (cpt2 == '\r' || cpt2 == '\n') {
|
|
686
|
+
cpt2 = _get_cpt(++pos);
|
|
687
|
+
}
|
|
688
|
+
_add_token(pos);
|
|
689
|
+
continue;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// Count whitespace characters
|
|
693
|
+
size_t num_whitespaces = 0;
|
|
694
|
+
size_t last_end_r_or_n = 0;
|
|
695
|
+
while (_get_flags(pos + num_whitespaces).is_whitespace) {
|
|
696
|
+
uint32_t cpt2 = _get_cpt(pos + num_whitespaces);
|
|
697
|
+
if (cpt2 == '\r' || cpt2 == '\n') {
|
|
698
|
+
last_end_r_or_n = pos + num_whitespaces + 1;
|
|
699
|
+
}
|
|
700
|
+
num_whitespaces++;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
// Pattern 6: \s*[\r\n]+ (whitespace with newlines)
|
|
704
|
+
if (last_end_r_or_n > 0) {
|
|
705
|
+
pos = last_end_r_or_n;
|
|
706
|
+
_add_token(pos);
|
|
707
|
+
continue;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// Pattern 7: \s+(?!\S) (trailing whitespace)
|
|
711
|
+
if (num_whitespaces > 1 && _get_cpt(pos + num_whitespaces) != OUT_OF_RANGE) {
|
|
712
|
+
pos += num_whitespaces - 1;
|
|
713
|
+
_add_token(pos);
|
|
714
|
+
continue;
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Pattern 8: \s+ (general whitespace)
|
|
718
|
+
if (num_whitespaces > 0) {
|
|
719
|
+
pos += num_whitespaces;
|
|
720
|
+
_add_token(pos);
|
|
721
|
+
continue;
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
// No matches - consume single character
|
|
725
|
+
_add_token(++pos);
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
return bpe_offsets;
|
|
730
|
+
}
|
|
731
|
+
|
|
560
732
|
static std::vector<size_t> unicode_regex_split_custom(const std::string & text, const std::string & regex_expr, const std::vector<size_t> & offsets) {
|
|
561
733
|
std::vector<size_t> bpe_offsets;
|
|
562
734
|
|
|
@@ -567,6 +739,9 @@ static std::vector<size_t> unicode_regex_split_custom(const std::string & text,
|
|
|
567
739
|
regex_expr == "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+") {
|
|
568
740
|
|
|
569
741
|
bpe_offsets = unicode_regex_split_custom_llama3(text, offsets);
|
|
742
|
+
} else if (regex_expr == "\\p{Han}+") {
|
|
743
|
+
// K2's first pattern - handle all K2 patterns together
|
|
744
|
+
bpe_offsets = unicode_regex_split_custom_kimi_k2(text, offsets);
|
|
570
745
|
}
|
|
571
746
|
|
|
572
747
|
return bpe_offsets;
|
|
@@ -672,6 +847,38 @@ uint32_t unicode_tolower(uint32_t cpt) {
|
|
|
672
847
|
return cpt; // Return the original code point if no lowercase mapping is found
|
|
673
848
|
}
|
|
674
849
|
|
|
850
|
+
bool unicode_cpt_is_han(uint32_t cpt) {
|
|
851
|
+
// Han character ranges (Chinese/CJK characters)
|
|
852
|
+
// CJK Unified Ideographs (most common)
|
|
853
|
+
if (cpt >= 0x4E00 && cpt <= 0x9FFF) return true;
|
|
854
|
+
|
|
855
|
+
// CJK Extension A
|
|
856
|
+
if (cpt >= 0x3400 && cpt <= 0x4DBF) return true;
|
|
857
|
+
|
|
858
|
+
// CJK Extension B
|
|
859
|
+
if (cpt >= 0x20000 && cpt <= 0x2A6DF) return true;
|
|
860
|
+
|
|
861
|
+
// CJK Extension C
|
|
862
|
+
if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true;
|
|
863
|
+
|
|
864
|
+
// CJK Extension D
|
|
865
|
+
if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true;
|
|
866
|
+
|
|
867
|
+
// CJK Extension E
|
|
868
|
+
if (cpt >= 0x2B820 && cpt <= 0x2CEAF) return true;
|
|
869
|
+
|
|
870
|
+
// CJK Extension F
|
|
871
|
+
if (cpt >= 0x2CEB0 && cpt <= 0x2EBEF) return true;
|
|
872
|
+
|
|
873
|
+
// CJK Compatibility Ideographs
|
|
874
|
+
if (cpt >= 0xF900 && cpt <= 0xFAFF) return true;
|
|
875
|
+
|
|
876
|
+
// CJK Compatibility Ideographs Supplement
|
|
877
|
+
if (cpt >= 0x2F800 && cpt <= 0x2FA1F) return true;
|
|
878
|
+
|
|
879
|
+
return false;
|
|
880
|
+
}
|
|
881
|
+
|
|
675
882
|
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
|
|
676
883
|
// unicode categories
|
|
677
884
|
static const std::map<std::string, int> k_ucat_enum = {
|
|
@@ -63,4 +63,6 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8);
|
|
|
63
63
|
|
|
64
64
|
uint32_t unicode_tolower(uint32_t cpt);
|
|
65
65
|
|
|
66
|
+
bool unicode_cpt_is_han(uint32_t cpt);
|
|
67
|
+
|
|
66
68
|
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
#pragma once
|
|
2
|
-
|
|
3
|
-
#include "ggml.h"
|
|
4
|
-
#include "ggml-backend.h"
|
|
5
|
-
|
|
6
|
-
#include <stdbool.h>
|
|
7
|
-
#include <stddef.h>
|
|
8
|
-
#include <stdint.h>
|
|
9
|
-
|
|
10
|
-
#ifdef __cplusplus
|
|
11
|
-
extern "C" {
|
|
12
|
-
#endif
|
|
13
|
-
|
|
14
|
-
#define GGML_KOMPUTE_MAX_DEVICES 16
|
|
15
|
-
|
|
16
|
-
struct ggml_vk_device {
|
|
17
|
-
int index;
|
|
18
|
-
int type; // same as VkPhysicalDeviceType
|
|
19
|
-
size_t heapSize;
|
|
20
|
-
const char * name;
|
|
21
|
-
const char * vendor;
|
|
22
|
-
int subgroupSize;
|
|
23
|
-
uint64_t bufferAlignment;
|
|
24
|
-
uint64_t maxAlloc;
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
|
|
28
|
-
bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
|
|
29
|
-
bool ggml_vk_has_vulkan(void);
|
|
30
|
-
bool ggml_vk_has_device(void);
|
|
31
|
-
struct ggml_vk_device ggml_vk_current_device(void);
|
|
32
|
-
|
|
33
|
-
//
|
|
34
|
-
// backend API
|
|
35
|
-
//
|
|
36
|
-
|
|
37
|
-
// forward declaration
|
|
38
|
-
typedef struct ggml_backend * ggml_backend_t;
|
|
39
|
-
|
|
40
|
-
GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
|
|
41
|
-
|
|
42
|
-
GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
|
|
43
|
-
|
|
44
|
-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
|
|
45
|
-
|
|
46
|
-
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
|
|
47
|
-
|
|
48
|
-
#ifdef __cplusplus
|
|
49
|
-
}
|
|
50
|
-
#endif
|