cui-llama.rn 1.3.6 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -1
- package/android/src/main/CMakeLists.txt +25 -26
- package/android/src/main/java/com/rnllama/LlamaContext.java +31 -9
- package/android/src/main/java/com/rnllama/RNLlama.java +98 -0
- package/android/src/main/jni-utils.h +94 -0
- package/android/src/main/jni.cpp +132 -62
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +15 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +15 -0
- package/cpp/common.cpp +1982 -1982
- package/cpp/common.h +665 -664
- package/cpp/ggml-cpu.c +14122 -14122
- package/cpp/ggml-cpu.cpp +627 -627
- package/cpp/ggml-metal-impl.h +288 -0
- package/cpp/ggml-opt.cpp +854 -0
- package/cpp/ggml-opt.h +216 -0
- package/cpp/llama-mmap.cpp +589 -589
- package/cpp/llama.cpp +12547 -12544
- package/cpp/rn-llama.hpp +117 -116
- package/cpp/sgemm.h +14 -14
- package/ios/RNLlama.mm +47 -0
- package/ios/RNLlamaContext.h +3 -1
- package/ios/RNLlamaContext.mm +71 -14
- package/jest/mock.js +15 -3
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +33 -37
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +31 -35
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +26 -6
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +21 -36
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +4 -18
- package/package.json +2 -3
- package/src/NativeRNLlama.ts +32 -13
- package/src/index.ts +52 -47
- package/cpp/llama.cpp.rej +0 -23
@@ -8,43 +8,19 @@ export type TokenData = {
|
|
8
8
|
token: string;
|
9
9
|
completion_probabilities?: Array<NativeCompletionTokenProb>;
|
10
10
|
};
|
11
|
-
export declare enum
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
LM_GGML_TYPE_Q2_K = 10,
|
21
|
-
LM_GGML_TYPE_Q3_K = 11,
|
22
|
-
LM_GGML_TYPE_Q4_K = 12,
|
23
|
-
LM_GGML_TYPE_Q5_K = 13,
|
24
|
-
LM_GGML_TYPE_Q6_K = 14,
|
25
|
-
LM_GGML_TYPE_Q8_K = 15,
|
26
|
-
LM_GGML_TYPE_IQ2_XXS = 16,
|
27
|
-
LM_GGML_TYPE_IQ2_XS = 17,
|
28
|
-
LM_GGML_TYPE_IQ3_XXS = 18,
|
29
|
-
LM_GGML_TYPE_IQ1_S = 19,
|
30
|
-
LM_GGML_TYPE_IQ4_NL = 20,
|
31
|
-
LM_GGML_TYPE_IQ3_S = 21,
|
32
|
-
LM_GGML_TYPE_IQ2_S = 22,
|
33
|
-
LM_GGML_TYPE_IQ4_XS = 23,
|
34
|
-
LM_GGML_TYPE_I8 = 24,
|
35
|
-
LM_GGML_TYPE_I16 = 25,
|
36
|
-
LM_GGML_TYPE_I32 = 26,
|
37
|
-
LM_GGML_TYPE_I64 = 27,
|
38
|
-
LM_GGML_TYPE_F64 = 28,
|
39
|
-
LM_GGML_TYPE_IQ1_M = 29,
|
40
|
-
LM_GGML_TYPE_BF16 = 30,
|
41
|
-
LM_GGML_TYPE_TQ1_0 = 34,
|
42
|
-
LM_GGML_TYPE_TQ2_0 = 35,
|
43
|
-
LM_GGML_TYPE_COUNT = 39
|
11
|
+
export declare enum CACHE_TYPE {
|
12
|
+
F16 = "f16",
|
13
|
+
F32 = "f32",
|
14
|
+
Q8_0 = "q8_0",
|
15
|
+
Q4_0 = "q4_0",
|
16
|
+
Q4_1 = "q4_1",
|
17
|
+
IQ4_NL = "iq4_nl",
|
18
|
+
Q5_0 = "q5_0",
|
19
|
+
Q5_1 = "q5_1"
|
44
20
|
}
|
45
21
|
export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & {
|
46
|
-
cache_type_k?:
|
47
|
-
cache_type_v?:
|
22
|
+
cache_type_k?: CACHE_TYPE;
|
23
|
+
cache_type_v?: CACHE_TYPE;
|
48
24
|
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';
|
49
25
|
};
|
50
26
|
export type EmbeddingParams = NativeEmbeddingParams;
|
@@ -88,11 +64,20 @@ export declare class LlamaContext {
|
|
88
64
|
detokenize(tokens: number[]): Promise<string>;
|
89
65
|
embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>;
|
90
66
|
bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>;
|
67
|
+
applyLoraAdapters(loraList: Array<{
|
68
|
+
path: string;
|
69
|
+
scaled?: number;
|
70
|
+
}>): Promise<void>;
|
71
|
+
removeLoraAdapters(): Promise<void>;
|
72
|
+
getLoadedLoraAdapters(): Promise<Array<{
|
73
|
+
path: string;
|
74
|
+
scaled?: number;
|
75
|
+
}>>;
|
91
76
|
release(): Promise<void>;
|
92
77
|
}
|
93
78
|
export declare function getCpuFeatures(): Promise<NativeCPUFeatures>;
|
94
79
|
export declare function setContextLimit(limit: number): Promise<void>;
|
95
80
|
export declare function loadLlamaModelInfo(model: string): Promise<Object>;
|
96
|
-
export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
|
81
|
+
export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, lora_list: loraList, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
|
97
82
|
export declare function releaseAllLlama(): Promise<void>;
|
98
83
|
//# sourceMappingURL=index.d.ts.map
|
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,EACjB,qBAAqB,EACrB,6BAA6B,EAC7B,6BAA6B,EAC9B,MAAM,iBAAiB,CAAA;AACxB,OAAO,KAAK,
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,EACjB,qBAAqB,EACrB,6BAA6B,EAC7B,6BAA6B,EAC9B,MAAM,iBAAiB,CAAA;AACxB,OAAO,KAAK,EACV,+BAA+B,EAC/B,iCAAiC,EAClC,MAAM,WAAW,CAAA;AAClB,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,MAAM,WAAW,CAAA;AAC9E,OAAO,KAAK,EAAE,kBAAkB,EAAE,2BAA2B,EAAE,MAAM,QAAQ,CAAA;AAG7E,YAAY,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,qBAAqB,EACrB,6BAA6B,EAC7B,6BAA6B,EAC7B,kBAAkB,EAClB,2BAA2B,EAC3B,+BAA+B,EAC/B,iCAAiC,GAClC,CAAA;AAED,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,CAAA;AAc7D,MAAM,MAAM,SAAS,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAOD,oBAAY,UAAU;IACpB,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,IAAI,SAAS;CACd;AAGD,MAAM,MAAM,aAAa,GAAG,IAAI,CAC9B,mBAAmB,EACnB,cAAc,GAAG,cAAc,GAAG,cAAc,CACjD,GAAG;IACF,YAAY,CAAC,EAAE,UAAU,CAAA;IACzB,YAAY,CAAC,EAAE,UAAU,CAAA;IACzB,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAAA;CACzD,CAAA;AAED,MAAM,MAAM,eAAe,GAAG,qBAAqB,CAAA;AAEnD,MAAM,MAAM,gBAAgB,GAAG,IAAI,CACjC,sBAAsB,EACtB,yBAAyB,GAAG,QAAQ,CACrC,GAAG;IACF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,2BAA2B,EAAE,CAAA;IACxC,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,YAAY,EAAE,MAAM,CAAA;IACpB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,qBAAa,YAAY;IACvB,EAAE,EAAE,MAAM,CAAA;IAEV,GAAG,EAAE,OAAO,CAAQ;IAEpB,WAAW,EAAE,MAAM,CAAK;IAExB,KAAK,EAAE;QACL,uBAAuB,CAAC,EAAE,OAAO,CAAA;KAClC,CAAK;gBAEM,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,kBAAkB;IAOtE;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAMrE;;OAEG;IACG,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAA;KAAE,GAC9B,OAAO,CAAC,MAAM,CAAC;IAIZ,gBAAgB,CACpB,QAAQ,EAAE,2BAA2B,EAAE,EACvC,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC;IAOZ,UAAU,CACd,MAAM,EAAE,gBAAgB,EACxB,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,SAAS,KAAK,IAAI,GACnC,OAAO,CAAC,sBAAsB,CAAC;IAqClC,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAI/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAI1D,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB;IAIhD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7C,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,eAAe,GACvB,OAAO,CAAC,qBAAqB,CAAC;IAI3B,KAAK,CACT,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,WAAW,CAAC;IAejB,iBAAiB,CACrB,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,GACjD,OAAO,CAAC,IAAI,CAAC;IAUV,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC;IAInC,qBAAqB,IAAI,OAAO,CACpC,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CACzC;IAIK,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,wBAAsB,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAElE;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAElE;AAYD,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAIvE;AAWD,wBAAsB,SAAS,CAC7B,EACE,KAAK,EACL,cAAc,EAAE,YAAY,EAC5B,YAAY,EAAE,WAAW,EACzB,IAAI,EACJ,SAAS,EAAE,QAAQ,EACnB,GAAG,IAAI,EACR,EAAE,aAAa,EAChB,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GACtC,OAAO,CAAC,YAAY,CAAC,CA+CvB;AAED,wBAAsB,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC,CAErD"}
|
package/llama-rn.podspec
CHANGED
@@ -2,8 +2,7 @@ require "json"
|
|
2
2
|
|
3
3
|
package = JSON.parse(File.read(File.join(__dir__, "package.json")))
|
4
4
|
base_ld_flags = "-framework Accelerate -framework Foundation -framework Metal -framework MetalKit"
|
5
|
-
base_compiler_flags = "-fno-objc-arc -DLM_GGML_USE_ACCELERATE -Wno-shorten-64-to-32"
|
6
|
-
folly_compiler_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -Wno-comma"
|
5
|
+
base_compiler_flags = "-fno-objc-arc -DLM_GGML_USE_CPU -DLM_GGML_USE_ACCELERATE -Wno-shorten-64-to-32"
|
7
6
|
|
8
7
|
if ENV["RNLLAMA_DISABLE_METAL"] != "1" then
|
9
8
|
base_compiler_flags += " -DLM_GGML_USE_METAL" # -DLM_GGML_METAL_NDEBUG
|
@@ -21,7 +20,7 @@ Pod::Spec.new do |s|
|
|
21
20
|
s.license = package["license"]
|
22
21
|
s.authors = package["author"]
|
23
22
|
|
24
|
-
s.platforms = { :ios => "
|
23
|
+
s.platforms = { :ios => "13.0", :tvos => "13.0" }
|
25
24
|
s.source = { :git => "https://github.com/mybigday/llama.rn.git", :tag => "#{s.version}" }
|
26
25
|
|
27
26
|
s.source_files = "ios/**/*.{h,m,mm}", "cpp/**/*.{h,cpp,hpp,c,m,mm}"
|
@@ -33,24 +32,11 @@ Pod::Spec.new do |s|
|
|
33
32
|
s.pod_target_xcconfig = {
|
34
33
|
"OTHER_LDFLAGS" => base_ld_flags,
|
35
34
|
"OTHER_CFLAGS" => base_optimizer_flags,
|
36
|
-
"OTHER_CPLUSPLUSFLAGS" => base_optimizer_flags
|
35
|
+
"OTHER_CPLUSPLUSFLAGS" => base_optimizer_flags + " -std=c++17"
|
37
36
|
}
|
38
37
|
|
39
38
|
# Don't install the dependencies when we run `pod install` in the old architecture.
|
40
39
|
if ENV['RCT_NEW_ARCH_ENABLED'] == '1' then
|
41
|
-
s
|
42
|
-
new_arch_cpp_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1"
|
43
|
-
s.pod_target_xcconfig = {
|
44
|
-
"CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
|
45
|
-
"HEADER_SEARCH_PATHS" => "\"$(PODS_ROOT)/boost\"",
|
46
|
-
"OTHER_LDFLAGS" => "-framework Accelerate",
|
47
|
-
"OTHER_CFLAGS" => base_optimizer_flags,
|
48
|
-
"OTHER_CPLUSPLUSFLAGS" => new_arch_cpp_flags + " " + base_optimizer_flags
|
49
|
-
}
|
50
|
-
s.dependency "React-Codegen"
|
51
|
-
s.dependency "RCT-Folly"
|
52
|
-
s.dependency "RCTRequired"
|
53
|
-
s.dependency "RCTTypeSafety"
|
54
|
-
s.dependency "ReactCommon/turbomodule/core"
|
40
|
+
install_modules_dependencies(s)
|
55
41
|
end
|
56
42
|
end
|
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "cui-llama.rn",
|
3
|
-
"version": "1.
|
3
|
+
"version": "1.4.0",
|
4
4
|
"description": "Fork of llama.rn for ChatterUI",
|
5
5
|
"main": "lib/commonjs/index",
|
6
6
|
"module": "lib/module/index",
|
@@ -13,8 +13,7 @@
|
|
13
13
|
"jest",
|
14
14
|
"ios",
|
15
15
|
"android",
|
16
|
-
"cpp
|
17
|
-
"cpp/amx/*.*",
|
16
|
+
"cpp",
|
18
17
|
"*.podspec",
|
19
18
|
"!lib/typescript/example",
|
20
19
|
"!ios/build",
|
package/src/NativeRNLlama.ts
CHANGED
@@ -12,6 +12,7 @@ export type NativeContextParams = {
|
|
12
12
|
|
13
13
|
n_ctx?: number
|
14
14
|
n_batch?: number
|
15
|
+
n_ubatch?: number
|
15
16
|
|
16
17
|
n_threads?: number
|
17
18
|
n_gpu_layers?: number
|
@@ -24,18 +25,28 @@ export type NativeContextParams = {
|
|
24
25
|
/**
|
25
26
|
* KV cache data type for the K (Experimental in llama.cpp)
|
26
27
|
*/
|
27
|
-
cache_type_k?:
|
28
|
+
cache_type_k?: string
|
28
29
|
/**
|
29
30
|
* KV cache data type for the V (Experimental in llama.cpp)
|
30
31
|
*/
|
31
|
-
cache_type_v?:
|
32
|
+
cache_type_v?: string
|
32
33
|
|
33
34
|
use_mlock?: boolean
|
34
35
|
use_mmap?: boolean
|
35
36
|
vocab_only?: boolean
|
36
37
|
|
37
|
-
|
38
|
+
/**
|
39
|
+
* Single LoRA adapter path
|
40
|
+
*/
|
41
|
+
lora?: string
|
42
|
+
/**
|
43
|
+
* Single LoRA adapter scale
|
44
|
+
*/
|
38
45
|
lora_scaled?: number
|
46
|
+
/**
|
47
|
+
* LoRA adapter list
|
48
|
+
*/
|
49
|
+
lora_list?: Array<{ path: string; scaled?: number }>
|
39
50
|
|
40
51
|
rope_freq_base?: number
|
41
52
|
rope_freq_scale?: number
|
@@ -115,10 +126,6 @@ export type NativeCompletionParams = {
|
|
115
126
|
* Repeat alpha presence penalty. Default: `0.0`, which is disabled.
|
116
127
|
*/
|
117
128
|
penalty_present?: number
|
118
|
-
/**
|
119
|
-
* Penalize newline tokens when applying the repeat penalty. Default: `false`
|
120
|
-
*/
|
121
|
-
// penalize_nl?: boolean
|
122
129
|
/**
|
123
130
|
* Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.
|
124
131
|
*/
|
@@ -243,8 +250,16 @@ export interface Spec extends TurboModule {
|
|
243
250
|
setContextLimit(limit: number): Promise<void>
|
244
251
|
|
245
252
|
modelInfo(path: string, skip?: string[]): Promise<Object>
|
246
|
-
initContext(
|
253
|
+
initContext(
|
254
|
+
contextId: number,
|
255
|
+
params: NativeContextParams,
|
256
|
+
): Promise<NativeLlamaContext>
|
247
257
|
|
258
|
+
getFormattedChat(
|
259
|
+
contextId: number,
|
260
|
+
messages: NativeLlamaChatMessage[],
|
261
|
+
chatTemplate?: string,
|
262
|
+
): Promise<string>
|
248
263
|
loadSession(
|
249
264
|
contextId: number,
|
250
265
|
filepath: string,
|
@@ -262,11 +277,6 @@ export interface Spec extends TurboModule {
|
|
262
277
|
tokenizeAsync(contextId: number, text: string): Promise<NativeTokenizeResult>
|
263
278
|
tokenizeSync(contextId: number, text: string): NativeTokenizeResult
|
264
279
|
getCpuFeatures() : Promise<NativeCPUFeatures>
|
265
|
-
getFormattedChat(
|
266
|
-
contextId: number,
|
267
|
-
messages: NativeLlamaChatMessage[],
|
268
|
-
chatTemplate?: string,
|
269
|
-
): Promise<string>
|
270
280
|
detokenize(contextId: number, tokens: number[]): Promise<string>
|
271
281
|
embedding(
|
272
282
|
contextId: number,
|
@@ -281,6 +291,15 @@ export interface Spec extends TurboModule {
|
|
281
291
|
nr: number,
|
282
292
|
): Promise<string>
|
283
293
|
|
294
|
+
applyLoraAdapters(
|
295
|
+
contextId: number,
|
296
|
+
loraAdapters: Array<{ path: string; scaled?: number }>,
|
297
|
+
): Promise<void>
|
298
|
+
removeLoraAdapters(contextId: number): Promise<void>
|
299
|
+
getLoadedLoraAdapters(
|
300
|
+
contextId: number,
|
301
|
+
): Promise<Array<{ path: string; scaled?: number }>>
|
302
|
+
|
284
303
|
releaseContext(contextId: number): Promise<void>
|
285
304
|
|
286
305
|
releaseAllContexts(): Promise<void>
|
package/src/index.ts
CHANGED
@@ -15,7 +15,10 @@ import type {
|
|
15
15
|
NativeCompletionTokenProbItem,
|
16
16
|
NativeCompletionResultTimings,
|
17
17
|
} from './NativeRNLlama'
|
18
|
-
import type {
|
18
|
+
import type {
|
19
|
+
SchemaGrammarConverterPropOrder,
|
20
|
+
SchemaGrammarConverterBuiltinRule,
|
21
|
+
} from './grammar'
|
19
22
|
import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
|
20
23
|
import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat'
|
21
24
|
import { formatChat } from './chat'
|
@@ -62,56 +65,24 @@ type TokenNativeEvent = {
|
|
62
65
|
tokenResult: TokenData
|
63
66
|
}
|
64
67
|
|
65
|
-
export enum
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
LM_GGML_TYPE_Q8_1 = 9,
|
76
|
-
LM_GGML_TYPE_Q2_K = 10,
|
77
|
-
LM_GGML_TYPE_Q3_K = 11,
|
78
|
-
LM_GGML_TYPE_Q4_K = 12,
|
79
|
-
LM_GGML_TYPE_Q5_K = 13,
|
80
|
-
LM_GGML_TYPE_Q6_K = 14,
|
81
|
-
LM_GGML_TYPE_Q8_K = 15,
|
82
|
-
LM_GGML_TYPE_IQ2_XXS = 16,
|
83
|
-
LM_GGML_TYPE_IQ2_XS = 17,
|
84
|
-
LM_GGML_TYPE_IQ3_XXS = 18,
|
85
|
-
LM_GGML_TYPE_IQ1_S = 19,
|
86
|
-
LM_GGML_TYPE_IQ4_NL = 20,
|
87
|
-
LM_GGML_TYPE_IQ3_S = 21,
|
88
|
-
LM_GGML_TYPE_IQ2_S = 22,
|
89
|
-
LM_GGML_TYPE_IQ4_XS = 23,
|
90
|
-
LM_GGML_TYPE_I8 = 24,
|
91
|
-
LM_GGML_TYPE_I16 = 25,
|
92
|
-
LM_GGML_TYPE_I32 = 26,
|
93
|
-
LM_GGML_TYPE_I64 = 27,
|
94
|
-
LM_GGML_TYPE_F64 = 28,
|
95
|
-
LM_GGML_TYPE_IQ1_M = 29,
|
96
|
-
LM_GGML_TYPE_BF16 = 30,
|
97
|
-
// LM_GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
|
98
|
-
// LM_GGML_TYPE_Q4_0_4_8 = 32,
|
99
|
-
// LM_GGML_TYPE_Q4_0_8_8 = 33,
|
100
|
-
LM_GGML_TYPE_TQ1_0 = 34,
|
101
|
-
LM_GGML_TYPE_TQ2_0 = 35,
|
102
|
-
// LM_GGML_TYPE_IQ4_NL_4_4 = 36,
|
103
|
-
// LM_GGML_TYPE_IQ4_NL_4_8 = 37,
|
104
|
-
// LM_GGML_TYPE_IQ4_NL_8_8 = 38,
|
105
|
-
LM_GGML_TYPE_COUNT = 39,
|
106
|
-
};
|
68
|
+
export enum CACHE_TYPE {
|
69
|
+
F16 = 'f16',
|
70
|
+
F32 = 'f32',
|
71
|
+
Q8_0 = 'q8_0',
|
72
|
+
Q4_0 = 'q4_0',
|
73
|
+
Q4_1 = 'q4_1',
|
74
|
+
IQ4_NL = 'iq4_nl',
|
75
|
+
Q5_0 = 'q5_0',
|
76
|
+
Q5_1 = 'q5_1'
|
77
|
+
}
|
107
78
|
|
108
79
|
|
109
80
|
export type ContextParams = Omit<
|
110
81
|
NativeContextParams,
|
111
|
-
'cache_type_k' | 'cache_type_v' |
|
82
|
+
'cache_type_k' | 'cache_type_v' | 'pooling_type'
|
112
83
|
> & {
|
113
|
-
cache_type_k?:
|
114
|
-
cache_type_v?:
|
84
|
+
cache_type_k?: CACHE_TYPE
|
85
|
+
cache_type_v?: CACHE_TYPE
|
115
86
|
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
|
116
87
|
}
|
117
88
|
|
@@ -190,7 +161,10 @@ export class LlamaContext {
|
|
190
161
|
let finalPrompt = params.prompt
|
191
162
|
if (params.messages) {
|
192
163
|
// messages always win
|
193
|
-
finalPrompt = await this.getFormattedChat(
|
164
|
+
finalPrompt = await this.getFormattedChat(
|
165
|
+
params.messages,
|
166
|
+
params.chatTemplate,
|
167
|
+
)
|
194
168
|
}
|
195
169
|
|
196
170
|
let tokenListener: any =
|
@@ -263,6 +237,28 @@ export class LlamaContext {
|
|
263
237
|
}
|
264
238
|
}
|
265
239
|
|
240
|
+
async applyLoraAdapters(
|
241
|
+
loraList: Array<{ path: string; scaled?: number }>
|
242
|
+
): Promise<void> {
|
243
|
+
let loraAdapters: Array<{ path: string; scaled?: number }> = []
|
244
|
+
if (loraList)
|
245
|
+
loraAdapters = loraList.map((l) => ({
|
246
|
+
path: l.path.replace(/file:\/\//, ''),
|
247
|
+
scaled: l.scaled,
|
248
|
+
}))
|
249
|
+
return RNLlama.applyLoraAdapters(this.id, loraAdapters)
|
250
|
+
}
|
251
|
+
|
252
|
+
async removeLoraAdapters(): Promise<void> {
|
253
|
+
return RNLlama.removeLoraAdapters(this.id)
|
254
|
+
}
|
255
|
+
|
256
|
+
async getLoadedLoraAdapters(): Promise<
|
257
|
+
Array<{ path: string; scaled?: number }>
|
258
|
+
> {
|
259
|
+
return RNLlama.getLoadedLoraAdapters(this.id)
|
260
|
+
}
|
261
|
+
|
266
262
|
async release(): Promise<void> {
|
267
263
|
return RNLlama.releaseContext(this.id)
|
268
264
|
}
|
@@ -307,6 +303,7 @@ export async function initLlama(
|
|
307
303
|
is_model_asset: isModelAsset,
|
308
304
|
pooling_type: poolingType,
|
309
305
|
lora,
|
306
|
+
lora_list: loraList,
|
310
307
|
...rest
|
311
308
|
}: ContextParams,
|
312
309
|
onProgress?: (progress: number) => void,
|
@@ -317,6 +314,13 @@ export async function initLlama(
|
|
317
314
|
let loraPath = lora
|
318
315
|
if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7)
|
319
316
|
|
317
|
+
let loraAdapters: Array<{ path: string; scaled?: number }> = []
|
318
|
+
if (loraList)
|
319
|
+
loraAdapters = loraList.map((l) => ({
|
320
|
+
path: l.path.replace(/file:\/\//, ''),
|
321
|
+
scaled: l.scaled,
|
322
|
+
}))
|
323
|
+
|
320
324
|
const contextId = contextIdCounter + contextIdRandom()
|
321
325
|
contextIdCounter += 1
|
322
326
|
|
@@ -342,6 +346,7 @@ export async function initLlama(
|
|
342
346
|
use_progress_callback: !!onProgress,
|
343
347
|
pooling_type: poolType,
|
344
348
|
lora: loraPath,
|
349
|
+
lora_list: loraAdapters,
|
345
350
|
...rest,
|
346
351
|
}).catch((err: any) => {
|
347
352
|
removeProgressListener?.remove()
|
package/cpp/llama.cpp.rej
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
--- llama.cpp.orig 2024-11-02 12:42:13
|
2
|
-
+++ llama.cpp 2024-11-02 13:00:37
|
3
|
-
@@ -1941,16 +1952,16 @@
|
4
|
-
|
5
|
-
if (prefetch > 0) {
|
6
|
-
// advise the kernel to preload the mapped memory
|
7
|
-
- if (posix_madvise(addr, std::min(file->size, prefetch), POSIX_MADV_WILLNEED)) {
|
8
|
-
- LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
|
9
|
-
+ if (madvise(addr, std::min(file->size, prefetch), MADV_WILLNEED)) {
|
10
|
-
+ fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
|
11
|
-
strerror(errno));
|
12
|
-
}
|
13
|
-
}
|
14
|
-
if (numa) {
|
15
|
-
// advise the kernel not to use readahead
|
16
|
-
// (because the next page might not belong on the same node)
|
17
|
-
- if (posix_madvise(addr, file->size, POSIX_MADV_RANDOM)) {
|
18
|
-
- LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
|
19
|
-
+ if (madvise(addr, file->size, MADV_RANDOM)) {
|
20
|
-
+ fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n",
|
21
|
-
strerror(errno));
|
22
|
-
}
|
23
|
-
}
|