cui-llama.rn 1.3.6 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +22 -1
  2. package/android/src/main/CMakeLists.txt +25 -26
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +31 -9
  4. package/android/src/main/java/com/rnllama/RNLlama.java +98 -0
  5. package/android/src/main/jni-utils.h +94 -0
  6. package/android/src/main/jni.cpp +132 -62
  7. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +15 -0
  8. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +15 -0
  9. package/cpp/common.cpp +1982 -1982
  10. package/cpp/common.h +665 -664
  11. package/cpp/ggml-cpu.c +14122 -14122
  12. package/cpp/ggml-cpu.cpp +627 -627
  13. package/cpp/ggml-metal-impl.h +288 -0
  14. package/cpp/ggml-opt.cpp +854 -0
  15. package/cpp/ggml-opt.h +216 -0
  16. package/cpp/llama-mmap.cpp +589 -589
  17. package/cpp/llama.cpp +12547 -12544
  18. package/cpp/rn-llama.hpp +117 -116
  19. package/cpp/sgemm.h +14 -14
  20. package/ios/RNLlama.mm +47 -0
  21. package/ios/RNLlamaContext.h +3 -1
  22. package/ios/RNLlamaContext.mm +71 -14
  23. package/jest/mock.js +15 -3
  24. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  25. package/lib/commonjs/index.js +33 -37
  26. package/lib/commonjs/index.js.map +1 -1
  27. package/lib/module/NativeRNLlama.js.map +1 -1
  28. package/lib/module/index.js +31 -35
  29. package/lib/module/index.js.map +1 -1
  30. package/lib/typescript/NativeRNLlama.d.ts +26 -6
  31. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  32. package/lib/typescript/index.d.ts +21 -36
  33. package/lib/typescript/index.d.ts.map +1 -1
  34. package/llama-rn.podspec +4 -18
  35. package/package.json +2 -3
  36. package/src/NativeRNLlama.ts +32 -13
  37. package/src/index.ts +52 -47
  38. package/cpp/llama.cpp.rej +0 -23
@@ -8,43 +8,19 @@ export type TokenData = {
8
8
  token: string;
9
9
  completion_probabilities?: Array<NativeCompletionTokenProb>;
10
10
  };
11
- export declare enum GGML_TYPE {
12
- LM_GGML_TYPE_F32 = 0,
13
- LM_GGML_TYPE_F16 = 1,
14
- LM_GGML_TYPE_Q4_0 = 2,
15
- LM_GGML_TYPE_Q4_1 = 3,
16
- LM_GGML_TYPE_Q5_0 = 6,
17
- LM_GGML_TYPE_Q5_1 = 7,
18
- LM_GGML_TYPE_Q8_0 = 8,
19
- LM_GGML_TYPE_Q8_1 = 9,
20
- LM_GGML_TYPE_Q2_K = 10,
21
- LM_GGML_TYPE_Q3_K = 11,
22
- LM_GGML_TYPE_Q4_K = 12,
23
- LM_GGML_TYPE_Q5_K = 13,
24
- LM_GGML_TYPE_Q6_K = 14,
25
- LM_GGML_TYPE_Q8_K = 15,
26
- LM_GGML_TYPE_IQ2_XXS = 16,
27
- LM_GGML_TYPE_IQ2_XS = 17,
28
- LM_GGML_TYPE_IQ3_XXS = 18,
29
- LM_GGML_TYPE_IQ1_S = 19,
30
- LM_GGML_TYPE_IQ4_NL = 20,
31
- LM_GGML_TYPE_IQ3_S = 21,
32
- LM_GGML_TYPE_IQ2_S = 22,
33
- LM_GGML_TYPE_IQ4_XS = 23,
34
- LM_GGML_TYPE_I8 = 24,
35
- LM_GGML_TYPE_I16 = 25,
36
- LM_GGML_TYPE_I32 = 26,
37
- LM_GGML_TYPE_I64 = 27,
38
- LM_GGML_TYPE_F64 = 28,
39
- LM_GGML_TYPE_IQ1_M = 29,
40
- LM_GGML_TYPE_BF16 = 30,
41
- LM_GGML_TYPE_TQ1_0 = 34,
42
- LM_GGML_TYPE_TQ2_0 = 35,
43
- LM_GGML_TYPE_COUNT = 39
11
+ export declare enum CACHE_TYPE {
12
+ F16 = "f16",
13
+ F32 = "f32",
14
+ Q8_0 = "q8_0",
15
+ Q4_0 = "q4_0",
16
+ Q4_1 = "q4_1",
17
+ IQ4_NL = "iq4_nl",
18
+ Q5_0 = "q5_0",
19
+ Q5_1 = "q5_1"
44
20
  }
45
21
  export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & {
46
- cache_type_k?: GGML_TYPE;
47
- cache_type_v?: GGML_TYPE;
22
+ cache_type_k?: CACHE_TYPE;
23
+ cache_type_v?: CACHE_TYPE;
48
24
  pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';
49
25
  };
50
26
  export type EmbeddingParams = NativeEmbeddingParams;
@@ -88,11 +64,20 @@ export declare class LlamaContext {
88
64
  detokenize(tokens: number[]): Promise<string>;
89
65
  embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>;
90
66
  bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>;
67
+ applyLoraAdapters(loraList: Array<{
68
+ path: string;
69
+ scaled?: number;
70
+ }>): Promise<void>;
71
+ removeLoraAdapters(): Promise<void>;
72
+ getLoadedLoraAdapters(): Promise<Array<{
73
+ path: string;
74
+ scaled?: number;
75
+ }>>;
91
76
  release(): Promise<void>;
92
77
  }
93
78
  export declare function getCpuFeatures(): Promise<NativeCPUFeatures>;
94
79
  export declare function setContextLimit(limit: number): Promise<void>;
95
80
  export declare function loadLlamaModelInfo(model: string): Promise<Object>;
96
- export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
81
+ export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, lora_list: loraList, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
97
82
  export declare function releaseAllLlama(): Promise<void>;
98
83
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,EACjB,qBAAqB,EACrB,6BAA6B,EAC7B,6BAA6B,EAC9B,MAAM,iBAAiB,CAAA;AACxB,OAAO,KAAK,EAAE,+BAA+B,EAAE,iCAAiC,EAAE,MAAM,WAAW,CAAA;AACnG,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,MAAM,WAAW,CAAA;AAC9E,OAAO,KAAK,EAAE,kBAAkB,EAAE,2BAA2B,EAAE,MAAM,QAAQ,CAAA;AAG7E,YAAY,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,qBAAqB,EACrB,6BAA6B,EAC7B,6BAA6B,EAC7B,kBAAkB,EAClB,2BAA2B,EAC3B,+BAA+B,EAC/B,iCAAiC,GAClC,CAAA;AAED,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,CAAA;AAc7D,MAAM,MAAM,SAAS,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAOD,oBAAY,SAAS;IACnB,gBAAgB,IAAQ;IACxB,gBAAgB,IAAQ;IACxB,iBAAiB,IAAO;IACxB,iBAAiB,IAAO;IAGxB,iBAAiB,IAAO;IACxB,iBAAiB,IAAO;IACxB,iBAAiB,IAAO;IACxB,iBAAiB,IAAO;IACxB,iBAAiB,KAAQ;IACzB,iBAAiB,KAAQ;IACzB,iBAAiB,KAAQ;IACzB,iBAAiB,KAAQ;IACzB,iBAAiB,KAAQ;IACzB,iBAAiB,KAAQ;IACzB,oBAAoB,KAAK;IACzB,mBAAmB,KAAM;IACzB,oBAAoB,KAAK;IACzB,kBAAkB,KAAO;IACzB,mBAAmB,KAAM;IACzB,kBAAkB,KAAO;IACzB,kBAAkB,KAAO;IACzB,mBAAmB,KAAM;IACzB,eAAe,KAAU;IACzB,gBAAgB,KAAS;IACzB,gBAAgB,KAAS;IACzB,gBAAgB,KAAS;IACzB,gBAAgB,KAAS;IACzB,kBAAkB,KAAO;IACzB,iBAAiB,KAAQ;IAIzB,kBAAkB,KAAO;IACzB,kBAAkB,KAAO;IAIzB,kBAAkB,KAAO;CAC1B;AAGD,MAAM,MAAM,aAAa,GAAG,IAAI,CAC9B,mBAAmB,EACnB,cAAc,GAAG,cAAc,GAAI,cAAc,CAClD,GAAG;IACF,YAAY,CAAC,EAAE,SAAS,CAAA;IACxB,YAAY,CAAC,EAAE,SAAS,CAAA;IACxB,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAAA;CACzD,CAAA;AAED,MAAM,MAAM,eAAe,GAAG,qBAAqB,CAAA;AAEnD,MAAM,MAAM,gBAAgB,GAAG,IAAI,CACjC,sBAAsB,EACtB,yBAAyB,GAAG,QAAQ,CACrC,GAAG;IACF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,2BAA2B,EAAE,CAAA;IACxC,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,YAAY,EAAE,MAAM,CAAA;IACpB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,qBAAa,YAAY;IACvB,EAAE,EAAE,MAAM,CAAA;IAEV,GAAG,EAAE,OAAO,CAAQ;IAEpB,WAAW,EAAE,MAAM,CAAK;IAExB,KAAK,EAAE;QACL,uBAAuB,CAAC,EAAE,OAAO,CAAA;KAClC,CAAK;gBAEM,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,kBAAkB;IAOtE;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAMrE;;OAEG;IACG,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAA;KAAE,GAC9B,OAAO,CAAC,MAAM,CAAC;IAIZ,gBAAgB,CACpB,QAAQ,EAAE,2BAA2B,EAAE,EACvC,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC;IAOZ,UAAU,CACd,MAAM,EAAE,gBAAgB,EACxB,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,SAAS,KAAK,IAAI,GACnC,OAAO,CAAC,sBAAsB,CAAC;IAkClC,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAI/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAI1D,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB;IAIhD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7C,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,eAAe,GACvB,OAAO,CAAC,qBAAqB,CAAC;IAI3B,KAAK,CACT,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,WAAW,CAAC;IAejB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,wBAAsB,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAElE;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAElE;AAYD,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAIvE;AAWD,wBAAsB,SAAS,CAC7B,EACE,KAAK,EACL,cAAc,EAAE,YAAY,EAC5B,YAAY,EAAE,WAAW,EACzB,IAAI,EACJ,GAAG,IAAI,EACR,EAAE,aAAa,EAChB,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GACtC,OAAO,CAAC,YAAY,CAAC,CAuCvB;AAED,wBAAsB,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC,CAErD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,EACjB,qBAAqB,EACrB,6BAA6B,EAC7B,6BAA6B,EAC9B,MAAM,iBAAiB,CAAA;AACxB,OAAO,KAAK,EACV,+BAA+B,EAC/B,iCAAiC,EAClC,MAAM,WAAW,CAAA;AAClB,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,MAAM,WAAW,CAAA;AAC9E,OAAO,KAAK,EAAE,kBAAkB,EAAE,2BAA2B,EAAE,MAAM,QAAQ,CAAA;AAG7E,YAAY,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,qBAAqB,EACrB,6BAA6B,EAC7B,6BAA6B,EAC7B,kBAAkB,EAClB,2BAA2B,EAC3B,+BAA+B,EAC/B,iCAAiC,GAClC,CAAA;AAED,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,CAAA;AAc7D,MAAM,MAAM,SAAS,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAOD,oBAAY,UAAU;IACpB,GAAG,QAAQ;IACX,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,IAAI,SAAS;CACd;AAGD,MAAM,MAAM,aAAa,GAAG,IAAI,CAC9B,mBAAmB,EACnB,cAAc,GAAG,cAAc,GAAG,cAAc,CACjD,GAAG;IACF,YAAY,CAAC,EAAE,UAAU,CAAA;IACzB,YAAY,CAAC,EAAE,UAAU,CAAA;IACzB,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAAA;CACzD,CAAA;AAED,MAAM,MAAM,eAAe,GAAG,qBAAqB,CAAA;AAEnD,MAAM,MAAM,gBAAgB,GAAG,IAAI,CACjC,sBAAsB,EACtB,yBAAyB,GAAG,QAAQ,CACrC,GAAG;IACF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,2BAA2B,EAAE,CAAA;IACxC,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,YAAY,EAAE,MAAM,CAAA;IACpB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,qBAAa,YAAY;IACvB,EAAE,EAAE,MAAM,CAAA;IAEV,GAAG,EAAE,OAAO,CAAQ;IAEpB,WAAW,EAAE,MAAM,CAAK;IAExB,KAAK,EAAE;QACL,uBAAuB,CAAC,EAAE,OAAO,CAAA;KAClC,CAAK;gBAEM,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,kBAAkB;IAOtE;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAMrE;;OAEG;IACG,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAA;KAAE,GAC9B,OAAO,CAAC,MAAM,CAAC;IAIZ,gBAAgB,CACpB,QAAQ,EAAE,2BAA2B,EAAE,EACvC,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC;IAOZ,UAAU,CACd,MAAM,EAAE,gBAAgB,EACxB,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,SAAS,KAAK,IAAI,GACnC,OAAO,CAAC,sBAAsB,CAAC;IAqClC,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAI/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAI1D,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB;IAIhD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7C,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,eAAe,GACvB,OAAO,CAAC,qBAAqB,CAAC;IAI3B,KAAK,CACT,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,WAAW,CAAC;IAejB,iBAAiB,CACrB,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,GACjD,OAAO,CAAC,IAAI,CAAC;IAUV,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC;IAInC,qBAAqB,IAAI,OAAO,CACpC,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CACzC;IAIK,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,wBAAsB,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAElE;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAElE;AAYD,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAIvE;AAWD,wBAAsB,SAAS,CAC7B,EACE,KAAK,EACL,cAAc,EAAE,YAAY,EAC5B,YAAY,EAAE,WAAW,EACzB,IAAI,EACJ,SAAS,EAAE,QAAQ,EACnB,GAAG,IAAI,EACR,EAAE,aAAa,EAChB,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GACtC,OAAO,CAAC,YAAY,CAAC,CA+CvB;AAED,wBAAsB,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC,CAErD"}
package/llama-rn.podspec CHANGED
@@ -2,8 +2,7 @@ require "json"
2
2
 
3
3
  package = JSON.parse(File.read(File.join(__dir__, "package.json")))
4
4
  base_ld_flags = "-framework Accelerate -framework Foundation -framework Metal -framework MetalKit"
5
- base_compiler_flags = "-fno-objc-arc -DLM_GGML_USE_ACCELERATE -Wno-shorten-64-to-32"
6
- folly_compiler_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -Wno-comma"
5
+ base_compiler_flags = "-fno-objc-arc -DLM_GGML_USE_CPU -DLM_GGML_USE_ACCELERATE -Wno-shorten-64-to-32"
7
6
 
8
7
  if ENV["RNLLAMA_DISABLE_METAL"] != "1" then
9
8
  base_compiler_flags += " -DLM_GGML_USE_METAL" # -DLM_GGML_METAL_NDEBUG
@@ -21,7 +20,7 @@ Pod::Spec.new do |s|
21
20
  s.license = package["license"]
22
21
  s.authors = package["author"]
23
22
 
24
- s.platforms = { :ios => "11.0", :tvos => "11.0" }
23
+ s.platforms = { :ios => "13.0", :tvos => "13.0" }
25
24
  s.source = { :git => "https://github.com/mybigday/llama.rn.git", :tag => "#{s.version}" }
26
25
 
27
26
  s.source_files = "ios/**/*.{h,m,mm}", "cpp/**/*.{h,cpp,hpp,c,m,mm}"
@@ -33,24 +32,11 @@ Pod::Spec.new do |s|
33
32
  s.pod_target_xcconfig = {
34
33
  "OTHER_LDFLAGS" => base_ld_flags,
35
34
  "OTHER_CFLAGS" => base_optimizer_flags,
36
- "OTHER_CPLUSPLUSFLAGS" => base_optimizer_flags
35
+ "OTHER_CPLUSPLUSFLAGS" => base_optimizer_flags + " -std=c++17"
37
36
  }
38
37
 
39
38
  # Don't install the dependencies when we run `pod install` in the old architecture.
40
39
  if ENV['RCT_NEW_ARCH_ENABLED'] == '1' then
41
- s.compiler_flags = base_compiler_flags + " " + folly_compiler_flags + " -DRCT_NEW_ARCH_ENABLED=1"
42
- new_arch_cpp_flags = "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1"
43
- s.pod_target_xcconfig = {
44
- "CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
45
- "HEADER_SEARCH_PATHS" => "\"$(PODS_ROOT)/boost\"",
46
- "OTHER_LDFLAGS" => "-framework Accelerate",
47
- "OTHER_CFLAGS" => base_optimizer_flags,
48
- "OTHER_CPLUSPLUSFLAGS" => new_arch_cpp_flags + " " + base_optimizer_flags
49
- }
50
- s.dependency "React-Codegen"
51
- s.dependency "RCT-Folly"
52
- s.dependency "RCTRequired"
53
- s.dependency "RCTTypeSafety"
54
- s.dependency "ReactCommon/turbomodule/core"
40
+ install_modules_dependencies(s)
55
41
  end
56
42
  end
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cui-llama.rn",
3
- "version": "1.3.6",
3
+ "version": "1.4.0",
4
4
  "description": "Fork of llama.rn for ChatterUI",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",
@@ -13,8 +13,7 @@
13
13
  "jest",
14
14
  "ios",
15
15
  "android",
16
- "cpp/*.*",
17
- "cpp/amx/*.*",
16
+ "cpp",
18
17
  "*.podspec",
19
18
  "!lib/typescript/example",
20
19
  "!ios/build",
@@ -12,6 +12,7 @@ export type NativeContextParams = {
12
12
 
13
13
  n_ctx?: number
14
14
  n_batch?: number
15
+ n_ubatch?: number
15
16
 
16
17
  n_threads?: number
17
18
  n_gpu_layers?: number
@@ -24,18 +25,28 @@ export type NativeContextParams = {
24
25
  /**
25
26
  * KV cache data type for the K (Experimental in llama.cpp)
26
27
  */
27
- cache_type_k?: number
28
+ cache_type_k?: string
28
29
  /**
29
30
  * KV cache data type for the V (Experimental in llama.cpp)
30
31
  */
31
- cache_type_v?: number
32
+ cache_type_v?: string
32
33
 
33
34
  use_mlock?: boolean
34
35
  use_mmap?: boolean
35
36
  vocab_only?: boolean
36
37
 
37
- lora?: string // lora_adaptor
38
+ /**
39
+ * Single LoRA adapter path
40
+ */
41
+ lora?: string
42
+ /**
43
+ * Single LoRA adapter scale
44
+ */
38
45
  lora_scaled?: number
46
+ /**
47
+ * LoRA adapter list
48
+ */
49
+ lora_list?: Array<{ path: string; scaled?: number }>
39
50
 
40
51
  rope_freq_base?: number
41
52
  rope_freq_scale?: number
@@ -115,10 +126,6 @@ export type NativeCompletionParams = {
115
126
  * Repeat alpha presence penalty. Default: `0.0`, which is disabled.
116
127
  */
117
128
  penalty_present?: number
118
- /**
119
- * Penalize newline tokens when applying the repeat penalty. Default: `false`
120
- */
121
- // penalize_nl?: boolean
122
129
  /**
123
130
  * Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.
124
131
  */
@@ -243,8 +250,16 @@ export interface Spec extends TurboModule {
243
250
  setContextLimit(limit: number): Promise<void>
244
251
 
245
252
  modelInfo(path: string, skip?: string[]): Promise<Object>
246
- initContext(contextId: number, params: NativeContextParams): Promise<NativeLlamaContext>
253
+ initContext(
254
+ contextId: number,
255
+ params: NativeContextParams,
256
+ ): Promise<NativeLlamaContext>
247
257
 
258
+ getFormattedChat(
259
+ contextId: number,
260
+ messages: NativeLlamaChatMessage[],
261
+ chatTemplate?: string,
262
+ ): Promise<string>
248
263
  loadSession(
249
264
  contextId: number,
250
265
  filepath: string,
@@ -262,11 +277,6 @@ export interface Spec extends TurboModule {
262
277
  tokenizeAsync(contextId: number, text: string): Promise<NativeTokenizeResult>
263
278
  tokenizeSync(contextId: number, text: string): NativeTokenizeResult
264
279
  getCpuFeatures() : Promise<NativeCPUFeatures>
265
- getFormattedChat(
266
- contextId: number,
267
- messages: NativeLlamaChatMessage[],
268
- chatTemplate?: string,
269
- ): Promise<string>
270
280
  detokenize(contextId: number, tokens: number[]): Promise<string>
271
281
  embedding(
272
282
  contextId: number,
@@ -281,6 +291,15 @@ export interface Spec extends TurboModule {
281
291
  nr: number,
282
292
  ): Promise<string>
283
293
 
294
+ applyLoraAdapters(
295
+ contextId: number,
296
+ loraAdapters: Array<{ path: string; scaled?: number }>,
297
+ ): Promise<void>
298
+ removeLoraAdapters(contextId: number): Promise<void>
299
+ getLoadedLoraAdapters(
300
+ contextId: number,
301
+ ): Promise<Array<{ path: string; scaled?: number }>>
302
+
284
303
  releaseContext(contextId: number): Promise<void>
285
304
 
286
305
  releaseAllContexts(): Promise<void>
package/src/index.ts CHANGED
@@ -15,7 +15,10 @@ import type {
15
15
  NativeCompletionTokenProbItem,
16
16
  NativeCompletionResultTimings,
17
17
  } from './NativeRNLlama'
18
- import type { SchemaGrammarConverterPropOrder, SchemaGrammarConverterBuiltinRule } from './grammar'
18
+ import type {
19
+ SchemaGrammarConverterPropOrder,
20
+ SchemaGrammarConverterBuiltinRule,
21
+ } from './grammar'
19
22
  import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
20
23
  import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat'
21
24
  import { formatChat } from './chat'
@@ -62,56 +65,24 @@ type TokenNativeEvent = {
62
65
  tokenResult: TokenData
63
66
  }
64
67
 
65
- export enum GGML_TYPE {
66
- LM_GGML_TYPE_F32 = 0,
67
- LM_GGML_TYPE_F16 = 1,
68
- LM_GGML_TYPE_Q4_0 = 2,
69
- LM_GGML_TYPE_Q4_1 = 3,
70
- // LM_GGML_TYPE_Q4_2 = 4, support has been removed
71
- // LM_GGML_TYPE_Q4_3 = 5, support has been removed
72
- LM_GGML_TYPE_Q5_0 = 6,
73
- LM_GGML_TYPE_Q5_1 = 7,
74
- LM_GGML_TYPE_Q8_0 = 8,
75
- LM_GGML_TYPE_Q8_1 = 9,
76
- LM_GGML_TYPE_Q2_K = 10,
77
- LM_GGML_TYPE_Q3_K = 11,
78
- LM_GGML_TYPE_Q4_K = 12,
79
- LM_GGML_TYPE_Q5_K = 13,
80
- LM_GGML_TYPE_Q6_K = 14,
81
- LM_GGML_TYPE_Q8_K = 15,
82
- LM_GGML_TYPE_IQ2_XXS = 16,
83
- LM_GGML_TYPE_IQ2_XS = 17,
84
- LM_GGML_TYPE_IQ3_XXS = 18,
85
- LM_GGML_TYPE_IQ1_S = 19,
86
- LM_GGML_TYPE_IQ4_NL = 20,
87
- LM_GGML_TYPE_IQ3_S = 21,
88
- LM_GGML_TYPE_IQ2_S = 22,
89
- LM_GGML_TYPE_IQ4_XS = 23,
90
- LM_GGML_TYPE_I8 = 24,
91
- LM_GGML_TYPE_I16 = 25,
92
- LM_GGML_TYPE_I32 = 26,
93
- LM_GGML_TYPE_I64 = 27,
94
- LM_GGML_TYPE_F64 = 28,
95
- LM_GGML_TYPE_IQ1_M = 29,
96
- LM_GGML_TYPE_BF16 = 30,
97
- // LM_GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
98
- // LM_GGML_TYPE_Q4_0_4_8 = 32,
99
- // LM_GGML_TYPE_Q4_0_8_8 = 33,
100
- LM_GGML_TYPE_TQ1_0 = 34,
101
- LM_GGML_TYPE_TQ2_0 = 35,
102
- // LM_GGML_TYPE_IQ4_NL_4_4 = 36,
103
- // LM_GGML_TYPE_IQ4_NL_4_8 = 37,
104
- // LM_GGML_TYPE_IQ4_NL_8_8 = 38,
105
- LM_GGML_TYPE_COUNT = 39,
106
- };
68
+ export enum CACHE_TYPE {
69
+ F16 = 'f16',
70
+ F32 = 'f32',
71
+ Q8_0 = 'q8_0',
72
+ Q4_0 = 'q4_0',
73
+ Q4_1 = 'q4_1',
74
+ IQ4_NL = 'iq4_nl',
75
+ Q5_0 = 'q5_0',
76
+ Q5_1 = 'q5_1'
77
+ }
107
78
 
108
79
 
109
80
  export type ContextParams = Omit<
110
81
  NativeContextParams,
111
- 'cache_type_k' | 'cache_type_v' | 'pooling_type'
82
+ 'cache_type_k' | 'cache_type_v' | 'pooling_type'
112
83
  > & {
113
- cache_type_k?: GGML_TYPE
114
- cache_type_v?: GGML_TYPE
84
+ cache_type_k?: CACHE_TYPE
85
+ cache_type_v?: CACHE_TYPE
115
86
  pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
116
87
  }
117
88
 
@@ -190,7 +161,10 @@ export class LlamaContext {
190
161
  let finalPrompt = params.prompt
191
162
  if (params.messages) {
192
163
  // messages always win
193
- finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate)
164
+ finalPrompt = await this.getFormattedChat(
165
+ params.messages,
166
+ params.chatTemplate,
167
+ )
194
168
  }
195
169
 
196
170
  let tokenListener: any =
@@ -263,6 +237,28 @@ export class LlamaContext {
263
237
  }
264
238
  }
265
239
 
240
+ async applyLoraAdapters(
241
+ loraList: Array<{ path: string; scaled?: number }>
242
+ ): Promise<void> {
243
+ let loraAdapters: Array<{ path: string; scaled?: number }> = []
244
+ if (loraList)
245
+ loraAdapters = loraList.map((l) => ({
246
+ path: l.path.replace(/file:\/\//, ''),
247
+ scaled: l.scaled,
248
+ }))
249
+ return RNLlama.applyLoraAdapters(this.id, loraAdapters)
250
+ }
251
+
252
+ async removeLoraAdapters(): Promise<void> {
253
+ return RNLlama.removeLoraAdapters(this.id)
254
+ }
255
+
256
+ async getLoadedLoraAdapters(): Promise<
257
+ Array<{ path: string; scaled?: number }>
258
+ > {
259
+ return RNLlama.getLoadedLoraAdapters(this.id)
260
+ }
261
+
266
262
  async release(): Promise<void> {
267
263
  return RNLlama.releaseContext(this.id)
268
264
  }
@@ -307,6 +303,7 @@ export async function initLlama(
307
303
  is_model_asset: isModelAsset,
308
304
  pooling_type: poolingType,
309
305
  lora,
306
+ lora_list: loraList,
310
307
  ...rest
311
308
  }: ContextParams,
312
309
  onProgress?: (progress: number) => void,
@@ -317,6 +314,13 @@ export async function initLlama(
317
314
  let loraPath = lora
318
315
  if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7)
319
316
 
317
+ let loraAdapters: Array<{ path: string; scaled?: number }> = []
318
+ if (loraList)
319
+ loraAdapters = loraList.map((l) => ({
320
+ path: l.path.replace(/file:\/\//, ''),
321
+ scaled: l.scaled,
322
+ }))
323
+
320
324
  const contextId = contextIdCounter + contextIdRandom()
321
325
  contextIdCounter += 1
322
326
 
@@ -342,6 +346,7 @@ export async function initLlama(
342
346
  use_progress_callback: !!onProgress,
343
347
  pooling_type: poolType,
344
348
  lora: loraPath,
349
+ lora_list: loraAdapters,
345
350
  ...rest,
346
351
  }).catch((err: any) => {
347
352
  removeProgressListener?.remove()
package/cpp/llama.cpp.rej DELETED
@@ -1,23 +0,0 @@
1
- --- llama.cpp.orig 2024-11-02 12:42:13
2
- +++ llama.cpp 2024-11-02 13:00:37
3
- @@ -1941,16 +1952,16 @@
4
-
5
- if (prefetch > 0) {
6
- // advise the kernel to preload the mapped memory
7
- - if (posix_madvise(addr, std::min(file->size, prefetch), POSIX_MADV_WILLNEED)) {
8
- - LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
9
- + if (madvise(addr, std::min(file->size, prefetch), MADV_WILLNEED)) {
10
- + fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
11
- strerror(errno));
12
- }
13
- }
14
- if (numa) {
15
- // advise the kernel not to use readahead
16
- // (because the next page might not belong on the same node)
17
- - if (posix_madvise(addr, file->size, POSIX_MADV_RANDOM)) {
18
- - LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
19
- + if (madvise(addr, file->size, MADV_RANDOM)) {
20
- + fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n",
21
- strerror(errno));
22
- }
23
- }