cui-llama.rn 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +3 -4
  2. package/android/src/main/CMakeLists.txt +21 -5
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +115 -30
  4. package/android/src/main/java/com/rnllama/RNLlama.java +40 -7
  5. package/android/src/main/jni.cpp +222 -36
  6. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +9 -4
  7. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +9 -4
  8. package/cpp/common.cpp +1682 -2122
  9. package/cpp/common.h +600 -594
  10. package/cpp/ggml-aarch64.c +129 -3209
  11. package/cpp/ggml-aarch64.h +19 -39
  12. package/cpp/ggml-alloc.c +1040 -1040
  13. package/cpp/ggml-alloc.h +76 -76
  14. package/cpp/ggml-backend-impl.h +216 -227
  15. package/cpp/ggml-backend-reg.cpp +195 -0
  16. package/cpp/ggml-backend.cpp +1997 -2625
  17. package/cpp/ggml-backend.h +328 -326
  18. package/cpp/ggml-common.h +1853 -1853
  19. package/cpp/ggml-cpp.h +38 -0
  20. package/cpp/ggml-cpu-aarch64.c +3560 -0
  21. package/cpp/ggml-cpu-aarch64.h +30 -0
  22. package/cpp/ggml-cpu-impl.h +371 -614
  23. package/cpp/ggml-cpu-quants.c +10822 -0
  24. package/cpp/ggml-cpu-quants.h +63 -0
  25. package/cpp/ggml-cpu.c +13975 -0
  26. package/cpp/ggml-cpu.cpp +663 -0
  27. package/cpp/ggml-cpu.h +177 -0
  28. package/cpp/ggml-impl.h +550 -209
  29. package/cpp/ggml-metal.h +66 -66
  30. package/cpp/ggml-metal.m +4294 -3819
  31. package/cpp/ggml-quants.c +5247 -15752
  32. package/cpp/ggml-quants.h +100 -147
  33. package/cpp/ggml-threading.cpp +12 -0
  34. package/cpp/ggml-threading.h +12 -0
  35. package/cpp/ggml.c +8180 -23464
  36. package/cpp/ggml.h +2411 -2562
  37. package/cpp/llama-grammar.cpp +1138 -1138
  38. package/cpp/llama-grammar.h +144 -144
  39. package/cpp/llama-impl.h +181 -181
  40. package/cpp/llama-sampling.cpp +2348 -2194
  41. package/cpp/llama-sampling.h +48 -30
  42. package/cpp/llama-vocab.cpp +1984 -1968
  43. package/cpp/llama-vocab.h +170 -165
  44. package/cpp/llama.cpp +22132 -21969
  45. package/cpp/llama.h +1253 -1253
  46. package/cpp/log.cpp +401 -401
  47. package/cpp/log.h +121 -121
  48. package/cpp/rn-llama.hpp +83 -19
  49. package/cpp/sampling.cpp +466 -458
  50. package/cpp/sgemm.cpp +1884 -1219
  51. package/ios/RNLlama.mm +43 -20
  52. package/ios/RNLlamaContext.h +9 -3
  53. package/ios/RNLlamaContext.mm +133 -33
  54. package/jest/mock.js +0 -1
  55. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  56. package/lib/commonjs/index.js +52 -15
  57. package/lib/commonjs/index.js.map +1 -1
  58. package/lib/module/NativeRNLlama.js.map +1 -1
  59. package/lib/module/index.js +51 -15
  60. package/lib/module/index.js.map +1 -1
  61. package/lib/typescript/NativeRNLlama.d.ts +29 -6
  62. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  63. package/lib/typescript/index.d.ts +12 -5
  64. package/lib/typescript/index.d.ts.map +1 -1
  65. package/package.json +1 -1
  66. package/src/NativeRNLlama.ts +41 -7
  67. package/src/index.ts +82 -27
  68. package/cpp/json-schema-to-grammar.cpp +0 -1045
  69. package/cpp/json-schema-to-grammar.h +0 -8
  70. package/cpp/json.hpp +0 -24766
@@ -3,8 +3,8 @@ import RNLlama from './NativeRNLlama';
3
3
  import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar';
4
4
  import { formatChat } from './chat';
5
5
  export { SchemaGrammarConverter, convertJsonSchemaToGrammar };
6
+ const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress';
6
7
  const EVENT_ON_TOKEN = '@RNLlama_onToken';
7
- const EVENT_ON_MODEL_PROGRESS = '@RNLlama_onModelProgress';
8
8
  let EventEmitter;
9
9
  if (Platform.OS === 'ios') {
10
10
  // @ts-ignore
@@ -45,16 +45,18 @@ export class LlamaContext {
45
45
  async saveSession(filepath, options) {
46
46
  return RNLlama.saveSession(this.id, filepath, (options === null || options === void 0 ? void 0 : options.tokenSize) || -1);
47
47
  }
48
- async getFormattedChat(messages) {
48
+ async getFormattedChat(messages, template) {
49
49
  var _this$model;
50
50
  const chat = formatChat(messages);
51
- return RNLlama.getFormattedChat(this.id, chat, (_this$model = this.model) !== null && _this$model !== void 0 && _this$model.isChatTemplateSupported ? undefined : 'chatml');
51
+ let tmpl = (_this$model = this.model) !== null && _this$model !== void 0 && _this$model.isChatTemplateSupported ? undefined : 'chatml';
52
+ if (template) tmpl = template; // Force replace if provided
53
+ return RNLlama.getFormattedChat(this.id, chat, tmpl);
52
54
  }
53
55
  async completion(params, callback) {
54
56
  let finalPrompt = params.prompt;
55
57
  if (params.messages) {
56
58
  // messages always win
57
- finalPrompt = await this.getFormattedChat(params.messages);
59
+ finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate);
58
60
  }
59
61
  let tokenListener = callback && EventEmitter.addListener(EVENT_ON_TOKEN, evt => {
60
62
  const {
@@ -94,8 +96,8 @@ export class LlamaContext {
94
96
  detokenize(tokens) {
95
97
  return RNLlama.detokenize(this.id, tokens);
96
98
  }
97
- embedding(text) {
98
- return RNLlama.embedding(this.id, text);
99
+ embedding(text, params) {
100
+ return RNLlama.embedding(this.id, text, params || {});
99
101
  }
100
102
  async bench(pp, tg, pl, nr) {
101
103
  const result = await RNLlama.bench(this.id, pp, tg, pl, nr);
@@ -120,30 +122,64 @@ export async function getCpuFeatures() {
120
122
  export async function setContextLimit(limit) {
121
123
  return RNLlama.setContextLimit(limit);
122
124
  }
123
- export async function initLlama(_ref2, progressCallback) {
125
+ let contextIdCounter = 0;
126
+ const contextIdRandom = () => process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000);
127
+ const modelInfoSkip = [
128
+ // Large fields
129
+ 'tokenizer.ggml.tokens', 'tokenizer.ggml.token_type', 'tokenizer.ggml.merges'];
130
+ export async function loadLlamaModelInfo(model) {
131
+ let path = model;
132
+ if (path.startsWith('file://')) path = path.slice(7);
133
+ return RNLlama.modelInfo(path, modelInfoSkip);
134
+ }
135
+ const poolTypeMap = {
136
+ // -1 is unspecified as undefined
137
+ none: 0,
138
+ mean: 1,
139
+ cls: 2,
140
+ last: 3,
141
+ rank: 4
142
+ };
143
+ export async function initLlama(_ref2, onProgress) {
144
+ var _loraPath, _removeProgressListen2;
124
145
  let {
125
146
  model,
126
147
  is_model_asset: isModelAsset,
148
+ pooling_type: poolingType,
149
+ lora,
127
150
  ...rest
128
151
  } = _ref2;
129
152
  let path = model;
130
153
  if (path.startsWith('file://')) path = path.slice(7);
131
- const modelProgressListener = EventEmitter.addListener(EVENT_ON_MODEL_PROGRESS, event => {
132
- if (event.progress && progressCallback) progressCallback(event.progress);
133
- if (event.progress === 100) {
134
- modelProgressListener.remove();
135
- }
136
- });
154
+ let loraPath = lora;
155
+ if ((_loraPath = loraPath) !== null && _loraPath !== void 0 && _loraPath.startsWith('file://')) loraPath = loraPath.slice(7);
156
+ const contextId = contextIdCounter + contextIdRandom();
157
+ contextIdCounter += 1;
158
+ let removeProgressListener = null;
159
+ if (onProgress) {
160
+ removeProgressListener = EventEmitter.addListener(EVENT_ON_INIT_CONTEXT_PROGRESS, evt => {
161
+ if (evt.contextId !== contextId) return;
162
+ onProgress(evt.progress);
163
+ });
164
+ }
165
+ const poolType = poolTypeMap[poolingType];
137
166
  const {
138
- contextId,
139
167
  gpu,
140
168
  reasonNoGPU,
141
169
  model: modelDetails
142
- } = await RNLlama.initContext({
170
+ } = await RNLlama.initContext(contextId, {
143
171
  model: path,
144
172
  is_model_asset: !!isModelAsset,
173
+ use_progress_callback: !!onProgress,
174
+ pooling_type: poolType,
175
+ lora: loraPath,
145
176
  ...rest
177
+ }).catch(err => {
178
+ var _removeProgressListen;
179
+ (_removeProgressListen = removeProgressListener) === null || _removeProgressListen === void 0 ? void 0 : _removeProgressListen.remove();
180
+ throw err;
146
181
  });
182
+ (_removeProgressListen2 = removeProgressListener) === null || _removeProgressListen2 === void 0 ? void 0 : _removeProgressListen2.remove();
147
183
  return new LlamaContext({
148
184
  contextId,
149
185
  gpu,
@@ -1 +1 @@
1
- {"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","Platform","RNLlama","SchemaGrammarConverter","convertJsonSchemaToGrammar","formatChat","EVENT_ON_TOKEN","EVENT_ON_MODEL_PROGRESS","EventEmitter","OS","LlamaContext","gpu","reasonNoGPU","model","constructor","_ref","contextId","id","loadSession","filepath","path","startsWith","slice","saveSession","options","tokenSize","getFormattedChat","messages","_this$model","chat","isChatTemplateSupported","undefined","completion","params","callback","finalPrompt","prompt","tokenListener","addListener","evt","tokenResult","Error","promise","emit_partial_completion","then","completionResult","_tokenListener","remove","catch","err","_tokenListener2","stopCompletion","tokenizeAsync","text","tokenizeSync","detokenize","tokens","embedding","bench","pp","tg","pl","nr","result","modelDesc","modelSize","modelNParams","ppAvg","ppStd","tgAvg","tgStd","JSON","parse","release","releaseContext","getCpuFeatures","setContextLimit","limit","initLlama","_ref2","progressCallback","is_model_asset","isModelAsset","rest","modelProgressListener","event","progress","modelDetails","initContext","releaseAllLlama","releaseAllContexts"],"sourceRoot":"..\\..\\src","sources":["index.ts"],"mappings":"AAAA,SAASA,kBAAkB,EAAEC,kBAAkB,EAAEC,QAAQ,QAAQ,cAAc;AAE/E,OAAOC,OAAO,MAAM,iBAAiB;AAYrC,SAASC,sBAAsB,EAAEC,0BAA0B,QAAQ,WAAW;AAE9E,SAASC,UAAU,QAAQ,QAAQ;AAEnC,SAASF,sBAAsB,EAAEC,0BAA0B;AAE3D,MAAME,cAAc,GAAG,kBAAkB;AAEzC,MAAMC,uBAAuB,GAAG,0BAA0B;AAE1D,IAAIC,YAA2D;AAC/D,IAAIP,QAAQ,CAACQ,EAAE,KAAK,KAAK,EAAE;EACzB;EACAD,YAAY,GAAG,IAAIT,kBAAkB,CAACG,OAAO,CAAC;AAChD;AACA,IAAID,QAAQ,CAACQ,EAAE,KAAK,SAAS,EAAE;EAC7BD,YAAY,GAAGR,kBAAkB;AACnC;AAgCA,OAAO,MAAMU,YAAY,CAAC;EAGxBC,GAAG,GAAY,KAAK;EAEpBC,WAAW,GAAW,EAAE;EAExBC,KAAK,GAED,CAAC,CAAC;EAENC,WAAWA,CAAAC,IAAA,EAA6D;IAAA,IAA5D;MAAEC,SAAS;MAAEL,GAAG;MAAEC,WAAW;MAAEC;IAA0B,CAAC,GAAAE,IAAA;IACpE,IAAI,CAACE,EAAE,GAAGD,SAAS;IACnB,IAAI,CAACL,GAAG,GAAGA,GAAG;IACd,IAAI,CAACC,WAAW,GAAGA,WAAW;IAC9B,IAAI,CAACC,KAAK,GAAGA,KAAK;EACpB;;EAEA;AACF;AACA;EACE,MAAMK,WAAWA,CAACC,QAAgB,EAAoC;IACpE,IAAIC,IAAI,GAAGD,QAAQ;IACnB,IAAIC,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;IACpD,OAAOpB,OAAO,CAACgB,WAAW,CAAC,IAAI,CAACD,EAAE,EAAEG,IAAI,CAAC;EAC3C;;EAEA;AACF;AACA;EACE,MAAMG,WAAWA,CACfJ,QAAgB,EAChBK,OAA+B,EACd;IACjB,OAAOtB,OAAO,CAACqB,WAAW,CAAC,IAAI,CAACN,EAAE,EAAEE,QAAQ,EAAE,CAAAK,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEC,SAAS,KAAI,CAAC,CAAC,CAAC;EACzE;EAEA,MAAMC,gBAAgBA,CACpBC,QAAuC,EACtB;IAAA,IAAAC,WAAA;IACjB,MAAMC,IAAI,GAAGxB,UAAU,CAACsB,QAAQ,CAAC;IACjC,OAAOzB,OAAO,CAACwB,gBAAgB,CAC7B,IAAI,CAACT,EAAE,EACPY,IAAI,EACJ,CAAAD,WAAA,OAAI,CAACf,KAAK,cAAAe,WAAA,eAAVA,WAAA,CAAYE,uBAAuB,GAAGC,SAAS,GAAG,QACpD,CAAC;EACH;EAEA,MAAMC,UAAUA,CACdC,MAAwB,EACxBC,QAAoC,EACH;IAEjC,IAAIC,WAAW,GAAGF,MAAM,CAACG,MAAM;IAC/B,IAAIH,MAAM,CAACN,QAAQ,EAAE;MAAE;MACrBQ,WAAW,GAAG,MAAM,IAAI,CAACT,gBAAgB,CAACO,MAAM,CAACN,QAAQ,CAAC;IAC5D;IAEA,IAAIU,aAAkB,GACpBH,QAAQ,IACR1B,YAAY,CAAC8B,WAAW,CAAChC,cAAc,EAAGiC,GAAqB,IAAK;MAClE,MAAM;QAAEvB,SAAS;QAAEwB;MAAY,CAAC,GAAGD,GAAG;MACtC,IAAIvB,SAAS,KAAK,IAAI,CAACC,EAAE,EAAE;MAC3BiB,QAAQ,CAACM,WAAW,CAAC;IACvB,CAAC,CAAC;IAEJ,IAAI,CAACL,WAAW,EAAE,MAAM,IAAIM,KAAK,CAAC,oBAAoB,CAAC;IACvD,MAAMC,OAAO,GAAGxC,OAAO,CAAC8B,UAAU,CAAC,IAAI,CAACf,EAAE,EAAE;MAC1C,GAAGgB,MAAM;MACTG,MAAM,EAAED,WAAW;MACnBQ,uBAAuB,EAAE,CAAC,CAACT;IAC7B,CAAC,CAAC;IACF,OAAOQ,OAAO,CACXE,IAAI,CAAEC,gBAAgB,IAAK;MAAA,IAAAC,cAAA;MAC1B,CAAAA,cAAA,GAAAT,aAAa,cAAAS,cAAA,uBAAbA,cAAA,CAAeC,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,OAAOQ,gBAAgB;IACzB,CAAC,CAAC,CACDG,KAAK,CAAEC,GAAQ,IAAK;MAAA,IAAAC,eAAA;MACnB,CAAAA,eAAA,GAAAb,aAAa,cAAAa,eAAA,uBAAbA,eAAA,CAAeH,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,MAAMY,GAAG;IACX,CAAC,CAAC;EACN;EAEAE,cAAcA,CAAA,EAAkB;IAC9B,OAAOjD,OAAO,CAACiD,cAAc,CAAC,IAAI,CAAClC,EAAE,CAAC;EACxC;EAEAmC,aAAaA,CAACC,IAAY,EAAiC;IACzD,OAAOnD,OAAO,CAACkD,aAAa,CAAC,IAAI,CAACnC,EAAE,EAAEoC,IAAI,CAAC;EAC7C;EAEAC,YAAYA,CAACD,IAAY,EAAwB;IAC/C,OAAOnD,OAAO,CAACoD,YAAY,CAAC,IAAI,CAACrC,EAAE,EAAEoC,IAAI,CAAC;EAC5C;EAEAE,UAAUA,CAACC,MAAgB,EAAmB;IAC5C,OAAOtD,OAAO,CAACqD,UAAU,CAAC,IAAI,CAACtC,EAAE,EAAEuC,MAAM,CAAC;EAC5C;EAEAC,SAASA,CAACJ,IAAY,EAAkC;IACtD,OAAOnD,OAAO,CAACuD,SAAS,CAAC,IAAI,CAACxC,EAAE,EAAEoC,IAAI,CAAC;EACzC;EAEA,MAAMK,KAAKA,CACTC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU,EACY;IACtB,MAAMC,MAAM,GAAG,MAAM7D,OAAO,CAACwD,KAAK,CAAC,IAAI,CAACzC,EAAE,EAAE0C,EAAE,EAAEC,EAAE,EAAEC,EAAE,EAAEC,EAAE,CAAC;IAC3D,MAAM,CAACE,SAAS,EAAEC,SAAS,EAAEC,YAAY,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,CAAC,GACpEC,IAAI,CAACC,KAAK,CAACT,MAAM,CAAC;IACpB,OAAO;MACLC,SAAS;MACTC,SAAS;MACTC,YAAY;MACZC,KAAK;MACLC,KAAK;MACLC,KAAK;MACLC;IACF,CAAC;EACH;EAEA,MAAMG,OAAOA,CAAA,EAAkB;IAC7B,OAAOvE,OAAO,CAACwE,cAAc,CAAC,IAAI,CAACzD,EAAE,CAAC;EACxC;AACF;AAEA,OAAO,eAAe0D,cAAcA,CAAA,EAAgC;EAClE,OAAOzE,OAAO,CAACyE,cAAc,CAAC,CAAC;AACjC;AAEA,OAAO,eAAeC,eAAeA,CAACC,KAAa,EAAiB;EAClE,OAAO3E,OAAO,CAAC0E,eAAe,CAACC,KAAK,CAAC;AACvC;AAEA,OAAO,eAAeC,SAASA,CAAAC,KAAA,EAK7BC,gBAA6C,EACtB;EAAA,IANO;IAC5BnE,KAAK;IACLoE,cAAc,EAAEC,YAAY;IAC5B,GAAGC;EACU,CAAC,GAAAJ,KAAA;EAGhB,IAAI3D,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EAEpD,MAAM8D,qBAAqB,GAAG5E,YAAY,CAAC8B,WAAW,CAAC/B,uBAAuB,EAAG8E,KAAK,IAAK;IACzF,IAAGA,KAAK,CAACC,QAAQ,IAAIN,gBAAgB,EACnCA,gBAAgB,CAACK,KAAK,CAACC,QAAQ,CAAC;IAClC,IAAGD,KAAK,CAACC,QAAQ,KAAK,GAAG,EAAE;MACzBF,qBAAqB,CAACrC,MAAM,CAAC,CAAC;IAChC;EACF,CAAC,CAAC;EAEF,MAAM;IACJ/B,SAAS;IACTL,GAAG;IACHC,WAAW;IACXC,KAAK,EAAE0E;EACT,CAAC,GAAG,MAAMrF,OAAO,CAACsF,WAAW,CAAC;IAC5B3E,KAAK,EAAEO,IAAI;IACX6D,cAAc,EAAE,CAAC,CAACC,YAAY;IAC9B,GAAGC;EACL,CAAC,CAAC;EAEF,OAAO,IAAIzE,YAAY,CAAC;IAAEM,SAAS;IAAEL,GAAG;IAAEC,WAAW;IAAEC,KAAK,EAAE0E;EAAa,CAAC,CAAC;AAC/E;AAEA,OAAO,eAAeE,eAAeA,CAAA,EAAkB;EACrD,OAAOvF,OAAO,CAACwF,kBAAkB,CAAC,CAAC;AACrC"}
1
+ {"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","Platform","RNLlama","SchemaGrammarConverter","convertJsonSchemaToGrammar","formatChat","EVENT_ON_INIT_CONTEXT_PROGRESS","EVENT_ON_TOKEN","EventEmitter","OS","LlamaContext","gpu","reasonNoGPU","model","constructor","_ref","contextId","id","loadSession","filepath","path","startsWith","slice","saveSession","options","tokenSize","getFormattedChat","messages","template","_this$model","chat","tmpl","isChatTemplateSupported","undefined","completion","params","callback","finalPrompt","prompt","chatTemplate","tokenListener","addListener","evt","tokenResult","Error","promise","emit_partial_completion","then","completionResult","_tokenListener","remove","catch","err","_tokenListener2","stopCompletion","tokenizeAsync","text","tokenizeSync","detokenize","tokens","embedding","bench","pp","tg","pl","nr","result","modelDesc","modelSize","modelNParams","ppAvg","ppStd","tgAvg","tgStd","JSON","parse","release","releaseContext","getCpuFeatures","setContextLimit","limit","contextIdCounter","contextIdRandom","process","env","NODE_ENV","Math","floor","random","modelInfoSkip","loadLlamaModelInfo","modelInfo","poolTypeMap","none","mean","cls","last","rank","initLlama","_ref2","onProgress","_loraPath","_removeProgressListen2","is_model_asset","isModelAsset","pooling_type","poolingType","lora","rest","loraPath","removeProgressListener","progress","poolType","modelDetails","initContext","use_progress_callback","_removeProgressListen","releaseAllLlama","releaseAllContexts"],"sourceRoot":"..\\..\\src","sources":["index.ts"],"mappings":"AAAA,SAASA,kBAAkB,EAAEC,kBAAkB,EAAEC,QAAQ,QAAQ,cAAc;AAE/E,OAAOC,OAAO,MAAM,iBAAiB;AAarC,SAASC,sBAAsB,EAAEC,0BAA0B,QAAQ,WAAW;AAE9E,SAASC,UAAU,QAAQ,QAAQ;AAEnC,SAASF,sBAAsB,EAAEC,0BAA0B;AAE3D,MAAME,8BAA8B,GAAG,gCAAgC;AACvE,MAAMC,cAAc,GAAG,kBAAkB;AAEzC,IAAIC,YAA2D;AAC/D,IAAIP,QAAQ,CAACQ,EAAE,KAAK,KAAK,EAAE;EACzB;EACAD,YAAY,GAAG,IAAIT,kBAAkB,CAACG,OAAO,CAAC;AAChD;AACA,IAAID,QAAQ,CAACQ,EAAE,KAAK,SAAS,EAAE;EAC7BD,YAAY,GAAGR,kBAAkB;AACnC;AA0CA,OAAO,MAAMU,YAAY,CAAC;EAGxBC,GAAG,GAAY,KAAK;EAEpBC,WAAW,GAAW,EAAE;EAExBC,KAAK,GAED,CAAC,CAAC;EAENC,WAAWA,CAAAC,IAAA,EAA6D;IAAA,IAA5D;MAAEC,SAAS;MAAEL,GAAG;MAAEC,WAAW;MAAEC;IAA0B,CAAC,GAAAE,IAAA;IACpE,IAAI,CAACE,EAAE,GAAGD,SAAS;IACnB,IAAI,CAACL,GAAG,GAAGA,GAAG;IACd,IAAI,CAACC,WAAW,GAAGA,WAAW;IAC9B,IAAI,CAACC,KAAK,GAAGA,KAAK;EACpB;;EAEA;AACF;AACA;EACE,MAAMK,WAAWA,CAACC,QAAgB,EAAoC;IACpE,IAAIC,IAAI,GAAGD,QAAQ;IACnB,IAAIC,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;IACpD,OAAOpB,OAAO,CAACgB,WAAW,CAAC,IAAI,CAACD,EAAE,EAAEG,IAAI,CAAC;EAC3C;;EAEA;AACF;AACA;EACE,MAAMG,WAAWA,CACfJ,QAAgB,EAChBK,OAA+B,EACd;IACjB,OAAOtB,OAAO,CAACqB,WAAW,CAAC,IAAI,CAACN,EAAE,EAAEE,QAAQ,EAAE,CAAAK,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEC,SAAS,KAAI,CAAC,CAAC,CAAC;EACzE;EAEA,MAAMC,gBAAgBA,CACpBC,QAAuC,EACvCC,QAAiB,EACA;IAAA,IAAAC,WAAA;IACjB,MAAMC,IAAI,GAAGzB,UAAU,CAACsB,QAAQ,CAAC;IACjC,IAAII,IAAI,GAAG,CAAAF,WAAA,OAAI,CAAChB,KAAK,cAAAgB,WAAA,eAAVA,WAAA,CAAYG,uBAAuB,GAAGC,SAAS,GAAG,QAAQ;IACrE,IAAIL,QAAQ,EAAEG,IAAI,GAAGH,QAAQ,EAAC;IAC9B,OAAO1B,OAAO,CAACwB,gBAAgB,CAAC,IAAI,CAACT,EAAE,EAAEa,IAAI,EAAEC,IAAI,CAAC;EACtD;EAEA,MAAMG,UAAUA,CACdC,MAAwB,EACxBC,QAAoC,EACH;IACjC,IAAIC,WAAW,GAAGF,MAAM,CAACG,MAAM;IAC/B,IAAIH,MAAM,CAACR,QAAQ,EAAE;MACnB;MACAU,WAAW,GAAG,MAAM,IAAI,CAACX,gBAAgB,CAACS,MAAM,CAACR,QAAQ,EAAEQ,MAAM,CAACI,YAAY,CAAC;IACjF;IAEA,IAAIC,aAAkB,GACpBJ,QAAQ,IACR5B,YAAY,CAACiC,WAAW,CAAClC,cAAc,EAAGmC,GAAqB,IAAK;MAClE,MAAM;QAAE1B,SAAS;QAAE2B;MAAY,CAAC,GAAGD,GAAG;MACtC,IAAI1B,SAAS,KAAK,IAAI,CAACC,EAAE,EAAE;MAC3BmB,QAAQ,CAACO,WAAW,CAAC;IACvB,CAAC,CAAC;IAEJ,IAAI,CAACN,WAAW,EAAE,MAAM,IAAIO,KAAK,CAAC,oBAAoB,CAAC;IACvD,MAAMC,OAAO,GAAG3C,OAAO,CAACgC,UAAU,CAAC,IAAI,CAACjB,EAAE,EAAE;MAC1C,GAAGkB,MAAM;MACTG,MAAM,EAAED,WAAW;MACnBS,uBAAuB,EAAE,CAAC,CAACV;IAC7B,CAAC,CAAC;IACF,OAAOS,OAAO,CACXE,IAAI,CAAEC,gBAAgB,IAAK;MAAA,IAAAC,cAAA;MAC1B,CAAAA,cAAA,GAAAT,aAAa,cAAAS,cAAA,uBAAbA,cAAA,CAAeC,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,OAAOQ,gBAAgB;IACzB,CAAC,CAAC,CACDG,KAAK,CAAEC,GAAQ,IAAK;MAAA,IAAAC,eAAA;MACnB,CAAAA,eAAA,GAAAb,aAAa,cAAAa,eAAA,uBAAbA,eAAA,CAAeH,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,MAAMY,GAAG;IACX,CAAC,CAAC;EACN;EAEAE,cAAcA,CAAA,EAAkB;IAC9B,OAAOpD,OAAO,CAACoD,cAAc,CAAC,IAAI,CAACrC,EAAE,CAAC;EACxC;EAEAsC,aAAaA,CAACC,IAAY,EAAiC;IACzD,OAAOtD,OAAO,CAACqD,aAAa,CAAC,IAAI,CAACtC,EAAE,EAAEuC,IAAI,CAAC;EAC7C;EAEAC,YAAYA,CAACD,IAAY,EAAwB;IAC/C,OAAOtD,OAAO,CAACuD,YAAY,CAAC,IAAI,CAACxC,EAAE,EAAEuC,IAAI,CAAC;EAC5C;EAEAE,UAAUA,CAACC,MAAgB,EAAmB;IAC5C,OAAOzD,OAAO,CAACwD,UAAU,CAAC,IAAI,CAACzC,EAAE,EAAE0C,MAAM,CAAC;EAC5C;EAEAC,SAASA,CACPJ,IAAY,EACZrB,MAAwB,EACQ;IAChC,OAAOjC,OAAO,CAAC0D,SAAS,CAAC,IAAI,CAAC3C,EAAE,EAAEuC,IAAI,EAAErB,MAAM,IAAI,CAAC,CAAC,CAAC;EACvD;EAEA,MAAM0B,KAAKA,CACTC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU,EACY;IACtB,MAAMC,MAAM,GAAG,MAAMhE,OAAO,CAAC2D,KAAK,CAAC,IAAI,CAAC5C,EAAE,EAAE6C,EAAE,EAAEC,EAAE,EAAEC,EAAE,EAAEC,EAAE,CAAC;IAC3D,MAAM,CAACE,SAAS,EAAEC,SAAS,EAAEC,YAAY,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,CAAC,GACpEC,IAAI,CAACC,KAAK,CAACT,MAAM,CAAC;IACpB,OAAO;MACLC,SAAS;MACTC,SAAS;MACTC,YAAY;MACZC,KAAK;MACLC,KAAK;MACLC,KAAK;MACLC;IACF,CAAC;EACH;EAEA,MAAMG,OAAOA,CAAA,EAAkB;IAC7B,OAAO1E,OAAO,CAAC2E,cAAc,CAAC,IAAI,CAAC5D,EAAE,CAAC;EACxC;AACF;AAEA,OAAO,eAAe6D,cAAcA,CAAA,EAAgC;EAClE,OAAO5E,OAAO,CAAC4E,cAAc,CAAC,CAAC;AACjC;AAEA,OAAO,eAAeC,eAAeA,CAACC,KAAa,EAAiB;EAClE,OAAO9E,OAAO,CAAC6E,eAAe,CAACC,KAAK,CAAC;AACvC;AAEA,IAAIC,gBAAgB,GAAG,CAAC;AACxB,MAAMC,eAAe,GAAGA,CAAA,KACtBC,OAAO,CAACC,GAAG,CAACC,QAAQ,KAAK,MAAM,GAAG,CAAC,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC;AAE1E,MAAMC,aAAa,GAAG;AACpB;AACA,uBAAuB,EACvB,2BAA2B,EAC3B,uBAAuB,CACxB;AACD,OAAO,eAAeC,kBAAkBA,CAAC7E,KAAa,EAAmB;EACvE,IAAIO,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EACpD,OAAOpB,OAAO,CAACyF,SAAS,CAACvE,IAAI,EAAEqE,aAAa,CAAC;AAC/C;AAEA,MAAMG,WAAW,GAAG;EAClB;EACAC,IAAI,EAAE,CAAC;EACPC,IAAI,EAAE,CAAC;EACPC,GAAG,EAAE,CAAC;EACNC,IAAI,EAAE,CAAC;EACPC,IAAI,EAAE;AACR,CAAC;AAED,OAAO,eAAeC,SAASA,CAAAC,KAAA,EAQ7BC,UAAuC,EAChB;EAAA,IAAAC,SAAA,EAAAC,sBAAA;EAAA,IARvB;IACEzF,KAAK;IACL0F,cAAc,EAAEC,YAAY;IAC5BC,YAAY,EAAEC,WAAW;IACzBC,IAAI;IACJ,GAAGC;EACU,CAAC,GAAAT,KAAA;EAGhB,IAAI/E,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EAEpD,IAAIuF,QAAQ,GAAGF,IAAI;EACnB,KAAAN,SAAA,GAAIQ,QAAQ,cAAAR,SAAA,eAARA,SAAA,CAAUhF,UAAU,CAAC,SAAS,CAAC,EAAEwF,QAAQ,GAAGA,QAAQ,CAACvF,KAAK,CAAC,CAAC,CAAC;EAEjE,MAAMN,SAAS,GAAGiE,gBAAgB,GAAGC,eAAe,CAAC,CAAC;EACtDD,gBAAgB,IAAI,CAAC;EAErB,IAAI6B,sBAA2B,GAAG,IAAI;EACtC,IAAIV,UAAU,EAAE;IACdU,sBAAsB,GAAGtG,YAAY,CAACiC,WAAW,CAC/CnC,8BAA8B,EAC7BoC,GAA4C,IAAK;MAChD,IAAIA,GAAG,CAAC1B,SAAS,KAAKA,SAAS,EAAE;MACjCoF,UAAU,CAAC1D,GAAG,CAACqE,QAAQ,CAAC;IAC1B,CACF,CAAC;EACH;EAEA,MAAMC,QAAQ,GAAGpB,WAAW,CAACc,WAAW,CAA6B;EACrE,MAAM;IACJ/F,GAAG;IACHC,WAAW;IACXC,KAAK,EAAEoG;EACT,CAAC,GAAG,MAAM/G,OAAO,CAACgH,WAAW,CAAClG,SAAS,EAAE;IACvCH,KAAK,EAAEO,IAAI;IACXmF,cAAc,EAAE,CAAC,CAACC,YAAY;IAC9BW,qBAAqB,EAAE,CAAC,CAACf,UAAU;IACnCK,YAAY,EAAEO,QAAQ;IACtBL,IAAI,EAAEE,QAAQ;IACd,GAAGD;EACL,CAAC,CAAC,CAACzD,KAAK,CAAEC,GAAQ,IAAK;IAAA,IAAAgE,qBAAA;IACrB,CAAAA,qBAAA,GAAAN,sBAAsB,cAAAM,qBAAA,uBAAtBA,qBAAA,CAAwBlE,MAAM,CAAC,CAAC;IAChC,MAAME,GAAG;EACX,CAAC,CAAC;EACF,CAAAkD,sBAAA,GAAAQ,sBAAsB,cAAAR,sBAAA,uBAAtBA,sBAAA,CAAwBpD,MAAM,CAAC,CAAC;EAChC,OAAO,IAAIxC,YAAY,CAAC;IAAEM,SAAS;IAAEL,GAAG;IAAEC,WAAW;IAAEC,KAAK,EAAEoG;EAAa,CAAC,CAAC;AAC/E;AAEA,OAAO,eAAeI,eAAeA,CAAA,EAAkB;EACrD,OAAOnH,OAAO,CAACoH,kBAAkB,CAAC,CAAC;AACrC"}
@@ -1,12 +1,27 @@
1
1
  import type { TurboModule } from 'react-native';
2
+ export type NativeEmbeddingParams = {
3
+ embd_normalize?: number;
4
+ };
2
5
  export type NativeContextParams = {
3
6
  model: string;
4
7
  is_model_asset?: boolean;
5
- embedding?: boolean;
8
+ use_progress_callback?: boolean;
6
9
  n_ctx?: number;
7
10
  n_batch?: number;
8
11
  n_threads?: number;
9
12
  n_gpu_layers?: number;
13
+ /**
14
+ * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
15
+ */
16
+ flash_attn?: boolean;
17
+ /**
18
+ * KV cache data type for the K (Experimental in llama.cpp)
19
+ */
20
+ cache_type_k?: string;
21
+ /**
22
+ * KV cache data type for the V (Experimental in llama.cpp)
23
+ */
24
+ cache_type_v?: string;
10
25
  use_mlock?: boolean;
11
26
  use_mmap?: boolean;
12
27
  vocab_only?: boolean;
@@ -14,6 +29,9 @@ export type NativeContextParams = {
14
29
  lora_scaled?: number;
15
30
  rope_freq_base?: number;
16
31
  rope_freq_scale?: number;
32
+ pooling_type?: number;
33
+ embedding?: boolean;
34
+ embd_normalize?: number;
17
35
  };
18
36
  export type NativeCompletionParams = {
19
37
  prompt: string;
@@ -25,9 +43,8 @@ export type NativeCompletionParams = {
25
43
  top_k?: number;
26
44
  top_p?: number;
27
45
  min_p?: number;
28
- xtc_t?: number;
29
- xtc_p?: number;
30
- tfs_z?: number;
46
+ xtc_threshold?: number;
47
+ xtc_probability?: number;
31
48
  typical_p?: number;
32
49
  temperature?: number;
33
50
  penalty_last_n?: number;
@@ -39,6 +56,11 @@ export type NativeCompletionParams = {
39
56
  mirostat_eta?: number;
40
57
  penalize_nl?: boolean;
41
58
  seed?: number;
59
+ dry_multiplier?: number;
60
+ dry_base?: number;
61
+ dry_allowed_length?: number;
62
+ dry_penalty_last_n?: number;
63
+ dry_sequence_breakers?: Array<string>;
42
64
  ignore_eos?: boolean;
43
65
  logit_bias?: Array<Array<number>>;
44
66
  emit_partial_completion: boolean;
@@ -101,7 +123,8 @@ export type NativeCPUFeatures = {
101
123
  };
102
124
  export interface Spec extends TurboModule {
103
125
  setContextLimit(limit: number): Promise<void>;
104
- initContext(params: NativeContextParams): Promise<NativeLlamaContext>;
126
+ modelInfo(path: string, skip?: string[]): Promise<Object>;
127
+ initContext(contextId: number, params: NativeContextParams): Promise<NativeLlamaContext>;
105
128
  loadSession(contextId: number, filepath: string): Promise<NativeSessionLoadResult>;
106
129
  saveSession(contextId: number, filepath: string, size: number): Promise<number>;
107
130
  completion(contextId: number, params: NativeCompletionParams): Promise<NativeCompletionResult>;
@@ -111,7 +134,7 @@ export interface Spec extends TurboModule {
111
134
  getCpuFeatures(): Promise<NativeCPUFeatures>;
112
135
  getFormattedChat(contextId: number, messages: NativeLlamaChatMessage[], chatTemplate?: string): Promise<string>;
113
136
  detokenize(contextId: number, tokens: number[]): Promise<string>;
114
- embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>;
137
+ embedding(contextId: number, text: string, params: NativeEmbeddingParams): Promise<NativeEmbeddingResult>;
115
138
  bench(contextId: number, pp: number, tg: number, pl: number, nr: number): Promise<string>;
116
139
  releaseContext(contextId: number): Promise<void>;
117
140
  releaseAllContexts(): Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;IAExB,SAAS,CAAC,EAAE,OAAO,CAAA;IAEnB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,MAAM,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAEpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,IAAI,CAAC,EAAE,MAAM,CAAA;IAEb,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;IAEjC,uBAAuB,EAAE,OAAO,CAAA;CACjC,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG;IACtC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,CAAA;CAC5C,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,iBAAiB,EAAE,MAAM,CAAA;IACzB,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,oBAAoB,EAAE,MAAM,CAAA;CAC7B,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IAEZ,gBAAgB,EAAE,MAAM,CAAA;IACxB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,OAAO,CAAA;IAClB,WAAW,EAAE,OAAO,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,6BAA6B,CAAA;IAEtC,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,OAAO,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,iBAAiB,GAAG;IAC9B,KAAK,EAAE,OAAO,CAAA;IACd,IAAI,EAAE,OAAO,CAAA;IACb,OAAO,EAAE,OAAO,CAAA;CACjB,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7C,WAAW,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAA;IAErE,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,uBAAuB,CAAC,CAAA;IACnC,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CACR,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,GAC7B,OAAO,CAAC,sBAAsB,CAAC,CAAA;IAClC,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAChD,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAA;IAC7E,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,oBAAoB,CAAA;IACnE,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAAA;IAC7C,gBAAgB,CACd,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,sBAAsB,EAAE,EAClC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAChE,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAA;IAC1E,KAAK,CACH,SAAS,EAAE,MAAM,EACjB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAEhD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACpC;;AAED,wBAA+D"}
1
+ {"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,qBAAqB,GAAG;IAClC,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB,CAAA;AAED,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,qBAAqB,CAAC,EAAE,OAAO,CAAA;IAE/B,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB;;OAEG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,MAAM,CAAA;IAExB,YAAY,CAAC,EAAE,MAAM,CAAA;IAGrB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAEpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,IAAI,CAAC,EAAE,MAAM,CAAA;IAEb,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,qBAAqB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAErC,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;IAEjC,uBAAuB,EAAE,OAAO,CAAA;CACjC,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG;IACtC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,CAAA;CAC5C,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,iBAAiB,EAAE,MAAM,CAAA;IACzB,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,oBAAoB,EAAE,MAAM,CAAA;CAC7B,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IAEZ,gBAAgB,EAAE,MAAM,CAAA;IACxB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,OAAO,CAAA;IAClB,WAAW,EAAE,OAAO,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,6BAA6B,CAAA;IAEtC,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,OAAO,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,iBAAiB,GAAG;IAC9B,KAAK,EAAE,OAAO,CAAA;IACd,IAAI,EAAE,OAAO,CAAA;IACb,OAAO,EAAE,OAAO,CAAA;CACjB,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAE7C,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IACzD,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAA;IAExF,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,uBAAuB,CAAC,CAAA;IACnC,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CACR,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,GAC7B,OAAO,CAAC,sBAAsB,CAAC,CAAA;IAClC,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAChD,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAA;IAC7E,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,oBAAoB,CAAA;IACnE,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAAA;IAC7C,gBAAgB,CACd,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,sBAAsB,EAAE,EAClC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAChE,SAAS,CACP,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC,CAAA;IACjC,KAAK,CACH,SAAS,EAAE,MAAM,EACjB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAEhD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACpC;;AAED,wBAA+D"}
@@ -1,4 +1,4 @@
1
- import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeCPUFeatures } from './NativeRNLlama';
1
+ import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeCPUFeatures, NativeEmbeddingParams } from './NativeRNLlama';
2
2
  import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar';
3
3
  import type { RNLlamaOAICompatibleMessage } from './chat';
4
4
  export { SchemaGrammarConverter, convertJsonSchemaToGrammar };
@@ -6,10 +6,16 @@ export type TokenData = {
6
6
  token: string;
7
7
  completion_probabilities?: Array<NativeCompletionTokenProb>;
8
8
  };
9
- export type ContextParams = NativeContextParams;
9
+ export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & {
10
+ cache_type_k?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1';
11
+ cache_type_v?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1';
12
+ pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';
13
+ };
14
+ export type EmbeddingParams = NativeEmbeddingParams;
10
15
  export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion' | 'prompt'> & {
11
16
  prompt?: string;
12
17
  messages?: RNLlamaOAICompatibleMessage[];
18
+ chatTemplate?: string;
13
19
  };
14
20
  export type BenchResult = {
15
21
  modelDesc: string;
@@ -38,18 +44,19 @@ export declare class LlamaContext {
38
44
  saveSession(filepath: string, options?: {
39
45
  tokenSize: number;
40
46
  }): Promise<number>;
41
- getFormattedChat(messages: RNLlamaOAICompatibleMessage[]): Promise<string>;
47
+ getFormattedChat(messages: RNLlamaOAICompatibleMessage[], template?: string): Promise<string>;
42
48
  completion(params: CompletionParams, callback?: (data: TokenData) => void): Promise<NativeCompletionResult>;
43
49
  stopCompletion(): Promise<void>;
44
50
  tokenizeAsync(text: string): Promise<NativeTokenizeResult>;
45
51
  tokenizeSync(text: string): NativeTokenizeResult;
46
52
  detokenize(tokens: number[]): Promise<string>;
47
- embedding(text: string): Promise<NativeEmbeddingResult>;
53
+ embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>;
48
54
  bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>;
49
55
  release(): Promise<void>;
50
56
  }
51
57
  export declare function getCpuFeatures(): Promise<NativeCPUFeatures>;
52
58
  export declare function setContextLimit(limit: number): Promise<void>;
53
- export declare function initLlama({ model, is_model_asset: isModelAsset, ...rest }: ContextParams, progressCallback?: (progress: number) => void): Promise<LlamaContext>;
59
+ export declare function loadLlamaModelInfo(model: string): Promise<Object>;
60
+ export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
54
61
  export declare function releaseAllLlama(): Promise<void>;
55
62
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,EAClB,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,MAAM,WAAW,CAAA;AAC9E,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,QAAQ,CAAA;AAGzD,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,CAAA;AAe7D,MAAM,MAAM,SAAS,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAOD,MAAM,MAAM,aAAa,GAAG,mBAAmB,CAAA;AAE/C,MAAM,MAAM,gBAAgB,GAAG,IAAI,CACjC,sBAAsB,EACtB,yBAAyB,GAAG,QAAQ,CACrC,GAAG;IACF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,2BAA2B,EAAE,CAAA;CACzC,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,YAAY,EAAE,MAAM,CAAA;IACpB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,qBAAa,YAAY;IACvB,EAAE,EAAE,MAAM,CAAA;IAEV,GAAG,EAAE,OAAO,CAAQ;IAEpB,WAAW,EAAE,MAAM,CAAK;IAExB,KAAK,EAAE;QACL,uBAAuB,CAAC,EAAE,OAAO,CAAA;KAClC,CAAK;gBAEM,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,kBAAkB;IAOtE;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAMrE;;OAEG;IACG,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAA;KAAE,GAC9B,OAAO,CAAC,MAAM,CAAC;IAIZ,gBAAgB,CACpB,QAAQ,EAAE,2BAA2B,EAAE,GACtC,OAAO,CAAC,MAAM,CAAC;IASZ,UAAU,CACd,MAAM,EAAE,gBAAgB,EACxB,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,SAAS,KAAK,IAAI,GACnC,OAAO,CAAC,sBAAsB,CAAC;IAkClC,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAI/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAI1D,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB;IAIhD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7C,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC;IAIjD,KAAK,CACT,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,WAAW,CAAC;IAejB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,wBAAsB,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAElE;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAElE;AAED,wBAAsB,SAAS,CAAC,EAC5B,KAAK,EACL,cAAc,EAAE,YAAY,EAC5B,GAAG,IAAI,EACR,EAAE,aAAa,EAChB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GAC5C,OAAO,CAAC,YAAY,CAAC,CAwBvB;AAED,wBAAsB,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC,CAErD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,EACjB,qBAAqB,EACtB,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,MAAM,WAAW,CAAA;AAC9E,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,QAAQ,CAAA;AAGzD,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,CAAA;AAc7D,MAAM,MAAM,SAAS,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAOD,MAAM,MAAM,aAAa,GAAG,IAAI,CAC9B,mBAAmB,EACnB,cAAc,GAAG,cAAc,GAAI,cAAc,CAClD,GAAG;IACF,YAAY,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAA;IACpF,YAAY,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAA;IACpF,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAAA;CACzD,CAAA;AAED,MAAM,MAAM,eAAe,GAAG,qBAAqB,CAAA;AAEnD,MAAM,MAAM,gBAAgB,GAAG,IAAI,CACjC,sBAAsB,EACtB,yBAAyB,GAAG,QAAQ,CACrC,GAAG;IACF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,2BAA2B,EAAE,CAAA;IACxC,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,YAAY,EAAE,MAAM,CAAA;IACpB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,qBAAa,YAAY;IACvB,EAAE,EAAE,MAAM,CAAA;IAEV,GAAG,EAAE,OAAO,CAAQ;IAEpB,WAAW,EAAE,MAAM,CAAK;IAExB,KAAK,EAAE;QACL,uBAAuB,CAAC,EAAE,OAAO,CAAA;KAClC,CAAK;gBAEM,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,kBAAkB;IAOtE;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAMrE;;OAEG;IACG,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAA;KAAE,GAC9B,OAAO,CAAC,MAAM,CAAC;IAIZ,gBAAgB,CACpB,QAAQ,EAAE,2BAA2B,EAAE,EACvC,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC;IAOZ,UAAU,CACd,MAAM,EAAE,gBAAgB,EACxB,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,SAAS,KAAK,IAAI,GACnC,OAAO,CAAC,sBAAsB,CAAC;IAkClC,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAI/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAI1D,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB;IAIhD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7C,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,eAAe,GACvB,OAAO,CAAC,qBAAqB,CAAC;IAI3B,KAAK,CACT,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,WAAW,CAAC;IAejB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,wBAAsB,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAElE;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAElE;AAYD,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAIvE;AAWD,wBAAsB,SAAS,CAC7B,EACE,KAAK,EACL,cAAc,EAAE,YAAY,EAC5B,YAAY,EAAE,WAAW,EACzB,IAAI,EACJ,GAAG,IAAI,EACR,EAAE,aAAa,EAChB,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GACtC,OAAO,CAAC,YAAY,CAAC,CAuCvB;AAED,wBAAsB,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC,CAErD"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cui-llama.rn",
3
- "version": "1.2.4",
3
+ "version": "1.3.0",
4
4
  "description": "Fork of llama.rn for ChatterUI",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",
@@ -1,11 +1,14 @@
1
1
  import type { TurboModule } from 'react-native'
2
2
  import { TurboModuleRegistry } from 'react-native'
3
3
 
4
+ export type NativeEmbeddingParams = {
5
+ embd_normalize?: number
6
+ }
7
+
4
8
  export type NativeContextParams = {
5
9
  model: string
6
10
  is_model_asset?: boolean
7
-
8
- embedding?: boolean
11
+ use_progress_callback?: boolean
9
12
 
10
13
  n_ctx?: number
11
14
  n_batch?: number
@@ -13,6 +16,20 @@ export type NativeContextParams = {
13
16
  n_threads?: number
14
17
  n_gpu_layers?: number
15
18
 
19
+ /**
20
+ * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
21
+ */
22
+ flash_attn?: boolean
23
+
24
+ /**
25
+ * KV cache data type for the K (Experimental in llama.cpp)
26
+ */
27
+ cache_type_k?: string
28
+ /**
29
+ * KV cache data type for the V (Experimental in llama.cpp)
30
+ */
31
+ cache_type_v?: string
32
+
16
33
  use_mlock?: boolean
17
34
  use_mmap?: boolean
18
35
  vocab_only?: boolean
@@ -22,6 +39,12 @@ export type NativeContextParams = {
22
39
 
23
40
  rope_freq_base?: number
24
41
  rope_freq_scale?: number
42
+
43
+ pooling_type?: number
44
+
45
+ // Embedding params
46
+ embedding?: boolean
47
+ embd_normalize?: number
25
48
  }
26
49
 
27
50
  export type NativeCompletionParams = {
@@ -35,9 +58,8 @@ export type NativeCompletionParams = {
35
58
  top_k?: number
36
59
  top_p?: number
37
60
  min_p?: number
38
- xtc_t?: number
39
- xtc_p?: number
40
- tfs_z?: number
61
+ xtc_threshold?: number
62
+ xtc_probability?: number
41
63
  typical_p?: number
42
64
  temperature?: number // -> temp
43
65
  penalty_last_n?: number
@@ -50,6 +72,12 @@ export type NativeCompletionParams = {
50
72
  penalize_nl?: boolean
51
73
  seed?: number
52
74
 
75
+ dry_multiplier?: number
76
+ dry_base?: number
77
+ dry_allowed_length?: number
78
+ dry_penalty_last_n?: number
79
+ dry_sequence_breakers?: Array<string>
80
+
53
81
  ignore_eos?: boolean
54
82
  logit_bias?: Array<Array<number>>
55
83
 
@@ -126,7 +154,9 @@ export type NativeCPUFeatures = {
126
154
 
127
155
  export interface Spec extends TurboModule {
128
156
  setContextLimit(limit: number): Promise<void>
129
- initContext(params: NativeContextParams): Promise<NativeLlamaContext>
157
+
158
+ modelInfo(path: string, skip?: string[]): Promise<Object>
159
+ initContext(contextId: number, params: NativeContextParams): Promise<NativeLlamaContext>
130
160
 
131
161
  loadSession(
132
162
  contextId: number,
@@ -151,7 +181,11 @@ export interface Spec extends TurboModule {
151
181
  chatTemplate?: string,
152
182
  ): Promise<string>
153
183
  detokenize(contextId: number, tokens: number[]): Promise<string>
154
- embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>
184
+ embedding(
185
+ contextId: number,
186
+ text: string,
187
+ params: NativeEmbeddingParams,
188
+ ): Promise<NativeEmbeddingResult>
155
189
  bench(
156
190
  contextId: number,
157
191
  pp: number,
package/src/index.ts CHANGED
@@ -11,6 +11,7 @@ import type {
11
11
  NativeEmbeddingResult,
12
12
  NativeSessionLoadResult,
13
13
  NativeCPUFeatures,
14
+ NativeEmbeddingParams,
14
15
  } from './NativeRNLlama'
15
16
  import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
16
17
  import type { RNLlamaOAICompatibleMessage } from './chat'
@@ -18,10 +19,9 @@ import { formatChat } from './chat'
18
19
 
19
20
  export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
20
21
 
22
+ const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'
21
23
  const EVENT_ON_TOKEN = '@RNLlama_onToken'
22
24
 
23
- const EVENT_ON_MODEL_PROGRESS = '@RNLlama_onModelProgress'
24
-
25
25
  let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
26
26
  if (Platform.OS === 'ios') {
27
27
  // @ts-ignore
@@ -41,7 +41,16 @@ type TokenNativeEvent = {
41
41
  tokenResult: TokenData
42
42
  }
43
43
 
44
- export type ContextParams = NativeContextParams
44
+ export type ContextParams = Omit<
45
+ NativeContextParams,
46
+ 'cache_type_k' | 'cache_type_v' | 'pooling_type'
47
+ > & {
48
+ cache_type_k?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1'
49
+ cache_type_v?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1'
50
+ pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
51
+ }
52
+
53
+ export type EmbeddingParams = NativeEmbeddingParams
45
54
 
46
55
  export type CompletionParams = Omit<
47
56
  NativeCompletionParams,
@@ -49,6 +58,7 @@ export type CompletionParams = Omit<
49
58
  > & {
50
59
  prompt?: string
51
60
  messages?: RNLlamaOAICompatibleMessage[]
61
+ chatTemplate?: string
52
62
  }
53
63
 
54
64
  export type BenchResult = {
@@ -100,23 +110,22 @@ export class LlamaContext {
100
110
 
101
111
  async getFormattedChat(
102
112
  messages: RNLlamaOAICompatibleMessage[],
113
+ template?: string,
103
114
  ): Promise<string> {
104
115
  const chat = formatChat(messages)
105
- return RNLlama.getFormattedChat(
106
- this.id,
107
- chat,
108
- this.model?.isChatTemplateSupported ? undefined : 'chatml',
109
- )
116
+ let tmpl = this.model?.isChatTemplateSupported ? undefined : 'chatml'
117
+ if (template) tmpl = template // Force replace if provided
118
+ return RNLlama.getFormattedChat(this.id, chat, tmpl)
110
119
  }
111
120
 
112
121
  async completion(
113
122
  params: CompletionParams,
114
123
  callback?: (data: TokenData) => void,
115
124
  ): Promise<NativeCompletionResult> {
116
-
117
125
  let finalPrompt = params.prompt
118
- if (params.messages) { // messages always win
119
- finalPrompt = await this.getFormattedChat(params.messages)
126
+ if (params.messages) {
127
+ // messages always win
128
+ finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate)
120
129
  }
121
130
 
122
131
  let tokenListener: any =
@@ -162,8 +171,11 @@ export class LlamaContext {
162
171
  return RNLlama.detokenize(this.id, tokens)
163
172
  }
164
173
 
165
- embedding(text: string): Promise<NativeEmbeddingResult> {
166
- return RNLlama.embedding(this.id, text)
174
+ embedding(
175
+ text: string,
176
+ params?: EmbeddingParams,
177
+ ): Promise<NativeEmbeddingResult> {
178
+ return RNLlama.embedding(this.id, text, params || {})
167
179
  }
168
180
 
169
181
  async bench(
@@ -199,35 +211,78 @@ export async function setContextLimit(limit: number): Promise<void> {
199
211
  return RNLlama.setContextLimit(limit)
200
212
  }
201
213
 
202
- export async function initLlama({
214
+ let contextIdCounter = 0
215
+ const contextIdRandom = () =>
216
+ process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000)
217
+
218
+ const modelInfoSkip = [
219
+ // Large fields
220
+ 'tokenizer.ggml.tokens',
221
+ 'tokenizer.ggml.token_type',
222
+ 'tokenizer.ggml.merges',
223
+ ]
224
+ export async function loadLlamaModelInfo(model: string): Promise<Object> {
225
+ let path = model
226
+ if (path.startsWith('file://')) path = path.slice(7)
227
+ return RNLlama.modelInfo(path, modelInfoSkip)
228
+ }
229
+
230
+ const poolTypeMap = {
231
+ // -1 is unspecified as undefined
232
+ none: 0,
233
+ mean: 1,
234
+ cls: 2,
235
+ last: 3,
236
+ rank: 4,
237
+ }
238
+
239
+ export async function initLlama(
240
+ {
203
241
  model,
204
242
  is_model_asset: isModelAsset,
243
+ pooling_type: poolingType,
244
+ lora,
205
245
  ...rest
206
- }: ContextParams,
207
- progressCallback?: (progress: number) => void
246
+ }: ContextParams,
247
+ onProgress?: (progress: number) => void,
208
248
  ): Promise<LlamaContext> {
209
249
  let path = model
210
250
  if (path.startsWith('file://')) path = path.slice(7)
211
-
212
- const modelProgressListener = EventEmitter.addListener(EVENT_ON_MODEL_PROGRESS, (event) => {
213
- if(event.progress && progressCallback)
214
- progressCallback(event.progress)
215
- if(event.progress === 100) {
216
- modelProgressListener.remove()
217
- }
218
- })
219
251
 
252
+ let loraPath = lora
253
+ if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7)
254
+
255
+ const contextId = contextIdCounter + contextIdRandom()
256
+ contextIdCounter += 1
257
+
258
+ let removeProgressListener: any = null
259
+ if (onProgress) {
260
+ removeProgressListener = EventEmitter.addListener(
261
+ EVENT_ON_INIT_CONTEXT_PROGRESS,
262
+ (evt: { contextId: number; progress: number }) => {
263
+ if (evt.contextId !== contextId) return
264
+ onProgress(evt.progress)
265
+ },
266
+ )
267
+ }
268
+
269
+ const poolType = poolTypeMap[poolingType as keyof typeof poolTypeMap]
220
270
  const {
221
- contextId,
222
271
  gpu,
223
272
  reasonNoGPU,
224
273
  model: modelDetails,
225
- } = await RNLlama.initContext({
274
+ } = await RNLlama.initContext(contextId, {
226
275
  model: path,
227
276
  is_model_asset: !!isModelAsset,
277
+ use_progress_callback: !!onProgress,
278
+ pooling_type: poolType,
279
+ lora: loraPath,
228
280
  ...rest,
281
+ }).catch((err: any) => {
282
+ removeProgressListener?.remove()
283
+ throw err
229
284
  })
230
-
285
+ removeProgressListener?.remove()
231
286
  return new LlamaContext({ contextId, gpu, reasonNoGPU, model: modelDetails })
232
287
  }
233
288