cui-llama.rn 1.2.6 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +3 -2
  2. package/android/src/main/CMakeLists.txt +20 -5
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +115 -27
  4. package/android/src/main/java/com/rnllama/RNLlama.java +40 -7
  5. package/android/src/main/jni.cpp +222 -34
  6. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +9 -4
  7. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +9 -4
  8. package/cpp/common.cpp +1682 -2114
  9. package/cpp/common.h +600 -613
  10. package/cpp/ggml-aarch64.c +129 -3478
  11. package/cpp/ggml-aarch64.h +19 -39
  12. package/cpp/ggml-alloc.c +1040 -1040
  13. package/cpp/ggml-alloc.h +76 -76
  14. package/cpp/ggml-backend-impl.h +216 -216
  15. package/cpp/ggml-backend-reg.cpp +195 -0
  16. package/cpp/ggml-backend.cpp +1997 -2661
  17. package/cpp/ggml-backend.h +328 -314
  18. package/cpp/ggml-common.h +1853 -1853
  19. package/cpp/ggml-cpp.h +38 -38
  20. package/cpp/ggml-cpu-aarch64.c +3560 -0
  21. package/cpp/ggml-cpu-aarch64.h +30 -0
  22. package/cpp/ggml-cpu-impl.h +371 -614
  23. package/cpp/ggml-cpu-quants.c +10822 -0
  24. package/cpp/ggml-cpu-quants.h +63 -0
  25. package/cpp/ggml-cpu.c +13975 -13720
  26. package/cpp/ggml-cpu.cpp +663 -0
  27. package/cpp/ggml-cpu.h +177 -150
  28. package/cpp/ggml-impl.h +550 -296
  29. package/cpp/ggml-metal.h +66 -66
  30. package/cpp/ggml-metal.m +4294 -3933
  31. package/cpp/ggml-quants.c +5247 -15739
  32. package/cpp/ggml-quants.h +100 -147
  33. package/cpp/ggml-threading.cpp +12 -0
  34. package/cpp/ggml-threading.h +12 -0
  35. package/cpp/ggml.c +8180 -8390
  36. package/cpp/ggml.h +2411 -2441
  37. package/cpp/llama-grammar.cpp +1138 -1138
  38. package/cpp/llama-grammar.h +144 -144
  39. package/cpp/llama-impl.h +181 -181
  40. package/cpp/llama-sampling.cpp +2348 -2345
  41. package/cpp/llama-sampling.h +48 -48
  42. package/cpp/llama-vocab.cpp +1984 -1984
  43. package/cpp/llama-vocab.h +170 -170
  44. package/cpp/llama.cpp +22132 -22046
  45. package/cpp/llama.h +1253 -1255
  46. package/cpp/log.cpp +401 -401
  47. package/cpp/log.h +121 -121
  48. package/cpp/rn-llama.hpp +83 -19
  49. package/cpp/sampling.cpp +466 -466
  50. package/cpp/sgemm.cpp +1884 -1276
  51. package/ios/RNLlama.mm +43 -20
  52. package/ios/RNLlamaContext.h +9 -3
  53. package/ios/RNLlamaContext.mm +133 -33
  54. package/jest/mock.js +0 -1
  55. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  56. package/lib/commonjs/index.js +52 -15
  57. package/lib/commonjs/index.js.map +1 -1
  58. package/lib/module/NativeRNLlama.js.map +1 -1
  59. package/lib/module/index.js +51 -15
  60. package/lib/module/index.js.map +1 -1
  61. package/lib/typescript/NativeRNLlama.d.ts +29 -5
  62. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  63. package/lib/typescript/index.d.ts +12 -5
  64. package/lib/typescript/index.d.ts.map +1 -1
  65. package/package.json +1 -1
  66. package/src/NativeRNLlama.ts +41 -6
  67. package/src/index.ts +82 -27
  68. package/cpp/json-schema-to-grammar.cpp +0 -1045
  69. package/cpp/json-schema-to-grammar.h +0 -8
  70. package/cpp/json.hpp +0 -24766
@@ -3,8 +3,8 @@ import RNLlama from './NativeRNLlama';
3
3
  import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar';
4
4
  import { formatChat } from './chat';
5
5
  export { SchemaGrammarConverter, convertJsonSchemaToGrammar };
6
+ const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress';
6
7
  const EVENT_ON_TOKEN = '@RNLlama_onToken';
7
- const EVENT_ON_MODEL_PROGRESS = '@RNLlama_onModelProgress';
8
8
  let EventEmitter;
9
9
  if (Platform.OS === 'ios') {
10
10
  // @ts-ignore
@@ -45,16 +45,18 @@ export class LlamaContext {
45
45
  async saveSession(filepath, options) {
46
46
  return RNLlama.saveSession(this.id, filepath, (options === null || options === void 0 ? void 0 : options.tokenSize) || -1);
47
47
  }
48
- async getFormattedChat(messages) {
48
+ async getFormattedChat(messages, template) {
49
49
  var _this$model;
50
50
  const chat = formatChat(messages);
51
- return RNLlama.getFormattedChat(this.id, chat, (_this$model = this.model) !== null && _this$model !== void 0 && _this$model.isChatTemplateSupported ? undefined : 'chatml');
51
+ let tmpl = (_this$model = this.model) !== null && _this$model !== void 0 && _this$model.isChatTemplateSupported ? undefined : 'chatml';
52
+ if (template) tmpl = template; // Force replace if provided
53
+ return RNLlama.getFormattedChat(this.id, chat, tmpl);
52
54
  }
53
55
  async completion(params, callback) {
54
56
  let finalPrompt = params.prompt;
55
57
  if (params.messages) {
56
58
  // messages always win
57
- finalPrompt = await this.getFormattedChat(params.messages);
59
+ finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate);
58
60
  }
59
61
  let tokenListener = callback && EventEmitter.addListener(EVENT_ON_TOKEN, evt => {
60
62
  const {
@@ -94,8 +96,8 @@ export class LlamaContext {
94
96
  detokenize(tokens) {
95
97
  return RNLlama.detokenize(this.id, tokens);
96
98
  }
97
- embedding(text) {
98
- return RNLlama.embedding(this.id, text);
99
+ embedding(text, params) {
100
+ return RNLlama.embedding(this.id, text, params || {});
99
101
  }
100
102
  async bench(pp, tg, pl, nr) {
101
103
  const result = await RNLlama.bench(this.id, pp, tg, pl, nr);
@@ -120,30 +122,64 @@ export async function getCpuFeatures() {
120
122
  export async function setContextLimit(limit) {
121
123
  return RNLlama.setContextLimit(limit);
122
124
  }
123
- export async function initLlama(_ref2, progressCallback) {
125
+ let contextIdCounter = 0;
126
+ const contextIdRandom = () => process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000);
127
+ const modelInfoSkip = [
128
+ // Large fields
129
+ 'tokenizer.ggml.tokens', 'tokenizer.ggml.token_type', 'tokenizer.ggml.merges'];
130
+ export async function loadLlamaModelInfo(model) {
131
+ let path = model;
132
+ if (path.startsWith('file://')) path = path.slice(7);
133
+ return RNLlama.modelInfo(path, modelInfoSkip);
134
+ }
135
+ const poolTypeMap = {
136
+ // -1 is unspecified as undefined
137
+ none: 0,
138
+ mean: 1,
139
+ cls: 2,
140
+ last: 3,
141
+ rank: 4
142
+ };
143
+ export async function initLlama(_ref2, onProgress) {
144
+ var _loraPath, _removeProgressListen2;
124
145
  let {
125
146
  model,
126
147
  is_model_asset: isModelAsset,
148
+ pooling_type: poolingType,
149
+ lora,
127
150
  ...rest
128
151
  } = _ref2;
129
152
  let path = model;
130
153
  if (path.startsWith('file://')) path = path.slice(7);
131
- const modelProgressListener = EventEmitter.addListener(EVENT_ON_MODEL_PROGRESS, event => {
132
- if (event.progress && progressCallback) progressCallback(event.progress);
133
- if (event.progress === 100) {
134
- modelProgressListener.remove();
135
- }
136
- });
154
+ let loraPath = lora;
155
+ if ((_loraPath = loraPath) !== null && _loraPath !== void 0 && _loraPath.startsWith('file://')) loraPath = loraPath.slice(7);
156
+ const contextId = contextIdCounter + contextIdRandom();
157
+ contextIdCounter += 1;
158
+ let removeProgressListener = null;
159
+ if (onProgress) {
160
+ removeProgressListener = EventEmitter.addListener(EVENT_ON_INIT_CONTEXT_PROGRESS, evt => {
161
+ if (evt.contextId !== contextId) return;
162
+ onProgress(evt.progress);
163
+ });
164
+ }
165
+ const poolType = poolTypeMap[poolingType];
137
166
  const {
138
- contextId,
139
167
  gpu,
140
168
  reasonNoGPU,
141
169
  model: modelDetails
142
- } = await RNLlama.initContext({
170
+ } = await RNLlama.initContext(contextId, {
143
171
  model: path,
144
172
  is_model_asset: !!isModelAsset,
173
+ use_progress_callback: !!onProgress,
174
+ pooling_type: poolType,
175
+ lora: loraPath,
145
176
  ...rest
177
+ }).catch(err => {
178
+ var _removeProgressListen;
179
+ (_removeProgressListen = removeProgressListener) === null || _removeProgressListen === void 0 ? void 0 : _removeProgressListen.remove();
180
+ throw err;
146
181
  });
182
+ (_removeProgressListen2 = removeProgressListener) === null || _removeProgressListen2 === void 0 ? void 0 : _removeProgressListen2.remove();
147
183
  return new LlamaContext({
148
184
  contextId,
149
185
  gpu,
@@ -1 +1 @@
1
- {"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","Platform","RNLlama","SchemaGrammarConverter","convertJsonSchemaToGrammar","formatChat","EVENT_ON_TOKEN","EVENT_ON_MODEL_PROGRESS","EventEmitter","OS","LlamaContext","gpu","reasonNoGPU","model","constructor","_ref","contextId","id","loadSession","filepath","path","startsWith","slice","saveSession","options","tokenSize","getFormattedChat","messages","_this$model","chat","isChatTemplateSupported","undefined","completion","params","callback","finalPrompt","prompt","tokenListener","addListener","evt","tokenResult","Error","promise","emit_partial_completion","then","completionResult","_tokenListener","remove","catch","err","_tokenListener2","stopCompletion","tokenizeAsync","text","tokenizeSync","detokenize","tokens","embedding","bench","pp","tg","pl","nr","result","modelDesc","modelSize","modelNParams","ppAvg","ppStd","tgAvg","tgStd","JSON","parse","release","releaseContext","getCpuFeatures","setContextLimit","limit","initLlama","_ref2","progressCallback","is_model_asset","isModelAsset","rest","modelProgressListener","event","progress","modelDetails","initContext","releaseAllLlama","releaseAllContexts"],"sourceRoot":"..\\..\\src","sources":["index.ts"],"mappings":"AAAA,SAASA,kBAAkB,EAAEC,kBAAkB,EAAEC,QAAQ,QAAQ,cAAc;AAE/E,OAAOC,OAAO,MAAM,iBAAiB;AAYrC,SAASC,sBAAsB,EAAEC,0BAA0B,QAAQ,WAAW;AAE9E,SAASC,UAAU,QAAQ,QAAQ;AAEnC,SAASF,sBAAsB,EAAEC,0BAA0B;AAE3D,MAAME,cAAc,GAAG,kBAAkB;AAEzC,MAAMC,uBAAuB,GAAG,0BAA0B;AAE1D,IAAIC,YAA2D;AAC/D,IAAIP,QAAQ,CAACQ,EAAE,KAAK,KAAK,EAAE;EACzB;EACAD,YAAY,GAAG,IAAIT,kBAAkB,CAACG,OAAO,CAAC;AAChD;AACA,IAAID,QAAQ,CAACQ,EAAE,KAAK,SAAS,EAAE;EAC7BD,YAAY,GAAGR,kBAAkB;AACnC;AAgCA,OAAO,MAAMU,YAAY,CAAC;EAGxBC,GAAG,GAAY,KAAK;EAEpBC,WAAW,GAAW,EAAE;EAExBC,KAAK,GAED,CAAC,CAAC;EAENC,WAAWA,CAAAC,IAAA,EAA6D;IAAA,IAA5D;MAAEC,SAAS;MAAEL,GAAG;MAAEC,WAAW;MAAEC;IAA0B,CAAC,GAAAE,IAAA;IACpE,IAAI,CAACE,EAAE,GAAGD,SAAS;IACnB,IAAI,CAACL,GAAG,GAAGA,GAAG;IACd,IAAI,CAACC,WAAW,GAAGA,WAAW;IAC9B,IAAI,CAACC,KAAK,GAAGA,KAAK;EACpB;;EAEA;AACF;AACA;EACE,MAAMK,WAAWA,CAACC,QAAgB,EAAoC;IACpE,IAAIC,IAAI,GAAGD,QAAQ;IACnB,IAAIC,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;IACpD,OAAOpB,OAAO,CAACgB,WAAW,CAAC,IAAI,CAACD,EAAE,EAAEG,IAAI,CAAC;EAC3C;;EAEA;AACF;AACA;EACE,MAAMG,WAAWA,CACfJ,QAAgB,EAChBK,OAA+B,EACd;IACjB,OAAOtB,OAAO,CAACqB,WAAW,CAAC,IAAI,CAACN,EAAE,EAAEE,QAAQ,EAAE,CAAAK,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEC,SAAS,KAAI,CAAC,CAAC,CAAC;EACzE;EAEA,MAAMC,gBAAgBA,CACpBC,QAAuC,EACtB;IAAA,IAAAC,WAAA;IACjB,MAAMC,IAAI,GAAGxB,UAAU,CAACsB,QAAQ,CAAC;IACjC,OAAOzB,OAAO,CAACwB,gBAAgB,CAC7B,IAAI,CAACT,EAAE,EACPY,IAAI,EACJ,CAAAD,WAAA,OAAI,CAACf,KAAK,cAAAe,WAAA,eAAVA,WAAA,CAAYE,uBAAuB,GAAGC,SAAS,GAAG,QACpD,CAAC;EACH;EAEA,MAAMC,UAAUA,CACdC,MAAwB,EACxBC,QAAoC,EACH;IAEjC,IAAIC,WAAW,GAAGF,MAAM,CAACG,MAAM;IAC/B,IAAIH,MAAM,CAACN,QAAQ,EAAE;MAAE;MACrBQ,WAAW,GAAG,MAAM,IAAI,CAACT,gBAAgB,CAACO,MAAM,CAACN,QAAQ,CAAC;IAC5D;IAEA,IAAIU,aAAkB,GACpBH,QAAQ,IACR1B,YAAY,CAAC8B,WAAW,CAAChC,cAAc,EAAGiC,GAAqB,IAAK;MAClE,MAAM;QAAEvB,SAAS;QAAEwB;MAAY,CAAC,GAAGD,GAAG;MACtC,IAAIvB,SAAS,KAAK,IAAI,CAACC,EAAE,EAAE;MAC3BiB,QAAQ,CAACM,WAAW,CAAC;IACvB,CAAC,CAAC;IAEJ,IAAI,CAACL,WAAW,EAAE,MAAM,IAAIM,KAAK,CAAC,oBAAoB,CAAC;IACvD,MAAMC,OAAO,GAAGxC,OAAO,CAAC8B,UAAU,CAAC,IAAI,CAACf,EAAE,EAAE;MAC1C,GAAGgB,MAAM;MACTG,MAAM,EAAED,WAAW;MACnBQ,uBAAuB,EAAE,CAAC,CAACT;IAC7B,CAAC,CAAC;IACF,OAAOQ,OAAO,CACXE,IAAI,CAAEC,gBAAgB,IAAK;MAAA,IAAAC,cAAA;MAC1B,CAAAA,cAAA,GAAAT,aAAa,cAAAS,cAAA,uBAAbA,cAAA,CAAeC,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,OAAOQ,gBAAgB;IACzB,CAAC,CAAC,CACDG,KAAK,CAAEC,GAAQ,IAAK;MAAA,IAAAC,eAAA;MACnB,CAAAA,eAAA,GAAAb,aAAa,cAAAa,eAAA,uBAAbA,eAAA,CAAeH,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,MAAMY,GAAG;IACX,CAAC,CAAC;EACN;EAEAE,cAAcA,CAAA,EAAkB;IAC9B,OAAOjD,OAAO,CAACiD,cAAc,CAAC,IAAI,CAAClC,EAAE,CAAC;EACxC;EAEAmC,aAAaA,CAACC,IAAY,EAAiC;IACzD,OAAOnD,OAAO,CAACkD,aAAa,CAAC,IAAI,CAACnC,EAAE,EAAEoC,IAAI,CAAC;EAC7C;EAEAC,YAAYA,CAACD,IAAY,EAAwB;IAC/C,OAAOnD,OAAO,CAACoD,YAAY,CAAC,IAAI,CAACrC,EAAE,EAAEoC,IAAI,CAAC;EAC5C;EAEAE,UAAUA,CAACC,MAAgB,EAAmB;IAC5C,OAAOtD,OAAO,CAACqD,UAAU,CAAC,IAAI,CAACtC,EAAE,EAAEuC,MAAM,CAAC;EAC5C;EAEAC,SAASA,CAACJ,IAAY,EAAkC;IACtD,OAAOnD,OAAO,CAACuD,SAAS,CAAC,IAAI,CAACxC,EAAE,EAAEoC,IAAI,CAAC;EACzC;EAEA,MAAMK,KAAKA,CACTC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU,EACY;IACtB,MAAMC,MAAM,GAAG,MAAM7D,OAAO,CAACwD,KAAK,CAAC,IAAI,CAACzC,EAAE,EAAE0C,EAAE,EAAEC,EAAE,EAAEC,EAAE,EAAEC,EAAE,CAAC;IAC3D,MAAM,CAACE,SAAS,EAAEC,SAAS,EAAEC,YAAY,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,CAAC,GACpEC,IAAI,CAACC,KAAK,CAACT,MAAM,CAAC;IACpB,OAAO;MACLC,SAAS;MACTC,SAAS;MACTC,YAAY;MACZC,KAAK;MACLC,KAAK;MACLC,KAAK;MACLC;IACF,CAAC;EACH;EAEA,MAAMG,OAAOA,CAAA,EAAkB;IAC7B,OAAOvE,OAAO,CAACwE,cAAc,CAAC,IAAI,CAACzD,EAAE,CAAC;EACxC;AACF;AAEA,OAAO,eAAe0D,cAAcA,CAAA,EAAgC;EAClE,OAAOzE,OAAO,CAACyE,cAAc,CAAC,CAAC;AACjC;AAEA,OAAO,eAAeC,eAAeA,CAACC,KAAa,EAAiB;EAClE,OAAO3E,OAAO,CAAC0E,eAAe,CAACC,KAAK,CAAC;AACvC;AAEA,OAAO,eAAeC,SAASA,CAAAC,KAAA,EAK7BC,gBAA6C,EACtB;EAAA,IANO;IAC5BnE,KAAK;IACLoE,cAAc,EAAEC,YAAY;IAC5B,GAAGC;EACU,CAAC,GAAAJ,KAAA;EAGhB,IAAI3D,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EAEpD,MAAM8D,qBAAqB,GAAG5E,YAAY,CAAC8B,WAAW,CAAC/B,uBAAuB,EAAG8E,KAAK,IAAK;IACzF,IAAGA,KAAK,CAACC,QAAQ,IAAIN,gBAAgB,EACnCA,gBAAgB,CAACK,KAAK,CAACC,QAAQ,CAAC;IAClC,IAAGD,KAAK,CAACC,QAAQ,KAAK,GAAG,EAAE;MACzBF,qBAAqB,CAACrC,MAAM,CAAC,CAAC;IAChC;EACF,CAAC,CAAC;EAEF,MAAM;IACJ/B,SAAS;IACTL,GAAG;IACHC,WAAW;IACXC,KAAK,EAAE0E;EACT,CAAC,GAAG,MAAMrF,OAAO,CAACsF,WAAW,CAAC;IAC5B3E,KAAK,EAAEO,IAAI;IACX6D,cAAc,EAAE,CAAC,CAACC,YAAY;IAC9B,GAAGC;EACL,CAAC,CAAC;EAEF,OAAO,IAAIzE,YAAY,CAAC;IAAEM,SAAS;IAAEL,GAAG;IAAEC,WAAW;IAAEC,KAAK,EAAE0E;EAAa,CAAC,CAAC;AAC/E;AAEA,OAAO,eAAeE,eAAeA,CAAA,EAAkB;EACrD,OAAOvF,OAAO,CAACwF,kBAAkB,CAAC,CAAC;AACrC"}
1
+ {"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","Platform","RNLlama","SchemaGrammarConverter","convertJsonSchemaToGrammar","formatChat","EVENT_ON_INIT_CONTEXT_PROGRESS","EVENT_ON_TOKEN","EventEmitter","OS","LlamaContext","gpu","reasonNoGPU","model","constructor","_ref","contextId","id","loadSession","filepath","path","startsWith","slice","saveSession","options","tokenSize","getFormattedChat","messages","template","_this$model","chat","tmpl","isChatTemplateSupported","undefined","completion","params","callback","finalPrompt","prompt","chatTemplate","tokenListener","addListener","evt","tokenResult","Error","promise","emit_partial_completion","then","completionResult","_tokenListener","remove","catch","err","_tokenListener2","stopCompletion","tokenizeAsync","text","tokenizeSync","detokenize","tokens","embedding","bench","pp","tg","pl","nr","result","modelDesc","modelSize","modelNParams","ppAvg","ppStd","tgAvg","tgStd","JSON","parse","release","releaseContext","getCpuFeatures","setContextLimit","limit","contextIdCounter","contextIdRandom","process","env","NODE_ENV","Math","floor","random","modelInfoSkip","loadLlamaModelInfo","modelInfo","poolTypeMap","none","mean","cls","last","rank","initLlama","_ref2","onProgress","_loraPath","_removeProgressListen2","is_model_asset","isModelAsset","pooling_type","poolingType","lora","rest","loraPath","removeProgressListener","progress","poolType","modelDetails","initContext","use_progress_callback","_removeProgressListen","releaseAllLlama","releaseAllContexts"],"sourceRoot":"..\\..\\src","sources":["index.ts"],"mappings":"AAAA,SAASA,kBAAkB,EAAEC,kBAAkB,EAAEC,QAAQ,QAAQ,cAAc;AAE/E,OAAOC,OAAO,MAAM,iBAAiB;AAarC,SAASC,sBAAsB,EAAEC,0BAA0B,QAAQ,WAAW;AAE9E,SAASC,UAAU,QAAQ,QAAQ;AAEnC,SAASF,sBAAsB,EAAEC,0BAA0B;AAE3D,MAAME,8BAA8B,GAAG,gCAAgC;AACvE,MAAMC,cAAc,GAAG,kBAAkB;AAEzC,IAAIC,YAA2D;AAC/D,IAAIP,QAAQ,CAACQ,EAAE,KAAK,KAAK,EAAE;EACzB;EACAD,YAAY,GAAG,IAAIT,kBAAkB,CAACG,OAAO,CAAC;AAChD;AACA,IAAID,QAAQ,CAACQ,EAAE,KAAK,SAAS,EAAE;EAC7BD,YAAY,GAAGR,kBAAkB;AACnC;AA0CA,OAAO,MAAMU,YAAY,CAAC;EAGxBC,GAAG,GAAY,KAAK;EAEpBC,WAAW,GAAW,EAAE;EAExBC,KAAK,GAED,CAAC,CAAC;EAENC,WAAWA,CAAAC,IAAA,EAA6D;IAAA,IAA5D;MAAEC,SAAS;MAAEL,GAAG;MAAEC,WAAW;MAAEC;IAA0B,CAAC,GAAAE,IAAA;IACpE,IAAI,CAACE,EAAE,GAAGD,SAAS;IACnB,IAAI,CAACL,GAAG,GAAGA,GAAG;IACd,IAAI,CAACC,WAAW,GAAGA,WAAW;IAC9B,IAAI,CAACC,KAAK,GAAGA,KAAK;EACpB;;EAEA;AACF;AACA;EACE,MAAMK,WAAWA,CAACC,QAAgB,EAAoC;IACpE,IAAIC,IAAI,GAAGD,QAAQ;IACnB,IAAIC,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;IACpD,OAAOpB,OAAO,CAACgB,WAAW,CAAC,IAAI,CAACD,EAAE,EAAEG,IAAI,CAAC;EAC3C;;EAEA;AACF;AACA;EACE,MAAMG,WAAWA,CACfJ,QAAgB,EAChBK,OAA+B,EACd;IACjB,OAAOtB,OAAO,CAACqB,WAAW,CAAC,IAAI,CAACN,EAAE,EAAEE,QAAQ,EAAE,CAAAK,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEC,SAAS,KAAI,CAAC,CAAC,CAAC;EACzE;EAEA,MAAMC,gBAAgBA,CACpBC,QAAuC,EACvCC,QAAiB,EACA;IAAA,IAAAC,WAAA;IACjB,MAAMC,IAAI,GAAGzB,UAAU,CAACsB,QAAQ,CAAC;IACjC,IAAII,IAAI,GAAG,CAAAF,WAAA,OAAI,CAAChB,KAAK,cAAAgB,WAAA,eAAVA,WAAA,CAAYG,uBAAuB,GAAGC,SAAS,GAAG,QAAQ;IACrE,IAAIL,QAAQ,EAAEG,IAAI,GAAGH,QAAQ,EAAC;IAC9B,OAAO1B,OAAO,CAACwB,gBAAgB,CAAC,IAAI,CAACT,EAAE,EAAEa,IAAI,EAAEC,IAAI,CAAC;EACtD;EAEA,MAAMG,UAAUA,CACdC,MAAwB,EACxBC,QAAoC,EACH;IACjC,IAAIC,WAAW,GAAGF,MAAM,CAACG,MAAM;IAC/B,IAAIH,MAAM,CAACR,QAAQ,EAAE;MACnB;MACAU,WAAW,GAAG,MAAM,IAAI,CAACX,gBAAgB,CAACS,MAAM,CAACR,QAAQ,EAAEQ,MAAM,CAACI,YAAY,CAAC;IACjF;IAEA,IAAIC,aAAkB,GACpBJ,QAAQ,IACR5B,YAAY,CAACiC,WAAW,CAAClC,cAAc,EAAGmC,GAAqB,IAAK;MAClE,MAAM;QAAE1B,SAAS;QAAE2B;MAAY,CAAC,GAAGD,GAAG;MACtC,IAAI1B,SAAS,KAAK,IAAI,CAACC,EAAE,EAAE;MAC3BmB,QAAQ,CAACO,WAAW,CAAC;IACvB,CAAC,CAAC;IAEJ,IAAI,CAACN,WAAW,EAAE,MAAM,IAAIO,KAAK,CAAC,oBAAoB,CAAC;IACvD,MAAMC,OAAO,GAAG3C,OAAO,CAACgC,UAAU,CAAC,IAAI,CAACjB,EAAE,EAAE;MAC1C,GAAGkB,MAAM;MACTG,MAAM,EAAED,WAAW;MACnBS,uBAAuB,EAAE,CAAC,CAACV;IAC7B,CAAC,CAAC;IACF,OAAOS,OAAO,CACXE,IAAI,CAAEC,gBAAgB,IAAK;MAAA,IAAAC,cAAA;MAC1B,CAAAA,cAAA,GAAAT,aAAa,cAAAS,cAAA,uBAAbA,cAAA,CAAeC,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,OAAOQ,gBAAgB;IACzB,CAAC,CAAC,CACDG,KAAK,CAAEC,GAAQ,IAAK;MAAA,IAAAC,eAAA;MACnB,CAAAA,eAAA,GAAAb,aAAa,cAAAa,eAAA,uBAAbA,eAAA,CAAeH,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,MAAMY,GAAG;IACX,CAAC,CAAC;EACN;EAEAE,cAAcA,CAAA,EAAkB;IAC9B,OAAOpD,OAAO,CAACoD,cAAc,CAAC,IAAI,CAACrC,EAAE,CAAC;EACxC;EAEAsC,aAAaA,CAACC,IAAY,EAAiC;IACzD,OAAOtD,OAAO,CAACqD,aAAa,CAAC,IAAI,CAACtC,EAAE,EAAEuC,IAAI,CAAC;EAC7C;EAEAC,YAAYA,CAACD,IAAY,EAAwB;IAC/C,OAAOtD,OAAO,CAACuD,YAAY,CAAC,IAAI,CAACxC,EAAE,EAAEuC,IAAI,CAAC;EAC5C;EAEAE,UAAUA,CAACC,MAAgB,EAAmB;IAC5C,OAAOzD,OAAO,CAACwD,UAAU,CAAC,IAAI,CAACzC,EAAE,EAAE0C,MAAM,CAAC;EAC5C;EAEAC,SAASA,CACPJ,IAAY,EACZrB,MAAwB,EACQ;IAChC,OAAOjC,OAAO,CAAC0D,SAAS,CAAC,IAAI,CAAC3C,EAAE,EAAEuC,IAAI,EAAErB,MAAM,IAAI,CAAC,CAAC,CAAC;EACvD;EAEA,MAAM0B,KAAKA,CACTC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU,EACY;IACtB,MAAMC,MAAM,GAAG,MAAMhE,OAAO,CAAC2D,KAAK,CAAC,IAAI,CAAC5C,EAAE,EAAE6C,EAAE,EAAEC,EAAE,EAAEC,EAAE,EAAEC,EAAE,CAAC;IAC3D,MAAM,CAACE,SAAS,EAAEC,SAAS,EAAEC,YAAY,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,CAAC,GACpEC,IAAI,CAACC,KAAK,CAACT,MAAM,CAAC;IACpB,OAAO;MACLC,SAAS;MACTC,SAAS;MACTC,YAAY;MACZC,KAAK;MACLC,KAAK;MACLC,KAAK;MACLC;IACF,CAAC;EACH;EAEA,MAAMG,OAAOA,CAAA,EAAkB;IAC7B,OAAO1E,OAAO,CAAC2E,cAAc,CAAC,IAAI,CAAC5D,EAAE,CAAC;EACxC;AACF;AAEA,OAAO,eAAe6D,cAAcA,CAAA,EAAgC;EAClE,OAAO5E,OAAO,CAAC4E,cAAc,CAAC,CAAC;AACjC;AAEA,OAAO,eAAeC,eAAeA,CAACC,KAAa,EAAiB;EAClE,OAAO9E,OAAO,CAAC6E,eAAe,CAACC,KAAK,CAAC;AACvC;AAEA,IAAIC,gBAAgB,GAAG,CAAC;AACxB,MAAMC,eAAe,GAAGA,CAAA,KACtBC,OAAO,CAACC,GAAG,CAACC,QAAQ,KAAK,MAAM,GAAG,CAAC,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC;AAE1E,MAAMC,aAAa,GAAG;AACpB;AACA,uBAAuB,EACvB,2BAA2B,EAC3B,uBAAuB,CACxB;AACD,OAAO,eAAeC,kBAAkBA,CAAC7E,KAAa,EAAmB;EACvE,IAAIO,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EACpD,OAAOpB,OAAO,CAACyF,SAAS,CAACvE,IAAI,EAAEqE,aAAa,CAAC;AAC/C;AAEA,MAAMG,WAAW,GAAG;EAClB;EACAC,IAAI,EAAE,CAAC;EACPC,IAAI,EAAE,CAAC;EACPC,GAAG,EAAE,CAAC;EACNC,IAAI,EAAE,CAAC;EACPC,IAAI,EAAE;AACR,CAAC;AAED,OAAO,eAAeC,SAASA,CAAAC,KAAA,EAQ7BC,UAAuC,EAChB;EAAA,IAAAC,SAAA,EAAAC,sBAAA;EAAA,IARvB;IACEzF,KAAK;IACL0F,cAAc,EAAEC,YAAY;IAC5BC,YAAY,EAAEC,WAAW;IACzBC,IAAI;IACJ,GAAGC;EACU,CAAC,GAAAT,KAAA;EAGhB,IAAI/E,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EAEpD,IAAIuF,QAAQ,GAAGF,IAAI;EACnB,KAAAN,SAAA,GAAIQ,QAAQ,cAAAR,SAAA,eAARA,SAAA,CAAUhF,UAAU,CAAC,SAAS,CAAC,EAAEwF,QAAQ,GAAGA,QAAQ,CAACvF,KAAK,CAAC,CAAC,CAAC;EAEjE,MAAMN,SAAS,GAAGiE,gBAAgB,GAAGC,eAAe,CAAC,CAAC;EACtDD,gBAAgB,IAAI,CAAC;EAErB,IAAI6B,sBAA2B,GAAG,IAAI;EACtC,IAAIV,UAAU,EAAE;IACdU,sBAAsB,GAAGtG,YAAY,CAACiC,WAAW,CAC/CnC,8BAA8B,EAC7BoC,GAA4C,IAAK;MAChD,IAAIA,GAAG,CAAC1B,SAAS,KAAKA,SAAS,EAAE;MACjCoF,UAAU,CAAC1D,GAAG,CAACqE,QAAQ,CAAC;IAC1B,CACF,CAAC;EACH;EAEA,MAAMC,QAAQ,GAAGpB,WAAW,CAACc,WAAW,CAA6B;EACrE,MAAM;IACJ/F,GAAG;IACHC,WAAW;IACXC,KAAK,EAAEoG;EACT,CAAC,GAAG,MAAM/G,OAAO,CAACgH,WAAW,CAAClG,SAAS,EAAE;IACvCH,KAAK,EAAEO,IAAI;IACXmF,cAAc,EAAE,CAAC,CAACC,YAAY;IAC9BW,qBAAqB,EAAE,CAAC,CAACf,UAAU;IACnCK,YAAY,EAAEO,QAAQ;IACtBL,IAAI,EAAEE,QAAQ;IACd,GAAGD;EACL,CAAC,CAAC,CAACzD,KAAK,CAAEC,GAAQ,IAAK;IAAA,IAAAgE,qBAAA;IACrB,CAAAA,qBAAA,GAAAN,sBAAsB,cAAAM,qBAAA,uBAAtBA,qBAAA,CAAwBlE,MAAM,CAAC,CAAC;IAChC,MAAME,GAAG;EACX,CAAC,CAAC;EACF,CAAAkD,sBAAA,GAAAQ,sBAAsB,cAAAR,sBAAA,uBAAtBA,sBAAA,CAAwBpD,MAAM,CAAC,CAAC;EAChC,OAAO,IAAIxC,YAAY,CAAC;IAAEM,SAAS;IAAEL,GAAG;IAAEC,WAAW;IAAEC,KAAK,EAAEoG;EAAa,CAAC,CAAC;AAC/E;AAEA,OAAO,eAAeI,eAAeA,CAAA,EAAkB;EACrD,OAAOnH,OAAO,CAACoH,kBAAkB,CAAC,CAAC;AACrC"}
@@ -1,12 +1,27 @@
1
1
  import type { TurboModule } from 'react-native';
2
+ export type NativeEmbeddingParams = {
3
+ embd_normalize?: number;
4
+ };
2
5
  export type NativeContextParams = {
3
6
  model: string;
4
7
  is_model_asset?: boolean;
5
- embedding?: boolean;
8
+ use_progress_callback?: boolean;
6
9
  n_ctx?: number;
7
10
  n_batch?: number;
8
11
  n_threads?: number;
9
12
  n_gpu_layers?: number;
13
+ /**
14
+ * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
15
+ */
16
+ flash_attn?: boolean;
17
+ /**
18
+ * KV cache data type for the K (Experimental in llama.cpp)
19
+ */
20
+ cache_type_k?: string;
21
+ /**
22
+ * KV cache data type for the V (Experimental in llama.cpp)
23
+ */
24
+ cache_type_v?: string;
10
25
  use_mlock?: boolean;
11
26
  use_mmap?: boolean;
12
27
  vocab_only?: boolean;
@@ -14,6 +29,9 @@ export type NativeContextParams = {
14
29
  lora_scaled?: number;
15
30
  rope_freq_base?: number;
16
31
  rope_freq_scale?: number;
32
+ pooling_type?: number;
33
+ embedding?: boolean;
34
+ embd_normalize?: number;
17
35
  };
18
36
  export type NativeCompletionParams = {
19
37
  prompt: string;
@@ -25,8 +43,8 @@ export type NativeCompletionParams = {
25
43
  top_k?: number;
26
44
  top_p?: number;
27
45
  min_p?: number;
28
- xtc_t?: number;
29
- xtc_p?: number;
46
+ xtc_threshold?: number;
47
+ xtc_probability?: number;
30
48
  typical_p?: number;
31
49
  temperature?: number;
32
50
  penalty_last_n?: number;
@@ -38,6 +56,11 @@ export type NativeCompletionParams = {
38
56
  mirostat_eta?: number;
39
57
  penalize_nl?: boolean;
40
58
  seed?: number;
59
+ dry_multiplier?: number;
60
+ dry_base?: number;
61
+ dry_allowed_length?: number;
62
+ dry_penalty_last_n?: number;
63
+ dry_sequence_breakers?: Array<string>;
41
64
  ignore_eos?: boolean;
42
65
  logit_bias?: Array<Array<number>>;
43
66
  emit_partial_completion: boolean;
@@ -100,7 +123,8 @@ export type NativeCPUFeatures = {
100
123
  };
101
124
  export interface Spec extends TurboModule {
102
125
  setContextLimit(limit: number): Promise<void>;
103
- initContext(params: NativeContextParams): Promise<NativeLlamaContext>;
126
+ modelInfo(path: string, skip?: string[]): Promise<Object>;
127
+ initContext(contextId: number, params: NativeContextParams): Promise<NativeLlamaContext>;
104
128
  loadSession(contextId: number, filepath: string): Promise<NativeSessionLoadResult>;
105
129
  saveSession(contextId: number, filepath: string, size: number): Promise<number>;
106
130
  completion(contextId: number, params: NativeCompletionParams): Promise<NativeCompletionResult>;
@@ -110,7 +134,7 @@ export interface Spec extends TurboModule {
110
134
  getCpuFeatures(): Promise<NativeCPUFeatures>;
111
135
  getFormattedChat(contextId: number, messages: NativeLlamaChatMessage[], chatTemplate?: string): Promise<string>;
112
136
  detokenize(contextId: number, tokens: number[]): Promise<string>;
113
- embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>;
137
+ embedding(contextId: number, text: string, params: NativeEmbeddingParams): Promise<NativeEmbeddingResult>;
114
138
  bench(contextId: number, pp: number, tg: number, pl: number, nr: number): Promise<string>;
115
139
  releaseContext(contextId: number): Promise<void>;
116
140
  releaseAllContexts(): Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;IAExB,SAAS,CAAC,EAAE,OAAO,CAAA;IAEnB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,MAAM,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAEpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,IAAI,CAAC,EAAE,MAAM,CAAA;IAEb,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;IAEjC,uBAAuB,EAAE,OAAO,CAAA;CACjC,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG;IACtC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,CAAA;CAC5C,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,iBAAiB,EAAE,MAAM,CAAA;IACzB,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,oBAAoB,EAAE,MAAM,CAAA;CAC7B,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IAEZ,gBAAgB,EAAE,MAAM,CAAA;IACxB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,OAAO,CAAA;IAClB,WAAW,EAAE,OAAO,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,6BAA6B,CAAA;IAEtC,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,OAAO,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,iBAAiB,GAAG;IAC9B,KAAK,EAAE,OAAO,CAAA;IACd,IAAI,EAAE,OAAO,CAAA;IACb,OAAO,EAAE,OAAO,CAAA;CACjB,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC7C,WAAW,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAA;IAErE,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,uBAAuB,CAAC,CAAA;IACnC,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CACR,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,GAC7B,OAAO,CAAC,sBAAsB,CAAC,CAAA;IAClC,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAChD,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAA;IAC7E,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,oBAAoB,CAAA;IACnE,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAAA;IAC7C,gBAAgB,CACd,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,sBAAsB,EAAE,EAClC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAChE,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAA;IAC1E,KAAK,CACH,SAAS,EAAE,MAAM,EACjB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAEhD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACpC;;AAED,wBAA+D"}
1
+ {"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,qBAAqB,GAAG;IAClC,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB,CAAA;AAED,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,qBAAqB,CAAC,EAAE,OAAO,CAAA;IAE/B,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB;;OAEG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,MAAM,CAAA;IAExB,YAAY,CAAC,EAAE,MAAM,CAAA;IAGrB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAEpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,IAAI,CAAC,EAAE,MAAM,CAAA;IAEb,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,qBAAqB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAErC,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;IAEjC,uBAAuB,EAAE,OAAO,CAAA;CACjC,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG;IACtC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,CAAA;CAC5C,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,iBAAiB,EAAE,MAAM,CAAA;IACzB,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,oBAAoB,EAAE,MAAM,CAAA;CAC7B,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IAEZ,gBAAgB,EAAE,MAAM,CAAA;IACxB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,OAAO,CAAA;IAClB,WAAW,EAAE,OAAO,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,6BAA6B,CAAA;IAEtC,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,OAAO,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,iBAAiB,GAAG;IAC9B,KAAK,EAAE,OAAO,CAAA;IACd,IAAI,EAAE,OAAO,CAAA;IACb,OAAO,EAAE,OAAO,CAAA;CACjB,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAE7C,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IACzD,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAA;IAExF,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,uBAAuB,CAAC,CAAA;IACnC,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CACR,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,GAC7B,OAAO,CAAC,sBAAsB,CAAC,CAAA;IAClC,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAChD,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAA;IAC7E,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,oBAAoB,CAAA;IACnE,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAAA;IAC7C,gBAAgB,CACd,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,sBAAsB,EAAE,EAClC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAChE,SAAS,CACP,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC,CAAA;IACjC,KAAK,CACH,SAAS,EAAE,MAAM,EACjB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAEhD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACpC;;AAED,wBAA+D"}
@@ -1,4 +1,4 @@
1
- import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeCPUFeatures } from './NativeRNLlama';
1
+ import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeCPUFeatures, NativeEmbeddingParams } from './NativeRNLlama';
2
2
  import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar';
3
3
  import type { RNLlamaOAICompatibleMessage } from './chat';
4
4
  export { SchemaGrammarConverter, convertJsonSchemaToGrammar };
@@ -6,10 +6,16 @@ export type TokenData = {
6
6
  token: string;
7
7
  completion_probabilities?: Array<NativeCompletionTokenProb>;
8
8
  };
9
- export type ContextParams = NativeContextParams;
9
+ export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & {
10
+ cache_type_k?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1';
11
+ cache_type_v?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1';
12
+ pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';
13
+ };
14
+ export type EmbeddingParams = NativeEmbeddingParams;
10
15
  export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion' | 'prompt'> & {
11
16
  prompt?: string;
12
17
  messages?: RNLlamaOAICompatibleMessage[];
18
+ chatTemplate?: string;
13
19
  };
14
20
  export type BenchResult = {
15
21
  modelDesc: string;
@@ -38,18 +44,19 @@ export declare class LlamaContext {
38
44
  saveSession(filepath: string, options?: {
39
45
  tokenSize: number;
40
46
  }): Promise<number>;
41
- getFormattedChat(messages: RNLlamaOAICompatibleMessage[]): Promise<string>;
47
+ getFormattedChat(messages: RNLlamaOAICompatibleMessage[], template?: string): Promise<string>;
42
48
  completion(params: CompletionParams, callback?: (data: TokenData) => void): Promise<NativeCompletionResult>;
43
49
  stopCompletion(): Promise<void>;
44
50
  tokenizeAsync(text: string): Promise<NativeTokenizeResult>;
45
51
  tokenizeSync(text: string): NativeTokenizeResult;
46
52
  detokenize(tokens: number[]): Promise<string>;
47
- embedding(text: string): Promise<NativeEmbeddingResult>;
53
+ embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>;
48
54
  bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>;
49
55
  release(): Promise<void>;
50
56
  }
51
57
  export declare function getCpuFeatures(): Promise<NativeCPUFeatures>;
52
58
  export declare function setContextLimit(limit: number): Promise<void>;
53
- export declare function initLlama({ model, is_model_asset: isModelAsset, ...rest }: ContextParams, progressCallback?: (progress: number) => void): Promise<LlamaContext>;
59
+ export declare function loadLlamaModelInfo(model: string): Promise<Object>;
60
+ export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
54
61
  export declare function releaseAllLlama(): Promise<void>;
55
62
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,EAClB,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,MAAM,WAAW,CAAA;AAC9E,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,QAAQ,CAAA;AAGzD,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,CAAA;AAe7D,MAAM,MAAM,SAAS,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAOD,MAAM,MAAM,aAAa,GAAG,mBAAmB,CAAA;AAE/C,MAAM,MAAM,gBAAgB,GAAG,IAAI,CACjC,sBAAsB,EACtB,yBAAyB,GAAG,QAAQ,CACrC,GAAG;IACF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,2BAA2B,EAAE,CAAA;CACzC,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,YAAY,EAAE,MAAM,CAAA;IACpB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,qBAAa,YAAY;IACvB,EAAE,EAAE,MAAM,CAAA;IAEV,GAAG,EAAE,OAAO,CAAQ;IAEpB,WAAW,EAAE,MAAM,CAAK;IAExB,KAAK,EAAE;QACL,uBAAuB,CAAC,EAAE,OAAO,CAAA;KAClC,CAAK;gBAEM,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,kBAAkB;IAOtE;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAMrE;;OAEG;IACG,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAA;KAAE,GAC9B,OAAO,CAAC,MAAM,CAAC;IAIZ,gBAAgB,CACpB,QAAQ,EAAE,2BAA2B,EAAE,GACtC,OAAO,CAAC,MAAM,CAAC;IASZ,UAAU,CACd,MAAM,EAAE,gBAAgB,EACxB,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,SAAS,KAAK,IAAI,GACnC,OAAO,CAAC,sBAAsB,CAAC;IAkClC,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAI/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAI1D,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB;IAIhD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7C,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC;IAIjD,KAAK,CACT,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,WAAW,CAAC;IAejB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,wBAAsB,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAElE;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAElE;AAED,wBAAsB,SAAS,CAAC,EAC5B,KAAK,EACL,cAAc,EAAE,YAAY,EAC5B,GAAG,IAAI,EACR,EAAE,aAAa,EAChB,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GAC5C,OAAO,CAAC,YAAY,CAAC,CAwBvB;AAED,wBAAsB,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC,CAErD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,EACjB,qBAAqB,EACtB,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,MAAM,WAAW,CAAA;AAC9E,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,QAAQ,CAAA;AAGzD,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,CAAA;AAc7D,MAAM,MAAM,SAAS,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAOD,MAAM,MAAM,aAAa,GAAG,IAAI,CAC9B,mBAAmB,EACnB,cAAc,GAAG,cAAc,GAAI,cAAc,CAClD,GAAG;IACF,YAAY,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAA;IACpF,YAAY,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAA;IACpF,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAAA;CACzD,CAAA;AAED,MAAM,MAAM,eAAe,GAAG,qBAAqB,CAAA;AAEnD,MAAM,MAAM,gBAAgB,GAAG,IAAI,CACjC,sBAAsB,EACtB,yBAAyB,GAAG,QAAQ,CACrC,GAAG;IACF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,2BAA2B,EAAE,CAAA;IACxC,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,YAAY,EAAE,MAAM,CAAA;IACpB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,qBAAa,YAAY;IACvB,EAAE,EAAE,MAAM,CAAA;IAEV,GAAG,EAAE,OAAO,CAAQ;IAEpB,WAAW,EAAE,MAAM,CAAK;IAExB,KAAK,EAAE;QACL,uBAAuB,CAAC,EAAE,OAAO,CAAA;KAClC,CAAK;gBAEM,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,kBAAkB;IAOtE;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAMrE;;OAEG;IACG,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAA;KAAE,GAC9B,OAAO,CAAC,MAAM,CAAC;IAIZ,gBAAgB,CACpB,QAAQ,EAAE,2BAA2B,EAAE,EACvC,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC;IAOZ,UAAU,CACd,MAAM,EAAE,gBAAgB,EACxB,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,SAAS,KAAK,IAAI,GACnC,OAAO,CAAC,sBAAsB,CAAC;IAkClC,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAI/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAI1D,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB;IAIhD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7C,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,eAAe,GACvB,OAAO,CAAC,qBAAqB,CAAC;IAI3B,KAAK,CACT,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,WAAW,CAAC;IAejB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,wBAAsB,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAElE;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAElE;AAYD,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAIvE;AAWD,wBAAsB,SAAS,CAC7B,EACE,KAAK,EACL,cAAc,EAAE,YAAY,EAC5B,YAAY,EAAE,WAAW,EACzB,IAAI,EACJ,GAAG,IAAI,EACR,EAAE,aAAa,EAChB,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GACtC,OAAO,CAAC,YAAY,CAAC,CAuCvB;AAED,wBAAsB,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC,CAErD"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cui-llama.rn",
3
- "version": "1.2.6",
3
+ "version": "1.3.0",
4
4
  "description": "Fork of llama.rn for ChatterUI",
5
5
  "main": "lib/commonjs/index",
6
6
  "module": "lib/module/index",
@@ -1,11 +1,14 @@
1
1
  import type { TurboModule } from 'react-native'
2
2
  import { TurboModuleRegistry } from 'react-native'
3
3
 
4
+ export type NativeEmbeddingParams = {
5
+ embd_normalize?: number
6
+ }
7
+
4
8
  export type NativeContextParams = {
5
9
  model: string
6
10
  is_model_asset?: boolean
7
-
8
- embedding?: boolean
11
+ use_progress_callback?: boolean
9
12
 
10
13
  n_ctx?: number
11
14
  n_batch?: number
@@ -13,6 +16,20 @@ export type NativeContextParams = {
13
16
  n_threads?: number
14
17
  n_gpu_layers?: number
15
18
 
19
+ /**
20
+ * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
21
+ */
22
+ flash_attn?: boolean
23
+
24
+ /**
25
+ * KV cache data type for the K (Experimental in llama.cpp)
26
+ */
27
+ cache_type_k?: string
28
+ /**
29
+ * KV cache data type for the V (Experimental in llama.cpp)
30
+ */
31
+ cache_type_v?: string
32
+
16
33
  use_mlock?: boolean
17
34
  use_mmap?: boolean
18
35
  vocab_only?: boolean
@@ -22,6 +39,12 @@ export type NativeContextParams = {
22
39
 
23
40
  rope_freq_base?: number
24
41
  rope_freq_scale?: number
42
+
43
+ pooling_type?: number
44
+
45
+ // Embedding params
46
+ embedding?: boolean
47
+ embd_normalize?: number
25
48
  }
26
49
 
27
50
  export type NativeCompletionParams = {
@@ -35,8 +58,8 @@ export type NativeCompletionParams = {
35
58
  top_k?: number
36
59
  top_p?: number
37
60
  min_p?: number
38
- xtc_t?: number
39
- xtc_p?: number
61
+ xtc_threshold?: number
62
+ xtc_probability?: number
40
63
  typical_p?: number
41
64
  temperature?: number // -> temp
42
65
  penalty_last_n?: number
@@ -49,6 +72,12 @@ export type NativeCompletionParams = {
49
72
  penalize_nl?: boolean
50
73
  seed?: number
51
74
 
75
+ dry_multiplier?: number
76
+ dry_base?: number
77
+ dry_allowed_length?: number
78
+ dry_penalty_last_n?: number
79
+ dry_sequence_breakers?: Array<string>
80
+
52
81
  ignore_eos?: boolean
53
82
  logit_bias?: Array<Array<number>>
54
83
 
@@ -125,7 +154,9 @@ export type NativeCPUFeatures = {
125
154
 
126
155
  export interface Spec extends TurboModule {
127
156
  setContextLimit(limit: number): Promise<void>
128
- initContext(params: NativeContextParams): Promise<NativeLlamaContext>
157
+
158
+ modelInfo(path: string, skip?: string[]): Promise<Object>
159
+ initContext(contextId: number, params: NativeContextParams): Promise<NativeLlamaContext>
129
160
 
130
161
  loadSession(
131
162
  contextId: number,
@@ -150,7 +181,11 @@ export interface Spec extends TurboModule {
150
181
  chatTemplate?: string,
151
182
  ): Promise<string>
152
183
  detokenize(contextId: number, tokens: number[]): Promise<string>
153
- embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>
184
+ embedding(
185
+ contextId: number,
186
+ text: string,
187
+ params: NativeEmbeddingParams,
188
+ ): Promise<NativeEmbeddingResult>
154
189
  bench(
155
190
  contextId: number,
156
191
  pp: number,
package/src/index.ts CHANGED
@@ -11,6 +11,7 @@ import type {
11
11
  NativeEmbeddingResult,
12
12
  NativeSessionLoadResult,
13
13
  NativeCPUFeatures,
14
+ NativeEmbeddingParams,
14
15
  } from './NativeRNLlama'
15
16
  import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
16
17
  import type { RNLlamaOAICompatibleMessage } from './chat'
@@ -18,10 +19,9 @@ import { formatChat } from './chat'
18
19
 
19
20
  export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
20
21
 
22
+ const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'
21
23
  const EVENT_ON_TOKEN = '@RNLlama_onToken'
22
24
 
23
- const EVENT_ON_MODEL_PROGRESS = '@RNLlama_onModelProgress'
24
-
25
25
  let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
26
26
  if (Platform.OS === 'ios') {
27
27
  // @ts-ignore
@@ -41,7 +41,16 @@ type TokenNativeEvent = {
41
41
  tokenResult: TokenData
42
42
  }
43
43
 
44
- export type ContextParams = NativeContextParams
44
+ export type ContextParams = Omit<
45
+ NativeContextParams,
46
+ 'cache_type_k' | 'cache_type_v' | 'pooling_type'
47
+ > & {
48
+ cache_type_k?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1'
49
+ cache_type_v?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1'
50
+ pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
51
+ }
52
+
53
+ export type EmbeddingParams = NativeEmbeddingParams
45
54
 
46
55
  export type CompletionParams = Omit<
47
56
  NativeCompletionParams,
@@ -49,6 +58,7 @@ export type CompletionParams = Omit<
49
58
  > & {
50
59
  prompt?: string
51
60
  messages?: RNLlamaOAICompatibleMessage[]
61
+ chatTemplate?: string
52
62
  }
53
63
 
54
64
  export type BenchResult = {
@@ -100,23 +110,22 @@ export class LlamaContext {
100
110
 
101
111
  async getFormattedChat(
102
112
  messages: RNLlamaOAICompatibleMessage[],
113
+ template?: string,
103
114
  ): Promise<string> {
104
115
  const chat = formatChat(messages)
105
- return RNLlama.getFormattedChat(
106
- this.id,
107
- chat,
108
- this.model?.isChatTemplateSupported ? undefined : 'chatml',
109
- )
116
+ let tmpl = this.model?.isChatTemplateSupported ? undefined : 'chatml'
117
+ if (template) tmpl = template // Force replace if provided
118
+ return RNLlama.getFormattedChat(this.id, chat, tmpl)
110
119
  }
111
120
 
112
121
  async completion(
113
122
  params: CompletionParams,
114
123
  callback?: (data: TokenData) => void,
115
124
  ): Promise<NativeCompletionResult> {
116
-
117
125
  let finalPrompt = params.prompt
118
- if (params.messages) { // messages always win
119
- finalPrompt = await this.getFormattedChat(params.messages)
126
+ if (params.messages) {
127
+ // messages always win
128
+ finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate)
120
129
  }
121
130
 
122
131
  let tokenListener: any =
@@ -162,8 +171,11 @@ export class LlamaContext {
162
171
  return RNLlama.detokenize(this.id, tokens)
163
172
  }
164
173
 
165
- embedding(text: string): Promise<NativeEmbeddingResult> {
166
- return RNLlama.embedding(this.id, text)
174
+ embedding(
175
+ text: string,
176
+ params?: EmbeddingParams,
177
+ ): Promise<NativeEmbeddingResult> {
178
+ return RNLlama.embedding(this.id, text, params || {})
167
179
  }
168
180
 
169
181
  async bench(
@@ -199,35 +211,78 @@ export async function setContextLimit(limit: number): Promise<void> {
199
211
  return RNLlama.setContextLimit(limit)
200
212
  }
201
213
 
202
- export async function initLlama({
214
+ let contextIdCounter = 0
215
+ const contextIdRandom = () =>
216
+ process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000)
217
+
218
+ const modelInfoSkip = [
219
+ // Large fields
220
+ 'tokenizer.ggml.tokens',
221
+ 'tokenizer.ggml.token_type',
222
+ 'tokenizer.ggml.merges',
223
+ ]
224
+ export async function loadLlamaModelInfo(model: string): Promise<Object> {
225
+ let path = model
226
+ if (path.startsWith('file://')) path = path.slice(7)
227
+ return RNLlama.modelInfo(path, modelInfoSkip)
228
+ }
229
+
230
+ const poolTypeMap = {
231
+ // -1 is unspecified as undefined
232
+ none: 0,
233
+ mean: 1,
234
+ cls: 2,
235
+ last: 3,
236
+ rank: 4,
237
+ }
238
+
239
+ export async function initLlama(
240
+ {
203
241
  model,
204
242
  is_model_asset: isModelAsset,
243
+ pooling_type: poolingType,
244
+ lora,
205
245
  ...rest
206
- }: ContextParams,
207
- progressCallback?: (progress: number) => void
246
+ }: ContextParams,
247
+ onProgress?: (progress: number) => void,
208
248
  ): Promise<LlamaContext> {
209
249
  let path = model
210
250
  if (path.startsWith('file://')) path = path.slice(7)
211
-
212
- const modelProgressListener = EventEmitter.addListener(EVENT_ON_MODEL_PROGRESS, (event) => {
213
- if(event.progress && progressCallback)
214
- progressCallback(event.progress)
215
- if(event.progress === 100) {
216
- modelProgressListener.remove()
217
- }
218
- })
219
251
 
252
+ let loraPath = lora
253
+ if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7)
254
+
255
+ const contextId = contextIdCounter + contextIdRandom()
256
+ contextIdCounter += 1
257
+
258
+ let removeProgressListener: any = null
259
+ if (onProgress) {
260
+ removeProgressListener = EventEmitter.addListener(
261
+ EVENT_ON_INIT_CONTEXT_PROGRESS,
262
+ (evt: { contextId: number; progress: number }) => {
263
+ if (evt.contextId !== contextId) return
264
+ onProgress(evt.progress)
265
+ },
266
+ )
267
+ }
268
+
269
+ const poolType = poolTypeMap[poolingType as keyof typeof poolTypeMap]
220
270
  const {
221
- contextId,
222
271
  gpu,
223
272
  reasonNoGPU,
224
273
  model: modelDetails,
225
- } = await RNLlama.initContext({
274
+ } = await RNLlama.initContext(contextId, {
226
275
  model: path,
227
276
  is_model_asset: !!isModelAsset,
277
+ use_progress_callback: !!onProgress,
278
+ pooling_type: poolType,
279
+ lora: loraPath,
228
280
  ...rest,
281
+ }).catch((err: any) => {
282
+ removeProgressListener?.remove()
283
+ throw err
229
284
  })
230
-
285
+ removeProgressListener?.remove()
231
286
  return new LlamaContext({ contextId, gpu, reasonNoGPU, model: modelDetails })
232
287
  }
233
288