cui-llama.rn 1.2.6 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/android/src/main/CMakeLists.txt +20 -5
- package/android/src/main/java/com/rnllama/LlamaContext.java +115 -27
- package/android/src/main/java/com/rnllama/RNLlama.java +40 -7
- package/android/src/main/jni.cpp +222 -34
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +9 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +9 -4
- package/cpp/common.cpp +1682 -2114
- package/cpp/common.h +600 -613
- package/cpp/ggml-aarch64.c +129 -3478
- package/cpp/ggml-aarch64.h +19 -39
- package/cpp/ggml-alloc.c +1040 -1040
- package/cpp/ggml-alloc.h +76 -76
- package/cpp/ggml-backend-impl.h +216 -216
- package/cpp/ggml-backend-reg.cpp +195 -0
- package/cpp/ggml-backend.cpp +1997 -2661
- package/cpp/ggml-backend.h +328 -314
- package/cpp/ggml-common.h +1853 -1853
- package/cpp/ggml-cpp.h +38 -38
- package/cpp/ggml-cpu-aarch64.c +3560 -0
- package/cpp/ggml-cpu-aarch64.h +30 -0
- package/cpp/ggml-cpu-impl.h +371 -614
- package/cpp/ggml-cpu-quants.c +10822 -0
- package/cpp/ggml-cpu-quants.h +63 -0
- package/cpp/ggml-cpu.c +13975 -13720
- package/cpp/ggml-cpu.cpp +663 -0
- package/cpp/ggml-cpu.h +177 -150
- package/cpp/ggml-impl.h +550 -296
- package/cpp/ggml-metal.h +66 -66
- package/cpp/ggml-metal.m +4294 -3933
- package/cpp/ggml-quants.c +5247 -15739
- package/cpp/ggml-quants.h +100 -147
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +12 -0
- package/cpp/ggml.c +8180 -8390
- package/cpp/ggml.h +2411 -2441
- package/cpp/llama-grammar.cpp +1138 -1138
- package/cpp/llama-grammar.h +144 -144
- package/cpp/llama-impl.h +181 -181
- package/cpp/llama-sampling.cpp +2348 -2345
- package/cpp/llama-sampling.h +48 -48
- package/cpp/llama-vocab.cpp +1984 -1984
- package/cpp/llama-vocab.h +170 -170
- package/cpp/llama.cpp +22132 -22046
- package/cpp/llama.h +1253 -1255
- package/cpp/log.cpp +401 -401
- package/cpp/log.h +121 -121
- package/cpp/rn-llama.hpp +83 -19
- package/cpp/sampling.cpp +466 -466
- package/cpp/sgemm.cpp +1884 -1276
- package/ios/RNLlama.mm +43 -20
- package/ios/RNLlamaContext.h +9 -3
- package/ios/RNLlamaContext.mm +133 -33
- package/jest/mock.js +0 -1
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +52 -15
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +51 -15
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +29 -5
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +12 -5
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +41 -6
- package/src/index.ts +82 -27
- package/cpp/json-schema-to-grammar.cpp +0 -1045
- package/cpp/json-schema-to-grammar.h +0 -8
- package/cpp/json.hpp +0 -24766
package/lib/module/index.js
CHANGED
@@ -3,8 +3,8 @@ import RNLlama from './NativeRNLlama';
|
|
3
3
|
import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar';
|
4
4
|
import { formatChat } from './chat';
|
5
5
|
export { SchemaGrammarConverter, convertJsonSchemaToGrammar };
|
6
|
+
const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress';
|
6
7
|
const EVENT_ON_TOKEN = '@RNLlama_onToken';
|
7
|
-
const EVENT_ON_MODEL_PROGRESS = '@RNLlama_onModelProgress';
|
8
8
|
let EventEmitter;
|
9
9
|
if (Platform.OS === 'ios') {
|
10
10
|
// @ts-ignore
|
@@ -45,16 +45,18 @@ export class LlamaContext {
|
|
45
45
|
async saveSession(filepath, options) {
|
46
46
|
return RNLlama.saveSession(this.id, filepath, (options === null || options === void 0 ? void 0 : options.tokenSize) || -1);
|
47
47
|
}
|
48
|
-
async getFormattedChat(messages) {
|
48
|
+
async getFormattedChat(messages, template) {
|
49
49
|
var _this$model;
|
50
50
|
const chat = formatChat(messages);
|
51
|
-
|
51
|
+
let tmpl = (_this$model = this.model) !== null && _this$model !== void 0 && _this$model.isChatTemplateSupported ? undefined : 'chatml';
|
52
|
+
if (template) tmpl = template; // Force replace if provided
|
53
|
+
return RNLlama.getFormattedChat(this.id, chat, tmpl);
|
52
54
|
}
|
53
55
|
async completion(params, callback) {
|
54
56
|
let finalPrompt = params.prompt;
|
55
57
|
if (params.messages) {
|
56
58
|
// messages always win
|
57
|
-
finalPrompt = await this.getFormattedChat(params.messages);
|
59
|
+
finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate);
|
58
60
|
}
|
59
61
|
let tokenListener = callback && EventEmitter.addListener(EVENT_ON_TOKEN, evt => {
|
60
62
|
const {
|
@@ -94,8 +96,8 @@ export class LlamaContext {
|
|
94
96
|
detokenize(tokens) {
|
95
97
|
return RNLlama.detokenize(this.id, tokens);
|
96
98
|
}
|
97
|
-
embedding(text) {
|
98
|
-
return RNLlama.embedding(this.id, text);
|
99
|
+
embedding(text, params) {
|
100
|
+
return RNLlama.embedding(this.id, text, params || {});
|
99
101
|
}
|
100
102
|
async bench(pp, tg, pl, nr) {
|
101
103
|
const result = await RNLlama.bench(this.id, pp, tg, pl, nr);
|
@@ -120,30 +122,64 @@ export async function getCpuFeatures() {
|
|
120
122
|
export async function setContextLimit(limit) {
|
121
123
|
return RNLlama.setContextLimit(limit);
|
122
124
|
}
|
123
|
-
|
125
|
+
let contextIdCounter = 0;
|
126
|
+
const contextIdRandom = () => process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000);
|
127
|
+
const modelInfoSkip = [
|
128
|
+
// Large fields
|
129
|
+
'tokenizer.ggml.tokens', 'tokenizer.ggml.token_type', 'tokenizer.ggml.merges'];
|
130
|
+
export async function loadLlamaModelInfo(model) {
|
131
|
+
let path = model;
|
132
|
+
if (path.startsWith('file://')) path = path.slice(7);
|
133
|
+
return RNLlama.modelInfo(path, modelInfoSkip);
|
134
|
+
}
|
135
|
+
const poolTypeMap = {
|
136
|
+
// -1 is unspecified as undefined
|
137
|
+
none: 0,
|
138
|
+
mean: 1,
|
139
|
+
cls: 2,
|
140
|
+
last: 3,
|
141
|
+
rank: 4
|
142
|
+
};
|
143
|
+
export async function initLlama(_ref2, onProgress) {
|
144
|
+
var _loraPath, _removeProgressListen2;
|
124
145
|
let {
|
125
146
|
model,
|
126
147
|
is_model_asset: isModelAsset,
|
148
|
+
pooling_type: poolingType,
|
149
|
+
lora,
|
127
150
|
...rest
|
128
151
|
} = _ref2;
|
129
152
|
let path = model;
|
130
153
|
if (path.startsWith('file://')) path = path.slice(7);
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
154
|
+
let loraPath = lora;
|
155
|
+
if ((_loraPath = loraPath) !== null && _loraPath !== void 0 && _loraPath.startsWith('file://')) loraPath = loraPath.slice(7);
|
156
|
+
const contextId = contextIdCounter + contextIdRandom();
|
157
|
+
contextIdCounter += 1;
|
158
|
+
let removeProgressListener = null;
|
159
|
+
if (onProgress) {
|
160
|
+
removeProgressListener = EventEmitter.addListener(EVENT_ON_INIT_CONTEXT_PROGRESS, evt => {
|
161
|
+
if (evt.contextId !== contextId) return;
|
162
|
+
onProgress(evt.progress);
|
163
|
+
});
|
164
|
+
}
|
165
|
+
const poolType = poolTypeMap[poolingType];
|
137
166
|
const {
|
138
|
-
contextId,
|
139
167
|
gpu,
|
140
168
|
reasonNoGPU,
|
141
169
|
model: modelDetails
|
142
|
-
} = await RNLlama.initContext({
|
170
|
+
} = await RNLlama.initContext(contextId, {
|
143
171
|
model: path,
|
144
172
|
is_model_asset: !!isModelAsset,
|
173
|
+
use_progress_callback: !!onProgress,
|
174
|
+
pooling_type: poolType,
|
175
|
+
lora: loraPath,
|
145
176
|
...rest
|
177
|
+
}).catch(err => {
|
178
|
+
var _removeProgressListen;
|
179
|
+
(_removeProgressListen = removeProgressListener) === null || _removeProgressListen === void 0 ? void 0 : _removeProgressListen.remove();
|
180
|
+
throw err;
|
146
181
|
});
|
182
|
+
(_removeProgressListen2 = removeProgressListener) === null || _removeProgressListen2 === void 0 ? void 0 : _removeProgressListen2.remove();
|
147
183
|
return new LlamaContext({
|
148
184
|
contextId,
|
149
185
|
gpu,
|
package/lib/module/index.js.map
CHANGED
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","Platform","RNLlama","SchemaGrammarConverter","convertJsonSchemaToGrammar","formatChat","
|
1
|
+
{"version":3,"names":["NativeEventEmitter","DeviceEventEmitter","Platform","RNLlama","SchemaGrammarConverter","convertJsonSchemaToGrammar","formatChat","EVENT_ON_INIT_CONTEXT_PROGRESS","EVENT_ON_TOKEN","EventEmitter","OS","LlamaContext","gpu","reasonNoGPU","model","constructor","_ref","contextId","id","loadSession","filepath","path","startsWith","slice","saveSession","options","tokenSize","getFormattedChat","messages","template","_this$model","chat","tmpl","isChatTemplateSupported","undefined","completion","params","callback","finalPrompt","prompt","chatTemplate","tokenListener","addListener","evt","tokenResult","Error","promise","emit_partial_completion","then","completionResult","_tokenListener","remove","catch","err","_tokenListener2","stopCompletion","tokenizeAsync","text","tokenizeSync","detokenize","tokens","embedding","bench","pp","tg","pl","nr","result","modelDesc","modelSize","modelNParams","ppAvg","ppStd","tgAvg","tgStd","JSON","parse","release","releaseContext","getCpuFeatures","setContextLimit","limit","contextIdCounter","contextIdRandom","process","env","NODE_ENV","Math","floor","random","modelInfoSkip","loadLlamaModelInfo","modelInfo","poolTypeMap","none","mean","cls","last","rank","initLlama","_ref2","onProgress","_loraPath","_removeProgressListen2","is_model_asset","isModelAsset","pooling_type","poolingType","lora","rest","loraPath","removeProgressListener","progress","poolType","modelDetails","initContext","use_progress_callback","_removeProgressListen","releaseAllLlama","releaseAllContexts"],"sourceRoot":"..\\..\\src","sources":["index.ts"],"mappings":"AAAA,SAASA,kBAAkB,EAAEC,kBAAkB,EAAEC,QAAQ,QAAQ,cAAc;AAE/E,OAAOC,OAAO,MAAM,iBAAiB;AAarC,SAASC,sBAAsB,EAAEC,0BAA0B,QAAQ,WAAW;AAE9E,SAASC,UAAU,QAAQ,QAAQ;AAEnC,SAASF,sBAAsB,EAAEC,0BAA0B;AAE3D,MAAME,8BAA8B,GAAG,gCAAgC;AACvE,MAAMC,cAAc,GAAG,kBAAkB;AAEzC,IAAIC,YAA2D;AAC/D,IAAIP,QAAQ,CAACQ,EAAE,KAAK,KAAK,EAAE;EACzB;EACAD,YAAY,GAAG,IAAIT,kBAAkB,CAACG,OAAO,CAAC;AAChD;AACA,IAAID,QAAQ,CAACQ,EAAE,KAAK,SAAS,EAAE;EAC7BD,YAAY,GAAGR,kBAAkB;AACnC;AA0CA,OAAO,MAAMU,YAAY,CAAC;EAGxBC,GAAG,GAAY,KAAK;EAEpBC,WAAW,GAAW,EAAE;EAExBC,KAAK,GAED,CAAC,CAAC;EAENC,WAAWA,CAAAC,IAAA,EAA6D;IAAA,IAA5D;MAAEC,SAAS;MAAEL,GAAG;MAAEC,WAAW;MAAEC;IAA0B,CAAC,GAAAE,IAAA;IACpE,IAAI,CAACE,EAAE,GAAGD,SAAS;IACnB,IAAI,CAACL,GAAG,GAAGA,GAAG;IACd,IAAI,CAACC,WAAW,GAAGA,WAAW;IAC9B,IAAI,CAACC,KAAK,GAAGA,KAAK;EACpB;;EAEA;AACF;AACA;EACE,MAAMK,WAAWA,CAACC,QAAgB,EAAoC;IACpE,IAAIC,IAAI,GAAGD,QAAQ;IACnB,IAAIC,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;IACpD,OAAOpB,OAAO,CAACgB,WAAW,CAAC,IAAI,CAACD,EAAE,EAAEG,IAAI,CAAC;EAC3C;;EAEA;AACF;AACA;EACE,MAAMG,WAAWA,CACfJ,QAAgB,EAChBK,OAA+B,EACd;IACjB,OAAOtB,OAAO,CAACqB,WAAW,CAAC,IAAI,CAACN,EAAE,EAAEE,QAAQ,EAAE,CAAAK,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEC,SAAS,KAAI,CAAC,CAAC,CAAC;EACzE;EAEA,MAAMC,gBAAgBA,CACpBC,QAAuC,EACvCC,QAAiB,EACA;IAAA,IAAAC,WAAA;IACjB,MAAMC,IAAI,GAAGzB,UAAU,CAACsB,QAAQ,CAAC;IACjC,IAAII,IAAI,GAAG,CAAAF,WAAA,OAAI,CAAChB,KAAK,cAAAgB,WAAA,eAAVA,WAAA,CAAYG,uBAAuB,GAAGC,SAAS,GAAG,QAAQ;IACrE,IAAIL,QAAQ,EAAEG,IAAI,GAAGH,QAAQ,EAAC;IAC9B,OAAO1B,OAAO,CAACwB,gBAAgB,CAAC,IAAI,CAACT,EAAE,EAAEa,IAAI,EAAEC,IAAI,CAAC;EACtD;EAEA,MAAMG,UAAUA,CACdC,MAAwB,EACxBC,QAAoC,EACH;IACjC,IAAIC,WAAW,GAAGF,MAAM,CAACG,MAAM;IAC/B,IAAIH,MAAM,CAACR,QAAQ,EAAE;MACnB;MACAU,WAAW,GAAG,MAAM,IAAI,CAACX,gBAAgB,CAACS,MAAM,CAACR,QAAQ,EAAEQ,MAAM,CAACI,YAAY,CAAC;IACjF;IAEA,IAAIC,aAAkB,GACpBJ,QAAQ,IACR5B,YAAY,CAACiC,WAAW,CAAClC,cAAc,EAAGmC,GAAqB,IAAK;MAClE,MAAM;QAAE1B,SAAS;QAAE2B;MAAY,CAAC,GAAGD,GAAG;MACtC,IAAI1B,SAAS,KAAK,IAAI,CAACC,EAAE,EAAE;MAC3BmB,QAAQ,CAACO,WAAW,CAAC;IACvB,CAAC,CAAC;IAEJ,IAAI,CAACN,WAAW,EAAE,MAAM,IAAIO,KAAK,CAAC,oBAAoB,CAAC;IACvD,MAAMC,OAAO,GAAG3C,OAAO,CAACgC,UAAU,CAAC,IAAI,CAACjB,EAAE,EAAE;MAC1C,GAAGkB,MAAM;MACTG,MAAM,EAAED,WAAW;MACnBS,uBAAuB,EAAE,CAAC,CAACV;IAC7B,CAAC,CAAC;IACF,OAAOS,OAAO,CACXE,IAAI,CAAEC,gBAAgB,IAAK;MAAA,IAAAC,cAAA;MAC1B,CAAAA,cAAA,GAAAT,aAAa,cAAAS,cAAA,uBAAbA,cAAA,CAAeC,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,OAAOQ,gBAAgB;IACzB,CAAC,CAAC,CACDG,KAAK,CAAEC,GAAQ,IAAK;MAAA,IAAAC,eAAA;MACnB,CAAAA,eAAA,GAAAb,aAAa,cAAAa,eAAA,uBAAbA,eAAA,CAAeH,MAAM,CAAC,CAAC;MACvBV,aAAa,GAAG,IAAI;MACpB,MAAMY,GAAG;IACX,CAAC,CAAC;EACN;EAEAE,cAAcA,CAAA,EAAkB;IAC9B,OAAOpD,OAAO,CAACoD,cAAc,CAAC,IAAI,CAACrC,EAAE,CAAC;EACxC;EAEAsC,aAAaA,CAACC,IAAY,EAAiC;IACzD,OAAOtD,OAAO,CAACqD,aAAa,CAAC,IAAI,CAACtC,EAAE,EAAEuC,IAAI,CAAC;EAC7C;EAEAC,YAAYA,CAACD,IAAY,EAAwB;IAC/C,OAAOtD,OAAO,CAACuD,YAAY,CAAC,IAAI,CAACxC,EAAE,EAAEuC,IAAI,CAAC;EAC5C;EAEAE,UAAUA,CAACC,MAAgB,EAAmB;IAC5C,OAAOzD,OAAO,CAACwD,UAAU,CAAC,IAAI,CAACzC,EAAE,EAAE0C,MAAM,CAAC;EAC5C;EAEAC,SAASA,CACPJ,IAAY,EACZrB,MAAwB,EACQ;IAChC,OAAOjC,OAAO,CAAC0D,SAAS,CAAC,IAAI,CAAC3C,EAAE,EAAEuC,IAAI,EAAErB,MAAM,IAAI,CAAC,CAAC,CAAC;EACvD;EAEA,MAAM0B,KAAKA,CACTC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU,EACY;IACtB,MAAMC,MAAM,GAAG,MAAMhE,OAAO,CAAC2D,KAAK,CAAC,IAAI,CAAC5C,EAAE,EAAE6C,EAAE,EAAEC,EAAE,EAAEC,EAAE,EAAEC,EAAE,CAAC;IAC3D,MAAM,CAACE,SAAS,EAAEC,SAAS,EAAEC,YAAY,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,EAAEC,KAAK,CAAC,GACpEC,IAAI,CAACC,KAAK,CAACT,MAAM,CAAC;IACpB,OAAO;MACLC,SAAS;MACTC,SAAS;MACTC,YAAY;MACZC,KAAK;MACLC,KAAK;MACLC,KAAK;MACLC;IACF,CAAC;EACH;EAEA,MAAMG,OAAOA,CAAA,EAAkB;IAC7B,OAAO1E,OAAO,CAAC2E,cAAc,CAAC,IAAI,CAAC5D,EAAE,CAAC;EACxC;AACF;AAEA,OAAO,eAAe6D,cAAcA,CAAA,EAAgC;EAClE,OAAO5E,OAAO,CAAC4E,cAAc,CAAC,CAAC;AACjC;AAEA,OAAO,eAAeC,eAAeA,CAACC,KAAa,EAAiB;EAClE,OAAO9E,OAAO,CAAC6E,eAAe,CAACC,KAAK,CAAC;AACvC;AAEA,IAAIC,gBAAgB,GAAG,CAAC;AACxB,MAAMC,eAAe,GAAGA,CAAA,KACtBC,OAAO,CAACC,GAAG,CAACC,QAAQ,KAAK,MAAM,GAAG,CAAC,GAAGC,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC;AAE1E,MAAMC,aAAa,GAAG;AACpB;AACA,uBAAuB,EACvB,2BAA2B,EAC3B,uBAAuB,CACxB;AACD,OAAO,eAAeC,kBAAkBA,CAAC7E,KAAa,EAAmB;EACvE,IAAIO,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EACpD,OAAOpB,OAAO,CAACyF,SAAS,CAACvE,IAAI,EAAEqE,aAAa,CAAC;AAC/C;AAEA,MAAMG,WAAW,GAAG;EAClB;EACAC,IAAI,EAAE,CAAC;EACPC,IAAI,EAAE,CAAC;EACPC,GAAG,EAAE,CAAC;EACNC,IAAI,EAAE,CAAC;EACPC,IAAI,EAAE;AACR,CAAC;AAED,OAAO,eAAeC,SAASA,CAAAC,KAAA,EAQ7BC,UAAuC,EAChB;EAAA,IAAAC,SAAA,EAAAC,sBAAA;EAAA,IARvB;IACEzF,KAAK;IACL0F,cAAc,EAAEC,YAAY;IAC5BC,YAAY,EAAEC,WAAW;IACzBC,IAAI;IACJ,GAAGC;EACU,CAAC,GAAAT,KAAA;EAGhB,IAAI/E,IAAI,GAAGP,KAAK;EAChB,IAAIO,IAAI,CAACC,UAAU,CAAC,SAAS,CAAC,EAAED,IAAI,GAAGA,IAAI,CAACE,KAAK,CAAC,CAAC,CAAC;EAEpD,IAAIuF,QAAQ,GAAGF,IAAI;EACnB,KAAAN,SAAA,GAAIQ,QAAQ,cAAAR,SAAA,eAARA,SAAA,CAAUhF,UAAU,CAAC,SAAS,CAAC,EAAEwF,QAAQ,GAAGA,QAAQ,CAACvF,KAAK,CAAC,CAAC,CAAC;EAEjE,MAAMN,SAAS,GAAGiE,gBAAgB,GAAGC,eAAe,CAAC,CAAC;EACtDD,gBAAgB,IAAI,CAAC;EAErB,IAAI6B,sBAA2B,GAAG,IAAI;EACtC,IAAIV,UAAU,EAAE;IACdU,sBAAsB,GAAGtG,YAAY,CAACiC,WAAW,CAC/CnC,8BAA8B,EAC7BoC,GAA4C,IAAK;MAChD,IAAIA,GAAG,CAAC1B,SAAS,KAAKA,SAAS,EAAE;MACjCoF,UAAU,CAAC1D,GAAG,CAACqE,QAAQ,CAAC;IAC1B,CACF,CAAC;EACH;EAEA,MAAMC,QAAQ,GAAGpB,WAAW,CAACc,WAAW,CAA6B;EACrE,MAAM;IACJ/F,GAAG;IACHC,WAAW;IACXC,KAAK,EAAEoG;EACT,CAAC,GAAG,MAAM/G,OAAO,CAACgH,WAAW,CAAClG,SAAS,EAAE;IACvCH,KAAK,EAAEO,IAAI;IACXmF,cAAc,EAAE,CAAC,CAACC,YAAY;IAC9BW,qBAAqB,EAAE,CAAC,CAACf,UAAU;IACnCK,YAAY,EAAEO,QAAQ;IACtBL,IAAI,EAAEE,QAAQ;IACd,GAAGD;EACL,CAAC,CAAC,CAACzD,KAAK,CAAEC,GAAQ,IAAK;IAAA,IAAAgE,qBAAA;IACrB,CAAAA,qBAAA,GAAAN,sBAAsB,cAAAM,qBAAA,uBAAtBA,qBAAA,CAAwBlE,MAAM,CAAC,CAAC;IAChC,MAAME,GAAG;EACX,CAAC,CAAC;EACF,CAAAkD,sBAAA,GAAAQ,sBAAsB,cAAAR,sBAAA,uBAAtBA,sBAAA,CAAwBpD,MAAM,CAAC,CAAC;EAChC,OAAO,IAAIxC,YAAY,CAAC;IAAEM,SAAS;IAAEL,GAAG;IAAEC,WAAW;IAAEC,KAAK,EAAEoG;EAAa,CAAC,CAAC;AAC/E;AAEA,OAAO,eAAeI,eAAeA,CAAA,EAAkB;EACrD,OAAOnH,OAAO,CAACoH,kBAAkB,CAAC,CAAC;AACrC"}
|
@@ -1,12 +1,27 @@
|
|
1
1
|
import type { TurboModule } from 'react-native';
|
2
|
+
export type NativeEmbeddingParams = {
|
3
|
+
embd_normalize?: number;
|
4
|
+
};
|
2
5
|
export type NativeContextParams = {
|
3
6
|
model: string;
|
4
7
|
is_model_asset?: boolean;
|
5
|
-
|
8
|
+
use_progress_callback?: boolean;
|
6
9
|
n_ctx?: number;
|
7
10
|
n_batch?: number;
|
8
11
|
n_threads?: number;
|
9
12
|
n_gpu_layers?: number;
|
13
|
+
/**
|
14
|
+
* Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
|
15
|
+
*/
|
16
|
+
flash_attn?: boolean;
|
17
|
+
/**
|
18
|
+
* KV cache data type for the K (Experimental in llama.cpp)
|
19
|
+
*/
|
20
|
+
cache_type_k?: string;
|
21
|
+
/**
|
22
|
+
* KV cache data type for the V (Experimental in llama.cpp)
|
23
|
+
*/
|
24
|
+
cache_type_v?: string;
|
10
25
|
use_mlock?: boolean;
|
11
26
|
use_mmap?: boolean;
|
12
27
|
vocab_only?: boolean;
|
@@ -14,6 +29,9 @@ export type NativeContextParams = {
|
|
14
29
|
lora_scaled?: number;
|
15
30
|
rope_freq_base?: number;
|
16
31
|
rope_freq_scale?: number;
|
32
|
+
pooling_type?: number;
|
33
|
+
embedding?: boolean;
|
34
|
+
embd_normalize?: number;
|
17
35
|
};
|
18
36
|
export type NativeCompletionParams = {
|
19
37
|
prompt: string;
|
@@ -25,8 +43,8 @@ export type NativeCompletionParams = {
|
|
25
43
|
top_k?: number;
|
26
44
|
top_p?: number;
|
27
45
|
min_p?: number;
|
28
|
-
|
29
|
-
|
46
|
+
xtc_threshold?: number;
|
47
|
+
xtc_probability?: number;
|
30
48
|
typical_p?: number;
|
31
49
|
temperature?: number;
|
32
50
|
penalty_last_n?: number;
|
@@ -38,6 +56,11 @@ export type NativeCompletionParams = {
|
|
38
56
|
mirostat_eta?: number;
|
39
57
|
penalize_nl?: boolean;
|
40
58
|
seed?: number;
|
59
|
+
dry_multiplier?: number;
|
60
|
+
dry_base?: number;
|
61
|
+
dry_allowed_length?: number;
|
62
|
+
dry_penalty_last_n?: number;
|
63
|
+
dry_sequence_breakers?: Array<string>;
|
41
64
|
ignore_eos?: boolean;
|
42
65
|
logit_bias?: Array<Array<number>>;
|
43
66
|
emit_partial_completion: boolean;
|
@@ -100,7 +123,8 @@ export type NativeCPUFeatures = {
|
|
100
123
|
};
|
101
124
|
export interface Spec extends TurboModule {
|
102
125
|
setContextLimit(limit: number): Promise<void>;
|
103
|
-
|
126
|
+
modelInfo(path: string, skip?: string[]): Promise<Object>;
|
127
|
+
initContext(contextId: number, params: NativeContextParams): Promise<NativeLlamaContext>;
|
104
128
|
loadSession(contextId: number, filepath: string): Promise<NativeSessionLoadResult>;
|
105
129
|
saveSession(contextId: number, filepath: string, size: number): Promise<number>;
|
106
130
|
completion(contextId: number, params: NativeCompletionParams): Promise<NativeCompletionResult>;
|
@@ -110,7 +134,7 @@ export interface Spec extends TurboModule {
|
|
110
134
|
getCpuFeatures(): Promise<NativeCPUFeatures>;
|
111
135
|
getFormattedChat(contextId: number, messages: NativeLlamaChatMessage[], chatTemplate?: string): Promise<string>;
|
112
136
|
detokenize(contextId: number, tokens: number[]): Promise<string>;
|
113
|
-
embedding(contextId: number, text: string): Promise<NativeEmbeddingResult>;
|
137
|
+
embedding(contextId: number, text: string, params: NativeEmbeddingParams): Promise<NativeEmbeddingResult>;
|
114
138
|
bench(contextId: number, pp: number, tg: number, pl: number, nr: number): Promise<string>;
|
115
139
|
releaseContext(contextId: number): Promise<void>;
|
116
140
|
releaseAllContexts(): Promise<void>;
|
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;
|
1
|
+
{"version":3,"file":"NativeRNLlama.d.ts","sourceRoot":"","sources":["../../src/NativeRNLlama.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAA;AAG/C,MAAM,MAAM,qBAAqB,GAAG;IAClC,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB,CAAA;AAED,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,qBAAqB,CAAC,EAAE,OAAO,CAAA;IAE/B,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAEhB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB;;OAEG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAA;IAErB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,UAAU,CAAC,EAAE,OAAO,CAAA;IAEpB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,MAAM,CAAA;IAExB,YAAY,CAAC,EAAE,MAAM,CAAA;IAGrB,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAEpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,IAAI,CAAC,EAAE,MAAM,CAAA;IAEb,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,qBAAqB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;IAErC,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,UAAU,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;IAEjC,uBAAuB,EAAE,OAAO,CAAA;CACjC,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,OAAO,EAAE,MAAM,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;CACb,CAAA;AAED,MAAM,MAAM,yBAAyB,GAAG;IACtC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,CAAA;CAC5C,CAAA;AAED,MAAM,MAAM,6BAA6B,GAAG;IAC1C,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,mBAAmB,EAAE,MAAM,CAAA;IAC3B,iBAAiB,EAAE,MAAM,CAAA;IACzB,WAAW,EAAE,MAAM,CAAA;IACnB,YAAY,EAAE,MAAM,CAAA;IACpB,sBAAsB,EAAE,MAAM,CAAA;IAC9B,oBAAoB,EAAE,MAAM,CAAA;CAC7B,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IAEZ,gBAAgB,EAAE,MAAM,CAAA;IACxB,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,EAAE,OAAO,CAAA;IAClB,WAAW,EAAE,OAAO,CAAA;IACpB,YAAY,EAAE,MAAM,CAAA;IACpB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,6BAA6B,CAAA;IAEtC,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;CACzB,CAAA;AAED,MAAM,MAAM,kBAAkB,GAAG;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,OAAO,CAAA;IACZ,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,MAAM,MAAM,uBAAuB,GAAG;IACpC,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACf,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;CAChB,CAAA;AAED,MAAM,MAAM,iBAAiB,GAAG;IAC9B,KAAK,EAAE,OAAO,CAAA;IACd,IAAI,EAAE,OAAO,CAAA;IACb,OAAO,EAAE,OAAO,CAAA;CACjB,CAAA;AAED,MAAM,WAAW,IAAK,SAAQ,WAAW;IACvC,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAE7C,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IACzD,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAA;IAExF,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,uBAAuB,CAAC,CAAA;IACnC,WAAW,CACT,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CACR,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,sBAAsB,GAC7B,OAAO,CAAC,sBAAsB,CAAC,CAAA;IAClC,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAChD,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAA;IAC7E,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,oBAAoB,CAAA;IACnE,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAAA;IAC7C,gBAAgB,CACd,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,sBAAsB,EAAE,EAClC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC,CAAA;IAClB,UAAU,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;IAChE,SAAS,CACP,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC,CAAA;IACjC,KAAK,CACH,SAAS,EAAE,MAAM,EACjB,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,MAAM,CAAC,CAAA;IAElB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAEhD,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;CACpC;;AAED,wBAA+D"}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeCPUFeatures } from './NativeRNLlama';
|
1
|
+
import type { NativeContextParams, NativeLlamaContext, NativeCompletionParams, NativeCompletionTokenProb, NativeCompletionResult, NativeTokenizeResult, NativeEmbeddingResult, NativeSessionLoadResult, NativeCPUFeatures, NativeEmbeddingParams } from './NativeRNLlama';
|
2
2
|
import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar';
|
3
3
|
import type { RNLlamaOAICompatibleMessage } from './chat';
|
4
4
|
export { SchemaGrammarConverter, convertJsonSchemaToGrammar };
|
@@ -6,10 +6,16 @@ export type TokenData = {
|
|
6
6
|
token: string;
|
7
7
|
completion_probabilities?: Array<NativeCompletionTokenProb>;
|
8
8
|
};
|
9
|
-
export type ContextParams = NativeContextParams
|
9
|
+
export type ContextParams = Omit<NativeContextParams, 'cache_type_k' | 'cache_type_v' | 'pooling_type'> & {
|
10
|
+
cache_type_k?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1';
|
11
|
+
cache_type_v?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1';
|
12
|
+
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank';
|
13
|
+
};
|
14
|
+
export type EmbeddingParams = NativeEmbeddingParams;
|
10
15
|
export type CompletionParams = Omit<NativeCompletionParams, 'emit_partial_completion' | 'prompt'> & {
|
11
16
|
prompt?: string;
|
12
17
|
messages?: RNLlamaOAICompatibleMessage[];
|
18
|
+
chatTemplate?: string;
|
13
19
|
};
|
14
20
|
export type BenchResult = {
|
15
21
|
modelDesc: string;
|
@@ -38,18 +44,19 @@ export declare class LlamaContext {
|
|
38
44
|
saveSession(filepath: string, options?: {
|
39
45
|
tokenSize: number;
|
40
46
|
}): Promise<number>;
|
41
|
-
getFormattedChat(messages: RNLlamaOAICompatibleMessage[]): Promise<string>;
|
47
|
+
getFormattedChat(messages: RNLlamaOAICompatibleMessage[], template?: string): Promise<string>;
|
42
48
|
completion(params: CompletionParams, callback?: (data: TokenData) => void): Promise<NativeCompletionResult>;
|
43
49
|
stopCompletion(): Promise<void>;
|
44
50
|
tokenizeAsync(text: string): Promise<NativeTokenizeResult>;
|
45
51
|
tokenizeSync(text: string): NativeTokenizeResult;
|
46
52
|
detokenize(tokens: number[]): Promise<string>;
|
47
|
-
embedding(text: string): Promise<NativeEmbeddingResult>;
|
53
|
+
embedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult>;
|
48
54
|
bench(pp: number, tg: number, pl: number, nr: number): Promise<BenchResult>;
|
49
55
|
release(): Promise<void>;
|
50
56
|
}
|
51
57
|
export declare function getCpuFeatures(): Promise<NativeCPUFeatures>;
|
52
58
|
export declare function setContextLimit(limit: number): Promise<void>;
|
53
|
-
export declare function
|
59
|
+
export declare function loadLlamaModelInfo(model: string): Promise<Object>;
|
60
|
+
export declare function initLlama({ model, is_model_asset: isModelAsset, pooling_type: poolingType, lora, ...rest }: ContextParams, onProgress?: (progress: number) => void): Promise<LlamaContext>;
|
54
61
|
export declare function releaseAllLlama(): Promise<void>;
|
55
62
|
//# sourceMappingURL=index.d.ts.map
|
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,EACzB,sBAAsB,EACtB,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,iBAAiB,EACjB,qBAAqB,EACtB,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,MAAM,WAAW,CAAA;AAC9E,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,QAAQ,CAAA;AAGzD,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,CAAA;AAc7D,MAAM,MAAM,SAAS,GAAG;IACtB,KAAK,EAAE,MAAM,CAAA;IACb,wBAAwB,CAAC,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;CAC5D,CAAA;AAOD,MAAM,MAAM,aAAa,GAAG,IAAI,CAC9B,mBAAmB,EACnB,cAAc,GAAG,cAAc,GAAI,cAAc,CAClD,GAAG;IACF,YAAY,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAA;IACpF,YAAY,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAA;IACpF,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,MAAM,CAAA;CACzD,CAAA;AAED,MAAM,MAAM,eAAe,GAAG,qBAAqB,CAAA;AAEnD,MAAM,MAAM,gBAAgB,GAAG,IAAI,CACjC,sBAAsB,EACtB,yBAAyB,GAAG,QAAQ,CACrC,GAAG;IACF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,2BAA2B,EAAE,CAAA;IACxC,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB,CAAA;AAED,MAAM,MAAM,WAAW,GAAG;IACxB,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,YAAY,EAAE,MAAM,CAAA;IACpB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;CACd,CAAA;AAED,qBAAa,YAAY;IACvB,EAAE,EAAE,MAAM,CAAA;IAEV,GAAG,EAAE,OAAO,CAAQ;IAEpB,WAAW,EAAE,MAAM,CAAK;IAExB,KAAK,EAAE;QACL,uBAAuB,CAAC,EAAE,OAAO,CAAA;KAClC,CAAK;gBAEM,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,kBAAkB;IAOtE;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAMrE;;OAEG;IACG,WAAW,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAA;KAAE,GAC9B,OAAO,CAAC,MAAM,CAAC;IAIZ,gBAAgB,CACpB,QAAQ,EAAE,2BAA2B,EAAE,EACvC,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC;IAOZ,UAAU,CACd,MAAM,EAAE,gBAAgB,EACxB,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,SAAS,KAAK,IAAI,GACnC,OAAO,CAAC,sBAAsB,CAAC;IAkClC,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAI/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAI1D,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB;IAIhD,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7C,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,eAAe,GACvB,OAAO,CAAC,qBAAqB,CAAC;IAI3B,KAAK,CACT,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,EACV,EAAE,EAAE,MAAM,GACT,OAAO,CAAC,WAAW,CAAC;IAejB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED,wBAAsB,cAAc,IAAK,OAAO,CAAC,iBAAiB,CAAC,CAElE;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAElE;AAYD,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAIvE;AAWD,wBAAsB,SAAS,CAC7B,EACE,KAAK,EACL,cAAc,EAAE,YAAY,EAC5B,YAAY,EAAE,WAAW,EACzB,IAAI,EACJ,GAAG,IAAI,EACR,EAAE,aAAa,EAChB,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,GACtC,OAAO,CAAC,YAAY,CAAC,CAuCvB;AAED,wBAAsB,eAAe,IAAI,OAAO,CAAC,IAAI,CAAC,CAErD"}
|
package/package.json
CHANGED
package/src/NativeRNLlama.ts
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
import type { TurboModule } from 'react-native'
|
2
2
|
import { TurboModuleRegistry } from 'react-native'
|
3
3
|
|
4
|
+
export type NativeEmbeddingParams = {
|
5
|
+
embd_normalize?: number
|
6
|
+
}
|
7
|
+
|
4
8
|
export type NativeContextParams = {
|
5
9
|
model: string
|
6
10
|
is_model_asset?: boolean
|
7
|
-
|
8
|
-
embedding?: boolean
|
11
|
+
use_progress_callback?: boolean
|
9
12
|
|
10
13
|
n_ctx?: number
|
11
14
|
n_batch?: number
|
@@ -13,6 +16,20 @@ export type NativeContextParams = {
|
|
13
16
|
n_threads?: number
|
14
17
|
n_gpu_layers?: number
|
15
18
|
|
19
|
+
/**
|
20
|
+
* Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
|
21
|
+
*/
|
22
|
+
flash_attn?: boolean
|
23
|
+
|
24
|
+
/**
|
25
|
+
* KV cache data type for the K (Experimental in llama.cpp)
|
26
|
+
*/
|
27
|
+
cache_type_k?: string
|
28
|
+
/**
|
29
|
+
* KV cache data type for the V (Experimental in llama.cpp)
|
30
|
+
*/
|
31
|
+
cache_type_v?: string
|
32
|
+
|
16
33
|
use_mlock?: boolean
|
17
34
|
use_mmap?: boolean
|
18
35
|
vocab_only?: boolean
|
@@ -22,6 +39,12 @@ export type NativeContextParams = {
|
|
22
39
|
|
23
40
|
rope_freq_base?: number
|
24
41
|
rope_freq_scale?: number
|
42
|
+
|
43
|
+
pooling_type?: number
|
44
|
+
|
45
|
+
// Embedding params
|
46
|
+
embedding?: boolean
|
47
|
+
embd_normalize?: number
|
25
48
|
}
|
26
49
|
|
27
50
|
export type NativeCompletionParams = {
|
@@ -35,8 +58,8 @@ export type NativeCompletionParams = {
|
|
35
58
|
top_k?: number
|
36
59
|
top_p?: number
|
37
60
|
min_p?: number
|
38
|
-
|
39
|
-
|
61
|
+
xtc_threshold?: number
|
62
|
+
xtc_probability?: number
|
40
63
|
typical_p?: number
|
41
64
|
temperature?: number // -> temp
|
42
65
|
penalty_last_n?: number
|
@@ -49,6 +72,12 @@ export type NativeCompletionParams = {
|
|
49
72
|
penalize_nl?: boolean
|
50
73
|
seed?: number
|
51
74
|
|
75
|
+
dry_multiplier?: number
|
76
|
+
dry_base?: number
|
77
|
+
dry_allowed_length?: number
|
78
|
+
dry_penalty_last_n?: number
|
79
|
+
dry_sequence_breakers?: Array<string>
|
80
|
+
|
52
81
|
ignore_eos?: boolean
|
53
82
|
logit_bias?: Array<Array<number>>
|
54
83
|
|
@@ -125,7 +154,9 @@ export type NativeCPUFeatures = {
|
|
125
154
|
|
126
155
|
export interface Spec extends TurboModule {
|
127
156
|
setContextLimit(limit: number): Promise<void>
|
128
|
-
|
157
|
+
|
158
|
+
modelInfo(path: string, skip?: string[]): Promise<Object>
|
159
|
+
initContext(contextId: number, params: NativeContextParams): Promise<NativeLlamaContext>
|
129
160
|
|
130
161
|
loadSession(
|
131
162
|
contextId: number,
|
@@ -150,7 +181,11 @@ export interface Spec extends TurboModule {
|
|
150
181
|
chatTemplate?: string,
|
151
182
|
): Promise<string>
|
152
183
|
detokenize(contextId: number, tokens: number[]): Promise<string>
|
153
|
-
embedding(
|
184
|
+
embedding(
|
185
|
+
contextId: number,
|
186
|
+
text: string,
|
187
|
+
params: NativeEmbeddingParams,
|
188
|
+
): Promise<NativeEmbeddingResult>
|
154
189
|
bench(
|
155
190
|
contextId: number,
|
156
191
|
pp: number,
|
package/src/index.ts
CHANGED
@@ -11,6 +11,7 @@ import type {
|
|
11
11
|
NativeEmbeddingResult,
|
12
12
|
NativeSessionLoadResult,
|
13
13
|
NativeCPUFeatures,
|
14
|
+
NativeEmbeddingParams,
|
14
15
|
} from './NativeRNLlama'
|
15
16
|
import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
|
16
17
|
import type { RNLlamaOAICompatibleMessage } from './chat'
|
@@ -18,10 +19,9 @@ import { formatChat } from './chat'
|
|
18
19
|
|
19
20
|
export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
|
20
21
|
|
22
|
+
const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'
|
21
23
|
const EVENT_ON_TOKEN = '@RNLlama_onToken'
|
22
24
|
|
23
|
-
const EVENT_ON_MODEL_PROGRESS = '@RNLlama_onModelProgress'
|
24
|
-
|
25
25
|
let EventEmitter: NativeEventEmitter | DeviceEventEmitterStatic
|
26
26
|
if (Platform.OS === 'ios') {
|
27
27
|
// @ts-ignore
|
@@ -41,7 +41,16 @@ type TokenNativeEvent = {
|
|
41
41
|
tokenResult: TokenData
|
42
42
|
}
|
43
43
|
|
44
|
-
export type ContextParams =
|
44
|
+
export type ContextParams = Omit<
|
45
|
+
NativeContextParams,
|
46
|
+
'cache_type_k' | 'cache_type_v' | 'pooling_type'
|
47
|
+
> & {
|
48
|
+
cache_type_k?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1'
|
49
|
+
cache_type_v?: 'f16' | 'f32' | 'q8_0' | 'q4_0' | 'q4_1' | 'iq4_nl' | 'q5_0' | 'q5_1'
|
50
|
+
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
|
51
|
+
}
|
52
|
+
|
53
|
+
export type EmbeddingParams = NativeEmbeddingParams
|
45
54
|
|
46
55
|
export type CompletionParams = Omit<
|
47
56
|
NativeCompletionParams,
|
@@ -49,6 +58,7 @@ export type CompletionParams = Omit<
|
|
49
58
|
> & {
|
50
59
|
prompt?: string
|
51
60
|
messages?: RNLlamaOAICompatibleMessage[]
|
61
|
+
chatTemplate?: string
|
52
62
|
}
|
53
63
|
|
54
64
|
export type BenchResult = {
|
@@ -100,23 +110,22 @@ export class LlamaContext {
|
|
100
110
|
|
101
111
|
async getFormattedChat(
|
102
112
|
messages: RNLlamaOAICompatibleMessage[],
|
113
|
+
template?: string,
|
103
114
|
): Promise<string> {
|
104
115
|
const chat = formatChat(messages)
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
this.model?.isChatTemplateSupported ? undefined : 'chatml',
|
109
|
-
)
|
116
|
+
let tmpl = this.model?.isChatTemplateSupported ? undefined : 'chatml'
|
117
|
+
if (template) tmpl = template // Force replace if provided
|
118
|
+
return RNLlama.getFormattedChat(this.id, chat, tmpl)
|
110
119
|
}
|
111
120
|
|
112
121
|
async completion(
|
113
122
|
params: CompletionParams,
|
114
123
|
callback?: (data: TokenData) => void,
|
115
124
|
): Promise<NativeCompletionResult> {
|
116
|
-
|
117
125
|
let finalPrompt = params.prompt
|
118
|
-
if (params.messages) {
|
119
|
-
|
126
|
+
if (params.messages) {
|
127
|
+
// messages always win
|
128
|
+
finalPrompt = await this.getFormattedChat(params.messages, params.chatTemplate)
|
120
129
|
}
|
121
130
|
|
122
131
|
let tokenListener: any =
|
@@ -162,8 +171,11 @@ export class LlamaContext {
|
|
162
171
|
return RNLlama.detokenize(this.id, tokens)
|
163
172
|
}
|
164
173
|
|
165
|
-
embedding(
|
166
|
-
|
174
|
+
embedding(
|
175
|
+
text: string,
|
176
|
+
params?: EmbeddingParams,
|
177
|
+
): Promise<NativeEmbeddingResult> {
|
178
|
+
return RNLlama.embedding(this.id, text, params || {})
|
167
179
|
}
|
168
180
|
|
169
181
|
async bench(
|
@@ -199,35 +211,78 @@ export async function setContextLimit(limit: number): Promise<void> {
|
|
199
211
|
return RNLlama.setContextLimit(limit)
|
200
212
|
}
|
201
213
|
|
202
|
-
|
214
|
+
let contextIdCounter = 0
|
215
|
+
const contextIdRandom = () =>
|
216
|
+
process.env.NODE_ENV === 'test' ? 0 : Math.floor(Math.random() * 100000)
|
217
|
+
|
218
|
+
const modelInfoSkip = [
|
219
|
+
// Large fields
|
220
|
+
'tokenizer.ggml.tokens',
|
221
|
+
'tokenizer.ggml.token_type',
|
222
|
+
'tokenizer.ggml.merges',
|
223
|
+
]
|
224
|
+
export async function loadLlamaModelInfo(model: string): Promise<Object> {
|
225
|
+
let path = model
|
226
|
+
if (path.startsWith('file://')) path = path.slice(7)
|
227
|
+
return RNLlama.modelInfo(path, modelInfoSkip)
|
228
|
+
}
|
229
|
+
|
230
|
+
const poolTypeMap = {
|
231
|
+
// -1 is unspecified as undefined
|
232
|
+
none: 0,
|
233
|
+
mean: 1,
|
234
|
+
cls: 2,
|
235
|
+
last: 3,
|
236
|
+
rank: 4,
|
237
|
+
}
|
238
|
+
|
239
|
+
export async function initLlama(
|
240
|
+
{
|
203
241
|
model,
|
204
242
|
is_model_asset: isModelAsset,
|
243
|
+
pooling_type: poolingType,
|
244
|
+
lora,
|
205
245
|
...rest
|
206
|
-
}: ContextParams,
|
207
|
-
|
246
|
+
}: ContextParams,
|
247
|
+
onProgress?: (progress: number) => void,
|
208
248
|
): Promise<LlamaContext> {
|
209
249
|
let path = model
|
210
250
|
if (path.startsWith('file://')) path = path.slice(7)
|
211
|
-
|
212
|
-
const modelProgressListener = EventEmitter.addListener(EVENT_ON_MODEL_PROGRESS, (event) => {
|
213
|
-
if(event.progress && progressCallback)
|
214
|
-
progressCallback(event.progress)
|
215
|
-
if(event.progress === 100) {
|
216
|
-
modelProgressListener.remove()
|
217
|
-
}
|
218
|
-
})
|
219
251
|
|
252
|
+
let loraPath = lora
|
253
|
+
if (loraPath?.startsWith('file://')) loraPath = loraPath.slice(7)
|
254
|
+
|
255
|
+
const contextId = contextIdCounter + contextIdRandom()
|
256
|
+
contextIdCounter += 1
|
257
|
+
|
258
|
+
let removeProgressListener: any = null
|
259
|
+
if (onProgress) {
|
260
|
+
removeProgressListener = EventEmitter.addListener(
|
261
|
+
EVENT_ON_INIT_CONTEXT_PROGRESS,
|
262
|
+
(evt: { contextId: number; progress: number }) => {
|
263
|
+
if (evt.contextId !== contextId) return
|
264
|
+
onProgress(evt.progress)
|
265
|
+
},
|
266
|
+
)
|
267
|
+
}
|
268
|
+
|
269
|
+
const poolType = poolTypeMap[poolingType as keyof typeof poolTypeMap]
|
220
270
|
const {
|
221
|
-
contextId,
|
222
271
|
gpu,
|
223
272
|
reasonNoGPU,
|
224
273
|
model: modelDetails,
|
225
|
-
} = await RNLlama.initContext({
|
274
|
+
} = await RNLlama.initContext(contextId, {
|
226
275
|
model: path,
|
227
276
|
is_model_asset: !!isModelAsset,
|
277
|
+
use_progress_callback: !!onProgress,
|
278
|
+
pooling_type: poolType,
|
279
|
+
lora: loraPath,
|
228
280
|
...rest,
|
281
|
+
}).catch((err: any) => {
|
282
|
+
removeProgressListener?.remove()
|
283
|
+
throw err
|
229
284
|
})
|
230
|
-
|
285
|
+
removeProgressListener?.remove()
|
231
286
|
return new LlamaContext({ contextId, gpu, reasonNoGPU, model: modelDetails })
|
232
287
|
}
|
233
288
|
|