web-llm-runner 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +211 -0
- package/README.md +564 -0
- package/lib/cache_util.d.ts +19 -0
- package/lib/cache_util.d.ts.map +1 -0
- package/lib/config.d.ts +199 -0
- package/lib/config.d.ts.map +1 -0
- package/lib/conversation.d.ts +107 -0
- package/lib/conversation.d.ts.map +1 -0
- package/lib/embedding.d.ts +38 -0
- package/lib/embedding.d.ts.map +1 -0
- package/lib/engine.d.ts +140 -0
- package/lib/engine.d.ts.map +1 -0
- package/lib/error.d.ts +208 -0
- package/lib/error.d.ts.map +1 -0
- package/lib/extension_service_worker.d.ts +54 -0
- package/lib/extension_service_worker.d.ts.map +1 -0
- package/lib/index.d.ts +13 -0
- package/lib/index.d.ts.map +1 -0
- package/lib/index.js +13767 -0
- package/lib/index.js.map +1 -0
- package/lib/integrity.d.ts +44 -0
- package/lib/integrity.d.ts.map +1 -0
- package/lib/llm_chat.d.ts +258 -0
- package/lib/llm_chat.d.ts.map +1 -0
- package/lib/message.d.ts +87 -0
- package/lib/message.d.ts.map +1 -0
- package/lib/openai_api_protocols/chat_completion.d.ts +834 -0
- package/lib/openai_api_protocols/chat_completion.d.ts.map +1 -0
- package/lib/openai_api_protocols/completion.d.ts +270 -0
- package/lib/openai_api_protocols/completion.d.ts.map +1 -0
- package/lib/openai_api_protocols/embedding.d.ts +125 -0
- package/lib/openai_api_protocols/embedding.d.ts.map +1 -0
- package/lib/openai_api_protocols/index.d.ts +20 -0
- package/lib/openai_api_protocols/index.d.ts.map +1 -0
- package/lib/service_worker.d.ts +53 -0
- package/lib/service_worker.d.ts.map +1 -0
- package/lib/support.d.ts +117 -0
- package/lib/support.d.ts.map +1 -0
- package/lib/types.d.ts +202 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/utils.d.ts +7 -0
- package/lib/utils.d.ts.map +1 -0
- package/lib/web_worker.d.ts +132 -0
- package/lib/web_worker.d.ts.map +1 -0
- package/lib/wrapper/WebLLMWrapper.d.ts +20 -0
- package/lib/wrapper/WebLLMWrapper.d.ts.map +1 -0
- package/lib/wrapper/llm-worker.d.ts +2 -0
- package/lib/wrapper/llm-worker.d.ts.map +1 -0
- package/package.json +60 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SRI (Subresource Integrity) hash string.
|
|
3
|
+
* Format: "sha256-BASE64", "sha384-BASE64", or "sha512-BASE64".
|
|
4
|
+
* See https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity
|
|
5
|
+
*/
|
|
6
|
+
export type SRIString = string;
|
|
7
|
+
/** Map of filename to SRI hash for per-file verification. */
|
|
8
|
+
export type FileIntegrityMap = Record<string, SRIString>;
|
|
9
|
+
/**
|
|
10
|
+
* Integrity configuration for a model's artifacts.
|
|
11
|
+
* All fields are optional — only specified artifacts will be verified.
|
|
12
|
+
*
|
|
13
|
+
* @param config SRI hash for the model's `mlc-chat-config.json`.
|
|
14
|
+
* @param model_lib SRI hash for the WASM model library file.
|
|
15
|
+
* @param tokenizer SRI hashes for tokenizer files, keyed by filename
|
|
16
|
+
* (e.g. `"tokenizer.json"` or `"tokenizer.model"`).
|
|
17
|
+
* @param onFailure Behavior on verification failure:
|
|
18
|
+
* `"error"` (default) throws an `IntegrityError`;
|
|
19
|
+
* `"warn"` logs a warning and continues.
|
|
20
|
+
*/
|
|
21
|
+
export interface ModelIntegrity {
|
|
22
|
+
config?: SRIString;
|
|
23
|
+
model_lib?: SRIString;
|
|
24
|
+
tokenizer?: FileIntegrityMap;
|
|
25
|
+
onFailure?: "error" | "warn";
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Verify an ArrayBuffer against an SRI hash using the Web Crypto API.
|
|
29
|
+
*
|
|
30
|
+
* @param data The raw bytes to verify.
|
|
31
|
+
* @param expectedSRI The expected SRI hash (e.g. `"sha256-abc123..."`).
|
|
32
|
+
* @param url The URL of the artifact, used for error messages.
|
|
33
|
+
* @param onFailure `"error"` to throw on mismatch, `"warn"` to log and continue.
|
|
34
|
+
* @throws {IntegrityError} When the hash does not match and `onFailure` is `"error"`.
|
|
35
|
+
*/
|
|
36
|
+
export declare function verifyIntegrity(data: ArrayBuffer, expectedSRI: SRIString, url: string, onFailure?: "error" | "warn"): Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Validate that a string is a well-formed SRI hash.
|
|
39
|
+
*
|
|
40
|
+
* @param sri The string to validate.
|
|
41
|
+
* @returns `true` if `sri` matches the SRI format.
|
|
42
|
+
*/
|
|
43
|
+
export declare function isValidSRI(sri: string): boolean;
|
|
44
|
+
//# sourceMappingURL=integrity.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"integrity.d.ts","sourceRoot":"","sources":["../src/integrity.ts"],"names":[],"mappings":"AAGA;;;;GAIG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,CAAC;AAE/B,6DAA6D;AAC7D,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,cAAc;IAC7B,MAAM,CAAC,EAAE,SAAS,CAAC;IACnB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,SAAS,CAAC,EAAE,gBAAgB,CAAC;IAC7B,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;CAC9B;AAgED;;;;;;;;GAQG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,WAAW,EACjB,WAAW,EAAE,SAAS,EACtB,GAAG,EAAE,MAAM,EACX,SAAS,GAAE,OAAO,GAAG,MAAgB,GACpC,OAAO,CAAC,IAAI,CAAC,CA+Bf;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE/C"}
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import * as tvmjs from "@mlc-ai/web-runtime";
|
|
2
|
+
import { Tokenizer } from "@mlc-ai/web-tokenizers";
|
|
3
|
+
import { ChatConfig, GenerationConfig, Role } from "./config";
|
|
4
|
+
import { Conversation } from "./conversation";
|
|
5
|
+
import { LogitProcessor, LatencyBreakdown } from "./types";
|
|
6
|
+
import { ChatCompletionFinishReason, ChatCompletionTokenLogprob } from "./openai_api_protocols/index";
|
|
7
|
+
export declare class LLMChatPipeline {
|
|
8
|
+
private config;
|
|
9
|
+
private tokenizer;
|
|
10
|
+
private tvm;
|
|
11
|
+
private device;
|
|
12
|
+
private vm;
|
|
13
|
+
private prefill;
|
|
14
|
+
private decoding;
|
|
15
|
+
private image_embed;
|
|
16
|
+
private embed;
|
|
17
|
+
private fapplyBitmask;
|
|
18
|
+
private fapplyPenalty;
|
|
19
|
+
private fapplyLogitBias;
|
|
20
|
+
private fsoftmaxWithTemperature;
|
|
21
|
+
private fsampleWithTopP;
|
|
22
|
+
private fargsortProbs;
|
|
23
|
+
private fclearKVCaches;
|
|
24
|
+
private fKVCacheAddSequence;
|
|
25
|
+
private fKVCacheRemoveSequence;
|
|
26
|
+
private fKVCacheBeginForward;
|
|
27
|
+
private fKVCacheEndForward;
|
|
28
|
+
private fKVCacheEnableSlidingWindowForSeq;
|
|
29
|
+
private params;
|
|
30
|
+
private kvCache;
|
|
31
|
+
private logitsOnCPU?;
|
|
32
|
+
private filledKVCacheLength;
|
|
33
|
+
private bosTokenId;
|
|
34
|
+
private contextWindowSize;
|
|
35
|
+
private slidingWindowSize;
|
|
36
|
+
private attentionSinkSize;
|
|
37
|
+
private prefillChunkSize;
|
|
38
|
+
private resetStatsPerPrefill;
|
|
39
|
+
private stopStr;
|
|
40
|
+
private stopTokens;
|
|
41
|
+
private outputMessage;
|
|
42
|
+
private outputIds;
|
|
43
|
+
private stopTriggered;
|
|
44
|
+
private finishReason;
|
|
45
|
+
private appearedTokensFreq;
|
|
46
|
+
private conversation;
|
|
47
|
+
private tokenLogprobArray;
|
|
48
|
+
private decodingTotalTime;
|
|
49
|
+
private decodingTotalTokens;
|
|
50
|
+
private prefillTotalTime;
|
|
51
|
+
private prefillTotalTokens;
|
|
52
|
+
private curRoundDecodingTotalTokens;
|
|
53
|
+
private curRoundPrefillTotalTokens;
|
|
54
|
+
private curRoundDecodingTotalTime;
|
|
55
|
+
private curRoundPrefillTotalTime;
|
|
56
|
+
curRoundLatencyBreakdown: LatencyBreakdown;
|
|
57
|
+
private logitProcessor?;
|
|
58
|
+
private grammarMatcher?;
|
|
59
|
+
private responseFormatCacheKey?;
|
|
60
|
+
private xgTokenizerInfo?;
|
|
61
|
+
private grammarCompiler?;
|
|
62
|
+
private bitmaskSize;
|
|
63
|
+
private fullVocabSize;
|
|
64
|
+
private token_postproc_method;
|
|
65
|
+
private prepend_space_in_encode;
|
|
66
|
+
private curRoundGrammarInitTotalTime;
|
|
67
|
+
private curRoundGrammarPerTokenTotalTime;
|
|
68
|
+
private sampleIndices;
|
|
69
|
+
private sampleIndicesDevice;
|
|
70
|
+
private topPDevice;
|
|
71
|
+
constructor(tvm: tvmjs.Instance, tokenizer: Tokenizer, config: ChatConfig, logitProcessor?: LogitProcessor);
|
|
72
|
+
dispose(): void;
|
|
73
|
+
/**
|
|
74
|
+
* Get the current message.
|
|
75
|
+
*/
|
|
76
|
+
getMessage(): string;
|
|
77
|
+
/**
|
|
78
|
+
* Reset the runtime statistics
|
|
79
|
+
*/
|
|
80
|
+
resetRuntimeStats(): void;
|
|
81
|
+
/**
|
|
82
|
+
* Reset the chat history
|
|
83
|
+
*/
|
|
84
|
+
resetChat(keepStats?: boolean): void;
|
|
85
|
+
/**
|
|
86
|
+
* Reset KV Cache
|
|
87
|
+
*/
|
|
88
|
+
resetKVCache(): void;
|
|
89
|
+
/**
|
|
90
|
+
* @returns Whether stop is triggered.
|
|
91
|
+
*/
|
|
92
|
+
stopped(): boolean;
|
|
93
|
+
/**
|
|
94
|
+
* @returns Finish reason; undefined if generation not started/stopped yet.
|
|
95
|
+
*/
|
|
96
|
+
getFinishReason(): ChatCompletionFinishReason | undefined;
|
|
97
|
+
/**
|
|
98
|
+
* @returns tokenLogprobArray for this current round of autoregressive generation.
|
|
99
|
+
* Updated upon each sampled token, cleared upon each prefillStep().
|
|
100
|
+
*/
|
|
101
|
+
getTokenLogprobArray(): Array<ChatCompletionTokenLogprob>;
|
|
102
|
+
/**
|
|
103
|
+
* @returns the number of tokens decoded for a single request or a single choice in the request.
|
|
104
|
+
*/
|
|
105
|
+
getCurRoundDecodingTotalTokens(): number;
|
|
106
|
+
/**
|
|
107
|
+
* @returns the number of tokens decoded for a single request or a single choice in the request.
|
|
108
|
+
*/
|
|
109
|
+
getCurRoundPrefillTotalTokens(): number;
|
|
110
|
+
/**
|
|
111
|
+
* @returns the time spent on decode for a single request or a single choice in the request.
|
|
112
|
+
*/
|
|
113
|
+
getCurRoundDecodingTotalTime(): number;
|
|
114
|
+
/**
|
|
115
|
+
* @returns the time spent on for a single request or a single choice in the request.
|
|
116
|
+
*/
|
|
117
|
+
getCurRoundPrefillTotalTime(): number;
|
|
118
|
+
/**
|
|
119
|
+
* @returns the time (seconds) spent on for initializing grammar matcher for a single request.
|
|
120
|
+
*/
|
|
121
|
+
getCurRoundGrammarInitTotalTime(): number;
|
|
122
|
+
/**
|
|
123
|
+
* @returns the total time (seconds) spent on creating bitmask and accepting token grammar matcher
|
|
124
|
+
* for all the generated tokens in a single request.
|
|
125
|
+
*/
|
|
126
|
+
getCurRoundGrammarPerTokenTotalTime(): number;
|
|
127
|
+
/**
|
|
128
|
+
* @returns the breakdown of latencies for sampling each token for a single request.
|
|
129
|
+
*/
|
|
130
|
+
getCurRoundLatencyBreakdown(): LatencyBreakdown;
|
|
131
|
+
/**
|
|
132
|
+
* @returns Runtime stats information.
|
|
133
|
+
*/
|
|
134
|
+
runtimeStatsText(): string;
|
|
135
|
+
/**
|
|
136
|
+
* @returns Runtime stats information, starting from the last prefill performed.
|
|
137
|
+
*/
|
|
138
|
+
curRoundRuntimeStatsText(): string;
|
|
139
|
+
/**
|
|
140
|
+
* @returns Prefill tokens per second, starting from the last prefill performed.
|
|
141
|
+
*/
|
|
142
|
+
getCurRoundPrefillTokensPerSec(): number;
|
|
143
|
+
/**
|
|
144
|
+
* @returns Prefill tokens per second, starting from the last prefill performed.
|
|
145
|
+
*/
|
|
146
|
+
getCurRoundDecodingTokensPerSec(): number;
|
|
147
|
+
/**
|
|
148
|
+
* Set the seed for the RNG `this.tvm.rng`.
|
|
149
|
+
*/
|
|
150
|
+
setSeed(seed: number): void;
|
|
151
|
+
private getResponseFormatKey;
|
|
152
|
+
/**
|
|
153
|
+
* @returns The conversation object (not a deep copy).
|
|
154
|
+
*/
|
|
155
|
+
getConversationObject(): Conversation;
|
|
156
|
+
/**
|
|
157
|
+
* Set this.conversation to a new conversation object.
|
|
158
|
+
*/
|
|
159
|
+
setConversation(newConv: Conversation): void;
|
|
160
|
+
asyncLoadWebGPUPipelines(): Promise<void>;
|
|
161
|
+
/**
|
|
162
|
+
* Generate the first token given input prompt
|
|
163
|
+
*/
|
|
164
|
+
prefillStep(inp: string, msgRole: Role, // either user or tool
|
|
165
|
+
inp_role_str?: string, genConfig?: GenerationConfig): Promise<void>;
|
|
166
|
+
decodeStep(genConfig?: GenerationConfig): Promise<void>;
|
|
167
|
+
/**
|
|
168
|
+
* Manually trigger stop if it is not stopped.
|
|
169
|
+
*/
|
|
170
|
+
triggerStop(): void;
|
|
171
|
+
/**
|
|
172
|
+
* Add a generated token and check for stop.
|
|
173
|
+
*
|
|
174
|
+
* @param nextToken The next token.
|
|
175
|
+
* @param genConfig Configs that override `this.config` for this round of generation.
|
|
176
|
+
*/
|
|
177
|
+
private processNextToken;
|
|
178
|
+
/**
|
|
179
|
+
* Given input tokens, return embeddings of them by calling embed kernel.
|
|
180
|
+
*
|
|
181
|
+
* @note precondition: inputTokens.length <= prefillChunkSize, since we take care of
|
|
182
|
+
* chunking in `getChunkedPrefillInputData()`.
|
|
183
|
+
*/
|
|
184
|
+
private getTokensEmbeddings;
|
|
185
|
+
/**
|
|
186
|
+
* Calculate resize dimensions for Phi3-V model.
|
|
187
|
+
* Based on vlm_utils.cc CalculateResizeShape
|
|
188
|
+
*/
|
|
189
|
+
private calculateResizeShape;
|
|
190
|
+
/**
|
|
191
|
+
* Calculate crop dimensions for Phi3-V model.
|
|
192
|
+
* Based on vlm_utils.cc CalculateCropShape / CalculatePadShape
|
|
193
|
+
*/
|
|
194
|
+
private calculateCropShape;
|
|
195
|
+
/**
|
|
196
|
+
* Embed an image input.
|
|
197
|
+
*/
|
|
198
|
+
private getImageEmbeddings;
|
|
199
|
+
/**
|
|
200
|
+
* Embed and forward input data, that can be either array of tokens, or an image.
|
|
201
|
+
* This will increment `this.filledKVCacheLength`.
|
|
202
|
+
*
|
|
203
|
+
* @param inputData data to embed and forward
|
|
204
|
+
* @param inputDataLen length of this inputData, should smaller than prefill chunk size.
|
|
205
|
+
* @returns The logits returned by this forward as tvmjs.Tensor on GPU.
|
|
206
|
+
*
|
|
207
|
+
* @note Precondition: inputData's data length is smaller than prefill chunk size
|
|
208
|
+
*/
|
|
209
|
+
private embedAndForward;
|
|
210
|
+
private updateLogitsOnCPU;
|
|
211
|
+
private sampleTokenFromLogits;
|
|
212
|
+
/**
|
|
213
|
+
* Return the an array of a mixture of token arrays and imageURLs (which cannot be represented
|
|
214
|
+
* as tokens). Also return the number of tokens this represents.
|
|
215
|
+
*
|
|
216
|
+
* We first convert the Conversation into a prompt array to be prefilled. Then we encode the
|
|
217
|
+
* text parts, leaving the imageURLs as it is.
|
|
218
|
+
* Example prompts:
|
|
219
|
+
* [
|
|
220
|
+
* "<|system|>\nSome system prompt\n",
|
|
221
|
+
* [
|
|
222
|
+
* "<|user|>\n",
|
|
223
|
+
* imageURL1,
|
|
224
|
+
* "\n",
|
|
225
|
+
* imageURL2,
|
|
226
|
+
* "\n",
|
|
227
|
+
* "Some user input<|end|>\n"
|
|
228
|
+
* ],
|
|
229
|
+
* ]
|
|
230
|
+
*
|
|
231
|
+
* Expected output:
|
|
232
|
+
* [
|
|
233
|
+
* token array for "<|system|>\nSome system prompt\n<|user|>\n",
|
|
234
|
+
* imageUrl1,
|
|
235
|
+
* token array for "\n",
|
|
236
|
+
* imageUrl2,
|
|
237
|
+
* token array for "\nSome user input<|end|>\n"
|
|
238
|
+
*/
|
|
239
|
+
private getInputData;
|
|
240
|
+
forwardTokensAndSample(inputIds: Array<number>, isPrefill: boolean): Promise<number>;
|
|
241
|
+
/**
|
|
242
|
+
* Based on `sampledToken` and `this.logitsOnCPU`, which becomes a distribution after
|
|
243
|
+
* calling `this.tvm.applySoftmaxWithTemperature()`, generate `ChatCompletionTokenLogprob` and
|
|
244
|
+
* update `this.tokenLogprobArray`.
|
|
245
|
+
*
|
|
246
|
+
* @param sampledToken The token ID sampled.
|
|
247
|
+
* @param top_logprobs Number of top tokens to include; `top_logprobs` in `ChatCompletionRequest`.
|
|
248
|
+
*
|
|
249
|
+
* @return The `ChatCompletionTokenLogprob` for this single autoregressive step.
|
|
250
|
+
*/
|
|
251
|
+
private getTokenLogprob;
|
|
252
|
+
/**
|
|
253
|
+
* Synchronize the device.
|
|
254
|
+
*/
|
|
255
|
+
sync(): Promise<void>;
|
|
256
|
+
evaluate(): Promise<void>;
|
|
257
|
+
}
|
|
258
|
+
//# sourceMappingURL=llm_chat.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm_chat.d.ts","sourceRoot":"","sources":["../src/llm_chat.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,qBAAqB,CAAC;AAG7C,OAAO,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AACnD,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAC9D,OAAO,EAAmB,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC/D,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAS3D,OAAO,EACL,0BAA0B,EAC1B,0BAA0B,EAI3B,MAAM,8BAA8B,CAAC;AAgBtC,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,SAAS,CAAY;IAG7B,OAAO,CAAC,GAAG,CAAiB;IAC5B,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,EAAE,CAAuB;IACjC,OAAO,CAAC,OAAO,CAAmB;IAClC,OAAO,CAAC,QAAQ,CAAmB;IACnC,OAAO,CAAC,WAAW,CAA+B;IAClD,OAAO,CAAC,KAAK,CAAmB;IAChC,OAAO,CAAC,aAAa,CAAmB;IACxC,OAAO,CAAC,aAAa,CAAmB;IACxC,OAAO,CAAC,eAAe,CAAmB;IAC1C,OAAO,CAAC,uBAAuB,CAAmB;IAClD,OAAO,CAAC,eAAe,CAAmB;IAC1C,OAAO,CAAC,aAAa,CAAmB;IAGxC,OAAO,CAAC,cAAc,CAAmB;IACzC,OAAO,CAAC,mBAAmB,CAAmB;IAC9C,OAAO,CAAC,sBAAsB,CAAmB;IACjD,OAAO,CAAC,oBAAoB,CAAmB;IAC/C,OAAO,CAAC,kBAAkB,CAAmB;IAC7C,OAAO,CAAC,iCAAiC,CAAmB;IAG5D,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,OAAO,CAAkB;IACjC,OAAO,CAAC,WAAW,CAAC,CAA2B;IAC/C,OAAO,CAAC,mBAAmB,CAAK;IAGhC,OAAO,CAAC,UAAU,CAAK;IACvB,OAAO,CAAC,iBAAiB,CAAM;IAC/B,OAAO,CAAC,iBAAiB,CAAM;IAC/B,OAAO,CAAC,iBAAiB,CAAM;IAC/B,OAAO,CAAC,gBAAgB,CAAM;IAC9B,OAAO,CAAC,oBAAoB,CAAQ;IACpC,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,UAAU,CAAgB;IAGlC,OAAO,CAAC,aAAa,CAAM;IAC3B,OAAO,CAAC,SAAS,CAAqB;IACtC,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,YAAY,CAAqD;IAEzE,OAAO,CAAC,kBAAkB,CAA6B;IACvD,OAAO,CAAC,YAAY,CAAe;IAInC,OAAO,CAAC,iBAAiB,CAAyC;IAGlE,OAAO,CAAC,iBAAiB,CAAK;IAC9B,OAAO,CAAC,mBAAmB,CAAK;IAChC,OAAO,CAAC,gBAAgB,CAAK;IAC7B,OAAO,CAAC,kBAAkB,CAAK;IAE/B,OAAO,CAAC,2BAA2B,CAAK;IACxC,OAAO,CAAC,0BAA0B,CAAK;IACvC,OAAO,CAAC,yBAAyB,CAAK;IACtC,OAAO,CAAC,wBAAwB,CAAK;IAG9B,wBAAwB,EAAE,gBAAgB,CAO/C;IAGF,OAAO,CAAC,cAAc,CAAC,CAA6B;IAKpD,OAAO,CAAC,cAAc,CAAC,CAAiC;IAIxD,OAAO,CAAC,sBAAsB,CAAC,CAAqB;IAGpD,OAAO,CAAC,eAAe,CAAC,CAAgC;IAExD,OAAO,CAAC,eAAe,CAAC,CAAkC;IAE1D,OAAO,CAAC,WAAW,CAAS;IAG5B,OAAO,CAAC,aAAa,CAAS;IAE9B,OAAO,CAAC,qBAAqB,CAAS;IAEtC,OAAO,CAAC,uBAAuB,CAAU;IAGzC,OAAO,CAAC,4BAA4B,CAAK;IAEzC,OAAO,CAAC,gCAAgC,CAAK;IAE7C,OAAO,CAAC,aAAa,CAAa;IAClC,OAAO,CAAC,mBAAmB,CAAe;IAC1C,OAAO,CAAC,UAAU,CAAe;gBAG/B,GAAG,EAAE,KAAK,CAAC,QAAQ,EACnB,SAAS,EAAE,SAAS,EACpB,MAAM,EAAE,UAAU,EAClB,cAAc,CAAC,EAAE,cAAc;IA+LjC,OAAO;IAkBP;;OAEG;IACH,UAAU;IAIV;;OAEG;IACH,iBAAiB;IAOjB;;OAEG;IACH,SAAS,CAAC,SAAS,UAAQ;IAY3B;;OAEG;IACH,YAAY;IAaZ;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,eAAe,IAAI,0BAA0B,GAAG,SAAS;IAIzD;;;OAGG;IACH,oBAAoB,IAAI,KAAK,CAAC,0BAA0B,CAAC;IAIzD;;OAEG;IACH,8BAA8B,IAAI,MAAM;IAIxC;;OAEG;IACH,6BAA6B,IAAI,MAAM;IAIvC;;OAEG;IACH,4BAA4B,IAAI,MAAM;IAItC;;OAEG;IACH,2BAA2B,IAAI,MAAM;IAIrC;;OAEG;IACH,+BAA+B,IAAI,MAAM;IAIzC;;;OAGG;IACH,mCAAmC,IAAI,MAAM;IAI7C;;OAEG;IACH,2BAA2B,IAAI,gBAAgB;IAI/C;;OAEG;IACH,gBAAgB,IAAI,MAAM;IAO1B;;OAEG;IACH,wBAAwB,IAAI,MAAM;IAOlC;;OAEG;IACH,8BAA8B,IAAI,MAAM;IAIxC;;OAEG;IACH,+BAA+B,IAAI,MAAM;IAIzC;;OAEG;IACH,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAI3B,OAAO,CAAC,oBAAoB;IAyB5B;;OAEG;IACH,qBAAqB,IAAI,YAAY;IAIrC;;OAEG;IACH,eAAe,CAAC,OAAO,EAAE,YAAY;IAM/B,wBAAwB;IAI9B;;OAEG;IACG,WAAW,CACf,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,IAAI,EAAE,sBAAsB;IACrC,YAAY,CAAC,EAAE,MAAM,EACrB,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,IAAI,CAAC;IA0LV,UAAU,CAAC,SAAS,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAoC7D;;OAEG;IACH,WAAW;IAWX;;;;;OAKG;IACH,OAAO,CAAC,gBAAgB;IAgGxB;;;;;OAKG;IACH,OAAO,CAAC,mBAAmB;IAqB3B;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAgB5B;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAa1B;;OAEG;YACW,kBAAkB;IAkDhC;;;;;;;;;OASG;YACW,eAAe;IA2D7B,OAAO,CAAC,iBAAiB;YAcX,qBAAqB;IA4XnC;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,OAAO,CAAC,YAAY;IAuEd,sBAAsB,CAC1B,QAAQ,EAAE,KAAK,CAAC,MAAM,CAAC,EACvB,SAAS,EAAE,OAAO,GACjB,OAAO,CAAC,MAAM,CAAC;IA+ClB;;;;;;;;;OASG;IACH,OAAO,CAAC,eAAe;IAsCvB;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAKrB,QAAQ;CA0Cf"}
|
package/lib/message.d.ts
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { AppConfig, ChatOptions } from "./config";
|
|
2
|
+
import { InitProgressReport, LogLevel } from "./types";
|
|
3
|
+
import { ChatCompletionRequestStreaming, ChatCompletionRequestNonStreaming, ChatCompletion, ChatCompletionChunk, CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming, Completion, EmbeddingCreateParams, CreateEmbeddingResponse } from "./openai_api_protocols/index";
|
|
4
|
+
/**
|
|
5
|
+
* Message kind used by worker
|
|
6
|
+
*/
|
|
7
|
+
type RequestKind = "reload" | "runtimeStatsText" | "interruptGenerate" | "unload" | "resetChat" | "getMaxStorageBufferBindingSize" | "getGPUVendor" | "forwardTokensAndSample" | "chatCompletionNonStreaming" | "completionNonStreaming" | "embedding" | "getMessage" | "chatCompletionStreamInit" | "completionStreamInit" | "completionStreamNextChunk" | "customRequest" | "keepAlive" | "setLogLevel" | "setAppConfig";
|
|
8
|
+
export interface ReloadParams {
|
|
9
|
+
modelId: string[];
|
|
10
|
+
chatOpts?: ChatOptions[];
|
|
11
|
+
}
|
|
12
|
+
export interface ResetChatParams {
|
|
13
|
+
keepStats: boolean;
|
|
14
|
+
modelId?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface GetMessageParams {
|
|
17
|
+
modelId?: string;
|
|
18
|
+
}
|
|
19
|
+
export interface RuntimeStatsTextParams {
|
|
20
|
+
modelId?: string;
|
|
21
|
+
}
|
|
22
|
+
export interface ForwardTokensAndSampleParams {
|
|
23
|
+
inputIds: Array<number>;
|
|
24
|
+
isPrefill: boolean;
|
|
25
|
+
modelId?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface ChatCompletionNonStreamingParams {
|
|
28
|
+
request: ChatCompletionRequestNonStreaming;
|
|
29
|
+
modelId: string[];
|
|
30
|
+
chatOpts?: ChatOptions[];
|
|
31
|
+
}
|
|
32
|
+
export interface ChatCompletionStreamInitParams {
|
|
33
|
+
request: ChatCompletionRequestStreaming;
|
|
34
|
+
selectedModelId: string;
|
|
35
|
+
modelId: string[];
|
|
36
|
+
chatOpts?: ChatOptions[];
|
|
37
|
+
}
|
|
38
|
+
export interface CompletionNonStreamingParams {
|
|
39
|
+
request: CompletionCreateParamsNonStreaming;
|
|
40
|
+
modelId: string[];
|
|
41
|
+
chatOpts?: ChatOptions[];
|
|
42
|
+
}
|
|
43
|
+
export interface CompletionStreamInitParams {
|
|
44
|
+
request: CompletionCreateParamsStreaming;
|
|
45
|
+
selectedModelId: string;
|
|
46
|
+
modelId: string[];
|
|
47
|
+
chatOpts?: ChatOptions[];
|
|
48
|
+
}
|
|
49
|
+
export interface EmbeddingParams {
|
|
50
|
+
request: EmbeddingCreateParams;
|
|
51
|
+
modelId: string[];
|
|
52
|
+
chatOpts?: ChatOptions[];
|
|
53
|
+
}
|
|
54
|
+
export interface CompletionStreamNextChunkParams {
|
|
55
|
+
selectedModelId: string;
|
|
56
|
+
}
|
|
57
|
+
export interface CustomRequestParams {
|
|
58
|
+
requestName: string;
|
|
59
|
+
requestMessage: string;
|
|
60
|
+
}
|
|
61
|
+
export type MessageContent = ReloadParams | ResetChatParams | GetMessageParams | RuntimeStatsTextParams | ForwardTokensAndSampleParams | ChatCompletionNonStreamingParams | ChatCompletionStreamInitParams | CompletionNonStreamingParams | CompletionStreamInitParams | EmbeddingParams | CompletionStreamNextChunkParams | CustomRequestParams | InitProgressReport | LogLevel | string | null | number | ChatCompletion | ChatCompletionChunk | CreateEmbeddingResponse | Completion | AppConfig | void;
|
|
62
|
+
/**
|
|
63
|
+
* The message used in exchange between worker
|
|
64
|
+
* and the main thread.
|
|
65
|
+
*/
|
|
66
|
+
export type WorkerRequest = {
|
|
67
|
+
kind: RequestKind;
|
|
68
|
+
uuid: string;
|
|
69
|
+
content: MessageContent;
|
|
70
|
+
};
|
|
71
|
+
type HeartbeatWorkerResponse = {
|
|
72
|
+
kind: "heartbeat";
|
|
73
|
+
uuid: string;
|
|
74
|
+
};
|
|
75
|
+
type OneTimeWorkerResponse = {
|
|
76
|
+
kind: "return" | "throw";
|
|
77
|
+
uuid: string;
|
|
78
|
+
content: MessageContent;
|
|
79
|
+
};
|
|
80
|
+
type InitProgressWorkerResponse = {
|
|
81
|
+
kind: "initProgressCallback";
|
|
82
|
+
uuid: string;
|
|
83
|
+
content: InitProgressReport;
|
|
84
|
+
};
|
|
85
|
+
export type WorkerResponse = OneTimeWorkerResponse | InitProgressWorkerResponse | HeartbeatWorkerResponse;
|
|
86
|
+
export {};
|
|
87
|
+
//# sourceMappingURL=message.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"message.d.ts","sourceRoot":"","sources":["../src/message.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAClD,OAAO,EAAE,kBAAkB,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACvD,OAAO,EACL,8BAA8B,EAC9B,iCAAiC,EACjC,cAAc,EACd,mBAAmB,EACnB,kCAAkC,EAClC,+BAA+B,EAC/B,UAAU,EACV,qBAAqB,EACrB,uBAAuB,EACxB,MAAM,8BAA8B,CAAC;AAEtC;;GAEG;AACH,KAAK,WAAW,GACZ,QAAQ,GACR,kBAAkB,GAClB,mBAAmB,GACnB,QAAQ,GACR,WAAW,GACX,gCAAgC,GAChC,cAAc,GACd,wBAAwB,GACxB,4BAA4B,GAC5B,wBAAwB,GACxB,WAAW,GACX,YAAY,GACZ,0BAA0B,GAC1B,sBAAsB,GACtB,2BAA2B,GAC3B,eAAe,GACf,WAAW,GACX,aAAa,GACb,cAAc,CAAC;AAKnB,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;CAC1B;AACD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AACD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AACD,MAAM,WAAW,sBAAsB;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AACD,MAAM,WAAW,4BAA4B;IAC3C,QAAQ,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAaD,MAAM,WAAW,gCAAgC;IAC/C,OAAO,EAAE,iCAAiC,CAAC;IAC3C,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;CAC1B;AACD,MAAM,WAAW,8BAA8B;IAC7C,OAAO,EAAE,8BAA8B,CAAC;IACxC,eAAe,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;CAC1B;AACD,MAAM,WAAW,4BAA4B;IAC3C,OAAO,EAAE,kCAAkC,CAAC;IAC5C,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;CAC1B;AACD,MAAM,WAAW,0BAA0B;IACzC,OAAO,EAAE,+BAA+B,CAAC;IACzC,eAAe,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;CAC1B;AACD,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,qBAAqB,CAAC;IAC/B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;CAC1B;AACD,MAAM,WAAW,+BAA+B;IAC9C,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,mBAAmB;IAClC,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;CACxB;AACD,MAAM,MAAM,cAAc,GACtB,YAAY,GACZ,eAAe,GACf,gBAAgB,GAChB,sBAAsB,GACtB,4BAA4B,GAC5B,gCAAgC,GAChC,8BAA8B,GAC9B,4BAA4B,GAC5B,0BAA0B,GAC1B,eAAe,GACf,+BAA+B,GAC/B,mBAAmB,GACnB,kBAAkB,GAClB,QAAQ,GACR,MAAM,GACN,IAAI,GACJ,MAAM,GACN,cAAc,GACd,mBAAmB,GACnB,uBAAuB,GACvB,UAAU,GACV,SAAS,GACT,IAAI,CAAC;AACT;;;GAGG;AAEH,MAAM,MAAM,aAAa,GAAG;IAC1B,IAAI,EAAE,WAAW,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,cAAc,CAAC;CACzB,CAAC;AAEF,KAAK,uBAAuB,GAAG;IAC7B,IAAI,EAAE,WAAW,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,KAAK,qBAAqB,GAAG;IAC3B,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,cAAc,CAAC;CACzB,CAAC;AAEF,KAAK,0BAA0B,GAAG;IAChC,IAAI,EAAE,sBAAsB,CAAC;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,kBAAkB,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,cAAc,GACtB,qBAAqB,GACrB,0BAA0B,GAC1B,uBAAuB,CAAC"}
|