@agorapete/wllama 3.5.1-q2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.gitmodules +3 -0
  2. package/.prettierignore +38 -0
  3. package/AGENTS.md +1 -0
  4. package/CMakeLists.txt +131 -0
  5. package/LICENCE +21 -0
  6. package/README-dev.md +178 -0
  7. package/README.md +225 -0
  8. package/README_banner.png +0 -0
  9. package/assets/screenshot_0.png +0 -0
  10. package/cpp/generate_glue_prototype.js +115 -0
  11. package/cpp/glue.hpp +664 -0
  12. package/cpp/test_glue.cpp +80 -0
  13. package/cpp/wllama-context.h +1172 -0
  14. package/cpp/wllama-fs.h +148 -0
  15. package/cpp/wllama.cpp +187 -0
  16. package/cpp/wllama.h +6 -0
  17. package/esm/cache-manager.d.ts +130 -0
  18. package/esm/debug.d.ts +28 -0
  19. package/esm/glue/glue.d.ts +22 -0
  20. package/esm/glue/messages.d.ts +146 -0
  21. package/esm/huggingface.d.ts +31 -0
  22. package/esm/index.cjs +3406 -0
  23. package/esm/index.d.ts +8 -0
  24. package/esm/index.js +3387 -0
  25. package/esm/index.min.js +1 -0
  26. package/esm/index.min.js.map +1 -0
  27. package/esm/model-manager.d.ts +136 -0
  28. package/esm/storage/cos.d.ts +36 -0
  29. package/esm/storage/index.d.ts +33 -0
  30. package/esm/storage/opfs.d.ts +12 -0
  31. package/esm/types/oai-compat.d.ts +278 -0
  32. package/esm/types/types.d.ts +112 -0
  33. package/esm/utils.d.ts +119 -0
  34. package/esm/wasm/source-map.d.ts +1 -0
  35. package/esm/wasm/wllama.wasm +0 -0
  36. package/esm/wasm-from-cdn.d.ts +8 -0
  37. package/esm/wllama.d.ts +397 -0
  38. package/esm/worker.d.ts +92 -0
  39. package/esm/workers-code/generated.d.ts +4 -0
  40. package/guides/intro-v2.md +132 -0
  41. package/guides/intro-v3.1.md +40 -0
  42. package/guides/intro-v3.md +230 -0
  43. package/index.ts +1 -0
  44. package/package.json +71 -0
  45. package/scripts/bisect_test.sh +33 -0
  46. package/scripts/build_hf_space.sh +26 -0
  47. package/scripts/build_source_map.js +269 -0
  48. package/scripts/build_wasm.sh +19 -0
  49. package/scripts/build_worker.sh +38 -0
  50. package/scripts/check_debug_build.js +30 -0
  51. package/scripts/check_package_size.js +25 -0
  52. package/scripts/docker-compose.yml +76 -0
  53. package/scripts/generate_wasm_from_cdn.js +24 -0
  54. package/scripts/http_server.js +44 -0
  55. package/scripts/post_build.sh +32 -0
  56. package/src/cache-manager.ts +358 -0
  57. package/src/debug.ts +111 -0
  58. package/src/glue/glue.ts +291 -0
  59. package/src/glue/messages.ts +773 -0
  60. package/src/huggingface.ts +151 -0
  61. package/src/index.ts +8 -0
  62. package/src/mjs.test.ts +44 -0
  63. package/src/model-manager.test.ts +200 -0
  64. package/src/model-manager.ts +359 -0
  65. package/src/storage/cos.test.ts +83 -0
  66. package/src/storage/cos.ts +171 -0
  67. package/src/storage/index.ts +40 -0
  68. package/src/storage/opfs.ts +119 -0
  69. package/src/types/oai-compat.ts +342 -0
  70. package/src/types/types.ts +133 -0
  71. package/src/utils.test.ts +231 -0
  72. package/src/utils.ts +403 -0
  73. package/src/wasm/source-map.ts +7 -0
  74. package/src/wasm/wllama.js +1 -0
  75. package/src/wasm/wllama.wasm +0 -0
  76. package/src/wasm-from-cdn.ts +13 -0
  77. package/src/wllama.test.ts +392 -0
  78. package/src/wllama.ts +1138 -0
  79. package/src/wllama.wgpu.test.ts +62 -0
  80. package/src/worker.ts +443 -0
  81. package/src/workers-code/generated.ts +11 -0
  82. package/src/workers-code/llama-cpp.js +511 -0
  83. package/src/workers-code/opfs-utils.js +150 -0
  84. package/tsconfig.build.json +34 -0
  85. package/tsup.config.ts +23 -0
  86. package/vitest.config.ts +61 -0
@@ -0,0 +1,119 @@
1
+ import { createWorker, isSafariMobile } from '../utils';
2
+ import { OPFS_UTILS_WORKER_CODE } from '../workers-code/generated';
3
+ import type { StorageBackend } from './index';
4
+
5
+ export class OPFSBackend implements StorageBackend {
6
+ isSupported(): boolean {
7
+ return (
8
+ typeof navigator !== 'undefined' &&
9
+ 'storage' in navigator &&
10
+ !!navigator.storage?.getDirectory
11
+ );
12
+ }
13
+
14
+ async read(key: string): Promise<Blob | null> {
15
+ try {
16
+ const cacheDir = await getCacheDir();
17
+ const fileHandle = await cacheDir.getFileHandle(key);
18
+ return await fileHandle.getFile();
19
+ } catch (e) {
20
+ // NotFoundError or similar
21
+ return null;
22
+ }
23
+ }
24
+
25
+ async write(key: string, stream: ReadableStream): Promise<void> {
26
+ const writable = await openWritable(key);
27
+ await writable.truncate(0);
28
+ const reader = stream.getReader();
29
+ try {
30
+ while (true) {
31
+ const { done, value } = await reader.read();
32
+ if (done) break;
33
+ await writable.write(value);
34
+ }
35
+ } finally {
36
+ await writable.close();
37
+ }
38
+ }
39
+
40
+ async getSize(key: string): Promise<number> {
41
+ try {
42
+ const cacheDir = await getCacheDir();
43
+ const fileHandle = await cacheDir.getFileHandle(key);
44
+ const file = await fileHandle.getFile();
45
+ return file.size;
46
+ } catch (e) {
47
+ return -1;
48
+ }
49
+ }
50
+
51
+ async list(): Promise<Array<{ key: string; size: number }>> {
52
+ const cacheDir = await getCacheDir();
53
+ const result: Array<{ key: string; size: number }> = [];
54
+ // @ts-ignore
55
+ for await (const [name, handle] of cacheDir.entries()) {
56
+ if (handle.kind === 'file') {
57
+ const file = await (handle as FileSystemFileHandle).getFile();
58
+ result.push({ key: name, size: file.size });
59
+ }
60
+ }
61
+ return result;
62
+ }
63
+
64
+ async delete(key: string): Promise<void> {
65
+ try {
66
+ const cacheDir = await getCacheDir();
67
+ await cacheDir.removeEntry(key);
68
+ } catch (e: any) {
69
+ if (e?.name !== 'NotFoundError') throw e;
70
+ }
71
+ }
72
+ }
73
+
74
+ async function getCacheDir(): Promise<FileSystemDirectoryHandle> {
75
+ const opfsRoot = await navigator.storage.getDirectory();
76
+ return opfsRoot.getDirectoryHandle('cache', { create: true });
77
+ }
78
+
79
+ async function openWritable(fileName: string): Promise<{
80
+ truncate(offset: number): Promise<void>;
81
+ write(value: Uint8Array): Promise<void>;
82
+ close(): Promise<void>;
83
+ }> {
84
+ const worker = createWorker(OPFS_UTILS_WORKER_CODE);
85
+ let pResolve: (v: any) => void;
86
+ let pReject: (v: any) => void;
87
+ worker.onmessage = (e: MessageEvent<any>) => {
88
+ if (e.data.ok) pResolve(null);
89
+ else if (e.data.err) pReject(e.data.err);
90
+ };
91
+ worker.onerror = (e) => pReject?.(e.message ?? e);
92
+ const workerExec = (
93
+ data:
94
+ | { action: 'open'; filename: string }
95
+ | { action: 'write'; buf: Uint8Array }
96
+ | { action: 'close' }
97
+ ) =>
98
+ new Promise<void>((resolve, reject) => {
99
+ pResolve = resolve;
100
+ pReject = reject;
101
+ worker.postMessage(
102
+ data,
103
+ isSafariMobile()
104
+ ? undefined
105
+ : { transfer: 'buf' in data && data.buf ? [data.buf.buffer] : [] }
106
+ );
107
+ });
108
+ await workerExec({ action: 'open', filename: fileName });
109
+ return {
110
+ truncate: async () => {
111
+ /* worker's openFile already calls truncate(0) on open */
112
+ },
113
+ write: (value) => workerExec({ action: 'write', buf: value }),
114
+ close: async () => {
115
+ await workerExec({ action: 'close' });
116
+ worker.terminate();
117
+ },
118
+ };
119
+ }
@@ -0,0 +1,342 @@
1
+ import type { SamplingParams } from './types';
2
+
3
+ // Message content types
4
+
5
+ export interface ChatCompletionMessageText {
6
+ type: 'text';
7
+ text: string;
8
+ }
9
+
10
+ export interface ChatCompletionMessageImage {
11
+ type: 'image';
12
+ data: ArrayBuffer;
13
+ }
14
+
15
+ export interface ChatCompletionMessageAudio {
16
+ type: 'audio';
17
+ data: ArrayBuffer;
18
+ }
19
+
20
+ export type ChatCompletionMessageContent =
21
+ | ChatCompletionMessageText
22
+ | ChatCompletionMessageImage
23
+ | ChatCompletionMessageAudio;
24
+
25
+ // Tool definitions
26
+
27
+ export interface ChatCompletionToolFunctionParameters {
28
+ type: 'object';
29
+ properties: Record<
30
+ string,
31
+ {
32
+ type: string;
33
+ description?: string;
34
+ enum?: string[];
35
+ [key: string]: unknown;
36
+ }
37
+ >;
38
+ required?: string[];
39
+ additionalProperties?: boolean;
40
+ }
41
+
42
+ export interface ChatCompletionToolFunction {
43
+ name: string;
44
+ description?: string;
45
+ parameters?: ChatCompletionToolFunctionParameters;
46
+ strict?: boolean;
47
+ }
48
+
49
+ export interface ChatCompletionTool {
50
+ type: 'function';
51
+ function: ChatCompletionToolFunction;
52
+ }
53
+
54
+ export type ChatCompletionToolChoice =
55
+ | 'none'
56
+ | 'auto'
57
+ | 'required'
58
+ | { type: 'function'; function: { name: string } };
59
+
60
+ // Message types
61
+
62
+ export interface ChatCompletionSystemMessage {
63
+ role: 'system';
64
+ content: string;
65
+ name?: string;
66
+ }
67
+
68
+ export interface ChatCompletionUserMessage {
69
+ role: 'user';
70
+ content: string | ChatCompletionMessageContent[];
71
+ name?: string;
72
+ }
73
+
74
+ export interface ChatCompletionToolCall {
75
+ id: string;
76
+ type: 'function';
77
+ function: {
78
+ name: string;
79
+ arguments: string; // JSON-encoded string
80
+ };
81
+ }
82
+
83
+ export interface ChatCompletionAssistantMessage {
84
+ role: 'assistant';
85
+ content?: string | null;
86
+ name?: string;
87
+ tool_calls?: ChatCompletionToolCall[];
88
+ }
89
+
90
+ export interface ChatCompletionToolMessage {
91
+ role: 'tool';
92
+ content: string;
93
+ tool_call_id: string;
94
+ }
95
+
96
+ export type ChatCompletionMessage =
97
+ | ChatCompletionSystemMessage
98
+ | ChatCompletionUserMessage
99
+ | ChatCompletionAssistantMessage
100
+ | ChatCompletionToolMessage;
101
+
102
+ // Request params
103
+
104
+ export type ChatCompletionParams = {
105
+ messages: ChatCompletionMessage[];
106
+ stream?: boolean;
107
+ model?: string;
108
+ abortSignal?: AbortSignal;
109
+ // sampling
110
+ temperature?: number;
111
+ max_tokens?: number;
112
+ // stop?: string | string[];
113
+ // n?: number;
114
+ logprobs?: boolean;
115
+ top_logprobs?: number;
116
+ logit_bias?: Record<string, number>;
117
+ // tools
118
+ tools?: ChatCompletionTool[];
119
+ tool_choice?: ChatCompletionToolChoice;
120
+ // parallel_tool_calls?: boolean;
121
+ // response format
122
+ response_format?: {
123
+ type: 'text' | 'json_object' | 'json_schema';
124
+ json_schema?: { name: string; schema: unknown; strict?: boolean };
125
+ };
126
+ // user-facing
127
+ user?: string;
128
+ // llama-server-specific
129
+ chat_template_kwargs?: Record<string, any>;
130
+ cache_prompt?: boolean;
131
+ return_tokens?: boolean;
132
+ timings_per_token?: boolean;
133
+ } & SamplingParams;
134
+
135
+ // Response types----------
136
+
137
+ export interface ChatCompletionLogprob {
138
+ token: string;
139
+ logprob: number;
140
+ bytes: number[] | null;
141
+ }
142
+
143
+ export interface ChatCompletionLogprobsContent extends ChatCompletionLogprob {
144
+ top_logprobs: ChatCompletionLogprob[];
145
+ }
146
+
147
+ export interface ChatCompletionChoiceLogprobs {
148
+ content: ChatCompletionLogprobsContent[] | null;
149
+ refusal: ChatCompletionLogprobsContent[] | null;
150
+ }
151
+
152
+ export interface ChatCompletionChoice {
153
+ index: number;
154
+ message: ChatCompletionAssistantMessage;
155
+ finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
156
+ logprobs: ChatCompletionChoiceLogprobs | null;
157
+ }
158
+
159
+ export interface ChatCompletionUsage {
160
+ prompt_tokens: number;
161
+ completion_tokens: number;
162
+ total_tokens: number;
163
+ prompt_tokens_details?: { cached_tokens: number; audio_tokens: number };
164
+ completion_tokens_details?: {
165
+ reasoning_tokens: number;
166
+ audio_tokens: number;
167
+ accepted_prediction_tokens: number;
168
+ rejected_prediction_tokens: number;
169
+ };
170
+ }
171
+
172
+ /** Response when stream=false (or omitted) */
173
+ export interface ChatCompletionResponse {
174
+ id: string;
175
+ object: 'chat.completion';
176
+ created: number;
177
+ model: string;
178
+ choices: ChatCompletionChoice[];
179
+ usage: ChatCompletionUsage;
180
+ system_fingerprint?: string;
181
+ service_tier?: string;
182
+ }
183
+
184
+ // Streaming response types
185
+
186
+ export interface ChatCompletionChunkDelta {
187
+ role?: 'assistant';
188
+ content?: string | null;
189
+ tool_calls?: Array<{
190
+ index: number;
191
+ id?: string;
192
+ type?: 'function';
193
+ function?: { name?: string; arguments?: string };
194
+ }>;
195
+ refusal?: string | null;
196
+ }
197
+
198
+ export interface ChatCompletionChunkChoice {
199
+ index: number;
200
+ delta: ChatCompletionChunkDelta;
201
+ finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
202
+ logprobs: ChatCompletionChoiceLogprobs | null;
203
+ }
204
+
205
+ export interface ResultTimings {
206
+ cache_n: number;
207
+ prompt_n: number;
208
+ prompt_ms: number;
209
+ prompt_per_token_ms: number;
210
+ prompt_per_second: number;
211
+ predicted_n: number;
212
+ predicted_ms: number;
213
+ predicted_per_token_ms: number;
214
+ predicted_per_second: number;
215
+ }
216
+
217
+ /** Response when stream=true — one chunk per SSE event */
218
+ export interface ChatCompletionChunk {
219
+ id: string;
220
+ object: 'chat.completion.chunk';
221
+ created: number;
222
+ model: string;
223
+ choices: ChatCompletionChunkChoice[];
224
+ usage?: ChatCompletionUsage | null;
225
+ timings?: ResultTimings;
226
+ }
227
+
228
+ // Raw (text) completion
229
+
230
+ export type RawCompletionParams = {
231
+ prompt: string | string[];
232
+ stream?: boolean;
233
+ model?: string;
234
+ abortSignal?: AbortSignal;
235
+ suffix?: string;
236
+ max_tokens?: number;
237
+ temperature?: number;
238
+ top_p?: number;
239
+ n?: number;
240
+ logprobs?: number | null;
241
+ echo?: boolean;
242
+ stop?: string | string[];
243
+ presence_penalty?: number;
244
+ frequency_penalty?: number;
245
+ best_of?: number;
246
+ logit_bias?: Record<string, number>;
247
+ seed?: number;
248
+ user?: string;
249
+ } & SamplingParams;
250
+
251
+ export interface RawCompletionChoice {
252
+ text: string;
253
+ index: number;
254
+ finish_reason: 'stop' | 'length' | 'content_filter' | null;
255
+ logprobs: {
256
+ tokens: string[];
257
+ token_logprobs: number[];
258
+ top_logprobs: Array<Record<string, number>>;
259
+ text_offset: number[];
260
+ } | null;
261
+ }
262
+
263
+ /** Response when stream=false */
264
+ export interface RawCompletionResponse {
265
+ id: string;
266
+ object: 'text_completion';
267
+ created: number;
268
+ model: string;
269
+ choices: RawCompletionChoice[];
270
+ usage: ChatCompletionUsage;
271
+ system_fingerprint?: string;
272
+ timings?: ResultTimings;
273
+ }
274
+
275
+ /** One chunk when stream=true */
276
+ export interface RawCompletionChunk {
277
+ id: string;
278
+ object: 'text_completion';
279
+ created: number;
280
+ model: string;
281
+ choices: Array<{
282
+ text: string;
283
+ index: number;
284
+ finish_reason: 'stop' | 'length' | 'content_filter' | null;
285
+ logprobs: null;
286
+ }>;
287
+ usage?: ChatCompletionUsage | null;
288
+ timings?: ResultTimings;
289
+ }
290
+
291
+ // Embeddings
292
+
293
+ export interface EmbeddingCreateParams {
294
+ input: string | string[] | number[] | number[][];
295
+ model?: string;
296
+ encoding_format?: 'float' | 'base64';
297
+ // dimensions?: number; // unsupported by llama.cpp
298
+ // user?: string;
299
+ }
300
+
301
+ export interface Embedding {
302
+ object: 'embedding';
303
+ index: number;
304
+ embedding: number[] | string; // float array or base64 string depending on encoding_format
305
+ }
306
+
307
+ export interface EmbeddingUsage {
308
+ prompt_tokens: number;
309
+ total_tokens: number;
310
+ }
311
+
312
+ export interface CreateEmbeddingResponse {
313
+ object: 'list';
314
+ data: Embedding[];
315
+ model: string;
316
+ usage: EmbeddingUsage;
317
+ }
318
+
319
+ // Reranking (NOT official OAI-compat, but is a commonly-used API schema)
320
+
321
+ export interface RerankParams {
322
+ query: string;
323
+ documents: string[];
324
+ top_n?: number;
325
+ }
326
+
327
+ export interface RerankResult {
328
+ index: number;
329
+ relevance_score: number;
330
+ }
331
+
332
+ export interface RerankUsage {
333
+ prompt_tokens: number;
334
+ total_tokens: number;
335
+ }
336
+
337
+ export interface RerankResponse {
338
+ model: string;
339
+ object: 'list';
340
+ usage: RerankUsage;
341
+ results: RerankResult[];
342
+ }
@@ -0,0 +1,133 @@
1
+ // Note: snake_case is used to match llama.cpp's naming convention
2
+ export interface LoadModelParams {
3
+ log_level?: LogLevel;
4
+ seed?: number;
5
+ n_ctx?: number;
6
+ n_batch?: number;
7
+ // by default, all layers are offloaded if WebGPU is available
8
+ n_gpu_layers?: number;
9
+ // by default, on multi-thread build, we take half number of available threads (hardwareConcurrency / 2)
10
+ n_threads?: number;
11
+ embeddings?: boolean;
12
+ offload_kqv?: boolean;
13
+ pooling_type?: // legacy values
14
+ | 'LLAMA_POOLING_TYPE_UNSPECIFIED'
15
+ | 'LLAMA_POOLING_TYPE_NONE'
16
+ | 'LLAMA_POOLING_TYPE_MEAN'
17
+ | 'LLAMA_POOLING_TYPE_CLS'
18
+ // new values
19
+ | 'unspecified'
20
+ | 'none'
21
+ | 'mean'
22
+ | 'cls'
23
+ | 'last'
24
+ | 'rank';
25
+ // context extending
26
+ rope_scaling_type?:
27
+ | 'LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED'
28
+ | 'LLAMA_ROPE_SCALING_TYPE_NONE'
29
+ | 'LLAMA_ROPE_SCALING_TYPE_LINEAR'
30
+ | 'LLAMA_ROPE_SCALING_TYPE_YARN';
31
+ rope_freq_base?: number;
32
+ rope_freq_scale?: number;
33
+ yarn_ext_factor?: number;
34
+ yarn_attn_factor?: number;
35
+ yarn_beta_fast?: number;
36
+ yarn_beta_slow?: number;
37
+ yarn_orig_ctx?: number;
38
+ // optimizations
39
+ cache_type_k?: 'f32' | 'f16' | 'q8_0' | 'q5_1' | 'q5_0' | 'q4_1' | 'q4_0';
40
+ cache_type_v?: 'f32' | 'f16' | 'q8_0' | 'q5_1' | 'q5_0' | 'q4_1' | 'q4_0';
41
+ flash_attn?: boolean; // true is auto, false is disabled
42
+ swa_full?: boolean;
43
+ chat_template?: string;
44
+ jinja?: boolean;
45
+ reasoning?: boolean;
46
+ image_min_tokens?: number;
47
+ image_max_tokens?: number;
48
+ warmup?: boolean;
49
+ no_kv_offload?: boolean;
50
+ mmproj_offload?: boolean;
51
+ cont_batching?: boolean;
52
+ n_keep?: number;
53
+ ctx_shift?: boolean;
54
+ cache_idle_slots?: boolean;
55
+ n_cache_reuse?: number;
56
+ lora_adapters?: { path: string; scale?: number }[];
57
+ lora_init_without_apply?: boolean;
58
+ spec_draft_model?: string;
59
+ spec_draft_ngl?: number;
60
+ spec_draft_n_max?: number;
61
+ spec_draft_n_min?: number;
62
+ spec_draft_p_min?: number;
63
+ spec_draft_threads?: number;
64
+ spec_draft_threads_batch?: number;
65
+ kv_overrides?: Record<string, string>;
66
+ reasoning_budget_tokens?: number;
67
+ reasoning_budget_message?: string;
68
+ reasoning_format?: 'none' | 'deepseek-legacy' | 'deepseek';
69
+ skip_chat_parsing?: boolean;
70
+ prefill_assistant?: boolean;
71
+ default_template_kwargs?: Record<string, any>;
72
+ }
73
+
74
+ // Note: snake_case is used to match llama.cpp's naming convention
75
+ export interface LoadedContextInfo {
76
+ n_vocab: number;
77
+ n_ctx: number;
78
+ n_batch: number;
79
+ n_ubatch: number;
80
+ n_ctx_train: number;
81
+ n_embd: number;
82
+ n_layer: number;
83
+ metadata: Record<string, string>;
84
+ token_bos: number;
85
+ token_eos: number;
86
+ token_eot: number;
87
+ list_tokens_eog: number[];
88
+ has_encoder: boolean;
89
+ token_decoder_start: number;
90
+ add_bos_token: boolean;
91
+ add_eos_token: boolean;
92
+ has_image_input: boolean;
93
+ has_audio_input: boolean;
94
+ }
95
+
96
+ // Note: snake_case is used to match llama.cpp's naming convention
97
+ export interface SamplingParams {
98
+ // See sampling.h for more details
99
+ seed?: number;
100
+ mirostat?: number | undefined; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
101
+ mirostat_eta?: number | undefined;
102
+ mirostat_tau?: number | undefined;
103
+ samplers_sequence?: string[] | undefined; // unused for now
104
+ temp?: number | undefined; // temperature
105
+ top_p?: number | undefined;
106
+ top_k?: number | undefined;
107
+ penalty_last_n?: number | undefined;
108
+ penalty_repeat?: number | undefined;
109
+ penalty_freq?: number | undefined;
110
+ penalty_present?: number | undefined;
111
+ dynatemp_range?: number | undefined;
112
+ dynatemp_exponent?: number | undefined;
113
+ grammar?: string;
114
+ n_prev?: number | undefined;
115
+ n_probs?: number | undefined;
116
+ min_p?: number | undefined;
117
+ typ_p?: number | undefined;
118
+ typical_p?: number | undefined;
119
+ logit_bias?: { token: number; bias: number }[] | undefined;
120
+ ignore_eos?: boolean | undefined;
121
+ }
122
+
123
+ export interface StreamParams<T> {
124
+ stream: true;
125
+ onData: (data: T) => void;
126
+ }
127
+
128
+ export enum LogLevel {
129
+ DEBUG = 1,
130
+ INFO = 2,
131
+ WARN = 3,
132
+ ERROR = 4,
133
+ }