@shenghuabi/llama 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1029 @@
1
+ import { ExternalCallBaseService } from '@cyia/external-call';
2
+ import { LlamaConfigInputType, LlamaServerType } from './define';
3
+ import { DownloadFileOptions } from '@cyia/dl';
4
+ /**
5
+ * 先写入配置,然后再初始化
6
+ */
7
+ export declare class LlamaSwapService extends ExternalCallBaseService {
8
+ #private;
9
+ logName: string;
10
+ start$: import("static-injector").WritableSignal<boolean>;
11
+ llamaDir$$: import("static-injector").Signal<string>;
12
+ llamaSwapDir$$: import("static-injector").Signal<string>;
13
+ startPath$$: import("static-injector").Signal<string>;
14
+ execPath$$: import("static-injector").Signal<string>;
15
+ checkFilePath$$: import("static-injector").Signal<string>;
16
+ configExist$: import("static-injector").WritableSignal<boolean>;
17
+ checkConfigExist(): void;
18
+ checkExist(): Promise<void>;
19
+ init(): void;
20
+ startup(): Promise<{
21
+ instance: import("execa").ResultPromise<{}>;
22
+ abortController: AbortController;
23
+ } | undefined>;
24
+ convertOllamaModel(originList: any[], llamaVersion: string, OLLAMA_MODELS?: string): Promise<({
25
+ config: {
26
+ common: {
27
+ 'verbose-prompt'?: {
28
+ enable?: boolean | undefined;
29
+ } | undefined;
30
+ threads?: {
31
+ enable?: boolean | undefined;
32
+ value?: [number] | undefined;
33
+ } | undefined;
34
+ 'threads-batch'?: {
35
+ enable?: boolean | undefined;
36
+ value?: [number] | undefined;
37
+ } | undefined;
38
+ 'cpu-mask'?: {
39
+ enable?: boolean | undefined;
40
+ value?: [string] | undefined;
41
+ } | undefined;
42
+ 'cpu-range'?: {
43
+ enable?: boolean | undefined;
44
+ value?: [string] | undefined;
45
+ } | undefined;
46
+ 'cpu-strict'?: {
47
+ enable?: boolean | undefined;
48
+ value?: ["0" | "1"] | undefined;
49
+ } | undefined;
50
+ prio?: {
51
+ enable?: boolean | undefined;
52
+ value?: [0 | 2 | 1 | 3] | undefined;
53
+ } | undefined;
54
+ poll?: {
55
+ enable?: boolean | undefined;
56
+ value?: [string] | undefined;
57
+ } | undefined;
58
+ 'cpu-mask-batch'?: {
59
+ enable?: boolean | undefined;
60
+ value?: [string] | undefined;
61
+ } | undefined;
62
+ 'cpu-range-batch'?: {
63
+ enable?: boolean | undefined;
64
+ value?: [string] | undefined;
65
+ } | undefined;
66
+ 'cpu-strict-batch'?: {
67
+ enable?: boolean | undefined;
68
+ value?: ["0" | "1"] | undefined;
69
+ } | undefined;
70
+ 'prio-batch'?: {
71
+ enable?: boolean | undefined;
72
+ value?: [0 | 2 | 1 | 3] | undefined;
73
+ } | undefined;
74
+ 'poll-batch'?: {
75
+ enable?: boolean | undefined;
76
+ value?: ["0" | "1"] | undefined;
77
+ } | undefined;
78
+ 'ctx-size'?: {
79
+ enable?: boolean | undefined;
80
+ value?: [number] | undefined;
81
+ } | undefined;
82
+ predict?: {
83
+ enable?: boolean | undefined;
84
+ value?: [number] | undefined;
85
+ } | undefined;
86
+ 'batch-size'?: {
87
+ enable?: boolean | undefined;
88
+ value?: [number] | undefined;
89
+ } | undefined;
90
+ 'ubatch-size'?: {
91
+ enable?: boolean | undefined;
92
+ value?: [number] | undefined;
93
+ } | undefined;
94
+ keep?: {
95
+ enable?: boolean | undefined;
96
+ value?: [number] | undefined;
97
+ } | undefined;
98
+ 'swa-full'?: {
99
+ enable?: boolean | undefined;
100
+ } | undefined;
101
+ 'kv-unified'?: {
102
+ enable?: boolean | undefined;
103
+ } | undefined;
104
+ 'flash-attn'?: {
105
+ enable?: boolean | undefined;
106
+ value?: ["on" | "off" | "auto"] | undefined;
107
+ } | undefined;
108
+ 'no-perf'?: {
109
+ enable?: boolean | undefined;
110
+ } | undefined;
111
+ escape?: {
112
+ enable?: boolean | undefined;
113
+ } | undefined;
114
+ 'no-escape'?: {
115
+ enable?: boolean | undefined;
116
+ } | undefined;
117
+ 'rope-scaling'?: {
118
+ enable?: boolean | undefined;
119
+ value?: ["none" | "linear" | "yarn"] | undefined;
120
+ } | undefined;
121
+ 'rope-scale'?: {
122
+ enable?: boolean | undefined;
123
+ value?: [number] | undefined;
124
+ } | undefined;
125
+ 'rope-freq-base'?: {
126
+ enable?: boolean | undefined;
127
+ value?: [number] | undefined;
128
+ } | undefined;
129
+ 'rope-freq-scale'?: {
130
+ enable?: boolean | undefined;
131
+ value?: [number] | undefined;
132
+ } | undefined;
133
+ 'yarn-orig-ctx'?: {
134
+ enable?: boolean | undefined;
135
+ value?: [number] | undefined;
136
+ } | undefined;
137
+ 'yarn-ext-factor'?: {
138
+ enable?: boolean | undefined;
139
+ value?: [number] | undefined;
140
+ } | undefined;
141
+ 'yarn-attn-factor'?: {
142
+ enable?: boolean | undefined;
143
+ value?: [number] | undefined;
144
+ } | undefined;
145
+ 'yarn-beta-slow'?: {
146
+ enable?: boolean | undefined;
147
+ value?: [number] | undefined;
148
+ } | undefined;
149
+ 'yarn-beta-fast'?: {
150
+ enable?: boolean | undefined;
151
+ value?: [number] | undefined;
152
+ } | undefined;
153
+ 'no-kv-offload'?: {
154
+ enable?: boolean | undefined;
155
+ } | undefined;
156
+ 'no-repack'?: {
157
+ enable?: boolean | undefined;
158
+ } | undefined;
159
+ 'cache-type-k'?: {
160
+ enable?: boolean | undefined;
161
+ value?: ["f32" | "f16" | "bf16" | "q8_0" | "q4_0" | "q4_1" | "iq4_nl" | "q5_0" | "q5_1"] | undefined;
162
+ } | undefined;
163
+ 'cache-type-v'?: {
164
+ enable?: boolean | undefined;
165
+ value?: ["f32" | "f16" | "bf16" | "q8_0" | "q4_0" | "q4_1" | "iq4_nl" | "q5_0" | "q5_1"] | undefined;
166
+ } | undefined;
167
+ 'defrag-thold'?: {
168
+ enable?: boolean | undefined;
169
+ value?: [number] | undefined;
170
+ } | undefined;
171
+ parallel?: {
172
+ enable?: boolean | undefined;
173
+ value?: [number] | undefined;
174
+ } | undefined;
175
+ mlock?: {
176
+ enable?: boolean | undefined;
177
+ } | undefined;
178
+ 'no-mmap'?: {
179
+ enable?: boolean | undefined;
180
+ } | undefined;
181
+ numa?: {
182
+ enable?: boolean | undefined;
183
+ value?: ["distribute" | "isolate" | "numactl"] | undefined;
184
+ } | undefined;
185
+ device?: {
186
+ enable?: boolean | undefined;
187
+ value?: [string] | undefined;
188
+ } | undefined;
189
+ 'override-tensor'?: {
190
+ enable?: boolean | undefined;
191
+ value?: [string] | undefined;
192
+ } | undefined;
193
+ 'cpu-moe'?: {
194
+ enable?: boolean | undefined;
195
+ } | undefined;
196
+ 'n-cpu-moe'?: {
197
+ enable?: boolean | undefined;
198
+ value?: [number] | undefined;
199
+ } | undefined;
200
+ 'gpu-layers'?: {
201
+ enable?: boolean | undefined;
202
+ value?: [number] | undefined;
203
+ } | undefined;
204
+ 'split-mode'?: {
205
+ enable?: boolean | undefined;
206
+ value?: ["none" | "layer" | "row"] | undefined;
207
+ } | undefined;
208
+ 'tensor-split'?: {
209
+ enable?: boolean | undefined;
210
+ value?: [string] | undefined;
211
+ } | undefined;
212
+ 'main-gpu'?: {
213
+ enable?: boolean | undefined;
214
+ value?: [number] | undefined;
215
+ } | undefined;
216
+ 'check-tensors'?: {
217
+ enable?: boolean | undefined;
218
+ } | undefined;
219
+ 'override-kv'?: {
220
+ enable?: boolean | undefined;
221
+ value?: [string] | undefined;
222
+ } | undefined;
223
+ 'no-op-offload'?: {
224
+ enable?: boolean | undefined;
225
+ } | undefined;
226
+ lora?: {
227
+ enable?: boolean | undefined;
228
+ value?: [string] | undefined;
229
+ } | undefined;
230
+ 'lora-scaled'?: {
231
+ enable?: boolean | undefined;
232
+ value?: [string, number] | undefined;
233
+ } | undefined;
234
+ 'control-vector'?: {
235
+ enable?: boolean | undefined;
236
+ value?: [string] | undefined;
237
+ } | undefined;
238
+ 'control-vector-scaled'?: {
239
+ enable?: boolean | undefined;
240
+ value?: [string, number] | undefined;
241
+ } | undefined;
242
+ 'control-vector-layer-range'?: {
243
+ enable?: boolean | undefined;
244
+ value?: [string, string] | undefined;
245
+ } | undefined;
246
+ model: {
247
+ enable?: boolean | undefined;
248
+ value?: [string] | undefined;
249
+ } | {
250
+ enable: boolean;
251
+ value: string[];
252
+ };
253
+ 'model-url'?: {
254
+ enable?: boolean | undefined;
255
+ value?: [string] | undefined;
256
+ } | undefined;
257
+ 'docker-repo'?: {
258
+ enable?: boolean | undefined;
259
+ value?: ["<repo>/]<model>[:quant"] | undefined;
260
+ } | undefined;
261
+ 'hf-repo'?: {
262
+ enable?: boolean | undefined;
263
+ value?: [string] | undefined;
264
+ } | undefined;
265
+ 'hf-repo-draft'?: {
266
+ enable?: boolean | undefined;
267
+ value?: [string] | undefined;
268
+ } | undefined;
269
+ 'hf-file'?: {
270
+ enable?: boolean | undefined;
271
+ value?: [string] | undefined;
272
+ } | undefined;
273
+ 'hf-repo-v'?: {
274
+ enable?: boolean | undefined;
275
+ value?: [string] | undefined;
276
+ } | undefined;
277
+ 'hf-file-v'?: {
278
+ enable?: boolean | undefined;
279
+ value?: [string] | undefined;
280
+ } | undefined;
281
+ 'hf-token'?: {
282
+ enable?: boolean | undefined;
283
+ value?: [string] | undefined;
284
+ } | undefined;
285
+ 'log-disable'?: {
286
+ enable?: boolean | undefined;
287
+ } | undefined;
288
+ 'log-file'?: {
289
+ enable?: boolean | undefined;
290
+ value?: [string] | undefined;
291
+ } | undefined;
292
+ 'log-colors'?: {
293
+ enable?: boolean | undefined;
294
+ value?: ["on" | "off" | "auto"] | undefined;
295
+ } | undefined;
296
+ verbose?: {
297
+ enable?: boolean | undefined;
298
+ } | undefined;
299
+ offline?: {
300
+ enable?: boolean | undefined;
301
+ } | undefined;
302
+ verbosity?: {
303
+ enable?: boolean | undefined;
304
+ value?: [number] | undefined;
305
+ } | undefined;
306
+ 'log-prefix'?: {
307
+ enable?: boolean | undefined;
308
+ } | undefined;
309
+ 'log-timestamps'?: {
310
+ enable?: boolean | undefined;
311
+ } | undefined;
312
+ 'cache-type-k-draft'?: {
313
+ enable?: boolean | undefined;
314
+ value?: [string] | undefined;
315
+ } | undefined;
316
+ 'cache-type-v-draft'?: {
317
+ enable?: boolean | undefined;
318
+ value?: [string] | undefined;
319
+ } | undefined;
320
+ };
321
+ };
322
+ exec: {
323
+ version: string;
324
+ device: string;
325
+ };
326
+ model: string;
327
+ } | {
328
+ config: LlamaServerType;
329
+ })[]>;
330
+ stop(): void;
331
+ writeConfig(config: LlamaConfigInputType): Promise<void>;
332
+ downloadExec(version: string, options?: {
333
+ progressMessage?: DownloadFileOptions['message'];
334
+ }): Promise<void>;
335
+ downloadModel(modelOptions: {
336
+ url?: string;
337
+ repo?: string;
338
+ token?: string;
339
+ fileName?: string;
340
+ vendor?: 'ollama';
341
+ }, options?: {
342
+ progressMessage?: DownloadFileOptions['message'];
343
+ }): Promise<string | undefined>;
344
+ checkOrDownloadLlama(dirName: string, version: string, device: string, options?: {
345
+ progressMessage?: DownloadFileOptions['message'];
346
+ }): Promise<void>;
347
+ downloadLlama(dirName: string, version: string, device: string, options?: {
348
+ progressMessage?: DownloadFileOptions['message'];
349
+ }): Promise<void>;
350
+ createModelConfig(model: string, options?: {
351
+ progressMessage?: DownloadFileOptions['message'];
352
+ }): Promise<{
353
+ config?: {
354
+ common?: {
355
+ 'verbose-prompt'?: {
356
+ enable: boolean;
357
+ } | undefined;
358
+ threads?: {
359
+ enable: boolean;
360
+ value?: [number] | undefined;
361
+ } | undefined;
362
+ 'threads-batch'?: {
363
+ enable: boolean;
364
+ value?: [number] | undefined;
365
+ } | undefined;
366
+ 'cpu-mask'?: {
367
+ enable: boolean;
368
+ value?: [string] | undefined;
369
+ } | undefined;
370
+ 'cpu-range'?: {
371
+ enable: boolean;
372
+ value?: [string] | undefined;
373
+ } | undefined;
374
+ 'cpu-strict'?: {
375
+ enable: boolean;
376
+ value?: ["0" | "1"] | undefined;
377
+ } | undefined;
378
+ prio?: {
379
+ enable: boolean;
380
+ value?: [0 | 2 | 1 | 3] | undefined;
381
+ } | undefined;
382
+ poll?: {
383
+ enable: boolean;
384
+ value?: [string] | undefined;
385
+ } | undefined;
386
+ 'cpu-mask-batch'?: {
387
+ enable: boolean;
388
+ value?: [string] | undefined;
389
+ } | undefined;
390
+ 'cpu-range-batch'?: {
391
+ enable: boolean;
392
+ value?: [string] | undefined;
393
+ } | undefined;
394
+ 'cpu-strict-batch'?: {
395
+ enable: boolean;
396
+ value?: ["0" | "1"] | undefined;
397
+ } | undefined;
398
+ 'prio-batch'?: {
399
+ enable: boolean;
400
+ value?: [0 | 2 | 1 | 3] | undefined;
401
+ } | undefined;
402
+ 'poll-batch'?: {
403
+ enable: boolean;
404
+ value?: ["0" | "1"] | undefined;
405
+ } | undefined;
406
+ 'ctx-size'?: {
407
+ enable: boolean;
408
+ value?: [number] | undefined;
409
+ } | undefined;
410
+ predict?: {
411
+ enable: boolean;
412
+ value?: [number] | undefined;
413
+ } | undefined;
414
+ 'batch-size'?: {
415
+ enable: boolean;
416
+ value?: [number] | undefined;
417
+ } | undefined;
418
+ 'ubatch-size'?: {
419
+ enable: boolean;
420
+ value?: [number] | undefined;
421
+ } | undefined;
422
+ keep?: {
423
+ enable: boolean;
424
+ value?: [number] | undefined;
425
+ } | undefined;
426
+ 'swa-full'?: {
427
+ enable: boolean;
428
+ } | undefined;
429
+ 'kv-unified'?: {
430
+ enable: boolean;
431
+ } | undefined;
432
+ 'flash-attn'?: {
433
+ enable: boolean;
434
+ value?: ["on" | "off" | "auto"] | undefined;
435
+ } | undefined;
436
+ 'no-perf'?: {
437
+ enable: boolean;
438
+ } | undefined;
439
+ escape?: {
440
+ enable: boolean;
441
+ } | undefined;
442
+ 'no-escape'?: {
443
+ enable: boolean;
444
+ } | undefined;
445
+ 'rope-scaling'?: {
446
+ enable: boolean;
447
+ value?: ["none" | "linear" | "yarn"] | undefined;
448
+ } | undefined;
449
+ 'rope-scale'?: {
450
+ enable: boolean;
451
+ value?: [number] | undefined;
452
+ } | undefined;
453
+ 'rope-freq-base'?: {
454
+ enable: boolean;
455
+ value?: [number] | undefined;
456
+ } | undefined;
457
+ 'rope-freq-scale'?: {
458
+ enable: boolean;
459
+ value?: [number] | undefined;
460
+ } | undefined;
461
+ 'yarn-orig-ctx'?: {
462
+ enable: boolean;
463
+ value?: [number] | undefined;
464
+ } | undefined;
465
+ 'yarn-ext-factor'?: {
466
+ enable: boolean;
467
+ value?: [number] | undefined;
468
+ } | undefined;
469
+ 'yarn-attn-factor'?: {
470
+ enable: boolean;
471
+ value?: [number] | undefined;
472
+ } | undefined;
473
+ 'yarn-beta-slow'?: {
474
+ enable: boolean;
475
+ value?: [number] | undefined;
476
+ } | undefined;
477
+ 'yarn-beta-fast'?: {
478
+ enable: boolean;
479
+ value?: [number] | undefined;
480
+ } | undefined;
481
+ 'no-kv-offload'?: {
482
+ enable: boolean;
483
+ } | undefined;
484
+ 'no-repack'?: {
485
+ enable: boolean;
486
+ } | undefined;
487
+ 'cache-type-k'?: {
488
+ enable: boolean;
489
+ value?: ["f32" | "f16" | "bf16" | "q8_0" | "q4_0" | "q4_1" | "iq4_nl" | "q5_0" | "q5_1"] | undefined;
490
+ } | undefined;
491
+ 'cache-type-v'?: {
492
+ enable: boolean;
493
+ value?: ["f32" | "f16" | "bf16" | "q8_0" | "q4_0" | "q4_1" | "iq4_nl" | "q5_0" | "q5_1"] | undefined;
494
+ } | undefined;
495
+ 'defrag-thold'?: {
496
+ enable: boolean;
497
+ value?: [number] | undefined;
498
+ } | undefined;
499
+ parallel?: {
500
+ enable: boolean;
501
+ value?: [number] | undefined;
502
+ } | undefined;
503
+ mlock?: {
504
+ enable: boolean;
505
+ } | undefined;
506
+ 'no-mmap'?: {
507
+ enable: boolean;
508
+ } | undefined;
509
+ numa?: {
510
+ enable: boolean;
511
+ value?: ["distribute" | "isolate" | "numactl"] | undefined;
512
+ } | undefined;
513
+ device?: {
514
+ enable: boolean;
515
+ value?: [string] | undefined;
516
+ } | undefined;
517
+ 'override-tensor'?: {
518
+ enable: boolean;
519
+ value?: [string] | undefined;
520
+ } | undefined;
521
+ 'cpu-moe'?: {
522
+ enable: boolean;
523
+ } | undefined;
524
+ 'n-cpu-moe'?: {
525
+ enable: boolean;
526
+ value?: [number] | undefined;
527
+ } | undefined;
528
+ 'gpu-layers'?: {
529
+ enable: boolean;
530
+ value?: [number] | undefined;
531
+ } | undefined;
532
+ 'split-mode'?: {
533
+ enable: boolean;
534
+ value?: ["none" | "layer" | "row"] | undefined;
535
+ } | undefined;
536
+ 'tensor-split'?: {
537
+ enable: boolean;
538
+ value?: [string] | undefined;
539
+ } | undefined;
540
+ 'main-gpu'?: {
541
+ enable: boolean;
542
+ value?: [number] | undefined;
543
+ } | undefined;
544
+ 'check-tensors'?: {
545
+ enable: boolean;
546
+ } | undefined;
547
+ 'override-kv'?: {
548
+ enable: boolean;
549
+ value?: [string] | undefined;
550
+ } | undefined;
551
+ 'no-op-offload'?: {
552
+ enable: boolean;
553
+ } | undefined;
554
+ lora?: {
555
+ enable: boolean;
556
+ value?: [string] | undefined;
557
+ } | undefined;
558
+ 'lora-scaled'?: {
559
+ enable: boolean;
560
+ value?: [string, number] | undefined;
561
+ } | undefined;
562
+ 'control-vector'?: {
563
+ enable: boolean;
564
+ value?: [string] | undefined;
565
+ } | undefined;
566
+ 'control-vector-scaled'?: {
567
+ enable: boolean;
568
+ value?: [string, number] | undefined;
569
+ } | undefined;
570
+ 'control-vector-layer-range'?: {
571
+ enable: boolean;
572
+ value?: [string, string] | undefined;
573
+ } | undefined;
574
+ model?: {
575
+ enable: boolean;
576
+ value?: [string] | undefined;
577
+ } | undefined;
578
+ 'model-url'?: {
579
+ enable: boolean;
580
+ value?: [string] | undefined;
581
+ } | undefined;
582
+ 'docker-repo'?: {
583
+ enable: boolean;
584
+ value?: ["<repo>/]<model>[:quant"] | undefined;
585
+ } | undefined;
586
+ 'hf-repo'?: {
587
+ enable: boolean;
588
+ value?: [string] | undefined;
589
+ } | undefined;
590
+ 'hf-repo-draft'?: {
591
+ enable: boolean;
592
+ value?: [string] | undefined;
593
+ } | undefined;
594
+ 'hf-file'?: {
595
+ enable: boolean;
596
+ value?: [string] | undefined;
597
+ } | undefined;
598
+ 'hf-repo-v'?: {
599
+ enable: boolean;
600
+ value?: [string] | undefined;
601
+ } | undefined;
602
+ 'hf-file-v'?: {
603
+ enable: boolean;
604
+ value?: [string] | undefined;
605
+ } | undefined;
606
+ 'hf-token'?: {
607
+ enable: boolean;
608
+ value?: [string] | undefined;
609
+ } | undefined;
610
+ 'log-disable'?: {
611
+ enable: boolean;
612
+ } | undefined;
613
+ 'log-file'?: {
614
+ enable: boolean;
615
+ value?: [string] | undefined;
616
+ } | undefined;
617
+ 'log-colors'?: {
618
+ enable: boolean;
619
+ value?: ["on" | "off" | "auto"] | undefined;
620
+ } | undefined;
621
+ verbose?: {
622
+ enable: boolean;
623
+ } | undefined;
624
+ offline?: {
625
+ enable: boolean;
626
+ } | undefined;
627
+ verbosity?: {
628
+ enable: boolean;
629
+ value?: [number] | undefined;
630
+ } | undefined;
631
+ 'log-prefix'?: {
632
+ enable: boolean;
633
+ } | undefined;
634
+ 'log-timestamps'?: {
635
+ enable: boolean;
636
+ } | undefined;
637
+ 'cache-type-k-draft'?: {
638
+ enable: boolean;
639
+ value?: [string] | undefined;
640
+ } | undefined;
641
+ 'cache-type-v-draft'?: {
642
+ enable: boolean;
643
+ value?: [string] | undefined;
644
+ } | undefined;
645
+ } | undefined;
646
+ exampleSpecific?: {
647
+ 'swa-checkpoints'?: {
648
+ enable: boolean;
649
+ value?: [number] | undefined;
650
+ } | undefined;
651
+ 'no-context-shift'?: {
652
+ enable: boolean;
653
+ } | undefined;
654
+ 'context-shift'?: {
655
+ enable: boolean;
656
+ } | undefined;
657
+ 'reverse-prompt'?: {
658
+ enable: boolean;
659
+ value?: [string] | undefined;
660
+ } | undefined;
661
+ special?: {
662
+ enable: boolean;
663
+ } | undefined;
664
+ 'no-warmup'?: {
665
+ enable: boolean;
666
+ } | undefined;
667
+ 'spm-infill'?: {
668
+ enable: boolean;
669
+ } | undefined;
670
+ pooling?: {
671
+ enable: boolean;
672
+ value?: ["none" | "mean" | "cls" | "last" | "rank"] | undefined;
673
+ } | undefined;
674
+ 'cont-batching'?: {
675
+ enable: boolean;
676
+ } | undefined;
677
+ 'no-cont-batching'?: {
678
+ enable: boolean;
679
+ } | undefined;
680
+ mmproj?: {
681
+ enable: boolean;
682
+ value?: [string] | undefined;
683
+ } | undefined;
684
+ 'mmproj-url'?: {
685
+ enable: boolean;
686
+ value?: [string] | undefined;
687
+ } | undefined;
688
+ 'no-mmproj'?: {
689
+ enable: boolean;
690
+ } | undefined;
691
+ 'no-mmproj-offload'?: {
692
+ enable: boolean;
693
+ } | undefined;
694
+ 'override-tensor-draft'?: {
695
+ enable: boolean;
696
+ value?: [string] | undefined;
697
+ } | undefined;
698
+ 'cpu-moe-draft'?: {
699
+ enable: boolean;
700
+ } | undefined;
701
+ 'n-cpu-moe-draft'?: {
702
+ enable: boolean;
703
+ value?: [number] | undefined;
704
+ } | undefined;
705
+ alias?: {
706
+ enable: boolean;
707
+ value?: [string] | undefined;
708
+ } | undefined;
709
+ host?: {
710
+ enable: boolean;
711
+ value?: [string] | undefined;
712
+ } | undefined;
713
+ port?: {
714
+ enable: boolean;
715
+ value?: [string] | undefined;
716
+ } | undefined;
717
+ path?: {
718
+ enable: boolean;
719
+ value?: [string] | undefined;
720
+ } | undefined;
721
+ 'api-prefix'?: {
722
+ enable: boolean;
723
+ value?: [string] | undefined;
724
+ } | undefined;
725
+ 'no-webui'?: {
726
+ enable: boolean;
727
+ } | undefined;
728
+ embedding?: {
729
+ enable: boolean;
730
+ } | undefined;
731
+ reranking?: {
732
+ enable: boolean;
733
+ } | undefined;
734
+ 'api-key'?: {
735
+ enable: boolean;
736
+ value?: [string] | undefined;
737
+ } | undefined;
738
+ 'api-key-file'?: {
739
+ enable: boolean;
740
+ value?: [string] | undefined;
741
+ } | undefined;
742
+ 'ssl-key-file'?: {
743
+ enable: boolean;
744
+ value?: [string] | undefined;
745
+ } | undefined;
746
+ 'ssl-cert-file'?: {
747
+ enable: boolean;
748
+ value?: [string] | undefined;
749
+ } | undefined;
750
+ 'chat-template-kwargs'?: {
751
+ enable: boolean;
752
+ value?: [string] | undefined;
753
+ } | undefined;
754
+ timeout?: {
755
+ enable: boolean;
756
+ value?: [number] | undefined;
757
+ } | undefined;
758
+ 'threads-http'?: {
759
+ enable: boolean;
760
+ value?: [number] | undefined;
761
+ } | undefined;
762
+ 'cache-reuse'?: {
763
+ enable: boolean;
764
+ value?: [number] | undefined;
765
+ } | undefined;
766
+ metrics?: {
767
+ enable: boolean;
768
+ } | undefined;
769
+ props?: {
770
+ enable: boolean;
771
+ } | undefined;
772
+ slots?: {
773
+ enable: boolean;
774
+ } | undefined;
775
+ 'no-slots'?: {
776
+ enable: boolean;
777
+ } | undefined;
778
+ 'slot-save-path'?: {
779
+ enable: boolean;
780
+ value?: [string] | undefined;
781
+ } | undefined;
782
+ jinja?: {
783
+ enable: boolean;
784
+ } | undefined;
785
+ 'reasoning-format'?: {
786
+ enable: boolean;
787
+ value?: ["none" | "deepseek"] | undefined;
788
+ } | undefined;
789
+ 'reasoning-budget'?: {
790
+ enable: boolean;
791
+ value?: [number] | undefined;
792
+ } | undefined;
793
+ 'chat-template'?: {
794
+ enable: boolean;
795
+ value?: [string] | undefined;
796
+ } | undefined;
797
+ 'chat-template-file'?: {
798
+ enable: boolean;
799
+ value?: [string] | undefined;
800
+ } | undefined;
801
+ 'no-prefill-assistant'?: {
802
+ enable: boolean;
803
+ } | undefined;
804
+ 'slot-prompt-similarity'?: {
805
+ enable: boolean;
806
+ value?: [string] | undefined;
807
+ } | undefined;
808
+ 'lora-init-without-apply'?: {
809
+ enable: boolean;
810
+ } | undefined;
811
+ 'threads-draft'?: {
812
+ enable: boolean;
813
+ value?: [number] | undefined;
814
+ } | undefined;
815
+ 'threads-batch-draft'?: {
816
+ enable: boolean;
817
+ value?: [number] | undefined;
818
+ } | undefined;
819
+ 'draft-max'?: {
820
+ enable: boolean;
821
+ value?: [number] | undefined;
822
+ } | undefined;
823
+ 'draft-min'?: {
824
+ enable: boolean;
825
+ value?: [number] | undefined;
826
+ } | undefined;
827
+ 'draft-p-min'?: {
828
+ enable: boolean;
829
+ value?: [string] | undefined;
830
+ } | undefined;
831
+ 'ctx-size-draft'?: {
832
+ enable: boolean;
833
+ value?: [number] | undefined;
834
+ } | undefined;
835
+ 'device-draft'?: {
836
+ enable: boolean;
837
+ value?: [string] | undefined;
838
+ } | undefined;
839
+ 'gpu-layers-draft'?: {
840
+ enable: boolean;
841
+ value?: [number] | undefined;
842
+ } | undefined;
843
+ 'model-draft'?: {
844
+ enable: boolean;
845
+ value?: [string] | undefined;
846
+ } | undefined;
847
+ 'spec-replace'?: {
848
+ enable: boolean;
849
+ value?: [string, string] | undefined;
850
+ } | undefined;
851
+ 'model-vocoder'?: {
852
+ enable: boolean;
853
+ value?: [string] | undefined;
854
+ } | undefined;
855
+ 'tts-use-guide-tokens'?: {
856
+ enable: boolean;
857
+ } | undefined;
858
+ 'embd-bge-small-en-default'?: {
859
+ enable: boolean;
860
+ } | undefined;
861
+ 'embd-e5-small-en-default'?: {
862
+ enable: boolean;
863
+ } | undefined;
864
+ 'embd-gte-small-default'?: {
865
+ enable: boolean;
866
+ } | undefined;
867
+ 'fim-qwen-1.5b-default'?: {
868
+ enable: boolean;
869
+ } | undefined;
870
+ 'fim-qwen-3b-default'?: {
871
+ enable: boolean;
872
+ } | undefined;
873
+ 'fim-qwen-7b-default'?: {
874
+ enable: boolean;
875
+ } | undefined;
876
+ 'fim-qwen-7b-spec'?: {
877
+ enable: boolean;
878
+ } | undefined;
879
+ 'fim-qwen-14b-spec'?: {
880
+ enable: boolean;
881
+ } | undefined;
882
+ 'fim-qwen-30b-default'?: {
883
+ enable: boolean;
884
+ } | undefined;
885
+ } | undefined;
886
+ sampling?: {
887
+ samplers?: {
888
+ enable: boolean;
889
+ value?: [string] | undefined;
890
+ } | undefined;
891
+ seed?: {
892
+ enable: boolean;
893
+ value?: [string] | undefined;
894
+ } | undefined;
895
+ 'sampling-seq'?: {
896
+ enable: boolean;
897
+ value?: [string] | undefined;
898
+ } | undefined;
899
+ 'ignore-eos'?: {
900
+ enable: boolean;
901
+ } | undefined;
902
+ temp?: {
903
+ enable: boolean;
904
+ value?: [number] | undefined;
905
+ } | undefined;
906
+ 'top-k'?: {
907
+ enable: boolean;
908
+ value?: [number] | undefined;
909
+ } | undefined;
910
+ 'top-p'?: {
911
+ enable: boolean;
912
+ value?: [number] | undefined;
913
+ } | undefined;
914
+ 'min-p'?: {
915
+ enable: boolean;
916
+ value?: [number] | undefined;
917
+ } | undefined;
918
+ 'top-nsigma'?: {
919
+ enable: boolean;
920
+ value?: [number] | undefined;
921
+ } | undefined;
922
+ 'xtc-probability'?: {
923
+ enable: boolean;
924
+ value?: [number] | undefined;
925
+ } | undefined;
926
+ 'xtc-threshold'?: {
927
+ enable: boolean;
928
+ value?: [number] | undefined;
929
+ } | undefined;
930
+ typical?: {
931
+ enable: boolean;
932
+ value?: [number] | undefined;
933
+ } | undefined;
934
+ 'repeat-last-n'?: {
935
+ enable: boolean;
936
+ value?: [number] | undefined;
937
+ } | undefined;
938
+ 'repeat-penalty'?: {
939
+ enable: boolean;
940
+ value?: [number] | undefined;
941
+ } | undefined;
942
+ 'presence-penalty'?: {
943
+ enable: boolean;
944
+ value?: [number] | undefined;
945
+ } | undefined;
946
+ 'frequency-penalty'?: {
947
+ enable: boolean;
948
+ value?: [number] | undefined;
949
+ } | undefined;
950
+ 'dry-multiplier'?: {
951
+ enable: boolean;
952
+ value?: [number] | undefined;
953
+ } | undefined;
954
+ 'dry-base'?: {
955
+ enable: boolean;
956
+ value?: [number] | undefined;
957
+ } | undefined;
958
+ 'dry-allowed-length'?: {
959
+ enable: boolean;
960
+ value?: [number] | undefined;
961
+ } | undefined;
962
+ 'dry-penalty-last-n'?: {
963
+ enable: boolean;
964
+ value?: [number] | undefined;
965
+ } | undefined;
966
+ 'dry-sequence-breaker'?: {
967
+ enable: boolean;
968
+ value?: [string] | undefined;
969
+ } | undefined;
970
+ 'dynatemp-range'?: {
971
+ enable: boolean;
972
+ value?: [number] | undefined;
973
+ } | undefined;
974
+ 'dynatemp-exp'?: {
975
+ enable: boolean;
976
+ value?: [number] | undefined;
977
+ } | undefined;
978
+ mirostat?: {
979
+ enable: boolean;
980
+ value?: [number] | undefined;
981
+ } | undefined;
982
+ 'mirostat-lr'?: {
983
+ enable: boolean;
984
+ value?: [number] | undefined;
985
+ } | undefined;
986
+ 'mirostat-ent'?: {
987
+ enable: boolean;
988
+ value?: [number] | undefined;
989
+ } | undefined;
990
+ 'logit-bias'?: {
991
+ enable: boolean;
992
+ value?: [string] | undefined;
993
+ } | undefined;
994
+ grammar?: {
995
+ enable: boolean;
996
+ value?: [string] | undefined;
997
+ } | undefined;
998
+ 'grammar-file'?: {
999
+ enable: boolean;
1000
+ value?: [string] | undefined;
1001
+ } | undefined;
1002
+ 'json-schema'?: {
1003
+ enable: boolean;
1004
+ value?: [string] | undefined;
1005
+ } | undefined;
1006
+ 'json-schema-file'?: {
1007
+ enable: boolean;
1008
+ value?: [string] | undefined;
1009
+ } | undefined;
1010
+ } | undefined;
1011
+ } | undefined;
1012
+ aliases?: string[] | undefined;
1013
+ env?: {
1014
+ [x: string]: any;
1015
+ } | undefined;
1016
+ ttl?: number | undefined;
1017
+ checkEndpoint?: string | undefined;
1018
+ useModelName?: string | undefined;
1019
+ exec?: string | {
1020
+ version: string;
1021
+ device: "cpu" | "cuda12.4" | "hip-radeon" | "sycl" | "vulkan";
1022
+ } | undefined;
1023
+ model?: string | undefined;
1024
+ proxy?: string | undefined;
1025
+ }>;
1026
+ getVersion(): Promise<string | undefined>;
1027
+ getLlamaSwapConfig(): Promise<any>;
1028
+ destroy(): void;
1029
+ }