@fugood/llama.node 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +3 -1
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +61 -6
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/* linenoise.h -- VERSION 1.0
|
|
2
|
+
*
|
|
3
|
+
* Guerrilla line editing library against the idea that a line editing lib
|
|
4
|
+
* needs to be 20,000 lines of C++ code.
|
|
5
|
+
*
|
|
6
|
+
* See linenoise.cpp for more information.
|
|
7
|
+
*
|
|
8
|
+
* ------------------------------------------------------------------------
|
|
9
|
+
*
|
|
10
|
+
* Copyright (c) 2010-2023, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
11
|
+
* Copyright (c) 2010-2013, Pieter Noordhuis <pcnoordhuis at gmail dot com>
|
|
12
|
+
* Copyright (c) 2025, Eric Curtin <ericcurtin17 at gmail dot com>
|
|
13
|
+
*
|
|
14
|
+
* All rights reserved.
|
|
15
|
+
*
|
|
16
|
+
* Redistribution and use in source and binary forms, with or without
|
|
17
|
+
* modification, are permitted provided that the following conditions are
|
|
18
|
+
* met:
|
|
19
|
+
*
|
|
20
|
+
* * Redistributions of source code must retain the above copyright
|
|
21
|
+
* notice, this list of conditions and the following disclaimer.
|
|
22
|
+
*
|
|
23
|
+
* * Redistributions in binary form must reproduce the above copyright
|
|
24
|
+
* notice, this list of conditions and the following disclaimer in the
|
|
25
|
+
* documentation and/or other materials provided with the distribution.
|
|
26
|
+
*
|
|
27
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
28
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
29
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
30
|
+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
31
|
+
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
32
|
+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
33
|
+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
34
|
+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
35
|
+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
36
|
+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
37
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
#ifndef __LINENOISE_H
|
|
41
|
+
#define __LINENOISE_H
|
|
42
|
+
|
|
43
|
+
#ifdef __cplusplus
|
|
44
|
+
extern "C" {
|
|
45
|
+
#endif
|
|
46
|
+
|
|
47
|
+
#include <stddef.h> /* For size_t. */
|
|
48
|
+
|
|
49
|
+
extern const char *linenoiseEditMore;
|
|
50
|
+
|
|
51
|
+
/* The linenoiseState structure represents the state during line editing.
|
|
52
|
+
* We pass this state to functions implementing specific editing
|
|
53
|
+
* functionalities. */
|
|
54
|
+
struct linenoiseState {
|
|
55
|
+
int in_completion; /* The user pressed TAB and we are now in completion
|
|
56
|
+
* mode, so input is handled by completeLine(). */
|
|
57
|
+
size_t completion_idx; /* Index of next completion to propose. */
|
|
58
|
+
int ifd; /* Terminal stdin file descriptor. */
|
|
59
|
+
int ofd; /* Terminal stdout file descriptor. */
|
|
60
|
+
char *buf; /* Edited line buffer. */
|
|
61
|
+
size_t buflen; /* Edited line buffer size. */
|
|
62
|
+
const char *prompt; /* Prompt to display. */
|
|
63
|
+
size_t plen; /* Prompt length. */
|
|
64
|
+
size_t pos; /* Current cursor position. */
|
|
65
|
+
size_t oldpos; /* Previous refresh cursor position. */
|
|
66
|
+
size_t len; /* Current edited line length. */
|
|
67
|
+
size_t cols; /* Number of columns in terminal. */
|
|
68
|
+
size_t oldrows; /* Rows used by last refrehsed line (multiline mode) */
|
|
69
|
+
int history_index; /* The history index we are currently editing. */
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
typedef struct linenoiseCompletions {
|
|
73
|
+
size_t len;
|
|
74
|
+
char **cvec;
|
|
75
|
+
} linenoiseCompletions;
|
|
76
|
+
|
|
77
|
+
/* Non blocking API. */
|
|
78
|
+
int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt);
|
|
79
|
+
const char *linenoiseEditFeed(struct linenoiseState *l);
|
|
80
|
+
void linenoiseEditStop(struct linenoiseState *l);
|
|
81
|
+
void linenoiseHide(struct linenoiseState *l);
|
|
82
|
+
void linenoiseShow(struct linenoiseState *l);
|
|
83
|
+
|
|
84
|
+
/* Blocking API. */
|
|
85
|
+
const char *linenoise(const char *prompt);
|
|
86
|
+
void linenoiseFree(void *ptr);
|
|
87
|
+
|
|
88
|
+
/* Completion API. */
|
|
89
|
+
typedef void(linenoiseCompletionCallback)(const char *, linenoiseCompletions *);
|
|
90
|
+
typedef const char*(linenoiseHintsCallback)(const char *, int *color, int *bold);
|
|
91
|
+
typedef void(linenoiseFreeHintsCallback)(const char *);
|
|
92
|
+
void linenoiseSetCompletionCallback(linenoiseCompletionCallback *);
|
|
93
|
+
void linenoiseSetHintsCallback(linenoiseHintsCallback *);
|
|
94
|
+
void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *);
|
|
95
|
+
void linenoiseAddCompletion(linenoiseCompletions *, const char *);
|
|
96
|
+
|
|
97
|
+
/* History API. */
|
|
98
|
+
int linenoiseHistoryAdd(const char *line);
|
|
99
|
+
int linenoiseHistorySetMaxLen(int len);
|
|
100
|
+
int linenoiseHistorySave(const char *filename);
|
|
101
|
+
int linenoiseHistoryLoad(const char *filename);
|
|
102
|
+
|
|
103
|
+
/* Other utilities. */
|
|
104
|
+
void linenoiseClearScreen(void);
|
|
105
|
+
void linenoiseSetMultiLine(int ml);
|
|
106
|
+
void linenoisePrintKeyCodes(void);
|
|
107
|
+
void linenoiseMaskModeEnable(void);
|
|
108
|
+
void linenoiseMaskModeDisable(void);
|
|
109
|
+
|
|
110
|
+
#ifdef __cplusplus
|
|
111
|
+
}
|
|
112
|
+
#endif
|
|
113
|
+
|
|
114
|
+
#endif /* __LINENOISE_H */
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#if defined(_WIN32)
|
|
2
2
|
# include <windows.h>
|
|
3
|
+
# include <io.h>
|
|
3
4
|
#else
|
|
4
5
|
# include <sys/file.h>
|
|
5
6
|
# include <sys/ioctl.h>
|
|
@@ -10,20 +11,31 @@
|
|
|
10
11
|
# include <curl/curl.h>
|
|
11
12
|
#endif
|
|
12
13
|
|
|
14
|
+
#include <signal.h>
|
|
15
|
+
|
|
13
16
|
#include <climits>
|
|
14
17
|
#include <cstdarg>
|
|
15
18
|
#include <cstdio>
|
|
16
19
|
#include <cstring>
|
|
17
20
|
#include <filesystem>
|
|
18
21
|
#include <iostream>
|
|
22
|
+
#include <list>
|
|
19
23
|
#include <sstream>
|
|
20
24
|
#include <string>
|
|
21
25
|
#include <vector>
|
|
22
26
|
|
|
23
27
|
#include "common.h"
|
|
24
28
|
#include "json.hpp"
|
|
29
|
+
#include "linenoise.cpp/linenoise.h"
|
|
25
30
|
#include "llama-cpp.h"
|
|
26
31
|
|
|
32
|
+
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
|
|
33
|
+
[[noreturn]] static void sigint_handler(int) {
|
|
34
|
+
printf("\n\033[0m");
|
|
35
|
+
exit(0); // not ideal, but it's the only way to guarantee exit in all cases
|
|
36
|
+
}
|
|
37
|
+
#endif
|
|
38
|
+
|
|
27
39
|
GGML_ATTRIBUTE_FORMAT(1, 2)
|
|
28
40
|
static std::string fmt(const char * fmt, ...) {
|
|
29
41
|
va_list ap;
|
|
@@ -55,29 +67,52 @@ static int printe(const char * fmt, ...) {
|
|
|
55
67
|
class Opt {
|
|
56
68
|
public:
|
|
57
69
|
int init(int argc, const char ** argv) {
|
|
70
|
+
ctx_params = llama_context_default_params();
|
|
71
|
+
model_params = llama_model_default_params();
|
|
72
|
+
context_size_default = ctx_params.n_batch;
|
|
73
|
+
ngl_default = model_params.n_gpu_layers;
|
|
74
|
+
common_params_sampling sampling;
|
|
75
|
+
temperature_default = sampling.temp;
|
|
76
|
+
|
|
77
|
+
if (argc < 2) {
|
|
78
|
+
printe("Error: No arguments provided.\n");
|
|
79
|
+
print_help();
|
|
80
|
+
return 1;
|
|
81
|
+
}
|
|
82
|
+
|
|
58
83
|
// Parse arguments
|
|
59
84
|
if (parse(argc, argv)) {
|
|
60
85
|
printe("Error: Failed to parse arguments.\n");
|
|
61
|
-
|
|
86
|
+
print_help();
|
|
62
87
|
return 1;
|
|
63
88
|
}
|
|
64
89
|
|
|
65
90
|
// If help is requested, show help and exit
|
|
66
|
-
if (
|
|
67
|
-
|
|
91
|
+
if (help) {
|
|
92
|
+
print_help();
|
|
68
93
|
return 2;
|
|
69
94
|
}
|
|
70
95
|
|
|
96
|
+
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
|
|
97
|
+
ctx_params.n_ctx = ctx_params.n_batch;
|
|
98
|
+
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
|
|
99
|
+
temperature = temperature >= 0 ? temperature : temperature_default;
|
|
100
|
+
|
|
71
101
|
return 0; // Success
|
|
72
102
|
}
|
|
73
103
|
|
|
104
|
+
llama_context_params ctx_params;
|
|
105
|
+
llama_model_params model_params;
|
|
74
106
|
std::string model_;
|
|
75
|
-
std::string
|
|
76
|
-
int
|
|
77
|
-
|
|
107
|
+
std::string user;
|
|
108
|
+
int context_size = -1, ngl = -1;
|
|
109
|
+
float temperature = -1;
|
|
110
|
+
bool verbose = false;
|
|
78
111
|
|
|
79
112
|
private:
|
|
80
|
-
|
|
113
|
+
int context_size_default = -1, ngl_default = -1;
|
|
114
|
+
float temperature_default = -1;
|
|
115
|
+
bool help = false;
|
|
81
116
|
|
|
82
117
|
bool parse_flag(const char ** argv, int i, const char * short_opt, const char * long_opt) {
|
|
83
118
|
return strcmp(argv[i], short_opt) == 0 || strcmp(argv[i], long_opt) == 0;
|
|
@@ -89,6 +124,17 @@ class Opt {
|
|
|
89
124
|
}
|
|
90
125
|
|
|
91
126
|
option_value = std::atoi(argv[++i]);
|
|
127
|
+
|
|
128
|
+
return 0;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
int handle_option_with_value(int argc, const char ** argv, int & i, float & option_value) {
|
|
132
|
+
if (i + 1 >= argc) {
|
|
133
|
+
return 1;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
option_value = std::atof(argv[++i]);
|
|
137
|
+
|
|
92
138
|
return 0;
|
|
93
139
|
}
|
|
94
140
|
|
|
@@ -96,18 +142,22 @@ class Opt {
|
|
|
96
142
|
bool options_parsing = true;
|
|
97
143
|
for (int i = 1, positional_args_i = 0; i < argc; ++i) {
|
|
98
144
|
if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) {
|
|
99
|
-
if (handle_option_with_value(argc, argv, i,
|
|
145
|
+
if (handle_option_with_value(argc, argv, i, context_size) == 1) {
|
|
100
146
|
return 1;
|
|
101
147
|
}
|
|
102
148
|
} else if (options_parsing && (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--ngl") == 0)) {
|
|
103
|
-
if (handle_option_with_value(argc, argv, i,
|
|
149
|
+
if (handle_option_with_value(argc, argv, i, ngl) == 1) {
|
|
150
|
+
return 1;
|
|
151
|
+
}
|
|
152
|
+
} else if (options_parsing && strcmp(argv[i], "--temp") == 0) {
|
|
153
|
+
if (handle_option_with_value(argc, argv, i, temperature) == 1) {
|
|
104
154
|
return 1;
|
|
105
155
|
}
|
|
106
156
|
} else if (options_parsing &&
|
|
107
157
|
(parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) {
|
|
108
|
-
|
|
158
|
+
verbose = true;
|
|
109
159
|
} else if (options_parsing && parse_flag(argv, i, "-h", "--help")) {
|
|
110
|
-
|
|
160
|
+
help = true;
|
|
111
161
|
return 0;
|
|
112
162
|
} else if (options_parsing && strcmp(argv[i], "--") == 0) {
|
|
113
163
|
options_parsing = false;
|
|
@@ -120,16 +170,16 @@ class Opt {
|
|
|
120
170
|
model_ = argv[i];
|
|
121
171
|
} else if (positional_args_i == 1) {
|
|
122
172
|
++positional_args_i;
|
|
123
|
-
|
|
173
|
+
user = argv[i];
|
|
124
174
|
} else {
|
|
125
|
-
|
|
175
|
+
user += " " + std::string(argv[i]);
|
|
126
176
|
}
|
|
127
177
|
}
|
|
128
178
|
|
|
129
179
|
return 0;
|
|
130
180
|
}
|
|
131
181
|
|
|
132
|
-
void
|
|
182
|
+
void print_help() const {
|
|
133
183
|
printf(
|
|
134
184
|
"Description:\n"
|
|
135
185
|
" Runs a llm\n"
|
|
@@ -142,6 +192,8 @@ class Opt {
|
|
|
142
192
|
" Context size (default: %d)\n"
|
|
143
193
|
" -n, --ngl <value>\n"
|
|
144
194
|
" Number of GPU layers (default: %d)\n"
|
|
195
|
+
" --temp <value>\n"
|
|
196
|
+
" Temperature (default: %.1f)\n"
|
|
145
197
|
" -v, --verbose, --log-verbose\n"
|
|
146
198
|
" Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n"
|
|
147
199
|
" -h, --help\n"
|
|
@@ -170,7 +222,7 @@ class Opt {
|
|
|
170
222
|
" llama-run file://some-file3.gguf\n"
|
|
171
223
|
" llama-run --ngl 999 some-file4.gguf\n"
|
|
172
224
|
" llama-run --ngl 999 some-file5.gguf Hello World\n",
|
|
173
|
-
|
|
225
|
+
context_size_default, ngl_default, temperature_default);
|
|
174
226
|
}
|
|
175
227
|
};
|
|
176
228
|
|
|
@@ -214,7 +266,7 @@ class File {
|
|
|
214
266
|
return 1;
|
|
215
267
|
}
|
|
216
268
|
|
|
217
|
-
OVERLAPPED overlapped = {
|
|
269
|
+
OVERLAPPED overlapped = {};
|
|
218
270
|
if (!LockFileEx(hFile, LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, 0, MAXDWORD, MAXDWORD,
|
|
219
271
|
&overlapped)) {
|
|
220
272
|
fd = -1;
|
|
@@ -238,7 +290,7 @@ class File {
|
|
|
238
290
|
if (fd >= 0) {
|
|
239
291
|
# ifdef _WIN32
|
|
240
292
|
if (hFile != INVALID_HANDLE_VALUE) {
|
|
241
|
-
OVERLAPPED overlapped = {
|
|
293
|
+
OVERLAPPED overlapped = {};
|
|
242
294
|
UnlockFileEx(hFile, 0, MAXDWORD, MAXDWORD, &overlapped);
|
|
243
295
|
}
|
|
244
296
|
# else
|
|
@@ -254,7 +306,7 @@ class File {
|
|
|
254
306
|
private:
|
|
255
307
|
int fd = -1;
|
|
256
308
|
# ifdef _WIN32
|
|
257
|
-
HANDLE hFile;
|
|
309
|
+
HANDLE hFile = nullptr;
|
|
258
310
|
# endif
|
|
259
311
|
};
|
|
260
312
|
|
|
@@ -425,7 +477,7 @@ class HttpClient {
|
|
|
425
477
|
return (now_downloaded_plus_file_size * 100) / total_to_download;
|
|
426
478
|
}
|
|
427
479
|
|
|
428
|
-
static std::string generate_progress_prefix(curl_off_t percentage) { return fmt("%3ld%% |", percentage); }
|
|
480
|
+
static std::string generate_progress_prefix(curl_off_t percentage) { return fmt("%3ld%% |", static_cast<long int>(percentage)); }
|
|
429
481
|
|
|
430
482
|
static double calculate_speed(curl_off_t now_downloaded, const std::chrono::steady_clock::time_point & start_time) {
|
|
431
483
|
const auto now = std::chrono::steady_clock::now();
|
|
@@ -486,7 +538,7 @@ class LlamaData {
|
|
|
486
538
|
llama_sampler_ptr sampler;
|
|
487
539
|
llama_context_ptr context;
|
|
488
540
|
std::vector<llama_chat_message> messages;
|
|
489
|
-
std::
|
|
541
|
+
std::list<std::string> msg_strs;
|
|
490
542
|
std::vector<char> fmtted;
|
|
491
543
|
|
|
492
544
|
int init(Opt & opt) {
|
|
@@ -495,12 +547,12 @@ class LlamaData {
|
|
|
495
547
|
return 1;
|
|
496
548
|
}
|
|
497
549
|
|
|
498
|
-
context = initialize_context(model, opt
|
|
550
|
+
context = initialize_context(model, opt);
|
|
499
551
|
if (!context) {
|
|
500
552
|
return 1;
|
|
501
553
|
}
|
|
502
554
|
|
|
503
|
-
sampler = initialize_sampler();
|
|
555
|
+
sampler = initialize_sampler(opt);
|
|
504
556
|
return 0;
|
|
505
557
|
}
|
|
506
558
|
|
|
@@ -619,14 +671,12 @@ class LlamaData {
|
|
|
619
671
|
// Initializes the model and returns a unique pointer to it
|
|
620
672
|
llama_model_ptr initialize_model(Opt & opt) {
|
|
621
673
|
ggml_backend_load_all();
|
|
622
|
-
llama_model_params model_params = llama_model_default_params();
|
|
623
|
-
model_params.n_gpu_layers = opt.ngl_ >= 0 ? opt.ngl_ : model_params.n_gpu_layers;
|
|
624
674
|
resolve_model(opt.model_);
|
|
625
675
|
printe(
|
|
626
676
|
"\r%*s"
|
|
627
677
|
"\rLoading model",
|
|
628
678
|
get_terminal_width(), " ");
|
|
629
|
-
llama_model_ptr model(
|
|
679
|
+
llama_model_ptr model(llama_model_load_from_file(opt.model_.c_str(), opt.model_params));
|
|
630
680
|
if (!model) {
|
|
631
681
|
printe("%s: error: unable to load model from file: %s\n", __func__, opt.model_.c_str());
|
|
632
682
|
}
|
|
@@ -636,10 +686,8 @@ class LlamaData {
|
|
|
636
686
|
}
|
|
637
687
|
|
|
638
688
|
// Initializes the context with the specified parameters
|
|
639
|
-
llama_context_ptr initialize_context(const llama_model_ptr & model, const
|
|
640
|
-
|
|
641
|
-
ctx_params.n_ctx = ctx_params.n_batch = n_ctx >= 0 ? n_ctx : ctx_params.n_batch;
|
|
642
|
-
llama_context_ptr context(llama_new_context_with_model(model.get(), ctx_params));
|
|
689
|
+
llama_context_ptr initialize_context(const llama_model_ptr & model, const Opt & opt) {
|
|
690
|
+
llama_context_ptr context(llama_init_from_model(model.get(), opt.ctx_params));
|
|
643
691
|
if (!context) {
|
|
644
692
|
printe("%s: error: failed to create the llama_context\n", __func__);
|
|
645
693
|
}
|
|
@@ -648,10 +696,10 @@ class LlamaData {
|
|
|
648
696
|
}
|
|
649
697
|
|
|
650
698
|
// Initializes and configures the sampler
|
|
651
|
-
llama_sampler_ptr initialize_sampler() {
|
|
699
|
+
llama_sampler_ptr initialize_sampler(const Opt & opt) {
|
|
652
700
|
llama_sampler_ptr sampler(llama_sampler_chain_init(llama_sampler_chain_default_params()));
|
|
653
701
|
llama_sampler_chain_add(sampler.get(), llama_sampler_init_min_p(0.05f, 1));
|
|
654
|
-
llama_sampler_chain_add(sampler.get(), llama_sampler_init_temp(
|
|
702
|
+
llama_sampler_chain_add(sampler.get(), llama_sampler_init_temp(opt.temperature));
|
|
655
703
|
llama_sampler_chain_add(sampler.get(), llama_sampler_init_dist(LLAMA_DEFAULT_SEED));
|
|
656
704
|
|
|
657
705
|
return sampler;
|
|
@@ -667,11 +715,11 @@ static void add_message(const char * role, const std::string & text, LlamaData &
|
|
|
667
715
|
// Function to apply the chat template and resize `formatted` if needed
|
|
668
716
|
static int apply_chat_template(LlamaData & llama_data, const bool append) {
|
|
669
717
|
int result = llama_chat_apply_template(
|
|
670
|
-
llama_data.model.get(),
|
|
718
|
+
llama_model_chat_template(llama_data.model.get()), llama_data.messages.data(), llama_data.messages.size(), append,
|
|
671
719
|
append ? llama_data.fmtted.data() : nullptr, append ? llama_data.fmtted.size() : 0);
|
|
672
720
|
if (append && result > static_cast<int>(llama_data.fmtted.size())) {
|
|
673
721
|
llama_data.fmtted.resize(result);
|
|
674
|
-
result = llama_chat_apply_template(llama_data.model.get(),
|
|
722
|
+
result = llama_chat_apply_template(llama_model_chat_template(llama_data.model.get()), llama_data.messages.data(),
|
|
675
723
|
llama_data.messages.size(), append, llama_data.fmtted.data(),
|
|
676
724
|
llama_data.fmtted.size());
|
|
677
725
|
}
|
|
@@ -680,11 +728,11 @@ static int apply_chat_template(LlamaData & llama_data, const bool append) {
|
|
|
680
728
|
}
|
|
681
729
|
|
|
682
730
|
// Function to tokenize the prompt
|
|
683
|
-
static int tokenize_prompt(const
|
|
731
|
+
static int tokenize_prompt(const llama_vocab * vocab, const std::string & prompt,
|
|
684
732
|
std::vector<llama_token> & prompt_tokens) {
|
|
685
|
-
const int n_prompt_tokens = -llama_tokenize(
|
|
733
|
+
const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, true, true);
|
|
686
734
|
prompt_tokens.resize(n_prompt_tokens);
|
|
687
|
-
if (llama_tokenize(
|
|
735
|
+
if (llama_tokenize(vocab, prompt.c_str(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), true,
|
|
688
736
|
true) < 0) {
|
|
689
737
|
printe("failed to tokenize the prompt\n");
|
|
690
738
|
return -1;
|
|
@@ -707,9 +755,9 @@ static int check_context_size(const llama_context_ptr & ctx, const llama_batch &
|
|
|
707
755
|
}
|
|
708
756
|
|
|
709
757
|
// convert the token to a string
|
|
710
|
-
static int convert_token_to_string(const
|
|
758
|
+
static int convert_token_to_string(const llama_vocab * vocab, const llama_token token_id, std::string & piece) {
|
|
711
759
|
char buf[256];
|
|
712
|
-
int n = llama_token_to_piece(
|
|
760
|
+
int n = llama_token_to_piece(vocab, token_id, buf, sizeof(buf), 0, true);
|
|
713
761
|
if (n < 0) {
|
|
714
762
|
printe("failed to convert token to piece\n");
|
|
715
763
|
return 1;
|
|
@@ -727,8 +775,10 @@ static void print_word_and_concatenate_to_response(const std::string & piece, st
|
|
|
727
775
|
|
|
728
776
|
// helper function to evaluate a prompt and generate a response
|
|
729
777
|
static int generate(LlamaData & llama_data, const std::string & prompt, std::string & response) {
|
|
778
|
+
const llama_vocab * vocab = llama_model_get_vocab(llama_data.model.get());
|
|
779
|
+
|
|
730
780
|
std::vector<llama_token> tokens;
|
|
731
|
-
if (tokenize_prompt(
|
|
781
|
+
if (tokenize_prompt(vocab, prompt, tokens) < 0) {
|
|
732
782
|
return 1;
|
|
733
783
|
}
|
|
734
784
|
|
|
@@ -744,12 +794,12 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str
|
|
|
744
794
|
|
|
745
795
|
// sample the next token, check is it an end of generation?
|
|
746
796
|
new_token_id = llama_sampler_sample(llama_data.sampler.get(), llama_data.context.get(), -1);
|
|
747
|
-
if (
|
|
797
|
+
if (llama_vocab_is_eog(vocab, new_token_id)) {
|
|
748
798
|
break;
|
|
749
799
|
}
|
|
750
800
|
|
|
751
801
|
std::string piece;
|
|
752
|
-
if (convert_token_to_string(
|
|
802
|
+
if (convert_token_to_string(vocab, new_token_id, piece)) {
|
|
753
803
|
return 1;
|
|
754
804
|
}
|
|
755
805
|
|
|
@@ -759,12 +809,45 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str
|
|
|
759
809
|
batch = llama_batch_get_one(&new_token_id, 1);
|
|
760
810
|
}
|
|
761
811
|
|
|
812
|
+
printf("\033[0m");
|
|
762
813
|
return 0;
|
|
763
814
|
}
|
|
764
815
|
|
|
765
|
-
static int read_user_input(std::string &
|
|
766
|
-
|
|
767
|
-
|
|
816
|
+
static int read_user_input(std::string & user_input) {
|
|
817
|
+
static const char * prompt_prefix = "> ";
|
|
818
|
+
#ifdef WIN32
|
|
819
|
+
printf(
|
|
820
|
+
"\r%*s"
|
|
821
|
+
"\r\033[0m%s",
|
|
822
|
+
get_terminal_width(), " ", prompt_prefix);
|
|
823
|
+
|
|
824
|
+
std::getline(std::cin, user_input);
|
|
825
|
+
if (std::cin.eof()) {
|
|
826
|
+
printf("\n");
|
|
827
|
+
return 1;
|
|
828
|
+
}
|
|
829
|
+
#else
|
|
830
|
+
std::unique_ptr<char, decltype(&std::free)> line(const_cast<char *>(linenoise(prompt_prefix)), free);
|
|
831
|
+
if (!line) {
|
|
832
|
+
return 1;
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
user_input = line.get();
|
|
836
|
+
#endif
|
|
837
|
+
|
|
838
|
+
if (user_input == "/bye") {
|
|
839
|
+
return 1;
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (user_input.empty()) {
|
|
843
|
+
return 2;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
#ifndef WIN32
|
|
847
|
+
linenoiseHistoryAdd(line.get());
|
|
848
|
+
#endif
|
|
849
|
+
|
|
850
|
+
return 0; // Should have data in happy path
|
|
768
851
|
}
|
|
769
852
|
|
|
770
853
|
// Function to generate a response based on the prompt
|
|
@@ -798,16 +881,12 @@ static int apply_chat_template_with_error_handling(LlamaData & llama_data, const
|
|
|
798
881
|
}
|
|
799
882
|
|
|
800
883
|
// Helper function to handle user input
|
|
801
|
-
static int handle_user_input(std::string & user_input, const std::string &
|
|
802
|
-
if (!
|
|
803
|
-
user_input =
|
|
884
|
+
static int handle_user_input(std::string & user_input, const std::string & user) {
|
|
885
|
+
if (!user.empty()) {
|
|
886
|
+
user_input = user;
|
|
804
887
|
return 0; // No need for interactive input
|
|
805
888
|
}
|
|
806
889
|
|
|
807
|
-
printf(
|
|
808
|
-
"\r%*s"
|
|
809
|
-
"\r\033[32m> \033[0m",
|
|
810
|
-
get_terminal_width(), " ");
|
|
811
890
|
return read_user_input(user_input); // Returns true if input ends the loop
|
|
812
891
|
}
|
|
813
892
|
|
|
@@ -831,18 +910,37 @@ static bool is_stdout_a_terminal() {
|
|
|
831
910
|
#endif
|
|
832
911
|
}
|
|
833
912
|
|
|
834
|
-
// Function to
|
|
835
|
-
static int
|
|
913
|
+
// Function to handle user input
|
|
914
|
+
static int get_user_input(std::string & user_input, const std::string & user) {
|
|
915
|
+
while (true) {
|
|
916
|
+
const int ret = handle_user_input(user_input, user);
|
|
917
|
+
if (ret == 1) {
|
|
918
|
+
return 1;
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
if (ret == 2) {
|
|
922
|
+
continue;
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
break;
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
return 0;
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
// Main chat loop function
|
|
932
|
+
static int chat_loop(LlamaData & llama_data, const std::string & user) {
|
|
836
933
|
int prev_len = 0;
|
|
837
934
|
llama_data.fmtted.resize(llama_n_ctx(llama_data.context.get()));
|
|
838
935
|
static const bool stdout_a_terminal = is_stdout_a_terminal();
|
|
839
936
|
while (true) {
|
|
840
937
|
// Get user input
|
|
841
938
|
std::string user_input;
|
|
842
|
-
|
|
939
|
+
if (get_user_input(user_input, user) == 1) {
|
|
940
|
+
return 0;
|
|
843
941
|
}
|
|
844
942
|
|
|
845
|
-
add_message("user",
|
|
943
|
+
add_message("user", user.empty() ? user_input : user, llama_data);
|
|
846
944
|
int new_len;
|
|
847
945
|
if (apply_chat_template_with_error_handling(llama_data, true, new_len) < 0) {
|
|
848
946
|
return 1;
|
|
@@ -854,7 +952,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user_) {
|
|
|
854
952
|
return 1;
|
|
855
953
|
}
|
|
856
954
|
|
|
857
|
-
if (!
|
|
955
|
+
if (!user.empty()) {
|
|
858
956
|
break;
|
|
859
957
|
}
|
|
860
958
|
|
|
@@ -869,7 +967,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user_) {
|
|
|
869
967
|
|
|
870
968
|
static void log_callback(const enum ggml_log_level level, const char * text, void * p) {
|
|
871
969
|
const Opt * opt = static_cast<Opt *>(p);
|
|
872
|
-
if (opt->
|
|
970
|
+
if (opt->verbose || level == GGML_LOG_LEVEL_ERROR) {
|
|
873
971
|
printe("%s", text);
|
|
874
972
|
}
|
|
875
973
|
}
|
|
@@ -880,7 +978,23 @@ static std::string read_pipe_data() {
|
|
|
880
978
|
return result.str();
|
|
881
979
|
}
|
|
882
980
|
|
|
981
|
+
static void ctrl_c_handling() {
|
|
982
|
+
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
|
|
983
|
+
struct sigaction sigint_action;
|
|
984
|
+
sigint_action.sa_handler = sigint_handler;
|
|
985
|
+
sigemptyset(&sigint_action.sa_mask);
|
|
986
|
+
sigint_action.sa_flags = 0;
|
|
987
|
+
sigaction(SIGINT, &sigint_action, NULL);
|
|
988
|
+
#elif defined(_WIN32)
|
|
989
|
+
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
|
990
|
+
return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false;
|
|
991
|
+
};
|
|
992
|
+
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
|
993
|
+
#endif
|
|
994
|
+
}
|
|
995
|
+
|
|
883
996
|
int main(int argc, const char ** argv) {
|
|
997
|
+
ctrl_c_handling();
|
|
884
998
|
Opt opt;
|
|
885
999
|
const int ret = opt.init(argc, argv);
|
|
886
1000
|
if (ret == 2) {
|
|
@@ -890,11 +1004,11 @@ int main(int argc, const char ** argv) {
|
|
|
890
1004
|
}
|
|
891
1005
|
|
|
892
1006
|
if (!is_stdin_a_terminal()) {
|
|
893
|
-
if (!opt.
|
|
894
|
-
opt.
|
|
1007
|
+
if (!opt.user.empty()) {
|
|
1008
|
+
opt.user += "\n\n";
|
|
895
1009
|
}
|
|
896
1010
|
|
|
897
|
-
opt.
|
|
1011
|
+
opt.user += read_pipe_data();
|
|
898
1012
|
}
|
|
899
1013
|
|
|
900
1014
|
llama_log_set(log_callback, &opt);
|
|
@@ -903,7 +1017,7 @@ int main(int argc, const char ** argv) {
|
|
|
903
1017
|
return 1;
|
|
904
1018
|
}
|
|
905
1019
|
|
|
906
|
-
if (chat_loop(llama_data, opt.
|
|
1020
|
+
if (chat_loop(llama_data, opt.user)) {
|
|
907
1021
|
return 1;
|
|
908
1022
|
}
|
|
909
1023
|
|