llama-cpp-capacitor 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/LICENSE +21 -0
- package/cpp/README.md +4 -0
- package/cpp/anyascii.c +22223 -0
- package/cpp/anyascii.h +42 -0
- package/cpp/chat-parser.cpp +393 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +2315 -0
- package/cpp/chat.h +221 -0
- package/cpp/common.cpp +1619 -0
- package/cpp/common.h +744 -0
- package/cpp/ggml-alloc.c +1028 -0
- package/cpp/ggml-alloc.h +76 -0
- package/cpp/ggml-backend-impl.h +255 -0
- package/cpp/ggml-backend-reg.cpp +600 -0
- package/cpp/ggml-backend.cpp +2118 -0
- package/cpp/ggml-backend.h +354 -0
- package/cpp/ggml-common.h +1878 -0
- package/cpp/ggml-cpp.h +39 -0
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2512 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +3650 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +1891 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +3820 -0
- package/cpp/ggml-cpu/arch/x86/repack.cpp +6307 -0
- package/cpp/ggml-cpu/arch-fallback.h +215 -0
- package/cpp/ggml-cpu/binary-ops.cpp +158 -0
- package/cpp/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml-cpu/common.h +73 -0
- package/cpp/ggml-cpu/ggml-cpu-impl.h +525 -0
- package/cpp/ggml-cpu/ggml-cpu.c +3578 -0
- package/cpp/ggml-cpu/ggml-cpu.cpp +672 -0
- package/cpp/ggml-cpu/ops.cpp +10587 -0
- package/cpp/ggml-cpu/ops.h +114 -0
- package/cpp/ggml-cpu/quants.c +1193 -0
- package/cpp/ggml-cpu/quants.h +97 -0
- package/cpp/ggml-cpu/repack.cpp +1982 -0
- package/cpp/ggml-cpu/repack.h +120 -0
- package/cpp/ggml-cpu/simd-mappings.h +1184 -0
- package/cpp/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml-cpu/traits.h +38 -0
- package/cpp/ggml-cpu/unary-ops.cpp +186 -0
- package/cpp/ggml-cpu/unary-ops.h +28 -0
- package/cpp/ggml-cpu/vec.cpp +348 -0
- package/cpp/ggml-cpu/vec.h +1121 -0
- package/cpp/ggml-cpu.h +145 -0
- package/cpp/ggml-impl.h +622 -0
- package/cpp/ggml-metal-impl.h +688 -0
- package/cpp/ggml-metal.h +66 -0
- package/cpp/ggml-metal.m +6833 -0
- package/cpp/ggml-opt.cpp +1093 -0
- package/cpp/ggml-opt.h +256 -0
- package/cpp/ggml-quants.c +5324 -0
- package/cpp/ggml-quants.h +106 -0
- package/cpp/ggml-threading.cpp +12 -0
- package/cpp/ggml-threading.h +14 -0
- package/cpp/ggml.c +7108 -0
- package/cpp/ggml.h +2492 -0
- package/cpp/gguf.cpp +1358 -0
- package/cpp/gguf.h +202 -0
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +985 -0
- package/cpp/json-schema-to-grammar.h +21 -0
- package/cpp/llama-adapter.cpp +388 -0
- package/cpp/llama-adapter.h +76 -0
- package/cpp/llama-arch.cpp +2355 -0
- package/cpp/llama-arch.h +499 -0
- package/cpp/llama-batch.cpp +875 -0
- package/cpp/llama-batch.h +160 -0
- package/cpp/llama-chat.cpp +783 -0
- package/cpp/llama-chat.h +65 -0
- package/cpp/llama-context.cpp +2748 -0
- package/cpp/llama-context.h +306 -0
- package/cpp/llama-cparams.cpp +5 -0
- package/cpp/llama-cparams.h +41 -0
- package/cpp/llama-cpp.h +30 -0
- package/cpp/llama-grammar.cpp +1229 -0
- package/cpp/llama-grammar.h +173 -0
- package/cpp/llama-graph.cpp +1891 -0
- package/cpp/llama-graph.h +810 -0
- package/cpp/llama-hparams.cpp +180 -0
- package/cpp/llama-hparams.h +233 -0
- package/cpp/llama-impl.cpp +167 -0
- package/cpp/llama-impl.h +61 -0
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache-iswa.cpp +318 -0
- package/cpp/llama-kv-cache-iswa.h +135 -0
- package/cpp/llama-kv-cache.cpp +2059 -0
- package/cpp/llama-kv-cache.h +374 -0
- package/cpp/llama-kv-cells.h +491 -0
- package/cpp/llama-memory-hybrid.cpp +258 -0
- package/cpp/llama-memory-hybrid.h +137 -0
- package/cpp/llama-memory-recurrent.cpp +1146 -0
- package/cpp/llama-memory-recurrent.h +179 -0
- package/cpp/llama-memory.cpp +59 -0
- package/cpp/llama-memory.h +119 -0
- package/cpp/llama-mmap.cpp +600 -0
- package/cpp/llama-mmap.h +68 -0
- package/cpp/llama-model-loader.cpp +1164 -0
- package/cpp/llama-model-loader.h +170 -0
- package/cpp/llama-model-saver.cpp +282 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +19042 -0
- package/cpp/llama-model.h +491 -0
- package/cpp/llama-sampling.cpp +2575 -0
- package/cpp/llama-sampling.h +32 -0
- package/cpp/llama-vocab.cpp +3792 -0
- package/cpp/llama-vocab.h +176 -0
- package/cpp/llama.cpp +358 -0
- package/cpp/llama.h +1373 -0
- package/cpp/log.cpp +427 -0
- package/cpp/log.h +103 -0
- package/cpp/minja/chat-template.hpp +550 -0
- package/cpp/minja/minja.hpp +3009 -0
- package/cpp/nlohmann/json.hpp +25526 -0
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-completion.cpp +681 -0
- package/cpp/rn-completion.h +116 -0
- package/cpp/rn-llama.cpp +345 -0
- package/cpp/rn-llama.h +149 -0
- package/cpp/rn-mtmd.hpp +602 -0
- package/cpp/rn-tts.cpp +591 -0
- package/cpp/rn-tts.h +59 -0
- package/cpp/sampling.cpp +579 -0
- package/cpp/sampling.h +107 -0
- package/cpp/tools/mtmd/clip-impl.h +473 -0
- package/cpp/tools/mtmd/clip.cpp +4322 -0
- package/cpp/tools/mtmd/clip.h +106 -0
- package/cpp/tools/mtmd/miniaudio/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +769 -0
- package/cpp/tools/mtmd/mtmd-audio.h +47 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +460 -0
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +1066 -0
- package/cpp/tools/mtmd/mtmd.h +298 -0
- package/cpp/tools/mtmd/stb/stb_image.h +7988 -0
- package/cpp/unicode-data.cpp +7034 -0
- package/cpp/unicode-data.h +20 -0
- package/cpp/unicode.cpp +1061 -0
- package/cpp/unicode.h +68 -0
- package/package.json +2 -1
package/cpp/gguf.h
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
// This file contains functionality related to "GGUF" files, the binary file format used by ggml.
|
|
2
|
+
// GGUF files have the following structure:
|
|
3
|
+
//
|
|
4
|
+
// 1. File magic "GGUF" (4 bytes).
|
|
5
|
+
// 2. File version (uint32_t).
|
|
6
|
+
// 3. Number of ggml tensors in file (int64_t).
|
|
7
|
+
// 4. Number of key-value-pairs in file (int64_t).
|
|
8
|
+
// 5. For each KV pair:
|
|
9
|
+
// 1. The key (string).
|
|
10
|
+
// 2. The value type (lm_gguf_type).
|
|
11
|
+
// 3a. If the value type is LM_GGUF_TYPE_ARRAY:
|
|
12
|
+
// 1. The type of the array (lm_gguf_type).
|
|
13
|
+
// 2. The number of elements in the array (uint64_t).
|
|
14
|
+
// 3. The binary representation of each element in the array.
|
|
15
|
+
// 3b. Otherwise:
|
|
16
|
+
// 1. The binary representation of the value.
|
|
17
|
+
// 6. For each ggml tensor:
|
|
18
|
+
// 1. The tensor name (string).
|
|
19
|
+
// 2. The number of dimensions of the tensor (uint32_t).
|
|
20
|
+
// 3. For each dimension:
|
|
21
|
+
// 1. The size of the tensor in the dimension (int64_t).
|
|
22
|
+
// 4. The tensor data type (lm_ggml_type).
|
|
23
|
+
// 5. The tensor data offset in the tensor data binary blob (uint64_t).
|
|
24
|
+
// 7. The tensor data binary blob (optional, aligned).
|
|
25
|
+
//
|
|
26
|
+
// Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator.
|
|
27
|
+
// All enums are stored as int32_t.
|
|
28
|
+
// All bool values are stored as int8_t.
|
|
29
|
+
// If the special key "general.alignment" (uint32_t) is defined it is used for alignment,
|
|
30
|
+
// otherwise LM_GGUF_DEFAULT_ALIGNMENT is used.
|
|
31
|
+
//
|
|
32
|
+
// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
|
|
33
|
+
|
|
34
|
+
#pragma once
|
|
35
|
+
|
|
36
|
+
#include "ggml.h"
|
|
37
|
+
|
|
38
|
+
#include <stdbool.h>
|
|
39
|
+
#include <stdint.h>
|
|
40
|
+
|
|
41
|
+
#define LM_GGUF_MAGIC "GGUF"
|
|
42
|
+
#define LM_GGUF_VERSION 3
|
|
43
|
+
|
|
44
|
+
#define LM_GGUF_KEY_GENERAL_ALIGNMENT "general.alignment"
|
|
45
|
+
|
|
46
|
+
#define LM_GGUF_DEFAULT_ALIGNMENT 32
|
|
47
|
+
|
|
48
|
+
#ifdef __cplusplus
|
|
49
|
+
extern "C" {
|
|
50
|
+
#endif
|
|
51
|
+
|
|
52
|
+
// types that can be stored as GGUF KV data
|
|
53
|
+
enum lm_gguf_type {
|
|
54
|
+
LM_GGUF_TYPE_UINT8 = 0,
|
|
55
|
+
LM_GGUF_TYPE_INT8 = 1,
|
|
56
|
+
LM_GGUF_TYPE_UINT16 = 2,
|
|
57
|
+
LM_GGUF_TYPE_INT16 = 3,
|
|
58
|
+
LM_GGUF_TYPE_UINT32 = 4,
|
|
59
|
+
LM_GGUF_TYPE_INT32 = 5,
|
|
60
|
+
LM_GGUF_TYPE_FLOAT32 = 6,
|
|
61
|
+
LM_GGUF_TYPE_BOOL = 7,
|
|
62
|
+
LM_GGUF_TYPE_STRING = 8,
|
|
63
|
+
LM_GGUF_TYPE_ARRAY = 9,
|
|
64
|
+
LM_GGUF_TYPE_UINT64 = 10,
|
|
65
|
+
LM_GGUF_TYPE_INT64 = 11,
|
|
66
|
+
LM_GGUF_TYPE_FLOAT64 = 12,
|
|
67
|
+
LM_GGUF_TYPE_COUNT, // marks the end of the enum
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
struct lm_gguf_context;
|
|
71
|
+
|
|
72
|
+
struct lm_gguf_init_params {
|
|
73
|
+
bool no_alloc;
|
|
74
|
+
|
|
75
|
+
// if not NULL, create a lm_ggml_context and allocate the tensor data in it
|
|
76
|
+
struct lm_ggml_context ** ctx;
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
LM_GGML_API struct lm_gguf_context * lm_gguf_init_empty(void);
|
|
80
|
+
LM_GGML_API struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params);
|
|
81
|
+
//LM_GGML_API struct lm_gguf_context * lm_gguf_init_from_buffer(..);
|
|
82
|
+
|
|
83
|
+
LM_GGML_API void lm_gguf_free(struct lm_gguf_context * ctx);
|
|
84
|
+
|
|
85
|
+
LM_GGML_API const char * lm_gguf_type_name(enum lm_gguf_type type);
|
|
86
|
+
|
|
87
|
+
LM_GGML_API uint32_t lm_gguf_get_version (const struct lm_gguf_context * ctx);
|
|
88
|
+
LM_GGML_API size_t lm_gguf_get_alignment (const struct lm_gguf_context * ctx);
|
|
89
|
+
LM_GGML_API size_t lm_gguf_get_data_offset(const struct lm_gguf_context * ctx);
|
|
90
|
+
|
|
91
|
+
LM_GGML_API int64_t lm_gguf_get_n_kv(const struct lm_gguf_context * ctx);
|
|
92
|
+
LM_GGML_API int64_t lm_gguf_find_key(const struct lm_gguf_context * ctx, const char * key); // returns -1 if key is not found
|
|
93
|
+
LM_GGML_API const char * lm_gguf_get_key (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
94
|
+
|
|
95
|
+
LM_GGML_API enum lm_gguf_type lm_gguf_get_kv_type (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
96
|
+
LM_GGML_API enum lm_gguf_type lm_gguf_get_arr_type(const struct lm_gguf_context * ctx, int64_t key_id);
|
|
97
|
+
|
|
98
|
+
// will abort if the wrong type is used for the key
|
|
99
|
+
LM_GGML_API uint8_t lm_gguf_get_val_u8 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
100
|
+
LM_GGML_API int8_t lm_gguf_get_val_i8 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
101
|
+
LM_GGML_API uint16_t lm_gguf_get_val_u16 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
102
|
+
LM_GGML_API int16_t lm_gguf_get_val_i16 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
103
|
+
LM_GGML_API uint32_t lm_gguf_get_val_u32 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
104
|
+
LM_GGML_API int32_t lm_gguf_get_val_i32 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
105
|
+
LM_GGML_API float lm_gguf_get_val_f32 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
106
|
+
LM_GGML_API uint64_t lm_gguf_get_val_u64 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
107
|
+
LM_GGML_API int64_t lm_gguf_get_val_i64 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
108
|
+
LM_GGML_API double lm_gguf_get_val_f64 (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
109
|
+
LM_GGML_API bool lm_gguf_get_val_bool(const struct lm_gguf_context * ctx, int64_t key_id);
|
|
110
|
+
LM_GGML_API const char * lm_gguf_get_val_str (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
111
|
+
LM_GGML_API const void * lm_gguf_get_val_data(const struct lm_gguf_context * ctx, int64_t key_id);
|
|
112
|
+
LM_GGML_API size_t lm_gguf_get_arr_n (const struct lm_gguf_context * ctx, int64_t key_id);
|
|
113
|
+
|
|
114
|
+
// get raw pointer to the first element of the array with the given key_id
|
|
115
|
+
// for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
|
|
116
|
+
LM_GGML_API const void * lm_gguf_get_arr_data(const struct lm_gguf_context * ctx, int64_t key_id);
|
|
117
|
+
|
|
118
|
+
// get ith C string from array with given key_id
|
|
119
|
+
LM_GGML_API const char * lm_gguf_get_arr_str (const struct lm_gguf_context * ctx, int64_t key_id, size_t i);
|
|
120
|
+
|
|
121
|
+
LM_GGML_API int64_t lm_gguf_get_n_tensors (const struct lm_gguf_context * ctx);
|
|
122
|
+
LM_GGML_API int64_t lm_gguf_find_tensor (const struct lm_gguf_context * ctx, const char * name); // returns -1 if the tensor is not found
|
|
123
|
+
LM_GGML_API size_t lm_gguf_get_tensor_offset(const struct lm_gguf_context * ctx, int64_t tensor_id);
|
|
124
|
+
LM_GGML_API const char * lm_gguf_get_tensor_name (const struct lm_gguf_context * ctx, int64_t tensor_id);
|
|
125
|
+
LM_GGML_API enum lm_ggml_type lm_gguf_get_tensor_type (const struct lm_gguf_context * ctx, int64_t tensor_id);
|
|
126
|
+
LM_GGML_API size_t lm_gguf_get_tensor_size (const struct lm_gguf_context * ctx, int64_t tensor_id);
|
|
127
|
+
|
|
128
|
+
// removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist)
|
|
129
|
+
LM_GGML_API int64_t lm_gguf_remove_key(struct lm_gguf_context * ctx, const char * key);
|
|
130
|
+
|
|
131
|
+
// overrides an existing KV pair or adds a new one, the new KV pair is always at the back
|
|
132
|
+
LM_GGML_API void lm_gguf_set_val_u8 (struct lm_gguf_context * ctx, const char * key, uint8_t val);
|
|
133
|
+
LM_GGML_API void lm_gguf_set_val_i8 (struct lm_gguf_context * ctx, const char * key, int8_t val);
|
|
134
|
+
LM_GGML_API void lm_gguf_set_val_u16 (struct lm_gguf_context * ctx, const char * key, uint16_t val);
|
|
135
|
+
LM_GGML_API void lm_gguf_set_val_i16 (struct lm_gguf_context * ctx, const char * key, int16_t val);
|
|
136
|
+
LM_GGML_API void lm_gguf_set_val_u32 (struct lm_gguf_context * ctx, const char * key, uint32_t val);
|
|
137
|
+
LM_GGML_API void lm_gguf_set_val_i32 (struct lm_gguf_context * ctx, const char * key, int32_t val);
|
|
138
|
+
LM_GGML_API void lm_gguf_set_val_f32 (struct lm_gguf_context * ctx, const char * key, float val);
|
|
139
|
+
LM_GGML_API void lm_gguf_set_val_u64 (struct lm_gguf_context * ctx, const char * key, uint64_t val);
|
|
140
|
+
LM_GGML_API void lm_gguf_set_val_i64 (struct lm_gguf_context * ctx, const char * key, int64_t val);
|
|
141
|
+
LM_GGML_API void lm_gguf_set_val_f64 (struct lm_gguf_context * ctx, const char * key, double val);
|
|
142
|
+
LM_GGML_API void lm_gguf_set_val_bool(struct lm_gguf_context * ctx, const char * key, bool val);
|
|
143
|
+
LM_GGML_API void lm_gguf_set_val_str (struct lm_gguf_context * ctx, const char * key, const char * val);
|
|
144
|
+
|
|
145
|
+
// creates a new array with n elements of the given type and copies the corresponding number of bytes from data
|
|
146
|
+
LM_GGML_API void lm_gguf_set_arr_data(struct lm_gguf_context * ctx, const char * key, enum lm_gguf_type type, const void * data, size_t n);
|
|
147
|
+
|
|
148
|
+
// creates a new array with n strings and copies the corresponding strings from data
|
|
149
|
+
LM_GGML_API void lm_gguf_set_arr_str (struct lm_gguf_context * ctx, const char * key, const char ** data, size_t n);
|
|
150
|
+
|
|
151
|
+
// set or add KV pairs from another context
|
|
152
|
+
LM_GGML_API void lm_gguf_set_kv(struct lm_gguf_context * ctx, const struct lm_gguf_context * src);
|
|
153
|
+
|
|
154
|
+
// add tensor to GGUF context, tensor name must be unique
|
|
155
|
+
LM_GGML_API void lm_gguf_add_tensor(struct lm_gguf_context * ctx, const struct lm_ggml_tensor * tensor);
|
|
156
|
+
|
|
157
|
+
// after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated
|
|
158
|
+
// in such a way that the tensor data remains as one contiguous block (except for padding)
|
|
159
|
+
LM_GGML_API void lm_gguf_set_tensor_type(struct lm_gguf_context * ctx, const char * name, enum lm_ggml_type type);
|
|
160
|
+
|
|
161
|
+
// assumes that at least lm_gguf_get_tensor_size bytes can be read from data
|
|
162
|
+
LM_GGML_API void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, const void * data);
|
|
163
|
+
|
|
164
|
+
// writing gguf files can be done in 3 ways:
|
|
165
|
+
//
|
|
166
|
+
// - write the entire lm_gguf_context to a binary file in a single pass:
|
|
167
|
+
//
|
|
168
|
+
// lm_gguf_write_to_file(ctx, fname, /*only_meta =*/ false);
|
|
169
|
+
//
|
|
170
|
+
// - write only the meta data to a file, then re-open the file and append the tensor data:
|
|
171
|
+
//
|
|
172
|
+
// lm_gguf_write_to_file(ctx, fname, /*only_meta =*/ true);
|
|
173
|
+
// FILE * f = fopen(fname, "ab");
|
|
174
|
+
// fwrite(f, ...); // write tensor data
|
|
175
|
+
// fclose(f);
|
|
176
|
+
//
|
|
177
|
+
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
|
178
|
+
//
|
|
179
|
+
// FILE * f = fopen(fname, "wb");
|
|
180
|
+
// const size_t size_meta = lm_gguf_get_meta_size(ctx);
|
|
181
|
+
// fseek(f, size_meta, SEEK_SET);
|
|
182
|
+
// fwrite(f, ...); // write tensor data
|
|
183
|
+
// void * data = malloc(size_meta);
|
|
184
|
+
// lm_gguf_get_meta_data(ctx, data);
|
|
185
|
+
// rewind(f);
|
|
186
|
+
// fwrite(data, 1, data, f);
|
|
187
|
+
// free(data);
|
|
188
|
+
// fclose(f);
|
|
189
|
+
//
|
|
190
|
+
|
|
191
|
+
// write the entire context to a binary file
|
|
192
|
+
LM_GGML_API bool lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fname, bool only_meta);
|
|
193
|
+
|
|
194
|
+
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
|
195
|
+
LM_GGML_API size_t lm_gguf_get_meta_size(const struct lm_gguf_context * ctx);
|
|
196
|
+
|
|
197
|
+
// writes the meta data to pointer "data"
|
|
198
|
+
LM_GGML_API void lm_gguf_get_meta_data(const struct lm_gguf_context * ctx, void * data);
|
|
199
|
+
|
|
200
|
+
#ifdef __cplusplus
|
|
201
|
+
}
|
|
202
|
+
#endif
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
#include "json-partial.h"
|
|
2
|
+
|
|
3
|
+
#include "log.h"
|
|
4
|
+
|
|
5
|
+
#include "nlohmann/json.hpp"
|
|
6
|
+
|
|
7
|
+
#include <string>
|
|
8
|
+
|
|
9
|
+
using json = nlohmann::ordered_json;
|
|
10
|
+
|
|
11
|
+
enum common_json_stack_element_type {
|
|
12
|
+
COMMON_JSON_STACK_ELEMENT_OBJECT,
|
|
13
|
+
COMMON_JSON_STACK_ELEMENT_KEY,
|
|
14
|
+
COMMON_JSON_STACK_ELEMENT_ARRAY,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
struct common_json_stack_element {
|
|
18
|
+
common_json_stack_element_type type;
|
|
19
|
+
std::string key;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
bool common_json_parse(
|
|
23
|
+
const std::string & input,
|
|
24
|
+
const std::string & healing_marker,
|
|
25
|
+
common_json & out)
|
|
26
|
+
{
|
|
27
|
+
std::string::const_iterator it = input.begin();
|
|
28
|
+
const auto end = input.end();
|
|
29
|
+
return common_json_parse(it, end, healing_marker, out);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
bool common_json_parse(
|
|
33
|
+
std::string::const_iterator & it,
|
|
34
|
+
const std::string::const_iterator & end,
|
|
35
|
+
const std::string & healing_marker,
|
|
36
|
+
common_json & out)
|
|
37
|
+
{
|
|
38
|
+
// // https://json.nlohmann.me/features/parsing/sax_interface/
|
|
39
|
+
struct json_error_locator : public nlohmann::json_sax<json> {
|
|
40
|
+
std::size_t position;
|
|
41
|
+
bool found_error;
|
|
42
|
+
std::string last_token;
|
|
43
|
+
std::string exception_message;
|
|
44
|
+
std::vector<common_json_stack_element> stack;
|
|
45
|
+
|
|
46
|
+
json_error_locator() : position(0), found_error(false) {}
|
|
47
|
+
|
|
48
|
+
bool parse_error(std::size_t position, const std::string & last_token, const json::exception & ex) override { // NOLINT
|
|
49
|
+
this->position = position - 1;
|
|
50
|
+
this->found_error = true;
|
|
51
|
+
this->last_token = last_token;
|
|
52
|
+
this->exception_message = ex.what();
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
void close_value() {
|
|
56
|
+
if (!stack.empty() && (stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY)) {
|
|
57
|
+
stack.pop_back();
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
bool null() override { // NOLINT
|
|
61
|
+
close_value();
|
|
62
|
+
return true;
|
|
63
|
+
}
|
|
64
|
+
bool boolean(bool) override { // NOLINT
|
|
65
|
+
close_value();
|
|
66
|
+
return true;
|
|
67
|
+
}
|
|
68
|
+
bool number_integer(number_integer_t) override { // NOLINT
|
|
69
|
+
close_value();
|
|
70
|
+
return true;
|
|
71
|
+
}
|
|
72
|
+
bool number_unsigned(number_unsigned_t) override { // NOLINT
|
|
73
|
+
close_value();
|
|
74
|
+
return true;
|
|
75
|
+
}
|
|
76
|
+
bool number_float(number_float_t, const string_t &) override { // NOLINT
|
|
77
|
+
close_value();
|
|
78
|
+
return true;
|
|
79
|
+
}
|
|
80
|
+
bool string(string_t &) override { // NOLINT
|
|
81
|
+
close_value();
|
|
82
|
+
return true;
|
|
83
|
+
}
|
|
84
|
+
bool binary(binary_t &) override { // NOLINT
|
|
85
|
+
close_value();
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
bool start_object(std::size_t) override { // NOLINT
|
|
89
|
+
stack.push_back({COMMON_JSON_STACK_ELEMENT_OBJECT, ""});
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
bool end_object() override {
|
|
93
|
+
LM_GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT);
|
|
94
|
+
stack.pop_back();
|
|
95
|
+
close_value();
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
|
+
bool key(string_t & key) override { // NOLINT
|
|
99
|
+
stack.push_back({COMMON_JSON_STACK_ELEMENT_KEY, key});
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
bool start_array(std::size_t) override { // NOLINT
|
|
103
|
+
stack.push_back({COMMON_JSON_STACK_ELEMENT_ARRAY, ""});
|
|
104
|
+
return true;
|
|
105
|
+
}
|
|
106
|
+
bool end_array() override {
|
|
107
|
+
LM_GGML_ASSERT(!stack.empty() && stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY);
|
|
108
|
+
stack.pop_back();
|
|
109
|
+
close_value();
|
|
110
|
+
return true;
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
json_error_locator err_loc;
|
|
114
|
+
auto start = it;
|
|
115
|
+
json::sax_parse(it, end, &err_loc);
|
|
116
|
+
|
|
117
|
+
if (err_loc.found_error) {
|
|
118
|
+
it = start;
|
|
119
|
+
auto temptative_end = it + err_loc.position;
|
|
120
|
+
// LOG_DBG("Error at position %zu (is_end = %s): %s\n", err_loc.position, temptative_end == end ? "true" : "false", err_loc.exception_message.c_str());
|
|
121
|
+
|
|
122
|
+
auto input = std::string(it, temptative_end);
|
|
123
|
+
try {
|
|
124
|
+
out.json = json::parse(input);
|
|
125
|
+
// out.json = json::parse(it, temptative_end);
|
|
126
|
+
it = temptative_end;
|
|
127
|
+
return true;
|
|
128
|
+
} catch (const std::exception & ex) {
|
|
129
|
+
// No, needs healing.
|
|
130
|
+
LOG_DBG("Failed to parse up to error: %s: <<<%s>>>\n", ex.what(), std::string(it, temptative_end).c_str());
|
|
131
|
+
}
|
|
132
|
+
auto can_parse = [](const std::string & str) {
|
|
133
|
+
try {
|
|
134
|
+
auto _ = json::parse(str); // NOLINT
|
|
135
|
+
return true;
|
|
136
|
+
} catch (const std::exception &) {
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
if (!healing_marker.empty() && !err_loc.stack.empty()) {
|
|
141
|
+
std::string str(it, temptative_end);
|
|
142
|
+
auto last_non_sp_pos = str.find_last_not_of(" \n\r\t");
|
|
143
|
+
if (last_non_sp_pos == std::string::npos) {
|
|
144
|
+
throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
|
|
145
|
+
}
|
|
146
|
+
auto last_non_sp_char = str[last_non_sp_pos];
|
|
147
|
+
// Used to detect stops on a number, which may not be complete.
|
|
148
|
+
auto was_maybe_number = [&]() {
|
|
149
|
+
if (!str.empty() && std::isspace(str.back())) {
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
152
|
+
return std::isdigit(last_non_sp_char) ||
|
|
153
|
+
last_non_sp_char == '.' ||
|
|
154
|
+
last_non_sp_char == 'e' ||
|
|
155
|
+
last_non_sp_char == 'E' ||
|
|
156
|
+
last_non_sp_char == '-';
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
std::string closing;
|
|
160
|
+
for (size_t i = err_loc.stack.size(); i > 0; i--) {
|
|
161
|
+
auto & el = err_loc.stack[i - 1];
|
|
162
|
+
if (el.type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
|
|
163
|
+
closing += "}";
|
|
164
|
+
} else if (el.type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
|
|
165
|
+
closing += "]";
|
|
166
|
+
} else if (el.type != COMMON_JSON_STACK_ELEMENT_KEY) {
|
|
167
|
+
throw std::runtime_error("Unexpected stack element type");
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const auto & magic_seed = out.healing_marker.marker = healing_marker;//"$llama.cpp.json$";
|
|
172
|
+
|
|
173
|
+
if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_KEY) {
|
|
174
|
+
// We're inside an object value
|
|
175
|
+
if (last_non_sp_char == ':' && can_parse(str + "1" + closing)) {
|
|
176
|
+
// Was about to create an object value
|
|
177
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
178
|
+
} else if (can_parse(str + ": 1" + closing)) {
|
|
179
|
+
str += (out.healing_marker.json_dump_marker = ":\"" + magic_seed) + "\"" + closing;
|
|
180
|
+
} else if (last_non_sp_char == '{' && can_parse(str + closing)) {
|
|
181
|
+
// Was about to create an object
|
|
182
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
|
|
183
|
+
} else if (can_parse(str + "\"" + closing)) {
|
|
184
|
+
// Was inside an object value string
|
|
185
|
+
str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
|
|
186
|
+
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
|
|
187
|
+
// Was inside an object value string after an escape
|
|
188
|
+
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
|
|
189
|
+
} else {
|
|
190
|
+
// find last :
|
|
191
|
+
auto last_pos = str.find_last_of(':');
|
|
192
|
+
if (last_pos == std::string::npos) {
|
|
193
|
+
throw std::runtime_error("Cannot heal a truncated JSON that stopped in an unknown location");
|
|
194
|
+
}
|
|
195
|
+
// Cutting back to opening : for object value
|
|
196
|
+
str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
197
|
+
}
|
|
198
|
+
} else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_ARRAY) {
|
|
199
|
+
if ((last_non_sp_char == ',' || last_non_sp_char == '[') && can_parse(str + "1" + closing)) {
|
|
200
|
+
// Was about to create an array value
|
|
201
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
202
|
+
} else if (can_parse(str + "\"" + closing)) {
|
|
203
|
+
// Was inside an array value string
|
|
204
|
+
str += (out.healing_marker.json_dump_marker = magic_seed) + "\"" + closing;
|
|
205
|
+
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\"" + closing)) {
|
|
206
|
+
// Was inside an array value string after an escape
|
|
207
|
+
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\"" + closing;
|
|
208
|
+
} else if (!was_maybe_number() && can_parse(str + ", 1" + closing)) {
|
|
209
|
+
// Had just finished a value
|
|
210
|
+
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\"" + closing;
|
|
211
|
+
} else {
|
|
212
|
+
auto last_pos = str.find_last_of("[,");
|
|
213
|
+
if (last_pos == std::string::npos) {
|
|
214
|
+
throw std::runtime_error("Cannot heal a truncated JSON array stopped in an unknown location");
|
|
215
|
+
}
|
|
216
|
+
// Cutting back to last [ or , for array value
|
|
217
|
+
str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
218
|
+
}
|
|
219
|
+
} else if (err_loc.stack.back().type == COMMON_JSON_STACK_ELEMENT_OBJECT) {
|
|
220
|
+
if ((last_non_sp_char == '{' && can_parse(str + closing)) ||
|
|
221
|
+
(last_non_sp_char == ',' && can_parse(str + "\"\": 1" + closing))) {
|
|
222
|
+
// Was about to create an object key+value
|
|
223
|
+
str += (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\": 1" + closing;
|
|
224
|
+
} else if (!was_maybe_number() && can_parse(str + ",\"\": 1" + closing)) {
|
|
225
|
+
// Was about to create an object key+value
|
|
226
|
+
str += (out.healing_marker.json_dump_marker = ",\"" + magic_seed) + "\": 1" + closing;
|
|
227
|
+
} else if (can_parse(str + "\": 1" + closing)) {
|
|
228
|
+
// Was inside an object key string
|
|
229
|
+
str += (out.healing_marker.json_dump_marker = magic_seed) + "\": 1" + closing;
|
|
230
|
+
} else if (str[str.length() - 1] == '\\' && can_parse(str + "\\\": 1" + closing)) {
|
|
231
|
+
// Was inside an object key string after an escape
|
|
232
|
+
str += (out.healing_marker.json_dump_marker = "\\" + magic_seed) + "\": 1" + closing;
|
|
233
|
+
} else {
|
|
234
|
+
auto last_pos = str.find_last_of(':');
|
|
235
|
+
if (last_pos == std::string::npos) {
|
|
236
|
+
throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
|
|
237
|
+
}
|
|
238
|
+
// fprintf(stderr, "Cutting back to last : for object key+value\n");
|
|
239
|
+
str = str.substr(0, last_pos + 1) + (out.healing_marker.json_dump_marker = "\"" + magic_seed) + "\"" + closing;
|
|
240
|
+
}
|
|
241
|
+
} else {
|
|
242
|
+
throw std::runtime_error("Cannot heal a truncated JSON object stopped in an unknown location");
|
|
243
|
+
}
|
|
244
|
+
// fprintf(stderr, "HEALED:\nSTRING <<<\n%s\n>>>\n\nmagic_cut: <<<\n%s\n>>>\n\n", str.c_str(), out.healing_marker.json_dump_marker.c_str());
|
|
245
|
+
out.json = json::parse(str);
|
|
246
|
+
it = temptative_end;
|
|
247
|
+
return true;
|
|
248
|
+
}
|
|
249
|
+
// TODO: handle unclosed top-level primitive if the stack was empty but we got an error (e.g. "tru", "\"", etc...)
|
|
250
|
+
// fprintf(stderr, "Closing: TODO\n");
|
|
251
|
+
return false;
|
|
252
|
+
}
|
|
253
|
+
out.json = json::parse(it, end);
|
|
254
|
+
it = end;
|
|
255
|
+
return true;
|
|
256
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "nlohmann/json.hpp"
|
|
4
|
+
|
|
5
|
+
// Healing marker (empty if the JSON was fully parsed / wasn't healed).
|
|
6
|
+
struct common_healing_marker {
|
|
7
|
+
// Raw marker.
|
|
8
|
+
std::string marker;
|
|
9
|
+
|
|
10
|
+
// Cutting the `common_json.json.dump()` string at the (only) occurrence of this marker should yield the original partial JSON string (modulo spaces / if it had the same dump format).
|
|
11
|
+
std::string json_dump_marker;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
// Represents a parsed JSON object, with its optional healing marker (a JSON dump fragment that can be used to find the position of healing in the JSON dump string)
|
|
15
|
+
struct common_json {
|
|
16
|
+
nlohmann::ordered_json json;
|
|
17
|
+
|
|
18
|
+
common_healing_marker healing_marker;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
// Parse the JSON string, healing (closing) any partial JSON if `healing_marker` is not empty.
|
|
22
|
+
//
|
|
23
|
+
// Healing completes partial JSON strings by adding a (possibly modified) healing marker, then whatever is needed to close the JSON.
|
|
24
|
+
// This allows to parse the resulting healed JSON string, yet be able to cut it again if needed at the healing marker.
|
|
25
|
+
// (this is used when parsing JSON outputs from the models, then crafting partial JSONs for the partial tool calls in OAI format).
|
|
26
|
+
//
|
|
27
|
+
// For instance, parsing `{` with a healing marker `foo` will produce a healed JSON `{"foo":1}`, w/ json_dump_marker = `"foo"` (which can be used to break the JSON again).
|
|
28
|
+
bool common_json_parse(
|
|
29
|
+
const std::string & input,
|
|
30
|
+
const std::string & healing_marker,
|
|
31
|
+
common_json & out);
|
|
32
|
+
|
|
33
|
+
// Parse the JSON string (see overload above), but advancing an iterator to the end of the input when the (potentially partial) parsing succeeds.
|
|
34
|
+
bool common_json_parse(
|
|
35
|
+
std::string::const_iterator & it,
|
|
36
|
+
const std::string::const_iterator & end,
|
|
37
|
+
const std::string & healing_marker,
|
|
38
|
+
common_json & out);
|