@fugood/llama.node 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/CMakeLists.txt +1 -10
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +6 -4
  17. package/src/LlamaCompletionWorker.cpp +6 -6
  18. package/src/LlamaContext.cpp +7 -9
  19. package/src/common.hpp +2 -1
  20. package/src/llama.cpp/.github/workflows/build.yml +98 -24
  21. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  22. package/src/llama.cpp/.github/workflows/docker.yml +43 -34
  23. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  24. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  25. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  26. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  27. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  28. package/src/llama.cpp/CMakeLists.txt +20 -8
  29. package/src/llama.cpp/common/CMakeLists.txt +12 -10
  30. package/src/llama.cpp/common/arg.cpp +2006 -0
  31. package/src/llama.cpp/common/arg.h +77 -0
  32. package/src/llama.cpp/common/common.cpp +496 -1632
  33. package/src/llama.cpp/common/common.h +161 -63
  34. package/src/llama.cpp/common/console.cpp +3 -0
  35. package/src/llama.cpp/common/log.cpp +401 -0
  36. package/src/llama.cpp/common/log.h +66 -698
  37. package/src/llama.cpp/common/ngram-cache.cpp +3 -0
  38. package/src/llama.cpp/common/sampling.cpp +348 -350
  39. package/src/llama.cpp/common/sampling.h +62 -139
  40. package/src/llama.cpp/common/stb_image.h +5990 -6398
  41. package/src/llama.cpp/common/train.cpp +2 -0
  42. package/src/llama.cpp/docs/build.md +36 -1
  43. package/src/llama.cpp/examples/CMakeLists.txt +0 -1
  44. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
  45. package/src/llama.cpp/examples/batched/batched.cpp +39 -55
  46. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
  47. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  48. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
  49. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  50. package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
  51. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
  52. package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
  53. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  54. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
  55. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  57. package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
  58. package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
  59. package/src/llama.cpp/examples/infill/infill.cpp +117 -132
  60. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
  61. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
  62. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  63. package/src/llama.cpp/examples/llava/clip.cpp +685 -150
  64. package/src/llama.cpp/examples/llava/clip.h +11 -2
  65. package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
  66. package/src/llama.cpp/examples/llava/llava.cpp +110 -24
  67. package/src/llama.cpp/examples/llava/llava.h +2 -3
  68. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  69. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  70. package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
  71. package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
  72. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
  73. package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
  74. package/src/llama.cpp/examples/main/main.cpp +210 -262
  75. package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
  76. package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
  77. package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
  78. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  80. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
  81. package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
  82. package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
  83. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
  84. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
  85. package/src/llama.cpp/examples/server/server.cpp +1027 -1073
  86. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  87. package/src/llama.cpp/examples/server/utils.hpp +107 -105
  88. package/src/llama.cpp/examples/simple/simple.cpp +35 -41
  89. package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
  90. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  91. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  92. package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
  93. package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
  94. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  95. package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
  96. package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
  97. package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
  98. package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
  99. package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
  100. package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
  101. package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
  102. package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
  103. package/src/llama.cpp/ggml/include/ggml.h +293 -186
  104. package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
  105. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
  106. package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
  107. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
  108. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
  109. package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
  110. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  111. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  112. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  113. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  114. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  115. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  116. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  117. package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
  118. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  119. package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
  120. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  121. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  122. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  123. package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
  124. package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
  125. package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
  126. package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
  127. package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
  128. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  129. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
  130. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
  131. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  132. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  133. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  134. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  135. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  136. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  137. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
  138. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  140. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  141. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
  142. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
  143. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
  144. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  145. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  146. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  147. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
  148. package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
  149. package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
  150. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
  151. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
  152. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
  153. package/src/llama.cpp/include/llama.h +241 -264
  154. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  155. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  156. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  157. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  158. package/src/llama.cpp/src/llama-grammar.h +120 -15
  159. package/src/llama.cpp/src/llama-impl.h +156 -1
  160. package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
  161. package/src/llama.cpp/src/llama-sampling.h +20 -47
  162. package/src/llama.cpp/src/llama-vocab.cpp +343 -120
  163. package/src/llama.cpp/src/llama-vocab.h +33 -17
  164. package/src/llama.cpp/src/llama.cpp +4247 -1525
  165. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  166. package/src/llama.cpp/src/unicode-data.h +4 -4
  167. package/src/llama.cpp/src/unicode.cpp +15 -7
  168. package/src/llama.cpp/tests/CMakeLists.txt +3 -0
  169. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  170. package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
  171. package/src/llama.cpp/tests/test-barrier.cpp +93 -0
  172. package/src/llama.cpp/tests/test-grad0.cpp +187 -70
  173. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  174. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  175. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
  176. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  177. package/src/llama.cpp/tests/test-log.cpp +39 -0
  178. package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
  179. package/src/llama.cpp/tests/test-rope.cpp +1 -1
  180. package/src/llama.cpp/tests/test-sampling.cpp +157 -98
  181. package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
  182. package/patches/llama.patch +0 -22
  183. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  184. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  185. package/src/llama.cpp/common/grammar-parser.h +0 -29
  186. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  187. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
@@ -18,14 +18,17 @@
18
18
  # define CLIP_API
19
19
  #endif
20
20
 
21
- struct clip_ctx;
22
-
23
21
  #ifdef __cplusplus
24
22
  extern "C" {
25
23
  #endif
26
24
 
27
25
  struct clip_ctx;
28
26
 
27
+ struct clip_image_size {
28
+ int width;
29
+ int height;
30
+ };
31
+
29
32
  struct clip_image_u8_batch {
30
33
  struct clip_image_u8 * data;
31
34
  size_t size;
@@ -55,6 +58,10 @@ CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx);
55
58
  CLIP_API int clip_n_patches (const struct clip_ctx * ctx);
56
59
  CLIP_API int clip_n_mmproj_embd(const struct clip_ctx * ctx);
57
60
 
61
+ CLIP_API int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip);
62
+ CLIP_API void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size);
63
+
64
+ CLIP_API struct clip_image_size * clip_image_size_init();
58
65
  CLIP_API struct clip_image_u8 * clip_image_u8_init ();
59
66
  CLIP_API struct clip_image_f32 * clip_image_f32_init();
60
67
 
@@ -78,6 +85,8 @@ CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, cons
78
85
 
79
86
  CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype);
80
87
 
88
+ CLIP_API int clip_is_minicpmv(const struct clip_ctx * ctx);
89
+
81
90
  #ifdef __cplusplus
82
91
  }
83
92
  #endif
@@ -1,14 +1,16 @@
1
- #include "ggml.h"
1
+ #include "arg.h"
2
+ #include "base64.hpp"
2
3
  #include "log.h"
3
4
  #include "common.h"
5
+ #include "sampling.h"
4
6
  #include "clip.h"
5
7
  #include "llava.h"
6
8
  #include "llama.h"
7
-
8
- #include "base64.hpp"
9
+ #include "ggml.h"
9
10
 
10
11
  #include <cstdio>
11
12
  #include <cstdlib>
13
+ #include <cstring>
12
14
  #include <vector>
13
15
 
14
16
  static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_token> tokens, int n_batch, int * n_past) {
@@ -19,7 +21,7 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
19
21
  n_eval = n_batch;
20
22
  }
21
23
  if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval, *n_past, 0))) {
22
- LOG_TEE("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
24
+ LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
23
25
  return false;
24
26
  }
25
27
  *n_past += n_eval;
@@ -40,11 +42,11 @@ static bool eval_string(struct llama_context * ctx_llama, const char* str, int n
40
42
  return true;
41
43
  }
42
44
 
43
- static const char * sample(struct llama_sampling_context * ctx_sampling,
45
+ static const char * sample(struct gpt_sampler * smpl,
44
46
  struct llama_context * ctx_llama,
45
47
  int * n_past) {
46
- const llama_token id = llama_sampling_sample(ctx_sampling, ctx_llama, NULL);
47
- llama_sampling_accept(ctx_sampling, ctx_llama, id, true);
48
+ const llama_token id = gpt_sampler_sample(smpl, ctx_llama, -1);
49
+ gpt_sampler_accept(smpl, id, true);
48
50
  static std::string ret;
49
51
  if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
50
52
  ret = "</s>";
@@ -74,7 +76,7 @@ static llava_image_embed * llava_image_embed_make_with_prompt_base64(struct clip
74
76
  size_t img_base64_str_start, img_base64_str_end;
75
77
  find_image_tag_in_prompt(prompt, img_base64_str_start, img_base64_str_end);
76
78
  if (img_base64_str_start == std::string::npos || img_base64_str_end == std::string::npos) {
77
- LOG_TEE("%s: invalid base64 image tag. must be %s<base64 byte string>%s\n", __func__, IMG_BASE64_TAG_BEGIN, IMG_BASE64_TAG_END);
79
+ LOG_ERR("%s: invalid base64 image tag. must be %s<base64 byte string>%s\n", __func__, IMG_BASE64_TAG_BEGIN, IMG_BASE64_TAG_END);
78
80
  return NULL;
79
81
  }
80
82
 
@@ -88,7 +90,7 @@ static llava_image_embed * llava_image_embed_make_with_prompt_base64(struct clip
88
90
 
89
91
  auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, img_bytes.data(), img_bytes.size());
90
92
  if (!embed) {
91
- LOG_TEE("%s: could not load image from base64 string.\n", __func__);
93
+ LOG_ERR("%s: could not load image from base64 string.\n", __func__);
92
94
  return NULL;
93
95
  }
94
96
 
@@ -112,12 +114,10 @@ struct llava_context {
112
114
  struct llama_model * model = NULL;
113
115
  };
114
116
 
115
- static void print_usage(int argc, char ** argv, const gpt_params & params) {
116
- gpt_params_print_usage(argc, argv, params);
117
-
118
- LOG_TEE("\n example usage:\n");
119
- LOG_TEE("\n %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
120
- LOG_TEE("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
117
+ static void print_usage(int, char ** argv) {
118
+ LOG("\n example usage:\n");
119
+ LOG("\n %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
120
+ LOG("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
121
121
  }
122
122
 
123
123
  static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_params * params, const std::string & fname) {
@@ -127,16 +127,16 @@ static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_para
127
127
  auto prompt = params->prompt;
128
128
  if (prompt_contains_image(prompt)) {
129
129
  if (!params->image.empty()) {
130
- LOG_TEE("using base64 encoded image instead of command line image path\n");
130
+ LOG_INF("using base64 encoded image instead of command line image path\n");
131
131
  }
132
- embed = llava_image_embed_make_with_prompt_base64(ctx_llava->ctx_clip, params->n_threads, prompt);
132
+ embed = llava_image_embed_make_with_prompt_base64(ctx_llava->ctx_clip, params->cpuparams.n_threads, prompt);
133
133
  if (!embed) {
134
- LOG_TEE("%s: can't load image from prompt\n", __func__);
134
+ LOG_ERR("%s: can't load image from prompt\n", __func__);
135
135
  return NULL;
136
136
  }
137
137
  params->prompt = remove_image_from_prompt(prompt);
138
138
  } else {
139
- embed = llava_image_embed_make_with_filename(ctx_llava->ctx_clip, params->n_threads, fname.c_str());
139
+ embed = llava_image_embed_make_with_filename(ctx_llava->ctx_clip, params->cpuparams.n_threads, fname.c_str());
140
140
  if (!embed) {
141
141
  fprintf(stderr, "%s: is %s really an image file?\n", __func__, fname.c_str());
142
142
  return NULL;
@@ -157,18 +157,18 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
157
157
  // new templating mode: Provide the full prompt including system message and use <image> as a placeholder for the image
158
158
  system_prompt = prompt.substr(0, image_pos);
159
159
  user_prompt = prompt.substr(image_pos + std::string("<image>").length());
160
- LOG_TEE("system_prompt: %s\n", system_prompt.c_str());
160
+ LOG_INF("system_prompt: %s\n", system_prompt.c_str());
161
161
  if (params->verbose_prompt) {
162
162
  auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
163
163
  for (int i = 0; i < (int) tmp.size(); i++) {
164
- LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
164
+ LOG_INF("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
165
165
  }
166
166
  }
167
- LOG_TEE("user_prompt: %s\n", user_prompt.c_str());
167
+ LOG_INF("user_prompt: %s\n", user_prompt.c_str());
168
168
  if (params->verbose_prompt) {
169
169
  auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
170
170
  for (int i = 0; i < (int) tmp.size(); i++) {
171
- LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
171
+ LOG_INF("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
172
172
  }
173
173
  }
174
174
  } else {
@@ -178,7 +178,7 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
178
178
  if (params->verbose_prompt) {
179
179
  auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
180
180
  for (int i = 0; i < (int) tmp.size(); i++) {
181
- LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
181
+ LOG_INF("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
182
182
  }
183
183
  }
184
184
  }
@@ -189,21 +189,21 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
189
189
 
190
190
  // generate the response
191
191
 
192
- LOG_TEE("\n");
192
+ LOG("\n");
193
193
 
194
- struct llama_sampling_context * ctx_sampling = llama_sampling_init(params->sparams);
195
- if (!ctx_sampling) {
196
- fprintf(stderr, "%s: failed to initialize sampling subsystem\n", __func__);
194
+ struct gpt_sampler * smpl = gpt_sampler_init(ctx_llava->model, params->sparams);
195
+ if (!smpl) {
196
+ LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
197
197
  exit(1);
198
198
  }
199
199
 
200
200
  std::string response = "";
201
201
  for (int i = 0; i < max_tgt_len; i++) {
202
- const char * tmp = sample(ctx_sampling, ctx_llava->ctx_llama, &n_past);
202
+ const char * tmp = sample(smpl, ctx_llava->ctx_llama, &n_past);
203
203
  response += tmp;
204
204
  if (strcmp(tmp, "</s>") == 0) break;
205
205
  if (strstr(tmp, "###")) break; // Yi-VL behavior
206
- printf("%s", tmp);
206
+ LOG("%s", tmp);
207
207
  if (strstr(response.c_str(), "<|im_end|>")) break; // Yi-34B llava-1.6 - for some reason those decode not as the correct token (tokenizer works)
208
208
  if (strstr(response.c_str(), "<|im_start|>")) break; // Yi-34B llava-1.6
209
209
  if (strstr(response.c_str(), "USER:")) break; // mistral llava-1.6
@@ -211,8 +211,8 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
211
211
  fflush(stdout);
212
212
  }
213
213
 
214
- llama_sampling_free(ctx_sampling);
215
- printf("\n");
214
+ gpt_sampler_free(smpl);
215
+ LOG("\n");
216
216
  }
217
217
 
218
218
  static struct llama_model * llava_init(gpt_params * params) {
@@ -223,7 +223,7 @@ static struct llama_model * llava_init(gpt_params * params) {
223
223
 
224
224
  llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
225
225
  if (model == NULL) {
226
- LOG_TEE("%s: error: unable to load model\n" , __func__);
226
+ LOG_ERR("%s: unable to load model\n" , __func__);
227
227
  return NULL;
228
228
  }
229
229
  return model;
@@ -246,11 +246,11 @@ static struct llava_context * llava_init_context(gpt_params * params, llama_mode
246
246
  llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
247
247
 
248
248
  if (ctx_llama == NULL) {
249
- LOG_TEE("%s: error: failed to create the llama_context\n" , __func__);
249
+ LOG_ERR("%s: failed to create the llama_context\n" , __func__);
250
250
  return NULL;
251
251
  }
252
252
 
253
- auto ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));
253
+ auto * ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));
254
254
 
255
255
  ctx_llava->ctx_llama = ctx_llama;
256
256
  ctx_llava->ctx_clip = ctx_clip;
@@ -269,65 +269,54 @@ static void llava_free(struct llava_context * ctx_llava) {
269
269
  llama_backend_free();
270
270
  }
271
271
 
272
- static void llama_log_callback_logTee(ggml_log_level level, const char * text, void * user_data) {
273
- (void) level;
274
- (void) user_data;
275
- LOG_TEE("%s", text);
276
- }
277
-
278
272
  int main(int argc, char ** argv) {
279
273
  ggml_time_init();
280
274
 
281
275
  gpt_params params;
282
276
 
283
- if (!gpt_params_parse(argc, argv, params)) {
284
- print_usage(argc, argv, params);
277
+ if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, print_usage)) {
285
278
  return 1;
286
279
  }
287
280
 
288
- #ifndef LOG_DISABLE_LOGS
289
- log_set_target(log_filename_generator("llava", "log"));
290
- LOG_TEE("Log start\n");
291
- log_dump_cmdline(argc, argv);
292
- llama_log_set(llama_log_callback_logTee, nullptr);
293
- #endif // LOG_DISABLE_LOGS
281
+ gpt_init();
294
282
 
295
283
  if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
296
- print_usage(argc, argv, {});
284
+ print_usage(argc, argv);
297
285
  return 1;
298
286
  }
299
- auto model = llava_init(&params);
287
+
288
+ auto * model = llava_init(&params);
300
289
  if (model == NULL) {
301
290
  fprintf(stderr, "%s: error: failed to init llava model\n", __func__);
302
291
  return 1;
303
292
  }
304
293
 
305
294
  if (prompt_contains_image(params.prompt)) {
306
- auto ctx_llava = llava_init_context(&params, model);
295
+ auto * ctx_llava = llava_init_context(&params, model);
307
296
 
308
- auto image_embed = load_image(ctx_llava, &params, "");
297
+ auto * image_embed = load_image(ctx_llava, &params, "");
309
298
 
310
299
  // process the prompt
311
300
  process_prompt(ctx_llava, image_embed, &params, params.prompt);
312
301
 
313
- llama_print_timings(ctx_llava->ctx_llama);
302
+ llama_perf_context_print(ctx_llava->ctx_llama);
314
303
  llava_image_embed_free(image_embed);
315
304
  ctx_llava->model = NULL;
316
305
  llava_free(ctx_llava);
317
306
  } else {
318
307
  for (auto & image : params.image) {
319
- auto ctx_llava = llava_init_context(&params, model);
308
+ auto * ctx_llava = llava_init_context(&params, model);
320
309
 
321
- auto image_embed = load_image(ctx_llava, &params, image);
310
+ auto * image_embed = load_image(ctx_llava, &params, image);
322
311
  if (!image_embed) {
323
- std::cerr << "error: failed to load image " << image << ". Terminating\n\n";
312
+ LOG_ERR("%s: failed to load image %s. Terminating\n\n", __func__, image.c_str());
324
313
  return 1;
325
314
  }
326
315
 
327
316
  // process the prompt
328
317
  process_prompt(ctx_llava, image_embed, &params, params.prompt);
329
318
 
330
- llama_print_timings(ctx_llava->ctx_llama);
319
+ llama_perf_context_print(ctx_llava->ctx_llama);
331
320
  llava_image_embed_free(image_embed);
332
321
  ctx_llava->model = NULL;
333
322
  llava_free(ctx_llava);
@@ -1,13 +1,23 @@
1
1
  #include "clip.h"
2
- #include "common.h"
3
- #include "llama.h"
4
2
  #include "llava.h"
5
- #include "base64.hpp"
6
3
 
4
+ #include "llama.h"
5
+
6
+ #include <algorithm>
7
+ #include <cerrno>
7
8
  #include <cstdio>
8
9
  #include <cstdlib>
10
+ #include <cstring>
11
+ #include <limits>
9
12
  #include <vector>
10
- #include <numeric>
13
+
14
+ #define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0)
15
+ #define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0)
16
+
17
+ #define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
18
+ #define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
19
+ #define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
20
+ #define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
11
21
 
12
22
  // RGB uint8 image
13
23
  struct clip_image_u8 {
@@ -54,7 +64,7 @@ static std::pair<int, int> select_best_resolution(const std::pair<int, int>& ori
54
64
  int downscaled_height = static_cast<int>(original_height * scale);
55
65
  int effective_resolution = std::min(downscaled_width * downscaled_height, original_width * original_height);
56
66
  int wasted_resolution = (width * height) - effective_resolution;
57
- // LOG_TEE("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution);
67
+ // LOG_DBG("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution);
58
68
  if (effective_resolution > max_effective_resolution || (effective_resolution == max_effective_resolution && wasted_resolution < min_wasted_resolution)) {
59
69
  max_effective_resolution = effective_resolution;
60
70
  min_wasted_resolution = wasted_resolution;
@@ -184,7 +194,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
184
194
  // ggml_tensor_printf(flatten,"flatten",__LINE__,false,false);
185
195
  ggml_build_forward_expand(gf, flatten);
186
196
  ggml_graph_compute_with_ctx(model.ctx, gf, 1);
187
- struct ggml_tensor* result = gf->nodes[gf->n_nodes - 1];
197
+ struct ggml_tensor* result = ggml_graph_node(gf, -1);
188
198
 
189
199
  memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
190
200
  // append without newline tokens (default behavior in llava_arch when not using unpad ):
@@ -202,6 +212,33 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
202
212
  return true;
203
213
  }
204
214
 
215
+ static clip_image_f32 * only_v2_5_reshape_by_patch(clip_image_f32 * image, int patch_size) {
216
+ int width = image->nx;
217
+ int height = image->ny;
218
+ int num_patches = (height / patch_size) * (width / patch_size);
219
+ clip_image_f32 * patch = clip_image_f32_init();
220
+ patch->nx = patch_size * num_patches;
221
+ patch->ny = patch_size;
222
+ patch->buf.resize(3 * patch->nx * patch->ny);
223
+
224
+ int patch_index = 0;
225
+
226
+ for (int i = 0; i < height; i += patch_size) {
227
+ for (int j = 0; j < width; j += patch_size) {
228
+ for (int pi = 0; pi < patch_size; ++pi) {
229
+ for (int pj = 0; pj < patch_size; ++pj) {
230
+ int input_index = ((i + pi) * width + (j + pj)) * 3;
231
+ int output_index = (pi * patch_size * num_patches + patch_index * patch_size + pj) * 3;
232
+ patch->buf[output_index] = image->buf[input_index];
233
+ patch->buf[output_index+1] = image->buf[input_index+1];
234
+ patch->buf[output_index+2] = image->buf[input_index+2];
235
+ }
236
+ }
237
+ patch_index++;
238
+ }
239
+ }
240
+ return patch;
241
+ }
205
242
 
206
243
  static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float * image_embd, int * n_img_pos) {
207
244
  // std::vector<clip_image_f32*> img_res_v; // format VectN x H x W x RGB (N x 336 x 336 x 3), so interleaved RGB - different to the python implementation which is N x 3 x 336 x 336
@@ -209,7 +246,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
209
246
  img_res_v.size = 0;
210
247
  img_res_v.data = nullptr;
211
248
  if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) {
212
- LOG_TEE("%s: unable to preprocess image\n", __func__);
249
+ LOG_ERR("%s: unable to preprocess image\n", __func__);
213
250
  delete[] img_res_v.data;
214
251
  return false;
215
252
  }
@@ -218,17 +255,62 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
218
255
 
219
256
  const char * mm_patch_merge_type = clip_patch_merge_type(ctx_clip);
220
257
 
221
- if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) {
258
+ if (clip_is_minicpmv(ctx_clip)) {
259
+ std::vector<float *> image_embd_v;
260
+ image_embd_v.resize(img_res_v.size);
261
+ struct clip_image_size * load_image_size = clip_image_size_init();
262
+ for (size_t i = 0; i < img_res_v.size; i++) {
263
+ const int64_t t_img_enc_step_start_us = ggml_time_us();
264
+ image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip));
265
+ int patch_size=14;
266
+ load_image_size->width = img_res_v.data[i].nx;
267
+ load_image_size->height = img_res_v.data[i].ny;
268
+ clip_add_load_image_size(ctx_clip, load_image_size);
269
+ bool encoded = false;
270
+ int has_minicpmv_projector = clip_is_minicpmv(ctx_clip);
271
+ if (has_minicpmv_projector == 2) {
272
+ encoded = clip_image_encode(ctx_clip, n_threads, only_v2_5_reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]);
273
+ }
274
+ else if (has_minicpmv_projector == 3) {
275
+ encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]);
276
+ }
277
+ if (!encoded) {
278
+ LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
279
+ return false;
280
+ }
281
+ const int64_t t_img_enc_steop_batch_us = ggml_time_us();
282
+ LOG_INF("%s: step %d of %d encoded in %8.2f ms\n", __func__, (int)i+1, (int)img_res_v.size, (t_img_enc_steop_batch_us - t_img_enc_step_start_us) / 1000.0);
283
+ }
284
+ const int64_t t_img_enc_batch_us = ggml_time_us();
285
+ LOG_INF("%s: all %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
286
+
287
+ int n_img_pos_out = 0;
288
+ for (size_t i = 0; i < image_embd_v.size(); i++) {
289
+ std::memcpy(image_embd + n_img_pos_out * clip_n_mmproj_embd(ctx_clip), image_embd_v[i], clip_embd_nbytes(ctx_clip));
290
+ n_img_pos_out += clip_n_patches(ctx_clip);
291
+ }
292
+ *n_img_pos = n_img_pos_out;
293
+ for (size_t i = 0; i < image_embd_v.size(); i++) {
294
+ free(image_embd_v[i]);
295
+ }
296
+ image_embd_v.clear();
297
+ load_image_size->width = img->nx;
298
+ load_image_size->height = img->ny;
299
+ clip_add_load_image_size(ctx_clip, load_image_size);
300
+ LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height);
301
+ }
302
+ else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) {
222
303
  // flat / default llava-1.5 type embedding
223
304
  *n_img_pos = clip_n_patches(ctx_clip);
224
305
  bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd); // image_embd shape is 576 x 4096
225
306
  delete[] img_res_v.data;
226
307
  if (!encoded) {
227
- LOG_TEE("Unable to encode image\n");
308
+ LOG_ERR("Unable to encode image\n");
228
309
 
229
310
  return false;
230
311
  }
231
- } else {
312
+ }
313
+ else {
232
314
  // spatial_unpad llava-1.6 type embedding
233
315
  // TODO: CLIP needs batching support - in HF the llm projection is separate after encoding, which might be a solution to quickly get batching working
234
316
  std::vector<float *> image_embd_v;
@@ -237,12 +319,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
237
319
  image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip)); // 576 patches * 4096 embeddings * 4 bytes = 9437184
238
320
  const bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside
239
321
  if (!encoded) {
240
- LOG_TEE("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
322
+ LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
241
323
  return false;
242
324
  }
243
325
  }
244
326
  const int64_t t_img_enc_batch_us = ggml_time_us();
245
- LOG_TEE("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
327
+ LOG_INF("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0);
246
328
 
247
329
  const int32_t * image_grid = clip_image_grid(ctx_clip);
248
330
 
@@ -275,12 +357,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
275
357
  // clip_image_save_to_bmp(*tmp, "image_feature.bmp");
276
358
  }
277
359
 
278
- LOG_TEE("%s: image embedding created: %d tokens\n", __func__, *n_img_pos);
360
+ LOG_INF("%s: image embedding created: %d tokens\n", __func__, *n_img_pos);
279
361
 
280
362
  const int64_t t_img_enc_end_us = ggml_time_us();
281
363
  float t_img_enc_ms = (t_img_enc_end_us - t_img_enc_start_us) / 1000.0;
282
364
 
283
- LOG_TEE("\n%s: image encoded in %8.2f ms by CLIP (%8.2f ms per image patch)\n", __func__, t_img_enc_ms, t_img_enc_ms / *n_img_pos);
365
+ LOG_INF("\n%s: image encoded in %8.2f ms by CLIP (%8.2f ms per image patch)\n", __func__, t_img_enc_ms, t_img_enc_ms / *n_img_pos);
284
366
 
285
367
  return true;
286
368
  }
@@ -290,22 +372,26 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx *
290
372
  int n_llama_embd = llama_n_embd(llama_get_model(ctx_llama));
291
373
  auto n_image_embd = clip_n_mmproj_embd(ctx_clip);
292
374
  if (n_image_embd != n_llama_embd) {
293
- LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_llama_embd);
375
+ LOG_ERR("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_llama_embd);
294
376
  return false;
295
377
  }
296
378
  return true;
297
379
  }
298
380
 
299
381
  bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) {
300
- float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model
382
+ int num_max_patches = 6;
383
+ if (clip_is_minicpmv(ctx_clip)) {
384
+ num_max_patches = 10;
385
+ }
386
+ float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*num_max_patches); // TODO: base on gridsize/llava model
301
387
  if (!image_embd) {
302
- LOG_TEE("Unable to allocate memory for image embeddings\n");
388
+ LOG_ERR("Unable to allocate memory for image embeddings\n");
303
389
  return false;
304
390
  }
305
391
 
306
392
  int n_img_pos;
307
393
  if (!encode_image_with_clip(ctx_clip, n_threads, img, image_embd, &n_img_pos)) {
308
- LOG_TEE("%s: cannot encode image, aborting\n", __func__);
394
+ LOG_ERR("%s: cannot encode image, aborting\n", __func__);
309
395
  free(image_embd);
310
396
  return false;
311
397
  }
@@ -325,7 +411,7 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_
325
411
  }
326
412
  llama_batch batch = {int32_t(n_eval), nullptr, (image_embed->embed+i*n_embd), nullptr, nullptr, nullptr, nullptr, *n_past, 1, 0, };
327
413
  if (llama_decode(ctx_llama, batch)) {
328
- LOG_TEE("%s : failed to eval\n", __func__);
414
+ LOG_ERR("%s : failed to eval\n", __func__);
329
415
  return false;
330
416
  }
331
417
  *n_past += n_eval;
@@ -337,7 +423,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c
337
423
  clip_image_u8 * img = clip_image_u8_init();
338
424
  if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img)) {
339
425
  clip_image_u8_free(img);
340
- LOG_TEE("%s: can't load image from bytes, is it a valid image?", __func__);
426
+ LOG_ERR("%s: can't load image from bytes, is it a valid image?", __func__);
341
427
  return NULL;
342
428
  }
343
429
 
@@ -346,7 +432,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c
346
432
  bool image_embed_result = llava_image_embed_make_with_clip_img(ctx_clip, n_threads, img, &image_embed, &n_image_pos);
347
433
  if (!image_embed_result) {
348
434
  clip_image_u8_free(img);
349
- LOG_TEE("%s: coulnd't embed the image\n", __func__);
435
+ LOG_ERR("%s: coulnd't embed the image\n", __func__);
350
436
  return NULL;
351
437
  }
352
438
 
@@ -360,7 +446,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c
360
446
  static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long *sizeOut) {
361
447
  auto file = fopen(path, "rb");
362
448
  if (file == NULL) {
363
- LOG_TEE("%s: can't read file %s\n", __func__, path);
449
+ LOG_ERR("%s: can't read file %s\n", __func__, path);
364
450
  return false;
365
451
  }
366
452
 
@@ -370,7 +456,7 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long
370
456
 
371
457
  auto buffer = (unsigned char *)malloc(fileSize); // Allocate memory to hold the file data
372
458
  if (buffer == NULL) {
373
- LOG_TEE("%s: failed to alloc %ld bytes for file %s\n", __func__, fileSize, path);
459
+ LOG_ERR("%s: failed to alloc %ld bytes for file %s\n", __func__, fileSize, path);
374
460
  perror("Memory allocation error");
375
461
  fclose(file);
376
462
  return false;
@@ -395,7 +481,7 @@ struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx
395
481
  long image_bytes_length;
396
482
  auto loaded = load_file_to_bytes(image_path, &image_bytes, &image_bytes_length);
397
483
  if (!loaded) {
398
- LOG_TEE("%s: failed to load %s\n", __func__, image_path);
484
+ LOG_ERR("%s: failed to load %s\n", __func__, image_path);
399
485
  return NULL;
400
486
  }
401
487
 
@@ -17,12 +17,11 @@
17
17
  # define LLAVA_API
18
18
  #endif
19
19
 
20
- struct clip_ctx;
21
-
22
20
  #ifdef __cplusplus
23
21
  extern "C" {
24
22
  #endif
25
23
 
24
+ struct clip_ctx;
26
25
  struct llava_image_embed {
27
26
  float * embed;
28
27
  int n_image_pos;
@@ -37,8 +36,8 @@ LLAVA_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip,
37
36
  LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
38
37
  /** build an image embed from a path to an image filename */
39
38
  LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path);
40
- LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed);
41
39
  /** free an embedding made with llava_image_embed_make_* */
40
+ LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed);
42
41
 
43
42
  /** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */
44
43
  LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past);