@fugood/llama.node 0.3.17 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/CMakeLists.txt +3 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +39 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +366 -19
  24. package/src/LlamaCompletionWorker.h +30 -10
  25. package/src/LlamaContext.cpp +213 -5
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
  29. package/src/llama.cpp/.github/workflows/build.yml +41 -762
  30. package/src/llama.cpp/.github/workflows/docker.yml +5 -2
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +12 -12
  33. package/src/llama.cpp/CMakeLists.txt +5 -17
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +31 -3
  37. package/src/llama.cpp/common/arg.cpp +48 -29
  38. package/src/llama.cpp/common/chat.cpp +128 -106
  39. package/src/llama.cpp/common/chat.h +2 -0
  40. package/src/llama.cpp/common/common.cpp +37 -1
  41. package/src/llama.cpp/common/common.h +18 -9
  42. package/src/llama.cpp/common/llguidance.cpp +1 -0
  43. package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
  44. package/src/llama.cpp/common/minja/minja.hpp +69 -36
  45. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  46. package/src/llama.cpp/common/regex-partial.h +56 -0
  47. package/src/llama.cpp/common/sampling.cpp +57 -50
  48. package/src/llama.cpp/examples/CMakeLists.txt +2 -23
  49. package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
  50. package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
  51. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  52. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  53. package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
  54. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  55. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  56. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  57. package/src/llama.cpp/ggml/include/ggml.h +10 -7
  58. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  60. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  61. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
  62. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
  63. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
  64. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
  65. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
  66. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  67. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  68. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  69. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
  71. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
  72. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  73. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
  74. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
  75. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  76. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  77. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
  78. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  79. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
  80. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
  81. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
  82. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  83. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  84. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  85. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  86. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
  87. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  88. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
  89. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  90. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  91. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  92. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
  93. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
  94. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
  95. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
  96. package/src/llama.cpp/ggml/src/ggml.c +29 -20
  97. package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
  98. package/src/llama.cpp/include/llama.h +52 -11
  99. package/src/llama.cpp/requirements/requirements-all.txt +3 -3
  100. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  101. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  102. package/src/llama.cpp/src/llama-adapter.cpp +6 -0
  103. package/src/llama.cpp/src/llama-arch.cpp +3 -0
  104. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  105. package/src/llama.cpp/src/llama-batch.h +2 -1
  106. package/src/llama.cpp/src/llama-chat.cpp +17 -7
  107. package/src/llama.cpp/src/llama-chat.h +1 -0
  108. package/src/llama.cpp/src/llama-context.cpp +389 -501
  109. package/src/llama.cpp/src/llama-context.h +44 -32
  110. package/src/llama.cpp/src/llama-cparams.h +1 -0
  111. package/src/llama.cpp/src/llama-graph.cpp +20 -38
  112. package/src/llama.cpp/src/llama-graph.h +12 -8
  113. package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
  114. package/src/llama.cpp/src/llama-kv-cache.h +271 -85
  115. package/src/llama.cpp/src/llama-memory.h +11 -1
  116. package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
  117. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  118. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  119. package/src/llama.cpp/src/llama-model.cpp +316 -69
  120. package/src/llama.cpp/src/llama-model.h +8 -1
  121. package/src/llama.cpp/src/llama-quant.cpp +15 -13
  122. package/src/llama.cpp/src/llama-sampling.cpp +18 -6
  123. package/src/llama.cpp/src/llama-vocab.cpp +42 -4
  124. package/src/llama.cpp/src/llama-vocab.h +6 -0
  125. package/src/llama.cpp/src/llama.cpp +14 -0
  126. package/src/llama.cpp/tests/CMakeLists.txt +10 -2
  127. package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
  128. package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
  129. package/src/llama.cpp/tests/test-chat.cpp +3 -1
  130. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  131. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  132. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  133. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  134. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  135. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
  136. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  137. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
  138. package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
  139. package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
  140. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
  141. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
  142. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  143. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
  144. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  145. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
  146. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  147. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
  148. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
  149. package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
  150. package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
  151. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  152. package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
  153. package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
  154. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  155. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  156. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  157. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  158. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  159. package/src/llama.cpp/examples/llava/clip.h +0 -135
  160. package/src/llama.cpp/examples/llava/llava.cpp +0 -586
  161. package/src/llama.cpp/examples/llava/llava.h +0 -49
  162. package/src/llama.cpp/examples/llava/mtmd.h +0 -168
  163. package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
  164. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  165. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  166. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  167. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  168. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  169. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  170. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  171. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  172. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  173. /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
  174. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  175. /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
  176. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  177. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  178. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  179. /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
  180. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  181. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  182. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  183. /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
  184. /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
  185. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  186. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  187. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  188. /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
  189. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  190. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  191. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  192. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
  193. /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
package/CMakeLists.txt CHANGED
@@ -73,11 +73,13 @@ set(LLAMA_CURL OFF CACHE BOOL "Build curl")
73
73
 
74
74
  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
75
75
  add_subdirectory("src/llama.cpp")
76
+ add_subdirectory("src/llama.cpp/tools/mtmd")
76
77
 
77
78
  include_directories(
78
79
  ${CMAKE_JS_INC}
79
80
  "src/llama.cpp"
80
81
  "src/llama.cpp/src"
82
+ "src/tools/mtmd"
81
83
  )
82
84
 
83
85
  file(
@@ -104,7 +106,7 @@ file(
104
106
 
105
107
  add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
106
108
  set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
107
- target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common)
109
+ target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common mtmd ${CMAKE_THREAD_LIBS_INIT})
108
110
 
109
111
  add_custom_target(copy_assets ALL DEPENDS ${PROJECT_NAME})
110
112
 
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -1,8 +1,17 @@
1
1
  import * as path from 'path'
2
2
 
3
+
4
+ export type MessagePart = {
5
+ type: string,
6
+ text?: string,
7
+ image_url?: {
8
+ url?: string
9
+ }
10
+ }
11
+
3
12
  export type ChatMessage = {
4
13
  role: string
5
- content: string
14
+ content?: string | MessagePart[]
6
15
  }
7
16
 
8
17
  export type LlamaModelOptions = {
@@ -93,6 +102,13 @@ export type LlamaCompletionOptions = {
93
102
  grammar_lazy?: boolean
94
103
  grammar_triggers?: { type: number; word: string; at_start: boolean }[]
95
104
  preserved_tokens?: string[]
105
+ /**
106
+ * Path(s) to image file(s) to process before generating text.
107
+ * When provided, the image(s) will be processed and added to the context.
108
+ * Requires multimodal support to be enabled via initMultimodal.
109
+ * Supports both file paths and base64 data URLs.
110
+ */
111
+ image_paths?: string | string[]
96
112
  }
97
113
 
98
114
  export type LlamaCompletionResult = {
@@ -154,9 +170,30 @@ export interface LlamaContext {
154
170
  applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
155
171
  removeLoraAdapters(adapters: { path: string }[]): void
156
172
  getLoadedLoraAdapters(): { path: string; scaled: number }[]
173
+ /**
174
+ * Initialize multimodal support with a mmproj file
175
+ * @param mmproj_path Path to the multimodal projector file
176
+ * @returns Promise resolving to true if initialization was successful
177
+ */
178
+ initMultimodal(options: { path: string; use_gpu?: boolean }): Promise<boolean>
179
+
180
+ /**
181
+ * Check if multimodal support is enabled
182
+ * @returns Promise resolving to true if multimodal is enabled
183
+ */
184
+ isMultimodalEnabled(): Promise<boolean>
185
+
186
+ /**
187
+ * Release multimodal support
188
+ */
189
+ releaseMultimodal(): Promise<void>
190
+
157
191
  // static
158
192
  loadModelInfo(path: string, skip: string[]): Promise<Object>
159
- toggleNativeLog(enable: boolean, callback: (level: string, text: string) => void): void
193
+ toggleNativeLog(
194
+ enable: boolean,
195
+ callback: (level: string, text: string) => void,
196
+ ): void
160
197
  }
161
198
 
162
199
  export interface Module {
package/lib/index.js CHANGED
@@ -51,12 +51,143 @@ function addNativeLogListener(listener) {
51
51
  },
52
52
  };
53
53
  }
54
+ const getJsonSchema = (responseFormat) => {
55
+ var _a;
56
+ if ((responseFormat === null || responseFormat === void 0 ? void 0 : responseFormat.type) === 'json_schema') {
57
+ return (_a = responseFormat.json_schema) === null || _a === void 0 ? void 0 : _a.schema;
58
+ }
59
+ if ((responseFormat === null || responseFormat === void 0 ? void 0 : responseFormat.type) === 'json_object') {
60
+ return responseFormat.schema || {};
61
+ }
62
+ return null;
63
+ };
64
+ class LlamaContextWrapper {
65
+ constructor(nativeCtx) {
66
+ this.ctx = nativeCtx;
67
+ }
68
+ getSystemInfo() {
69
+ return this.ctx.getSystemInfo();
70
+ }
71
+ getModelInfo() {
72
+ return this.ctx.getModelInfo();
73
+ }
74
+ isJinjaSupported() {
75
+ const { minja } = this.ctx.getModelInfo().chatTemplates;
76
+ return !!(minja === null || minja === void 0 ? void 0 : minja.toolUse) || !!(minja === null || minja === void 0 ? void 0 : minja.default);
77
+ }
78
+ isLlamaChatSupported() {
79
+ return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
80
+ }
81
+ _formatImageChat(messages) {
82
+ if (!messages)
83
+ return {
84
+ messages,
85
+ has_image: false,
86
+ };
87
+ const imagePaths = [];
88
+ return {
89
+ messages: messages.map((msg) => {
90
+ if (Array.isArray(msg.content)) {
91
+ const content = msg.content.map((part) => {
92
+ var _a;
93
+ // Handle multimodal content
94
+ if (part.type === 'image_url') {
95
+ let path = ((_a = part.image_url) === null || _a === void 0 ? void 0 : _a.url) || '';
96
+ imagePaths.push(path);
97
+ return {
98
+ type: 'text',
99
+ text: '<__image__>',
100
+ };
101
+ }
102
+ return part;
103
+ });
104
+ return Object.assign(Object.assign({}, msg), { content });
105
+ }
106
+ return msg;
107
+ }),
108
+ has_image: imagePaths.length > 0,
109
+ image_paths: imagePaths,
110
+ };
111
+ }
112
+ getFormattedChat(messages, template, params) {
113
+ const { messages: chat, has_image, image_paths, } = this._formatImageChat(messages);
114
+ const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
115
+ let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml';
116
+ if (template)
117
+ tmpl = template; // Force replace if provided
118
+ const jsonSchema = getJsonSchema(params === null || params === void 0 ? void 0 : params.response_format);
119
+ const result = this.ctx.getFormattedChat(chat, tmpl, {
120
+ jinja: useJinja,
121
+ json_schema: jsonSchema,
122
+ tools: params === null || params === void 0 ? void 0 : params.tools,
123
+ parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
124
+ tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
125
+ });
126
+ if (!useJinja) {
127
+ return {
128
+ type: 'llama-chat',
129
+ prompt: result,
130
+ has_image,
131
+ image_paths,
132
+ };
133
+ }
134
+ const jinjaResult = result;
135
+ jinjaResult.type = 'jinja';
136
+ jinjaResult.has_image = has_image;
137
+ jinjaResult.image_paths = image_paths;
138
+ return jinjaResult;
139
+ }
140
+ completion(options, callback) {
141
+ const { messages, image_paths = options.image_paths } = this._formatImageChat(options.messages);
142
+ return this.ctx.completion(Object.assign(Object.assign({}, options), { messages, image_paths: options.image_paths || image_paths }), callback || (() => { }));
143
+ }
144
+ stopCompletion() {
145
+ return this.ctx.stopCompletion();
146
+ }
147
+ tokenize(text) {
148
+ return this.ctx.tokenize(text);
149
+ }
150
+ detokenize(tokens) {
151
+ return this.ctx.detokenize(tokens);
152
+ }
153
+ embedding(text) {
154
+ return this.ctx.embedding(text);
155
+ }
156
+ saveSession(path) {
157
+ return this.ctx.saveSession(path);
158
+ }
159
+ loadSession(path) {
160
+ return this.ctx.loadSession(path);
161
+ }
162
+ release() {
163
+ return this.ctx.release();
164
+ }
165
+ applyLoraAdapters(adapters) {
166
+ return this.ctx.applyLoraAdapters(adapters);
167
+ }
168
+ removeLoraAdapters(adapters) {
169
+ return this.ctx.removeLoraAdapters(adapters);
170
+ }
171
+ getLoadedLoraAdapters() {
172
+ return this.ctx.getLoadedLoraAdapters();
173
+ }
174
+ initMultimodal(options) {
175
+ return this.ctx.initMultimodal(options);
176
+ }
177
+ isMultimodalEnabled() {
178
+ return this.ctx.isMultimodalEnabled();
179
+ }
180
+ releaseMultimodal() {
181
+ return this.ctx.releaseMultimodal();
182
+ }
183
+ }
54
184
  const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
55
185
  var _a, _b;
56
186
  const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
57
187
  (_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
58
188
  refreshNativeLogSetup();
59
- return new mods[variant].LlamaContext(options);
189
+ const nativeCtx = new mods[variant].LlamaContext(options);
190
+ return new LlamaContextWrapper(nativeCtx);
60
191
  });
61
192
  exports.loadModel = loadModel;
62
193
  exports.initLlama = binding_1.loadModule;
package/lib/index.ts CHANGED
@@ -1,5 +1,16 @@
1
1
  import { loadModule, LlamaModelOptions } from './binding'
2
- import type { Module, LlamaContext, LibVariant } from './binding'
2
+ import type {
3
+ Module,
4
+ LlamaContext,
5
+ LibVariant,
6
+ ChatMessage,
7
+ LlamaCompletionOptions,
8
+ LlamaCompletionToken,
9
+ LlamaCompletionResult,
10
+ TokenizeResult,
11
+ EmbeddingResult,
12
+ CompletionResponseFormat,
13
+ } from './binding'
3
14
 
4
15
  export * from './binding'
5
16
 
@@ -39,13 +50,202 @@ export function addNativeLogListener(
39
50
  }
40
51
  }
41
52
 
53
+ const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
54
+ if (responseFormat?.type === 'json_schema') {
55
+ return responseFormat.json_schema?.schema
56
+ }
57
+ if (responseFormat?.type === 'json_object') {
58
+ return responseFormat.schema || {}
59
+ }
60
+ return null
61
+ }
62
+
63
+ class LlamaContextWrapper {
64
+ ctx: any
65
+
66
+ constructor(nativeCtx: any) {
67
+ this.ctx = nativeCtx
68
+ }
69
+
70
+ getSystemInfo(): string {
71
+ return this.ctx.getSystemInfo()
72
+ }
73
+
74
+ getModelInfo(): object {
75
+ return this.ctx.getModelInfo()
76
+ }
77
+
78
+ isJinjaSupported(): boolean {
79
+ const { minja } = this.ctx.getModelInfo().chatTemplates
80
+ return !!minja?.toolUse || !!minja?.default
81
+ }
82
+
83
+ isLlamaChatSupported(): boolean {
84
+ return !!this.ctx.getModelInfo().chatTemplates.llamaChat
85
+ }
86
+
87
+ _formatImageChat(messages: ChatMessage[] | undefined): {
88
+ messages: ChatMessage[] | undefined
89
+ has_image: boolean
90
+ image_paths?: string[]
91
+ } {
92
+ if (!messages)
93
+ return {
94
+ messages,
95
+ has_image: false,
96
+ }
97
+ const imagePaths: string[] = []
98
+ return {
99
+ messages: messages.map((msg) => {
100
+ if (Array.isArray(msg.content)) {
101
+ const content = msg.content.map((part) => {
102
+ // Handle multimodal content
103
+ if (part.type === 'image_url') {
104
+ let path = part.image_url?.url || ''
105
+ imagePaths.push(path)
106
+ return {
107
+ type: 'text',
108
+ text: '<__image__>',
109
+ }
110
+ }
111
+ return part
112
+ })
113
+
114
+ return {
115
+ ...msg,
116
+ content,
117
+ }
118
+ }
119
+ return msg
120
+ }),
121
+ has_image: imagePaths.length > 0,
122
+ image_paths: imagePaths,
123
+ }
124
+ }
125
+
126
+ getFormattedChat(
127
+ messages: ChatMessage[],
128
+ template?: string,
129
+ params?: {
130
+ jinja?: boolean
131
+ response_format?: CompletionResponseFormat
132
+ tools?: object
133
+ parallel_tool_calls?: object
134
+ tool_choice?: string
135
+ },
136
+ ): object {
137
+ const {
138
+ messages: chat,
139
+ has_image,
140
+ image_paths,
141
+ } = this._formatImageChat(messages)
142
+
143
+ const useJinja = this.isJinjaSupported() && params?.jinja
144
+ let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
145
+ if (template) tmpl = template // Force replace if provided
146
+ const jsonSchema = getJsonSchema(params?.response_format)
147
+
148
+ const result = this.ctx.getFormattedChat(chat, tmpl, {
149
+ jinja: useJinja,
150
+ json_schema: jsonSchema,
151
+ tools: params?.tools,
152
+ parallel_tool_calls: params?.parallel_tool_calls,
153
+ tool_choice: params?.tool_choice,
154
+ })
155
+
156
+ if (!useJinja) {
157
+ return {
158
+ type: 'llama-chat',
159
+ prompt: result as string,
160
+ has_image,
161
+ image_paths,
162
+ }
163
+ }
164
+ const jinjaResult = result
165
+ jinjaResult.type = 'jinja'
166
+ jinjaResult.has_image = has_image
167
+ jinjaResult.image_paths = image_paths
168
+ return jinjaResult
169
+ }
170
+
171
+ completion(
172
+ options: LlamaCompletionOptions,
173
+ callback?: (token: LlamaCompletionToken) => void,
174
+ ): Promise<LlamaCompletionResult> {
175
+ const { messages, image_paths = options.image_paths } =
176
+ this._formatImageChat(options.messages)
177
+ return this.ctx.completion({
178
+ ...options,
179
+ messages,
180
+ image_paths: options.image_paths || image_paths,
181
+ }, callback || (() => {}))
182
+ }
183
+
184
+ stopCompletion(): void {
185
+ return this.ctx.stopCompletion()
186
+ }
187
+
188
+ tokenize(text: string): Promise<TokenizeResult> {
189
+ return this.ctx.tokenize(text)
190
+ }
191
+
192
+ detokenize(tokens: number[]): Promise<string> {
193
+ return this.ctx.detokenize(tokens)
194
+ }
195
+
196
+ embedding(text: string): Promise<EmbeddingResult> {
197
+ return this.ctx.embedding(text)
198
+ }
199
+
200
+ saveSession(path: string): Promise<void> {
201
+ return this.ctx.saveSession(path)
202
+ }
203
+
204
+ loadSession(path: string): Promise<void> {
205
+ return this.ctx.loadSession(path)
206
+ }
207
+
208
+ release(): Promise<void> {
209
+ return this.ctx.release()
210
+ }
211
+
212
+ applyLoraAdapters(adapters: { path: string; scaled: number }[]): void {
213
+ return this.ctx.applyLoraAdapters(adapters)
214
+ }
215
+
216
+ removeLoraAdapters(adapters: { path: string }[]): void {
217
+ return this.ctx.removeLoraAdapters(adapters)
218
+ }
219
+
220
+ getLoadedLoraAdapters(): { path: string; scaled: number }[] {
221
+ return this.ctx.getLoadedLoraAdapters()
222
+ }
223
+
224
+ initMultimodal(options: {
225
+ path: string
226
+ use_gpu?: boolean
227
+ }): Promise<boolean> {
228
+ return this.ctx.initMultimodal(options)
229
+ }
230
+
231
+ isMultimodalEnabled(): Promise<boolean> {
232
+ return this.ctx.isMultimodalEnabled()
233
+ }
234
+
235
+ releaseMultimodal(): Promise<void> {
236
+ return this.ctx.releaseMultimodal()
237
+ }
238
+ }
239
+
42
240
  export const loadModel = async (
43
241
  options: LlamaModelOptionsExtended,
44
- ): Promise<LlamaContext> => {
242
+ ): Promise<LlamaContextWrapper> => {
45
243
  const variant = options.lib_variant ?? 'default'
46
244
  mods[variant] ??= await loadModule(options.lib_variant)
47
245
  refreshNativeLogSetup()
48
- return new mods[variant].LlamaContext(options)
246
+
247
+ const nativeCtx = new mods[variant].LlamaContext(options)
248
+ return new LlamaContextWrapper(nativeCtx)
49
249
  }
50
250
 
51
251
  export const initLlama = loadModule
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.17",
4
+ "version": "0.4.1",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
8
+ "pretest": "node scripts/download-test-models.js",
8
9
  "test": "jest",
9
10
  "build": "tsc",
10
11
  "prepack": "yarn build",
@@ -6,7 +6,7 @@ EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
6
6
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _params(params) {}
7
7
 
8
8
  void EmbeddingWorker::Execute() {
9
- llama_kv_cache_clear(_sess->context());
9
+ llama_kv_self_clear(_sess->context());
10
10
  auto tokens = ::common_tokenize(_sess->context(), _text, true);
11
11
  // add SEP if not present
12
12
  auto vocab = llama_model_get_vocab(_sess->model());