@fugood/llama.node 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/lib/binding.ts +22 -4
  18. package/lib/index.js +42 -18
  19. package/lib/index.ts +57 -23
  20. package/package.json +1 -1
  21. package/src/LlamaCompletionWorker.cpp +22 -381
  22. package/src/LlamaCompletionWorker.h +2 -4
  23. package/src/LlamaContext.cpp +40 -100
  24. package/src/LlamaContext.h +1 -0
  25. package/src/TokenizeWorker.cpp +33 -4
  26. package/src/TokenizeWorker.h +2 -5
  27. package/src/common.hpp +389 -0
  28. package/src/llama.cpp/.github/workflows/build.yml +2 -2
  29. package/src/llama.cpp/.github/workflows/release.yml +152 -129
  30. package/src/llama.cpp/.github/workflows/winget.yml +42 -0
  31. package/src/llama.cpp/common/arg.cpp +14 -13
  32. package/src/llama.cpp/common/common.cpp +4 -75
  33. package/src/llama.cpp/common/common.h +7 -12
  34. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
  35. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
  36. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
  37. package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
  38. package/src/llama.cpp/examples/simple/simple.cpp +1 -1
  39. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
  40. package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
  41. package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
  42. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  43. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
  44. package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
  45. package/src/llama.cpp/ggml/include/ggml.h +11 -0
  46. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
  47. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
  48. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
  49. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
  50. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
  51. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
  52. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  53. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
  54. package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
  55. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
  56. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
  57. package/src/llama.cpp/ggml/src/ggml.c +64 -18
  58. package/src/llama.cpp/include/llama.h +24 -124
  59. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  60. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  61. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  62. package/src/llama.cpp/src/llama-batch.cpp +3 -1
  63. package/src/llama.cpp/src/llama-context.cpp +60 -110
  64. package/src/llama.cpp/src/llama-graph.cpp +137 -233
  65. package/src/llama.cpp/src/llama-graph.h +49 -7
  66. package/src/llama.cpp/src/llama-hparams.cpp +17 -1
  67. package/src/llama.cpp/src/llama-hparams.h +34 -5
  68. package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
  69. package/src/llama.cpp/src/llama-kv-cache.h +201 -85
  70. package/src/llama.cpp/src/llama-memory.h +3 -2
  71. package/src/llama.cpp/src/llama-model.cpp +273 -94
  72. package/src/llama.cpp/src/llama-model.h +4 -1
  73. package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
  74. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
  75. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
  76. package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
  77. package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
  78. package/src/llama.cpp/tools/mtmd/clip.h +6 -4
  79. package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
  80. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  81. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
  82. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
  83. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
  84. package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
  85. package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
  86. package/src/llama.cpp/tools/run/run.cpp +2 -2
  87. package/src/llama.cpp/tools/server/server.cpp +158 -47
  88. package/src/llama.cpp/tools/server/utils.hpp +71 -43
  89. package/src/llama.cpp/tools/tts/tts.cpp +4 -2
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -6,6 +6,11 @@ export type MessagePart = {
6
6
  text?: string,
7
7
  image_url?: {
8
8
  url?: string
9
+ },
10
+ input_audio?: {
11
+ format: string
12
+ data?: string
13
+ url?: string
9
14
  }
10
15
  }
11
16
 
@@ -103,12 +108,12 @@ export type LlamaCompletionOptions = {
103
108
  grammar_triggers?: { type: number; word: string; at_start: boolean }[]
104
109
  preserved_tokens?: string[]
105
110
  /**
106
- * Path(s) to image file(s) to process before generating text.
107
- * When provided, the image(s) will be processed and added to the context.
111
+ * Path(s) to media file(s) to process before generating text.
112
+ * When provided, the media will be processed and added to the context.
108
113
  * Requires multimodal support to be enabled via initMultimodal.
109
114
  * Supports both file paths and base64 data URLs.
110
115
  */
111
- image_paths?: string | string[]
116
+ media_paths?: string | string[]
112
117
  }
113
118
 
114
119
  export type LlamaCompletionResult = {
@@ -137,6 +142,10 @@ export type LlamaCompletionToken = {
137
142
 
138
143
  export type TokenizeResult = {
139
144
  tokens: Int32Array
145
+ has_media: boolean
146
+ bitmap_hashes: string[]
147
+ chunk_pos: number[]
148
+ chunk_pos_media: number[]
140
149
  }
141
150
 
142
151
  export type EmbeddingResult = {
@@ -163,7 +172,7 @@ export interface LlamaContext {
163
172
  callback?: (token: LlamaCompletionToken) => void,
164
173
  ): Promise<LlamaCompletionResult>
165
174
  stopCompletion(): void
166
- tokenize(text: string): Promise<TokenizeResult>
175
+ tokenize(text: string, media_paths?: string[]): Promise<TokenizeResult>
167
176
  detokenize(tokens: number[]): Promise<string>
168
177
  embedding(text: string): Promise<EmbeddingResult>
169
178
  saveSession(path: string): Promise<void>
@@ -185,6 +194,15 @@ export interface LlamaContext {
185
194
  */
186
195
  isMultimodalEnabled(): Promise<boolean>
187
196
 
197
+ /**
198
+ * Get multimodal support capabilities
199
+ * @returns Promise resolving to an object with vision and audio support
200
+ */
201
+ getMultimodalSupport(): Promise<{
202
+ vision: boolean
203
+ audio: boolean
204
+ }>
205
+
188
206
  /**
189
207
  * Release multimodal support
190
208
  */
package/lib/index.js CHANGED
@@ -23,10 +23,11 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
23
23
  });
24
24
  };
25
25
  Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = exports.toggleNativeLog = void 0;
26
+ exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = exports.toggleNativeLog = exports.MTMD_DEFAULT_MEDIA_MARKER = void 0;
27
27
  exports.addNativeLogListener = addNativeLogListener;
28
28
  const binding_1 = require("./binding");
29
29
  __exportStar(require("./binding"), exports);
30
+ exports.MTMD_DEFAULT_MEDIA_MARKER = '<__media__>';
30
31
  const mods = {};
31
32
  const logListeners = [];
32
33
  const logCallback = (level, text) => {
@@ -78,13 +79,13 @@ class LlamaContextWrapper {
78
79
  isLlamaChatSupported() {
79
80
  return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
80
81
  }
81
- _formatImageChat(messages) {
82
+ _formatMediaChat(messages) {
82
83
  if (!messages)
83
84
  return {
84
85
  messages,
85
- has_image: false,
86
+ has_media: false,
86
87
  };
87
- const imagePaths = [];
88
+ const mediaPaths = [];
88
89
  return {
89
90
  messages: messages.map((msg) => {
90
91
  if (Array.isArray(msg.content)) {
@@ -93,10 +94,30 @@ class LlamaContextWrapper {
93
94
  // Handle multimodal content
94
95
  if (part.type === 'image_url') {
95
96
  let path = ((_a = part.image_url) === null || _a === void 0 ? void 0 : _a.url) || '';
96
- imagePaths.push(path);
97
+ mediaPaths.push(path);
97
98
  return {
98
99
  type: 'text',
99
- text: '<__image__>',
100
+ text: exports.MTMD_DEFAULT_MEDIA_MARKER,
101
+ };
102
+ }
103
+ else if (part.type === 'input_audio') {
104
+ const { input_audio: audio } = part;
105
+ if (!audio)
106
+ throw new Error('input_audio is required');
107
+ const { format } = audio;
108
+ if (format != 'wav' && format != 'mp3') {
109
+ throw new Error(`Unsupported audio format: ${format}`);
110
+ }
111
+ if (audio.url) {
112
+ const path = audio.url.replace(/file:\/\//, '');
113
+ mediaPaths.push(path);
114
+ }
115
+ else if (audio.data) {
116
+ mediaPaths.push(audio.data);
117
+ }
118
+ return {
119
+ type: 'text',
120
+ text: exports.MTMD_DEFAULT_MEDIA_MARKER,
100
121
  };
101
122
  }
102
123
  return part;
@@ -105,14 +126,14 @@ class LlamaContextWrapper {
105
126
  }
106
127
  return msg;
107
128
  }),
108
- has_image: imagePaths.length > 0,
109
- image_paths: imagePaths,
129
+ has_media: mediaPaths.length > 0,
130
+ media_paths: mediaPaths,
110
131
  };
111
132
  }
112
133
  getFormattedChat(messages, template, params) {
113
- const { messages: chat, has_image, image_paths, } = this._formatImageChat(messages);
134
+ const { messages: chat, has_media, media_paths, } = this._formatMediaChat(messages);
114
135
  const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
115
- let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml';
136
+ let tmpl;
116
137
  if (template)
117
138
  tmpl = template; // Force replace if provided
118
139
  const jsonSchema = getJsonSchema(params === null || params === void 0 ? void 0 : params.response_format);
@@ -127,25 +148,25 @@ class LlamaContextWrapper {
127
148
  return {
128
149
  type: 'llama-chat',
129
150
  prompt: result,
130
- has_image,
131
- image_paths,
151
+ has_media,
152
+ media_paths,
132
153
  };
133
154
  }
134
155
  const jinjaResult = result;
135
156
  jinjaResult.type = 'jinja';
136
- jinjaResult.has_image = has_image;
137
- jinjaResult.image_paths = image_paths;
157
+ jinjaResult.has_media = has_media;
158
+ jinjaResult.media_paths = media_paths;
138
159
  return jinjaResult;
139
160
  }
140
161
  completion(options, callback) {
141
- const { messages, image_paths = options.image_paths } = this._formatImageChat(options.messages);
142
- return this.ctx.completion(Object.assign(Object.assign({}, options), { messages, image_paths: options.image_paths || image_paths }), callback || (() => { }));
162
+ const { messages, media_paths = options.media_paths } = this._formatMediaChat(options.messages);
163
+ return this.ctx.completion(Object.assign(Object.assign({}, options), { messages, media_paths: options.media_paths || media_paths }), callback || (() => { }));
143
164
  }
144
165
  stopCompletion() {
145
166
  return this.ctx.stopCompletion();
146
167
  }
147
- tokenize(text) {
148
- return this.ctx.tokenize(text);
168
+ tokenize(text, { media_paths } = {}) {
169
+ return this.ctx.tokenize(text, media_paths);
149
170
  }
150
171
  detokenize(tokens) {
151
172
  return this.ctx.detokenize(tokens);
@@ -180,6 +201,9 @@ class LlamaContextWrapper {
180
201
  releaseMultimodal() {
181
202
  return this.ctx.releaseMultimodal();
182
203
  }
204
+ getMultimodalSupport() {
205
+ return this.ctx.getMultimodalSupport();
206
+ }
183
207
  }
184
208
  const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
185
209
  var _a, _b;
package/lib/index.ts CHANGED
@@ -14,6 +14,8 @@ import type {
14
14
 
15
15
  export * from './binding'
16
16
 
17
+ export const MTMD_DEFAULT_MEDIA_MARKER = '<__media__>'
18
+
17
19
  export interface LlamaModelOptionsExtended extends LlamaModelOptions {
18
20
  lib_variant?: LibVariant
19
21
  }
@@ -60,6 +62,13 @@ const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
60
62
  return null
61
63
  }
62
64
 
65
+ export type FormattedChatResult = {
66
+ type: 'jinja' | 'llama-chat'
67
+ prompt: string
68
+ has_media: boolean
69
+ media_paths?: Array<string>
70
+ }
71
+
63
72
  class LlamaContextWrapper {
64
73
  ctx: any
65
74
 
@@ -84,17 +93,17 @@ class LlamaContextWrapper {
84
93
  return !!this.ctx.getModelInfo().chatTemplates.llamaChat
85
94
  }
86
95
 
87
- _formatImageChat(messages: ChatMessage[] | undefined): {
96
+ _formatMediaChat(messages: ChatMessage[] | undefined): {
88
97
  messages: ChatMessage[] | undefined
89
- has_image: boolean
90
- image_paths?: string[]
98
+ has_media: boolean
99
+ media_paths?: string[]
91
100
  } {
92
101
  if (!messages)
93
102
  return {
94
103
  messages,
95
- has_image: false,
104
+ has_media: false,
96
105
  }
97
- const imagePaths: string[] = []
106
+ const mediaPaths: string[] = []
98
107
  return {
99
108
  messages: messages.map((msg) => {
100
109
  if (Array.isArray(msg.content)) {
@@ -102,10 +111,28 @@ class LlamaContextWrapper {
102
111
  // Handle multimodal content
103
112
  if (part.type === 'image_url') {
104
113
  let path = part.image_url?.url || ''
105
- imagePaths.push(path)
114
+ mediaPaths.push(path)
106
115
  return {
107
116
  type: 'text',
108
- text: '<__image__>',
117
+ text: MTMD_DEFAULT_MEDIA_MARKER,
118
+ }
119
+ } else if (part.type === 'input_audio') {
120
+ const { input_audio: audio } = part
121
+ if (!audio) throw new Error('input_audio is required')
122
+
123
+ const { format } = audio
124
+ if (format != 'wav' && format != 'mp3') {
125
+ throw new Error(`Unsupported audio format: ${format}`)
126
+ }
127
+ if (audio.url) {
128
+ const path = audio.url.replace(/file:\/\//, '')
129
+ mediaPaths.push(path)
130
+ } else if (audio.data) {
131
+ mediaPaths.push(audio.data)
132
+ }
133
+ return {
134
+ type: 'text',
135
+ text: MTMD_DEFAULT_MEDIA_MARKER,
109
136
  }
110
137
  }
111
138
  return part
@@ -118,8 +145,8 @@ class LlamaContextWrapper {
118
145
  }
119
146
  return msg
120
147
  }),
121
- has_image: imagePaths.length > 0,
122
- image_paths: imagePaths,
148
+ has_media: mediaPaths.length > 0,
149
+ media_paths: mediaPaths,
123
150
  }
124
151
  }
125
152
 
@@ -133,15 +160,15 @@ class LlamaContextWrapper {
133
160
  parallel_tool_calls?: object
134
161
  tool_choice?: string
135
162
  },
136
- ): object {
163
+ ): FormattedChatResult {
137
164
  const {
138
165
  messages: chat,
139
- has_image,
140
- image_paths,
141
- } = this._formatImageChat(messages)
166
+ has_media,
167
+ media_paths,
168
+ } = this._formatMediaChat(messages)
142
169
 
143
170
  const useJinja = this.isJinjaSupported() && params?.jinja
144
- let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
171
+ let tmpl
145
172
  if (template) tmpl = template // Force replace if provided
146
173
  const jsonSchema = getJsonSchema(params?.response_format)
147
174
 
@@ -157,14 +184,14 @@ class LlamaContextWrapper {
157
184
  return {
158
185
  type: 'llama-chat',
159
186
  prompt: result as string,
160
- has_image,
161
- image_paths,
187
+ has_media,
188
+ media_paths,
162
189
  }
163
190
  }
164
191
  const jinjaResult = result
165
192
  jinjaResult.type = 'jinja'
166
- jinjaResult.has_image = has_image
167
- jinjaResult.image_paths = image_paths
193
+ jinjaResult.has_media = has_media
194
+ jinjaResult.media_paths = media_paths
168
195
  return jinjaResult
169
196
  }
170
197
 
@@ -172,12 +199,12 @@ class LlamaContextWrapper {
172
199
  options: LlamaCompletionOptions,
173
200
  callback?: (token: LlamaCompletionToken) => void,
174
201
  ): Promise<LlamaCompletionResult> {
175
- const { messages, image_paths = options.image_paths } =
176
- this._formatImageChat(options.messages)
202
+ const { messages, media_paths = options.media_paths } =
203
+ this._formatMediaChat(options.messages)
177
204
  return this.ctx.completion({
178
205
  ...options,
179
206
  messages,
180
- image_paths: options.image_paths || image_paths,
207
+ media_paths: options.media_paths || media_paths,
181
208
  }, callback || (() => {}))
182
209
  }
183
210
 
@@ -185,8 +212,8 @@ class LlamaContextWrapper {
185
212
  return this.ctx.stopCompletion()
186
213
  }
187
214
 
188
- tokenize(text: string): Promise<TokenizeResult> {
189
- return this.ctx.tokenize(text)
215
+ tokenize(text: string, { media_paths }: { media_paths?: string[] } = {}): Promise<TokenizeResult> {
216
+ return this.ctx.tokenize(text, media_paths)
190
217
  }
191
218
 
192
219
  detokenize(tokens: number[]): Promise<string> {
@@ -235,6 +262,13 @@ class LlamaContextWrapper {
235
262
  releaseMultimodal(): Promise<void> {
236
263
  return this.ctx.releaseMultimodal()
237
264
  }
265
+
266
+ getMultimodalSupport(): Promise<{
267
+ vision: boolean
268
+ audio: boolean
269
+ }> {
270
+ return this.ctx.getMultimodalSupport()
271
+ }
238
272
  }
239
273
 
240
274
  export const loadModel = async (
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.4.6",
4
+ "version": "0.5.0",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {