@shipworthy/ai-sdk-llama-cpp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CMakeLists.txt +6 -0
  2. package/LICENSE +21 -0
  3. package/README.md +274 -0
  4. package/dist/binding-bun.d.ts +7 -0
  5. package/dist/binding-bun.d.ts.map +1 -0
  6. package/dist/binding-bun.js +354 -0
  7. package/dist/binding-bun.js.map +1 -0
  8. package/dist/binding-node.d.ts +7 -0
  9. package/dist/binding-node.d.ts.map +1 -0
  10. package/dist/binding-node.js +59 -0
  11. package/dist/binding-node.js.map +1 -0
  12. package/dist/binding.d.ts +67 -0
  13. package/dist/binding.d.ts.map +1 -0
  14. package/dist/binding.js +105 -0
  15. package/dist/binding.js.map +1 -0
  16. package/dist/index.d.ts +5 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +8 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/llama-cpp-embedding-model.d.ts +28 -0
  21. package/dist/llama-cpp-embedding-model.d.ts.map +1 -0
  22. package/dist/llama-cpp-embedding-model.js +78 -0
  23. package/dist/llama-cpp-embedding-model.js.map +1 -0
  24. package/dist/llama-cpp-language-model.d.ts +55 -0
  25. package/dist/llama-cpp-language-model.d.ts.map +1 -0
  26. package/dist/llama-cpp-language-model.js +221 -0
  27. package/dist/llama-cpp-language-model.js.map +1 -0
  28. package/dist/llama-cpp-provider.d.ts +82 -0
  29. package/dist/llama-cpp-provider.d.ts.map +1 -0
  30. package/dist/llama-cpp-provider.js +71 -0
  31. package/dist/llama-cpp-provider.js.map +1 -0
  32. package/dist/native-binding.d.ts +51 -0
  33. package/dist/native-binding.d.ts.map +1 -0
  34. package/dist/native-binding.js +74 -0
  35. package/dist/native-binding.js.map +1 -0
  36. package/native/CMakeLists.txt +74 -0
  37. package/native/binding.cpp +522 -0
  38. package/native/llama-wrapper.cpp +519 -0
  39. package/native/llama-wrapper.h +131 -0
  40. package/package.json +79 -0
  41. package/scripts/postinstall.cjs +74 -0
@@ -0,0 +1,221 @@
1
+ import { loadModel, unloadModel, generate, generateStream, isModelLoaded, } from "./native-binding.js";
2
+ export function convertFinishReason(reason) {
3
+ let unified;
4
+ switch (reason) {
5
+ case "stop":
6
+ unified = "stop";
7
+ break;
8
+ case "length":
9
+ unified = "length";
10
+ break;
11
+ default:
12
+ unified = "other";
13
+ }
14
+ return { unified, raw: reason };
15
+ }
16
+ export function convertUsage(promptTokens, completionTokens) {
17
+ return {
18
+ inputTokens: {
19
+ total: promptTokens,
20
+ noCache: undefined,
21
+ cacheRead: undefined,
22
+ cacheWrite: undefined,
23
+ },
24
+ outputTokens: {
25
+ total: completionTokens,
26
+ text: completionTokens,
27
+ reasoning: undefined,
28
+ },
29
+ };
30
+ }
31
+ /**
32
+ * Convert AI SDK messages to simple role/content format for the native layer.
33
+ * The native layer will apply the appropriate chat template.
34
+ */
35
+ export function convertMessages(messages) {
36
+ const result = [];
37
+ for (const message of messages) {
38
+ switch (message.role) {
39
+ case "system":
40
+ result.push({
41
+ role: "system",
42
+ content: message.content,
43
+ });
44
+ break;
45
+ case "user":
46
+ // Extract text content from user messages
47
+ let userContent = "";
48
+ for (const part of message.content) {
49
+ if (part.type === "text") {
50
+ userContent += part.text;
51
+ }
52
+ // Note: File parts are not supported in this implementation
53
+ }
54
+ result.push({
55
+ role: "user",
56
+ content: userContent,
57
+ });
58
+ break;
59
+ case "assistant":
60
+ // Extract text content from assistant messages
61
+ let assistantContent = "";
62
+ for (const part of message.content) {
63
+ if (part.type === "text") {
64
+ assistantContent += part.text;
65
+ }
66
+ }
67
+ result.push({
68
+ role: "assistant",
69
+ content: assistantContent,
70
+ });
71
+ break;
72
+ case "tool":
73
+ // Tool results are not supported in this implementation
74
+ break;
75
+ }
76
+ }
77
+ return result;
78
+ }
79
+ export class LlamaCppLanguageModel {
80
+ specificationVersion = "v3";
81
+ provider = "llama.cpp";
82
+ modelId;
83
+ /**
84
+ * Supported URL patterns - empty since we only support local files
85
+ */
86
+ supportedUrls = {};
87
+ modelHandle = null;
88
+ config;
89
+ initPromise = null;
90
+ constructor(config) {
91
+ this.config = config;
92
+ this.modelId = config.modelPath;
93
+ }
94
+ async ensureModelLoaded() {
95
+ if (this.modelHandle !== null && isModelLoaded(this.modelHandle)) {
96
+ return this.modelHandle;
97
+ }
98
+ if (this.initPromise) {
99
+ await this.initPromise;
100
+ if (this.modelHandle !== null) {
101
+ return this.modelHandle;
102
+ }
103
+ }
104
+ this.initPromise = (async () => {
105
+ const options = {
106
+ modelPath: this.config.modelPath,
107
+ contextSize: this.config.contextSize ?? 2048,
108
+ gpuLayers: this.config.gpuLayers ?? 99,
109
+ threads: this.config.threads ?? 4,
110
+ debug: this.config.debug ?? false,
111
+ chatTemplate: this.config.chatTemplate ?? "auto",
112
+ };
113
+ this.modelHandle = await loadModel(options);
114
+ })();
115
+ await this.initPromise;
116
+ this.initPromise = null;
117
+ if (this.modelHandle === null) {
118
+ throw new Error("Failed to load model");
119
+ }
120
+ return this.modelHandle;
121
+ }
122
+ async dispose() {
123
+ if (this.modelHandle !== null) {
124
+ unloadModel(this.modelHandle);
125
+ this.modelHandle = null;
126
+ }
127
+ }
128
+ async doGenerate(options) {
129
+ const handle = await this.ensureModelLoaded();
130
+ const messages = convertMessages(options.prompt);
131
+ const generateOptions = {
132
+ messages,
133
+ maxTokens: options.maxOutputTokens ?? 256,
134
+ temperature: options.temperature ?? 0.7,
135
+ topP: options.topP ?? 0.9,
136
+ topK: options.topK ?? 40,
137
+ stopSequences: options.stopSequences,
138
+ };
139
+ const result = await generate(handle, generateOptions);
140
+ // Build content array with text content
141
+ const content = [
142
+ {
143
+ type: "text",
144
+ text: result.text,
145
+ providerMetadata: undefined,
146
+ },
147
+ ];
148
+ const warnings = [];
149
+ return {
150
+ content,
151
+ finishReason: convertFinishReason(result.finishReason),
152
+ usage: convertUsage(result.promptTokens, result.completionTokens),
153
+ warnings,
154
+ request: {
155
+ body: generateOptions,
156
+ },
157
+ };
158
+ }
159
+ async doStream(options) {
160
+ const handle = await this.ensureModelLoaded();
161
+ const messages = convertMessages(options.prompt);
162
+ const generateOptions = {
163
+ messages,
164
+ maxTokens: options.maxOutputTokens ?? 256,
165
+ temperature: options.temperature ?? 0.7,
166
+ topP: options.topP ?? 0.9,
167
+ topK: options.topK ?? 40,
168
+ stopSequences: options.stopSequences,
169
+ };
170
+ const textId = crypto.randomUUID();
171
+ const stream = new ReadableStream({
172
+ start: async (controller) => {
173
+ try {
174
+ // Emit stream start
175
+ controller.enqueue({
176
+ type: "stream-start",
177
+ warnings: [],
178
+ });
179
+ // Emit text start
180
+ controller.enqueue({
181
+ type: "text-start",
182
+ id: textId,
183
+ });
184
+ const result = await generateStream(handle, generateOptions, (token) => {
185
+ controller.enqueue({
186
+ type: "text-delta",
187
+ id: textId,
188
+ delta: token,
189
+ });
190
+ });
191
+ // Emit text end
192
+ controller.enqueue({
193
+ type: "text-end",
194
+ id: textId,
195
+ });
196
+ // Emit finish
197
+ controller.enqueue({
198
+ type: "finish",
199
+ finishReason: convertFinishReason(result.finishReason),
200
+ usage: convertUsage(result.promptTokens, result.completionTokens),
201
+ });
202
+ controller.close();
203
+ }
204
+ catch (error) {
205
+ controller.enqueue({
206
+ type: "error",
207
+ error,
208
+ });
209
+ controller.close();
210
+ }
211
+ },
212
+ });
213
+ return {
214
+ stream,
215
+ request: {
216
+ body: generateOptions,
217
+ },
218
+ };
219
+ }
220
+ }
221
+ //# sourceMappingURL=llama-cpp-language-model.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llama-cpp-language-model.js","sourceRoot":"","sources":["../src/llama-cpp-language-model.ts"],"names":[],"mappings":"AAaA,OAAO,EACL,SAAS,EACT,WAAW,EACX,QAAQ,EACR,cAAc,EACd,aAAa,GAId,MAAM,qBAAqB,CAAC;AAgC7B,MAAM,UAAU,mBAAmB,CACjC,MAAc;IAEd,IAAI,OAA+C,CAAC;IACpD,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,MAAM;YACT,OAAO,GAAG,MAAM,CAAC;YACjB,MAAM;QACR,KAAK,QAAQ;YACX,OAAO,GAAG,QAAQ,CAAC;YACnB,MAAM;QACR;YACE,OAAO,GAAG,OAAO,CAAC;IACtB,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC;AAClC,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,YAAoB,EACpB,gBAAwB;IAExB,OAAO;QACL,WAAW,EAAE;YACX,KAAK,EAAE,YAAY;YACnB,OAAO,EAAE,SAAS;YAClB,SAAS,EAAE,SAAS;YACpB,UAAU,EAAE,SAAS;SACtB;QACD,YAAY,EAAE;YACZ,KAAK,EAAE,gBAAgB;YACvB,IAAI,EAAE,gBAAgB;YACtB,SAAS,EAAE,SAAS;SACrB;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,QAAkC;IAChE,MAAM,MAAM,GAAkB,EAAE,CAAC;IAEjC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,QAAQ,OAAO,CAAC,IAAI,EAAE,CAAC;YACrB,KAAK,QAAQ;gBACX,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,OAAO,CAAC,OAAO;iBACzB,CAAC,CAAC;gBACH,MAAM;YACR,KAAK,MAAM;gBACT,0CAA0C;gBAC1C,IAAI,WAAW,GAAG,EAAE,CAAC;gBACrB,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;oBACnC,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;wBACzB,WAAW,IAAI,IAAI,CAAC,IAAI,CAAC;oBAC3B,CAAC;oBACD,4DAA4D;gBAC9D,CAAC;gBACD,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,WAAW;iBACrB,CAAC,CAAC;gBACH,MAAM;YACR,KAAK,WAAW;gBACd,+CAA+C;gBAC/C,IAAI,gBAAgB,GAAG,EAAE,CAAC;gBAC1B,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;oBACnC,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;wBACzB,gBAAgB,IAAI,IAAI,CAAC,IAAI,CAAC;oBAChC,CAAC;gBACH,CAAC;gBACD,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,WAAW;oBACjB,OAAO,EAAE,gBAAgB;iBAC1B,CAAC,CAAC;gBACH,MAAM;YACR,KAAK,MAAM;gBACT,wDAAwD;gBACxD,MAAM;QACV,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,OAAO,qBAAqB;IACvB,oBAAoB,GAAG,IAAa,CAAC;IACrC,QAAQ,GAAG,WAAW,CAAC;IACvB,OAAO,CAAS;IAEzB;;OAEG;IACM,aAAa,GAA6B,EAAE,CAAC;IAE9C,WAAW,GAAkB,IAAI,CAAC;IACzB,MAAM,CAAsB;IACrC,WAAW,GAAyB,IAAI,CAAC;IAEjD,YAAY,MAA2B;QACrC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,SAAS,CAAC;IAClC,CAAC;IAEO,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,WAAW,KAAK,IAAI,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC;YACjE,OAAO,IAAI,CAAC,WAAW,CAAC;QAC1B,CAAC;QAED,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,MAAM,IAAI,CAAC,WAAW,CAAC;YACvB,IAAI,IAAI,CAAC,WAAW,KAAK,IAAI,EAAE,CAAC;gBAC9B,OAAO,IAAI,CAAC,WAAW,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,IAAI,CAAC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;YAC7B,MAAM,OAAO,GAAqB;gBAChC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAChC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI;gBAC5C,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,EAAE;gBACtC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,CAAC;gBACjC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,KAAK;gBACjC,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,IAAI,MAAM;aACjD,CAAC;YAEF,IAAI,CAAC,WAAW,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC,CAAC,EAAE,CAAC;QAEL,MAAM,IAAI,CAAC,WAAW,CAAC;QACvB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAExB,IAAI,IAAI,CAAC,WAAW,KAAK,IAAI,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;QAC1C,CAAC;QAED,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,WAAW,KAAK,IAAI,EAAE,CAAC;YAC9B,WAAW,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU,CACd,OAAmC;QAEnC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE9C,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAEjD,MAAM,eAAe,GAAoB;YACvC,QAAQ;YACR,SAAS,EAAE,OAAO,CAAC,eAAe,IAAI,GAAG;YACzC,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,GAAG;YACvC,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,GAAG;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,EAAE;YACxB,aAAa,EAAE,OAAO,CAAC,aAAa;SACrC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;QAEvD,wCAAwC;QACxC,MAAM,OAAO,GAA6B;YACxC;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,gBAAgB,EAAE,SAAS;aAC5B;SACF,CAAC;QAEF,MAAM,QAAQ,GAAsB,EAAE,CAAC;QAEvC,OAAO;YACL,OAAO;YACP,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC,YAAY,CAAC;YACtD,KAAK,EAAE,YAAY,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,gBAAgB,CAAC;YACjE,QAAQ;YACR,OAAO,EAAE;gBACP,IAAI,EAAE,eAAe;aACtB;SACF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,QAAQ,CACZ,OAAmC;QAEnC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE9C,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAEjD,MAAM,eAAe,GAAoB;YACvC,QAAQ;YACR,SAAS,EAAE,OAAO,CAAC,eAAe,IAAI,GAAG;YACzC,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,GAAG;YACvC,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,GAAG;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,EAAE;YACxB,aAAa,EAAE,OAAO,CAAC,aAAa;SACrC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,EAAE,CAAC;QAEnC,MAAM,MAAM,GAAG,IAAI,cAAc,CAA4B;YAC3D,KAAK,EAAE,KAAK,EAAE,UAAU,EAAE,EAAE;gBAC1B,IAAI,CAAC;oBACH,oBAAoB;oBACpB,UAAU,CAAC,OAAO,CAAC;wBACjB,IAAI,EAAE,cAAc;wBACpB,QAAQ,EAAE,EAAE;qBACb,CAAC,CAAC;oBAEH,kBAAkB;oBAClB,UAAU,CAAC,OAAO,CAAC;wBACjB,IAAI,EAAE,YAAY;wBAClB,EAAE,EAAE,MAAM;qBACX,CAAC,CAAC;oBAEH,MAAM,MAAM,GAAG,MAAM,cAAc,CACjC,MAAM,EACN,eAAe,EACf,CAAC,KAAK,EAAE,EAAE;wBACR,UAAU,CAAC,OAAO,CAAC;4BACjB,IAAI,EAAE,YAAY;4BAClB,EAAE,EAAE,MAAM;4BACV,KAAK,EAAE,KAAK;yBACb,CAAC,CAAC;oBACL,CAAC,CACF,CAAC;oBAEF,gBAAgB;oBAChB,UAAU,CAAC,OAAO,CAAC;wBACjB,IAAI,EAAE,UAAU;wBAChB,EAAE,EAAE,MAAM;qBACX,CAAC,CAAC;oBAEH,cAAc;oBACd,UAAU,CAAC,OAAO,CAAC;wBACjB,IAAI,EAAE,QAAQ;wBACd,YAAY,EAAE,mBAAmB,CAAC,MAAM,CAAC,YAAY,CAAC;wBACtD,KAAK,EAAE,YAAY,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,gBAAgB,CAAC;qBAClE,CAAC,CAAC;oBAEH,UAAU,CAAC,KAAK,EAAE,CAAC;gBACrB,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,UAAU,CAAC,OAAO,CAAC;wBACjB,IAAI,EAAE,OAAO;wBACb,KAAK;qBACN,CAAC,CAAC;oBACH,UAAU,CAAC,KAAK,EAAE,CAAC;gBACrB,CAAC;YACH,CAAC;SACF,CAAC,CAAC;QAEH,OAAO;YACL,MAAM;YACN,OAAO,EAAE;gBACP,IAAI,EAAE,eAAe;aACtB;SACF,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,82 @@
1
+ import { LlamaCppLanguageModel } from "./llama-cpp-language-model.js";
2
+ import { LlamaCppEmbeddingModel } from "./llama-cpp-embedding-model.js";
3
+ export interface LlamaCppProviderConfig {
4
+ /**
5
+ * Path to the GGUF model file.
6
+ */
7
+ modelPath: string;
8
+ /**
9
+ * Maximum context size (default: 2048).
10
+ */
11
+ contextSize?: number;
12
+ /**
13
+ * Number of layers to offload to GPU (default: 99, meaning all layers).
14
+ * Set to 0 to disable GPU acceleration.
15
+ */
16
+ gpuLayers?: number;
17
+ /**
18
+ * Number of CPU threads to use (default2: 4).
19
+ */
20
+ threads?: number;
21
+ /**
22
+ * Enable verbose debug output from llama.cpp (default: false).
23
+ */
24
+ debug?: boolean;
25
+ }
26
+ export interface LlamaCppProvider {
27
+ (config: LlamaCppProviderConfig): LlamaCppLanguageModel;
28
+ languageModel(config: LlamaCppProviderConfig): LlamaCppLanguageModel;
29
+ embedding(config: LlamaCppProviderConfig): LlamaCppEmbeddingModel;
30
+ }
31
+ /**
32
+ * Creates a llama.cpp model provider.
33
+ *
34
+ * @example
35
+ * ```typescript
36
+ * import { llamaCpp } from 'ai-sdk-llama-cpp';
37
+ * import { embed, embedMany, generateText, streamText } from 'ai';
38
+ *
39
+ * const model = llamaCpp({
40
+ * modelPath: './models/llama-3.2-1b.gguf'
41
+ * });
42
+ *
43
+ * const embeddingModel = llamaCpp.embedding({
44
+ * modelPath: './models/nomic-embed-text-v1.5.Q4_K_M.gguf'
45
+ * });
46
+ *
47
+ * // Non-streaming
48
+ * const { text } = await generateText({
49
+ * model,
50
+ * prompt: 'Hello, how are you?'
51
+ * });
52
+ *
53
+ * // Streaming
54
+ * const { textStream } = await streamText({
55
+ * model,
56
+ * prompt: 'Tell me a story'
57
+ * });
58
+ *
59
+ * for await (const chunk of textStream) {
60
+ * process.stdout.write(chunk);
61
+ * }
62
+ *
63
+ *
64
+ * // Single embedding
65
+ * const { embedding } = await embed({
66
+ * model: embeddingModel,
67
+ * value: 'Hello, world!'
68
+ * });
69
+ *
70
+ * // Multiple embeddings
71
+ * const { embeddings } = await embedMany({
72
+ * model: embeddingModel,
73
+ * values: ['Hello', 'World', 'How are you?']
74
+ * });
75
+ * ```
76
+ */
77
+ export declare const llamaCpp: LlamaCppProvider;
78
+ /**
79
+ * Default export for convenience.
80
+ */
81
+ export default llamaCpp;
82
+ //# sourceMappingURL=llama-cpp-provider.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llama-cpp-provider.d.ts","sourceRoot":"","sources":["../src/llama-cpp-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,qBAAqB,EAEtB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACL,sBAAsB,EACvB,MAAM,gCAAgC,CAAC;AAExC,MAAM,WAAW,sBAAsB;IACrC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAElB;;OAEG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAGD,MAAM,WAAW,gBAAgB;IAC/B,CAAC,MAAM,EAAE,sBAAsB,GAAG,qBAAqB,CAAC;IACxD,aAAa,CAAC,MAAM,EAAE,sBAAsB,GAAG,qBAAqB,CAAC;IACrE,SAAS,CAAC,MAAM,EAAE,sBAAsB,GAAG,sBAAsB,CAAC;CACnE;AA0BD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AACH,eAAO,MAAM,QAAQ,kBAAmB,CAAC;AAGzC;;GAEG;AACH,eAAe,QAAQ,CAAC"}
@@ -0,0 +1,71 @@
1
+ import { LlamaCppLanguageModel, } from "./llama-cpp-language-model.js";
2
+ import { LlamaCppEmbeddingModel, } from "./llama-cpp-embedding-model.js";
3
+ function createLlamaCpp() {
4
+ const provider = (config) => {
5
+ const modelConfig = {
6
+ modelPath: config.modelPath,
7
+ contextSize: config.contextSize,
8
+ gpuLayers: config.gpuLayers,
9
+ threads: config.threads,
10
+ debug: config.debug,
11
+ };
12
+ return new LlamaCppLanguageModel(modelConfig);
13
+ };
14
+ provider.languageModel = provider;
15
+ provider.embedding = (config) => {
16
+ return new LlamaCppEmbeddingModel(config);
17
+ };
18
+ return provider;
19
+ }
20
+ /**
21
+ * Creates a llama.cpp model provider.
22
+ *
23
+ * @example
24
+ * ```typescript
25
+ * import { llamaCpp } from 'ai-sdk-llama-cpp';
26
+ * import { embed, embedMany, generateText, streamText } from 'ai';
27
+ *
28
+ * const model = llamaCpp({
29
+ * modelPath: './models/llama-3.2-1b.gguf'
30
+ * });
31
+ *
32
+ * const embeddingModel = llamaCpp.embedding({
33
+ * modelPath: './models/nomic-embed-text-v1.5.Q4_K_M.gguf'
34
+ * });
35
+ *
36
+ * // Non-streaming
37
+ * const { text } = await generateText({
38
+ * model,
39
+ * prompt: 'Hello, how are you?'
40
+ * });
41
+ *
42
+ * // Streaming
43
+ * const { textStream } = await streamText({
44
+ * model,
45
+ * prompt: 'Tell me a story'
46
+ * });
47
+ *
48
+ * for await (const chunk of textStream) {
49
+ * process.stdout.write(chunk);
50
+ * }
51
+ *
52
+ *
53
+ * // Single embedding
54
+ * const { embedding } = await embed({
55
+ * model: embeddingModel,
56
+ * value: 'Hello, world!'
57
+ * });
58
+ *
59
+ * // Multiple embeddings
60
+ * const { embeddings } = await embedMany({
61
+ * model: embeddingModel,
62
+ * values: ['Hello', 'World', 'How are you?']
63
+ * });
64
+ * ```
65
+ */
66
+ export const llamaCpp = createLlamaCpp();
67
+ /**
68
+ * Default export for convenience.
69
+ */
70
+ export default llamaCpp;
71
+ //# sourceMappingURL=llama-cpp-provider.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llama-cpp-provider.js","sourceRoot":"","sources":["../src/llama-cpp-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,qBAAqB,GAEtB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACL,sBAAsB,GACvB,MAAM,gCAAgC,CAAC;AAqCxC,SAAS,cAAc;IACrB,MAAM,QAAQ,GAAG,CACf,MAA8B,EACP,EAAE;QACzB,MAAM,WAAW,GAAwB;YACvC,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,KAAK,EAAE,MAAM,CAAC,KAAK;SACpB,CAAC;QAEF,OAAO,IAAI,qBAAqB,CAAC,WAAW,CAAC,CAAC;IAChD,CAAC,CAAA;IAED,QAAQ,CAAC,aAAa,GAAG,QAAQ,CAAC;IAElC,QAAQ,CAAC,SAAS,GAAG,CAAC,MAA8B,EAAE,EAAE;QACtD,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAC5C,CAAC,CAAA;IAED,OAAO,QAA4B,CAAC;AACtC,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AACH,MAAM,CAAC,MAAM,QAAQ,GAAG,cAAc,EAAE,CAAC;AAGzC;;GAEG;AACH,eAAe,QAAQ,CAAC"}
@@ -0,0 +1,51 @@
1
+ export interface LoadModelOptions {
2
+ modelPath: string;
3
+ gpuLayers?: number;
4
+ contextSize?: number;
5
+ threads?: number;
6
+ debug?: boolean;
7
+ /**
8
+ * Chat template to use for formatting messages.
9
+ * - "auto" (default): Use the template embedded in the GGUF model file
10
+ * - Template name: Use a specific built-in template (e.g., "llama3", "chatml", "gemma")
11
+ */
12
+ chatTemplate?: string;
13
+ /**
14
+ * Whether to load the model for embedding generation.
15
+ * When true, creates an embedding context with mean pooling enabled.
16
+ * Default: false
17
+ */
18
+ embedding?: boolean;
19
+ }
20
+ export interface ChatMessage {
21
+ role: string;
22
+ content: string;
23
+ }
24
+ export interface GenerateOptions {
25
+ messages: ChatMessage[];
26
+ maxTokens?: number;
27
+ temperature?: number;
28
+ topP?: number;
29
+ topK?: number;
30
+ stopSequences?: string[];
31
+ }
32
+ export interface GenerateResult {
33
+ text: string;
34
+ promptTokens: number;
35
+ completionTokens: number;
36
+ finishReason: 'stop' | 'length' | 'error';
37
+ }
38
+ export interface EmbedOptions {
39
+ texts: string[];
40
+ }
41
+ export interface EmbedResult {
42
+ embeddings: Float32Array[];
43
+ totalTokens: number;
44
+ }
45
+ export declare function loadModel(options: LoadModelOptions): Promise<number>;
46
+ export declare function unloadModel(handle: number): boolean;
47
+ export declare function generate(handle: number, options: GenerateOptions): Promise<GenerateResult>;
48
+ export declare function generateStream(handle: number, options: GenerateOptions, onToken: (token: string) => void): Promise<GenerateResult>;
49
+ export declare function isModelLoaded(handle: number): boolean;
50
+ export declare function embed(handle: number, options: EmbedOptions): Promise<EmbedResult>;
51
+ //# sourceMappingURL=native-binding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"native-binding.d.ts","sourceRoot":"","sources":["../src/native-binding.ts"],"names":[],"mappings":"AAUA,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;CAC3C;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,YAAY,EAAE,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;CACrB;AAiBD,wBAAgB,SAAS,CAAC,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAYpE;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED,wBAAgB,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,cAAc,CAAC,CAY1F;AAED,wBAAgB,cAAc,CAC5B,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,eAAe,EACxB,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,GAC/B,OAAO,CAAC,cAAc,CAAC,CAiBzB;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAgB,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,WAAW,CAAC,CAYjF"}
@@ -0,0 +1,74 @@
1
+ import { createRequire } from 'node:module';
2
+ import { join, dirname } from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ const __dirname = dirname(fileURLToPath(import.meta.url));
5
+ const require = createRequire(import.meta.url);
6
+ // Load the native binding
7
+ const binding = require(join(__dirname, '..', 'build', 'Release', 'llama_binding.node'));
8
+ export function loadModel(options) {
9
+ return new Promise((resolve, reject) => {
10
+ binding.loadModel(options, (error, handle) => {
11
+ if (error) {
12
+ reject(new Error(error));
13
+ }
14
+ else if (handle !== null) {
15
+ resolve(handle);
16
+ }
17
+ else {
18
+ reject(new Error('Failed to load model: unknown error'));
19
+ }
20
+ });
21
+ });
22
+ }
23
+ export function unloadModel(handle) {
24
+ return binding.unloadModel(handle);
25
+ }
26
+ export function generate(handle, options) {
27
+ return new Promise((resolve, reject) => {
28
+ binding.generate(handle, options, (error, result) => {
29
+ if (error) {
30
+ reject(new Error(error));
31
+ }
32
+ else if (result) {
33
+ resolve(result);
34
+ }
35
+ else {
36
+ reject(new Error('Failed to generate: unknown error'));
37
+ }
38
+ });
39
+ });
40
+ }
41
+ export function generateStream(handle, options, onToken) {
42
+ return new Promise((resolve, reject) => {
43
+ binding.generateStream(handle, options, onToken, (error, result) => {
44
+ if (error) {
45
+ reject(new Error(error));
46
+ }
47
+ else if (result) {
48
+ resolve(result);
49
+ }
50
+ else {
51
+ reject(new Error('Failed to generate stream: unknown error'));
52
+ }
53
+ });
54
+ });
55
+ }
56
+ export function isModelLoaded(handle) {
57
+ return binding.isModelLoaded(handle);
58
+ }
59
+ export function embed(handle, options) {
60
+ return new Promise((resolve, reject) => {
61
+ binding.embed(handle, options, (error, result) => {
62
+ if (error) {
63
+ reject(new Error(error));
64
+ }
65
+ else if (result) {
66
+ resolve(result);
67
+ }
68
+ else {
69
+ reject(new Error('Failed to generate embeddings: unknown error'));
70
+ }
71
+ });
72
+ });
73
+ }
74
+ //# sourceMappingURL=native-binding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"native-binding.js","sourceRoot":"","sources":["../src/native-binding.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAE/C,0BAA0B;AAC1B,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,oBAAoB,CAAC,CAAkB,CAAC;AAmE1G,MAAM,UAAU,SAAS,CAAC,OAAyB;IACjD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;YAC3C,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;YAC3B,CAAC;iBAAM,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBAC3B,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,MAAc;IACxC,OAAO,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,MAAc,EAAE,OAAwB;IAC/D,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;YAClD,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;YAC3B,CAAC;iBAAM,IAAI,MAAM,EAAE,CAAC;gBAClB,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC,CAAC;YACzD,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,MAAc,EACd,OAAwB,EACxB,OAAgC;IAEhC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,OAAO,CAAC,cAAc,CACpB,MAAM,EACN,OAAO,EACP,OAAO,EACP,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;YAChB,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;YAC3B,CAAC;iBAAM,IAAI,MAAM,EAAE,CAAC;gBAClB,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC,CAAC;YAChE,CAAC;QACH,CAAC,CACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAc;IAC1C,OAAO,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,MAAc,EAAE,OAAqB;IACzD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;YAC/C,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;YAC3B,CAAC;iBAAM,IAAI,MAAM,EAAE,CAAC;gBAClB,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,74 @@
1
+ cmake_minimum_required(VERSION 3.15)
2
+ project(llama_binding)
3
+
4
+ set(CMAKE_CXX_STANDARD 17)
5
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
6
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
7
+
8
+ # Build llama.cpp as a static library
9
+ set(LLAMA_STATIC ON CACHE BOOL "Build llama.cpp as static library" FORCE)
10
+ set(LLAMA_BUILD_TESTS OFF CACHE BOOL "Disable llama.cpp tests" FORCE)
11
+ set(LLAMA_BUILD_EXAMPLES OFF CACHE BOOL "Disable llama.cpp examples" FORCE)
12
+ set(LLAMA_BUILD_SERVER OFF CACHE BOOL "Disable llama.cpp server" FORCE)
13
+
14
+ # Enable Metal on macOS
15
+ if(APPLE)
16
+ set(GGML_METAL ON CACHE BOOL "Enable Metal support" FORCE)
17
+ set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "Embed Metal library" FORCE)
18
+ endif()
19
+
20
+ # Add llama.cpp subdirectory
21
+ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../llama.cpp ${CMAKE_CURRENT_BINARY_DIR}/llama.cpp)
22
+
23
+ # Find Node.js and node-addon-api
24
+ include_directories(${CMAKE_JS_INC})
25
+
26
+ # Find node-addon-api
27
+ execute_process(
28
+ COMMAND node -p "require('node-addon-api').include"
29
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
30
+ OUTPUT_VARIABLE NODE_ADDON_API_DIR
31
+ OUTPUT_STRIP_TRAILING_WHITESPACE
32
+ )
33
+ string(REPLACE "\"" "" NODE_ADDON_API_DIR ${NODE_ADDON_API_DIR})
34
+ include_directories(${NODE_ADDON_API_DIR})
35
+
36
+ # Include llama.cpp headers
37
+ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../llama.cpp/include)
38
+ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../llama.cpp/ggml/include)
39
+
40
+ # Build the native addon
41
+ add_library(${PROJECT_NAME} SHARED
42
+ binding.cpp
43
+ llama-wrapper.cpp
44
+ )
45
+
46
+ # Set output name and extension
47
+ set_target_properties(${PROJECT_NAME} PROPERTIES
48
+ PREFIX ""
49
+ SUFFIX ".node"
50
+ OUTPUT_NAME "llama_binding"
51
+ )
52
+
53
+ # Link against llama.cpp
54
+ target_link_libraries(${PROJECT_NAME} PRIVATE
55
+ llama
56
+ ggml
57
+ ${CMAKE_JS_LIB}
58
+ )
59
+
60
+ # Define NAPI_VERSION
61
+ target_compile_definitions(${PROJECT_NAME} PRIVATE
62
+ NAPI_VERSION=8
63
+ NAPI_CPP_EXCEPTIONS
64
+ )
65
+
66
+ # Platform-specific settings
67
+ if(APPLE)
68
+ target_link_libraries(${PROJECT_NAME} PRIVATE
69
+ "-framework Foundation"
70
+ "-framework Metal"
71
+ "-framework MetalKit"
72
+ )
73
+ endif()
74
+