@llumiverse/drivers 0.18.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/lib/cjs/bedrock/index.js +19 -22
  2. package/lib/cjs/bedrock/index.js.map +1 -1
  3. package/lib/cjs/huggingface_ie.js +1 -1
  4. package/lib/cjs/huggingface_ie.js.map +1 -1
  5. package/lib/cjs/mistral/index.js +1 -1
  6. package/lib/cjs/mistral/index.js.map +1 -1
  7. package/lib/cjs/openai/index.js +10 -14
  8. package/lib/cjs/openai/index.js.map +1 -1
  9. package/lib/cjs/togetherai/index.js +1 -1
  10. package/lib/cjs/togetherai/index.js.map +1 -1
  11. package/lib/cjs/vertexai/index.js +81 -18
  12. package/lib/cjs/vertexai/index.js.map +1 -1
  13. package/lib/cjs/vertexai/models/claude.js +46 -66
  14. package/lib/cjs/vertexai/models/claude.js.map +1 -1
  15. package/lib/cjs/vertexai/models/gemini.js +413 -80
  16. package/lib/cjs/vertexai/models/gemini.js.map +1 -1
  17. package/lib/cjs/vertexai/models/llama.js +182 -0
  18. package/lib/cjs/vertexai/models/llama.js.map +1 -0
  19. package/lib/cjs/vertexai/models.js +4 -0
  20. package/lib/cjs/vertexai/models.js.map +1 -1
  21. package/lib/cjs/watsonx/index.js +1 -1
  22. package/lib/cjs/watsonx/index.js.map +1 -1
  23. package/lib/cjs/xai/index.js +1 -1
  24. package/lib/cjs/xai/index.js.map +1 -1
  25. package/lib/esm/bedrock/index.js +19 -22
  26. package/lib/esm/bedrock/index.js.map +1 -1
  27. package/lib/esm/huggingface_ie.js +1 -1
  28. package/lib/esm/huggingface_ie.js.map +1 -1
  29. package/lib/esm/mistral/index.js +1 -1
  30. package/lib/esm/mistral/index.js.map +1 -1
  31. package/lib/esm/openai/index.js +12 -16
  32. package/lib/esm/openai/index.js.map +1 -1
  33. package/lib/esm/togetherai/index.js +1 -1
  34. package/lib/esm/togetherai/index.js.map +1 -1
  35. package/lib/esm/vertexai/index.js +81 -18
  36. package/lib/esm/vertexai/index.js.map +1 -1
  37. package/lib/esm/vertexai/models/claude.js +46 -66
  38. package/lib/esm/vertexai/models/claude.js.map +1 -1
  39. package/lib/esm/vertexai/models/gemini.js +409 -76
  40. package/lib/esm/vertexai/models/gemini.js.map +1 -1
  41. package/lib/esm/vertexai/models/llama.js +178 -0
  42. package/lib/esm/vertexai/models/llama.js.map +1 -0
  43. package/lib/esm/vertexai/models.js +4 -0
  44. package/lib/esm/vertexai/models.js.map +1 -1
  45. package/lib/esm/watsonx/index.js +1 -1
  46. package/lib/esm/watsonx/index.js.map +1 -1
  47. package/lib/esm/xai/index.js +1 -1
  48. package/lib/esm/xai/index.js.map +1 -1
  49. package/lib/types/bedrock/index.d.ts.map +1 -1
  50. package/lib/types/groq/index.d.ts +1 -1
  51. package/lib/types/groq/index.d.ts.map +1 -1
  52. package/lib/types/huggingface_ie.d.ts +1 -1
  53. package/lib/types/huggingface_ie.d.ts.map +1 -1
  54. package/lib/types/mistral/index.d.ts +1 -1
  55. package/lib/types/mistral/index.d.ts.map +1 -1
  56. package/lib/types/openai/index.d.ts.map +1 -1
  57. package/lib/types/togetherai/index.d.ts +1 -1
  58. package/lib/types/togetherai/index.d.ts.map +1 -1
  59. package/lib/types/vertexai/index.d.ts +17 -7
  60. package/lib/types/vertexai/index.d.ts.map +1 -1
  61. package/lib/types/vertexai/models/claude.d.ts.map +1 -1
  62. package/lib/types/vertexai/models/gemini.d.ts +9 -6
  63. package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
  64. package/lib/types/vertexai/models/llama.d.ts +20 -0
  65. package/lib/types/vertexai/models/llama.d.ts.map +1 -0
  66. package/lib/types/vertexai/models.d.ts +6 -2
  67. package/lib/types/vertexai/models.d.ts.map +1 -1
  68. package/lib/types/watsonx/index.d.ts +1 -1
  69. package/lib/types/watsonx/index.d.ts.map +1 -1
  70. package/lib/types/xai/index.d.ts +1 -1
  71. package/lib/types/xai/index.d.ts.map +1 -1
  72. package/package.json +16 -16
  73. package/src/bedrock/index.ts +19 -22
  74. package/src/groq/index.ts +1 -1
  75. package/src/huggingface_ie.ts +1 -1
  76. package/src/mistral/index.ts +1 -1
  77. package/src/openai/index.ts +12 -16
  78. package/src/togetherai/index.ts +1 -1
  79. package/src/vertexai/index.ts +95 -22
  80. package/src/vertexai/models/claude.ts +54 -69
  81. package/src/vertexai/models/gemini.ts +473 -93
  82. package/src/vertexai/models/llama.ts +261 -0
  83. package/src/vertexai/models.ts +6 -2
  84. package/src/watsonx/index.ts +1 -1
  85. package/src/xai/index.ts +1 -1
@@ -0,0 +1,261 @@
1
+ import {
2
+ AIModel, Completion, CompletionChunk, CompletionChunkObject, ExecutionOptions, ModelType,
3
+ PromptOptions, PromptRole, PromptSegment,
4
+ TextFallbackOptions
5
+ } from "@llumiverse/core";
6
+ import { VertexAIDriver } from "../index.js";
7
+ import { ModelDefinition } from "../models.js";
8
+ import { transformSSEStream } from "@llumiverse/core/async";
9
+
10
+ interface LLamaMessage {
11
+ role: string;
12
+ content: string;
13
+ }
14
+
15
+ interface LLamaPrompt {
16
+ messages: LLamaMessage[];
17
+ }
18
+
19
+ interface LLamaResponse {
20
+ id: string;
21
+ object: string;
22
+ created: number;
23
+ model: string;
24
+ choices: {
25
+ index: number;
26
+ message: {
27
+ role: string;
28
+ content: string;
29
+ refusal?: string;
30
+ };
31
+ finish_reason: string;
32
+ }[];
33
+ usage: {
34
+ prompt_tokens: number;
35
+ completion_tokens: number;
36
+ total_tokens: number;
37
+ };
38
+ }
39
+
40
+ interface LLamaStreamResponse {
41
+ id: string;
42
+ object: string;
43
+ created: number;
44
+ model: string;
45
+ choices: {
46
+ index: number;
47
+ delta: {
48
+ role?: string;
49
+ content?: string;
50
+ refusal?: string;
51
+ };
52
+ finish_reason?: string;
53
+ }[];
54
+ usage?: {
55
+ prompt_tokens: number;
56
+ completion_tokens: number;
57
+ total_tokens: number;
58
+ };
59
+ }
60
+
61
+ /**
62
+ * Convert a stream to a string
63
+ */
64
+ async function streamToString(stream: any): Promise<string> {
65
+ const chunks: Buffer[] = [];
66
+ for await (const chunk of stream) {
67
+ chunks.push(Buffer.from(chunk));
68
+ }
69
+ return Buffer.concat(chunks).toString('utf-8');
70
+ }
71
+
72
+ /**
73
+ * Update the conversation messages
74
+ * @param conversation The previous conversation context
75
+ * @param prompt The new prompt to add to the conversation
76
+ * @returns Updated conversation with combined messages
77
+ */
78
+ function updateConversation(conversation: LLamaPrompt | undefined | null, prompt: LLamaPrompt): LLamaPrompt {
79
+ const baseMessages = conversation ? conversation.messages : [];
80
+
81
+ return {
82
+ messages: [...baseMessages, ...(prompt.messages || [])],
83
+ };
84
+ }
85
+
86
+ export class LLamaModelDefinition implements ModelDefinition<LLamaPrompt> {
87
+
88
+ model: AIModel
89
+
90
+ constructor(modelId: string) {
91
+ this.model = {
92
+ id: modelId,
93
+ name: modelId,
94
+ provider: 'vertexai',
95
+ type: ModelType.Text,
96
+ can_stream: true,
97
+ } as AIModel;
98
+ }
99
+
100
+ // Return the appropriate region based on the Llama model
101
+ getLlamaModelRegion(modelName: string): string {
102
+ // Llama 4 models are in us-east5, Llama 3.x models are in us-central1
103
+ if (modelName.startsWith('llama-4')) {
104
+ return 'us-east5';
105
+ } else {
106
+ return 'us-central1';
107
+ }
108
+ }
109
+
110
+ async createPrompt(_driver: VertexAIDriver, segments: PromptSegment[], options: PromptOptions): Promise<LLamaPrompt> {
111
+ const messages: LLamaMessage[] = [];
112
+
113
+ // Process segments and convert them to the Llama MaaS format
114
+ for (const segment of segments) {
115
+ // Convert the prompt segments to messages
116
+ const role = segment.role === PromptRole.assistant ? 'assistant' : 'user';
117
+
118
+ // Combine files and text content if needed
119
+ let messageContent = segment.content || '';
120
+
121
+ if (segment.files && segment.files.length > 0) {
122
+ for (const file of segment.files) {
123
+ if (file.mime_type?.startsWith("text/")) {
124
+ const fileStream = await file.getStream();
125
+ const fileContent = await streamToString(fileStream);
126
+ messageContent += `\n\nFile content:\n${fileContent}`;
127
+ }
128
+ }
129
+ }
130
+
131
+ messages.push({
132
+ role: role,
133
+ content: messageContent
134
+ });
135
+ }
136
+
137
+ if (options.result_schema) {
138
+ messages.push({
139
+ role: 'user',
140
+ content: "The answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema)
141
+ });
142
+ }
143
+
144
+ // Return the prompt in the format expected by Llama MaaS API
145
+ return {
146
+ messages: messages,
147
+ };
148
+ }
149
+
150
+ async requestTextCompletion(driver: VertexAIDriver, prompt: LLamaPrompt, options: ExecutionOptions): Promise<Completion> {
151
+ const splits = options.model.split("/");
152
+ const modelName = splits[splits.length - 1];
153
+
154
+ let conversation = updateConversation(options.conversation as LLamaPrompt, prompt);
155
+
156
+ const modelOptions = options.model_options as TextFallbackOptions;
157
+
158
+ const payload: Record<string, any> = {
159
+ model: `meta/${modelName}`,
160
+ messages: conversation.messages,
161
+ stream: false,
162
+ max_tokens: modelOptions?.max_tokens,
163
+ temperature: modelOptions?.temperature,
164
+ top_p: modelOptions?.top_p,
165
+ top_k: modelOptions?.top_k,
166
+ // Disable llama guard
167
+ extra_body: {
168
+ google: {
169
+ model_safety_settings: {
170
+ enabled: false,
171
+ llama_guard_settings: {}
172
+ }
173
+ }
174
+ }
175
+ };
176
+
177
+ // Make POST request to the Llama MaaS API
178
+ const region = this.getLlamaModelRegion(modelName);
179
+ const client = driver.getLLamaClient(region);
180
+ const openaiEndpoint = `endpoints/openapi/chat/completions`;
181
+ const result = await client.post(openaiEndpoint, {
182
+ payload
183
+ }) as LLamaResponse;
184
+
185
+ // Extract response data
186
+ const assistantMessage = result?.choices[0]?.message;
187
+ const text = assistantMessage?.content;
188
+
189
+ // Update conversation with the response
190
+ conversation = updateConversation(conversation, {
191
+ messages: [{
192
+ role: assistantMessage?.role,
193
+ content: text
194
+ }],
195
+ });
196
+
197
+ return {
198
+ result: text,
199
+ token_usage: {
200
+ prompt: result.usage.prompt_tokens,
201
+ result: result.usage.completion_tokens,
202
+ total: result.usage.total_tokens
203
+ },
204
+ finish_reason: result.choices[0].finish_reason,
205
+ conversation
206
+ };
207
+ }
208
+
209
+ async requestTextCompletionStream(driver: VertexAIDriver, prompt: LLamaPrompt, options: ExecutionOptions): Promise<AsyncIterable<CompletionChunk>> {
210
+ const splits = options.model.split("/");
211
+ const modelName = splits[splits.length - 1];
212
+
213
+ let conversation = updateConversation(options.conversation as LLamaPrompt, prompt);
214
+
215
+ const modelOptions = options.model_options as TextFallbackOptions;
216
+
217
+ const payload: Record<string, any> = {
218
+ model: `meta/${modelName}`,
219
+ messages: conversation.messages,
220
+ stream: true,
221
+ max_tokens: modelOptions?.max_tokens,
222
+ temperature: modelOptions?.temperature,
223
+ top_p: modelOptions?.top_p,
224
+ top_k: modelOptions?.top_k,
225
+ // Disable llama guard
226
+ extra_body: {
227
+ google: {
228
+ model_safety_settings: {
229
+ enabled: false,
230
+ llama_guard_settings: {}
231
+ }
232
+ }
233
+ }
234
+ };
235
+
236
+ // Make POST request to the Llama MaaS API
237
+ //TODO: Fix error handling with the fetch client, errors will return a empty response
238
+ //But not throw any error
239
+ const region = this.getLlamaModelRegion(modelName);
240
+ const client = driver.getLLamaClient(region);
241
+ const openaiEndpoint = `endpoints/openapi/chat/completions`;
242
+ const stream = await client.post(openaiEndpoint, {
243
+ payload,
244
+ reader: 'sse'
245
+ });
246
+
247
+ return transformSSEStream(stream, (data: string): CompletionChunkObject => {
248
+ const json = JSON.parse(data) as LLamaStreamResponse;
249
+ const choice = json.choices?.[0];
250
+ return {
251
+ result: choice?.delta?.content ?? '',
252
+ finish_reason: choice?.finish_reason,
253
+ token_usage: json.usage ? {
254
+ prompt: json.usage.prompt_tokens,
255
+ result: json.usage.completion_tokens,
256
+ total: json.usage.total_tokens,
257
+ } : undefined
258
+ };
259
+ });
260
+ }
261
+ }
@@ -1,14 +1,16 @@
1
- import { AIModel, Completion, CompletionChunkObject, PromptOptions, PromptSegment, ExecutionOptions } from "@llumiverse/core";
1
+ import { AIModel, Completion, PromptOptions, PromptSegment, ExecutionOptions, CompletionChunk } from "@llumiverse/core";
2
2
  import { VertexAIDriver , trimModelName} from "./index.js";
3
3
  import { GeminiModelDefinition } from "./models/gemini.js";
4
4
  import { ClaudeModelDefinition } from "./models/claude.js";
5
+ import { LLamaModelDefinition } from "./models/llama.js";
5
6
 
6
7
  export interface ModelDefinition<PromptT = any> {
7
8
  model: AIModel;
8
9
  versions?: string[]; // the versions of the model that are available. ex: ['001', '002']
9
10
  createPrompt: (driver: VertexAIDriver, segments: PromptSegment[], options: PromptOptions) => Promise<PromptT>;
10
11
  requestTextCompletion: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<Completion>;
11
- requestTextCompletionStream: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<AsyncIterable<CompletionChunkObject>>;
12
+ requestTextCompletionStream: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<AsyncIterable<CompletionChunk>>;
13
+ preValidationProcessing?(result: Completion, options: ExecutionOptions): { result: Completion, options: ExecutionOptions };
12
14
  }
13
15
 
14
16
  export function getModelDefinition(model: string): ModelDefinition {
@@ -20,6 +22,8 @@ export function getModelDefinition(model: string): ModelDefinition {
20
22
  return new ClaudeModelDefinition(modelName);
21
23
  } else if (publisher?.includes("google")) {
22
24
  return new GeminiModelDefinition(modelName);
25
+ } else if (publisher?.includes("meta")) {
26
+ return new LLamaModelDefinition(modelName);
23
27
  }
24
28
 
25
29
  //Fallback, assume it is Gemini.
@@ -1,6 +1,6 @@
1
1
  import { AIModel, AbstractDriver, Completion, CompletionChunk, DriverOptions, EmbeddingsOptions, EmbeddingsResult, ExecutionOptions, TextFallbackOptions } from "@llumiverse/core";
2
2
  import { transformSSEStream } from "@llumiverse/core/async";
3
- import { FetchClient } from "api-fetch-client";
3
+ import { FetchClient } from "@vertesia/api-fetch-client";
4
4
  import { GenerateEmbeddingPayload, GenerateEmbeddingResponse, WatsonAuthToken, WatsonxListModelResponse, WatsonxModelSpec, WatsonxTextGenerationPayload, WatsonxTextGenerationResponse } from "./interfaces.js";
5
5
 
6
6
  interface WatsonxDriverOptions extends DriverOptions {
package/src/xai/index.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { AIModel, Completion, DriverOptions, ExecutionOptions, PromptOptions, PromptSegment } from "@llumiverse/core";
2
2
  import { formatOpenAILikeMultimodalPrompt, OpenAIPromptFormatterOptions } from "@llumiverse/core/formatters";
3
- import { FetchClient } from "api-fetch-client";
3
+ import { FetchClient } from "@vertesia/api-fetch-client";
4
4
  import OpenAI from "openai";
5
5
  import { BaseOpenAIDriver } from "../openai/index.js";
6
6