@llumiverse/drivers 0.18.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/bedrock/index.js +19 -22
- package/lib/cjs/bedrock/index.js.map +1 -1
- package/lib/cjs/huggingface_ie.js +1 -1
- package/lib/cjs/huggingface_ie.js.map +1 -1
- package/lib/cjs/mistral/index.js +1 -1
- package/lib/cjs/mistral/index.js.map +1 -1
- package/lib/cjs/openai/index.js +10 -14
- package/lib/cjs/openai/index.js.map +1 -1
- package/lib/cjs/togetherai/index.js +1 -1
- package/lib/cjs/togetherai/index.js.map +1 -1
- package/lib/cjs/vertexai/index.js +81 -18
- package/lib/cjs/vertexai/index.js.map +1 -1
- package/lib/cjs/vertexai/models/claude.js +46 -66
- package/lib/cjs/vertexai/models/claude.js.map +1 -1
- package/lib/cjs/vertexai/models/gemini.js +413 -80
- package/lib/cjs/vertexai/models/gemini.js.map +1 -1
- package/lib/cjs/vertexai/models/llama.js +182 -0
- package/lib/cjs/vertexai/models/llama.js.map +1 -0
- package/lib/cjs/vertexai/models.js +4 -0
- package/lib/cjs/vertexai/models.js.map +1 -1
- package/lib/cjs/watsonx/index.js +1 -1
- package/lib/cjs/watsonx/index.js.map +1 -1
- package/lib/cjs/xai/index.js +1 -1
- package/lib/cjs/xai/index.js.map +1 -1
- package/lib/esm/bedrock/index.js +19 -22
- package/lib/esm/bedrock/index.js.map +1 -1
- package/lib/esm/huggingface_ie.js +1 -1
- package/lib/esm/huggingface_ie.js.map +1 -1
- package/lib/esm/mistral/index.js +1 -1
- package/lib/esm/mistral/index.js.map +1 -1
- package/lib/esm/openai/index.js +12 -16
- package/lib/esm/openai/index.js.map +1 -1
- package/lib/esm/togetherai/index.js +1 -1
- package/lib/esm/togetherai/index.js.map +1 -1
- package/lib/esm/vertexai/index.js +81 -18
- package/lib/esm/vertexai/index.js.map +1 -1
- package/lib/esm/vertexai/models/claude.js +46 -66
- package/lib/esm/vertexai/models/claude.js.map +1 -1
- package/lib/esm/vertexai/models/gemini.js +409 -76
- package/lib/esm/vertexai/models/gemini.js.map +1 -1
- package/lib/esm/vertexai/models/llama.js +178 -0
- package/lib/esm/vertexai/models/llama.js.map +1 -0
- package/lib/esm/vertexai/models.js +4 -0
- package/lib/esm/vertexai/models.js.map +1 -1
- package/lib/esm/watsonx/index.js +1 -1
- package/lib/esm/watsonx/index.js.map +1 -1
- package/lib/esm/xai/index.js +1 -1
- package/lib/esm/xai/index.js.map +1 -1
- package/lib/types/bedrock/index.d.ts.map +1 -1
- package/lib/types/groq/index.d.ts +1 -1
- package/lib/types/groq/index.d.ts.map +1 -1
- package/lib/types/huggingface_ie.d.ts +1 -1
- package/lib/types/huggingface_ie.d.ts.map +1 -1
- package/lib/types/mistral/index.d.ts +1 -1
- package/lib/types/mistral/index.d.ts.map +1 -1
- package/lib/types/openai/index.d.ts.map +1 -1
- package/lib/types/togetherai/index.d.ts +1 -1
- package/lib/types/togetherai/index.d.ts.map +1 -1
- package/lib/types/vertexai/index.d.ts +17 -7
- package/lib/types/vertexai/index.d.ts.map +1 -1
- package/lib/types/vertexai/models/claude.d.ts.map +1 -1
- package/lib/types/vertexai/models/gemini.d.ts +9 -6
- package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
- package/lib/types/vertexai/models/llama.d.ts +20 -0
- package/lib/types/vertexai/models/llama.d.ts.map +1 -0
- package/lib/types/vertexai/models.d.ts +6 -2
- package/lib/types/vertexai/models.d.ts.map +1 -1
- package/lib/types/watsonx/index.d.ts +1 -1
- package/lib/types/watsonx/index.d.ts.map +1 -1
- package/lib/types/xai/index.d.ts +1 -1
- package/lib/types/xai/index.d.ts.map +1 -1
- package/package.json +16 -16
- package/src/bedrock/index.ts +19 -22
- package/src/groq/index.ts +1 -1
- package/src/huggingface_ie.ts +1 -1
- package/src/mistral/index.ts +1 -1
- package/src/openai/index.ts +12 -16
- package/src/togetherai/index.ts +1 -1
- package/src/vertexai/index.ts +95 -22
- package/src/vertexai/models/claude.ts +54 -69
- package/src/vertexai/models/gemini.ts +473 -93
- package/src/vertexai/models/llama.ts +261 -0
- package/src/vertexai/models.ts +6 -2
- package/src/watsonx/index.ts +1 -1
- package/src/xai/index.ts +1 -1
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AIModel, Completion, CompletionChunk, CompletionChunkObject, ExecutionOptions, ModelType,
|
|
3
|
+
PromptOptions, PromptRole, PromptSegment,
|
|
4
|
+
TextFallbackOptions
|
|
5
|
+
} from "@llumiverse/core";
|
|
6
|
+
import { VertexAIDriver } from "../index.js";
|
|
7
|
+
import { ModelDefinition } from "../models.js";
|
|
8
|
+
import { transformSSEStream } from "@llumiverse/core/async";
|
|
9
|
+
|
|
10
|
+
interface LLamaMessage {
|
|
11
|
+
role: string;
|
|
12
|
+
content: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
interface LLamaPrompt {
|
|
16
|
+
messages: LLamaMessage[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface LLamaResponse {
|
|
20
|
+
id: string;
|
|
21
|
+
object: string;
|
|
22
|
+
created: number;
|
|
23
|
+
model: string;
|
|
24
|
+
choices: {
|
|
25
|
+
index: number;
|
|
26
|
+
message: {
|
|
27
|
+
role: string;
|
|
28
|
+
content: string;
|
|
29
|
+
refusal?: string;
|
|
30
|
+
};
|
|
31
|
+
finish_reason: string;
|
|
32
|
+
}[];
|
|
33
|
+
usage: {
|
|
34
|
+
prompt_tokens: number;
|
|
35
|
+
completion_tokens: number;
|
|
36
|
+
total_tokens: number;
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
interface LLamaStreamResponse {
|
|
41
|
+
id: string;
|
|
42
|
+
object: string;
|
|
43
|
+
created: number;
|
|
44
|
+
model: string;
|
|
45
|
+
choices: {
|
|
46
|
+
index: number;
|
|
47
|
+
delta: {
|
|
48
|
+
role?: string;
|
|
49
|
+
content?: string;
|
|
50
|
+
refusal?: string;
|
|
51
|
+
};
|
|
52
|
+
finish_reason?: string;
|
|
53
|
+
}[];
|
|
54
|
+
usage?: {
|
|
55
|
+
prompt_tokens: number;
|
|
56
|
+
completion_tokens: number;
|
|
57
|
+
total_tokens: number;
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Convert a stream to a string
|
|
63
|
+
*/
|
|
64
|
+
async function streamToString(stream: any): Promise<string> {
|
|
65
|
+
const chunks: Buffer[] = [];
|
|
66
|
+
for await (const chunk of stream) {
|
|
67
|
+
chunks.push(Buffer.from(chunk));
|
|
68
|
+
}
|
|
69
|
+
return Buffer.concat(chunks).toString('utf-8');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Update the conversation messages
|
|
74
|
+
* @param conversation The previous conversation context
|
|
75
|
+
* @param prompt The new prompt to add to the conversation
|
|
76
|
+
* @returns Updated conversation with combined messages
|
|
77
|
+
*/
|
|
78
|
+
function updateConversation(conversation: LLamaPrompt | undefined | null, prompt: LLamaPrompt): LLamaPrompt {
|
|
79
|
+
const baseMessages = conversation ? conversation.messages : [];
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
messages: [...baseMessages, ...(prompt.messages || [])],
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export class LLamaModelDefinition implements ModelDefinition<LLamaPrompt> {
|
|
87
|
+
|
|
88
|
+
model: AIModel
|
|
89
|
+
|
|
90
|
+
constructor(modelId: string) {
|
|
91
|
+
this.model = {
|
|
92
|
+
id: modelId,
|
|
93
|
+
name: modelId,
|
|
94
|
+
provider: 'vertexai',
|
|
95
|
+
type: ModelType.Text,
|
|
96
|
+
can_stream: true,
|
|
97
|
+
} as AIModel;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Return the appropriate region based on the Llama model
|
|
101
|
+
getLlamaModelRegion(modelName: string): string {
|
|
102
|
+
// Llama 4 models are in us-east5, Llama 3.x models are in us-central1
|
|
103
|
+
if (modelName.startsWith('llama-4')) {
|
|
104
|
+
return 'us-east5';
|
|
105
|
+
} else {
|
|
106
|
+
return 'us-central1';
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async createPrompt(_driver: VertexAIDriver, segments: PromptSegment[], options: PromptOptions): Promise<LLamaPrompt> {
|
|
111
|
+
const messages: LLamaMessage[] = [];
|
|
112
|
+
|
|
113
|
+
// Process segments and convert them to the Llama MaaS format
|
|
114
|
+
for (const segment of segments) {
|
|
115
|
+
// Convert the prompt segments to messages
|
|
116
|
+
const role = segment.role === PromptRole.assistant ? 'assistant' : 'user';
|
|
117
|
+
|
|
118
|
+
// Combine files and text content if needed
|
|
119
|
+
let messageContent = segment.content || '';
|
|
120
|
+
|
|
121
|
+
if (segment.files && segment.files.length > 0) {
|
|
122
|
+
for (const file of segment.files) {
|
|
123
|
+
if (file.mime_type?.startsWith("text/")) {
|
|
124
|
+
const fileStream = await file.getStream();
|
|
125
|
+
const fileContent = await streamToString(fileStream);
|
|
126
|
+
messageContent += `\n\nFile content:\n${fileContent}`;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
messages.push({
|
|
132
|
+
role: role,
|
|
133
|
+
content: messageContent
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (options.result_schema) {
|
|
138
|
+
messages.push({
|
|
139
|
+
role: 'user',
|
|
140
|
+
content: "The answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema)
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Return the prompt in the format expected by Llama MaaS API
|
|
145
|
+
return {
|
|
146
|
+
messages: messages,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async requestTextCompletion(driver: VertexAIDriver, prompt: LLamaPrompt, options: ExecutionOptions): Promise<Completion> {
|
|
151
|
+
const splits = options.model.split("/");
|
|
152
|
+
const modelName = splits[splits.length - 1];
|
|
153
|
+
|
|
154
|
+
let conversation = updateConversation(options.conversation as LLamaPrompt, prompt);
|
|
155
|
+
|
|
156
|
+
const modelOptions = options.model_options as TextFallbackOptions;
|
|
157
|
+
|
|
158
|
+
const payload: Record<string, any> = {
|
|
159
|
+
model: `meta/${modelName}`,
|
|
160
|
+
messages: conversation.messages,
|
|
161
|
+
stream: false,
|
|
162
|
+
max_tokens: modelOptions?.max_tokens,
|
|
163
|
+
temperature: modelOptions?.temperature,
|
|
164
|
+
top_p: modelOptions?.top_p,
|
|
165
|
+
top_k: modelOptions?.top_k,
|
|
166
|
+
// Disable llama guard
|
|
167
|
+
extra_body: {
|
|
168
|
+
google: {
|
|
169
|
+
model_safety_settings: {
|
|
170
|
+
enabled: false,
|
|
171
|
+
llama_guard_settings: {}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
// Make POST request to the Llama MaaS API
|
|
178
|
+
const region = this.getLlamaModelRegion(modelName);
|
|
179
|
+
const client = driver.getLLamaClient(region);
|
|
180
|
+
const openaiEndpoint = `endpoints/openapi/chat/completions`;
|
|
181
|
+
const result = await client.post(openaiEndpoint, {
|
|
182
|
+
payload
|
|
183
|
+
}) as LLamaResponse;
|
|
184
|
+
|
|
185
|
+
// Extract response data
|
|
186
|
+
const assistantMessage = result?.choices[0]?.message;
|
|
187
|
+
const text = assistantMessage?.content;
|
|
188
|
+
|
|
189
|
+
// Update conversation with the response
|
|
190
|
+
conversation = updateConversation(conversation, {
|
|
191
|
+
messages: [{
|
|
192
|
+
role: assistantMessage?.role,
|
|
193
|
+
content: text
|
|
194
|
+
}],
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
result: text,
|
|
199
|
+
token_usage: {
|
|
200
|
+
prompt: result.usage.prompt_tokens,
|
|
201
|
+
result: result.usage.completion_tokens,
|
|
202
|
+
total: result.usage.total_tokens
|
|
203
|
+
},
|
|
204
|
+
finish_reason: result.choices[0].finish_reason,
|
|
205
|
+
conversation
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async requestTextCompletionStream(driver: VertexAIDriver, prompt: LLamaPrompt, options: ExecutionOptions): Promise<AsyncIterable<CompletionChunk>> {
|
|
210
|
+
const splits = options.model.split("/");
|
|
211
|
+
const modelName = splits[splits.length - 1];
|
|
212
|
+
|
|
213
|
+
let conversation = updateConversation(options.conversation as LLamaPrompt, prompt);
|
|
214
|
+
|
|
215
|
+
const modelOptions = options.model_options as TextFallbackOptions;
|
|
216
|
+
|
|
217
|
+
const payload: Record<string, any> = {
|
|
218
|
+
model: `meta/${modelName}`,
|
|
219
|
+
messages: conversation.messages,
|
|
220
|
+
stream: true,
|
|
221
|
+
max_tokens: modelOptions?.max_tokens,
|
|
222
|
+
temperature: modelOptions?.temperature,
|
|
223
|
+
top_p: modelOptions?.top_p,
|
|
224
|
+
top_k: modelOptions?.top_k,
|
|
225
|
+
// Disable llama guard
|
|
226
|
+
extra_body: {
|
|
227
|
+
google: {
|
|
228
|
+
model_safety_settings: {
|
|
229
|
+
enabled: false,
|
|
230
|
+
llama_guard_settings: {}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
// Make POST request to the Llama MaaS API
|
|
237
|
+
//TODO: Fix error handling with the fetch client, errors will return a empty response
|
|
238
|
+
//But not throw any error
|
|
239
|
+
const region = this.getLlamaModelRegion(modelName);
|
|
240
|
+
const client = driver.getLLamaClient(region);
|
|
241
|
+
const openaiEndpoint = `endpoints/openapi/chat/completions`;
|
|
242
|
+
const stream = await client.post(openaiEndpoint, {
|
|
243
|
+
payload,
|
|
244
|
+
reader: 'sse'
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
return transformSSEStream(stream, (data: string): CompletionChunkObject => {
|
|
248
|
+
const json = JSON.parse(data) as LLamaStreamResponse;
|
|
249
|
+
const choice = json.choices?.[0];
|
|
250
|
+
return {
|
|
251
|
+
result: choice?.delta?.content ?? '',
|
|
252
|
+
finish_reason: choice?.finish_reason,
|
|
253
|
+
token_usage: json.usage ? {
|
|
254
|
+
prompt: json.usage.prompt_tokens,
|
|
255
|
+
result: json.usage.completion_tokens,
|
|
256
|
+
total: json.usage.total_tokens,
|
|
257
|
+
} : undefined
|
|
258
|
+
};
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
}
|
package/src/vertexai/models.ts
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
|
-
import { AIModel, Completion,
|
|
1
|
+
import { AIModel, Completion, PromptOptions, PromptSegment, ExecutionOptions, CompletionChunk } from "@llumiverse/core";
|
|
2
2
|
import { VertexAIDriver , trimModelName} from "./index.js";
|
|
3
3
|
import { GeminiModelDefinition } from "./models/gemini.js";
|
|
4
4
|
import { ClaudeModelDefinition } from "./models/claude.js";
|
|
5
|
+
import { LLamaModelDefinition } from "./models/llama.js";
|
|
5
6
|
|
|
6
7
|
export interface ModelDefinition<PromptT = any> {
|
|
7
8
|
model: AIModel;
|
|
8
9
|
versions?: string[]; // the versions of the model that are available. ex: ['001', '002']
|
|
9
10
|
createPrompt: (driver: VertexAIDriver, segments: PromptSegment[], options: PromptOptions) => Promise<PromptT>;
|
|
10
11
|
requestTextCompletion: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<Completion>;
|
|
11
|
-
requestTextCompletionStream: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<AsyncIterable<
|
|
12
|
+
requestTextCompletionStream: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<AsyncIterable<CompletionChunk>>;
|
|
13
|
+
preValidationProcessing?(result: Completion, options: ExecutionOptions): { result: Completion, options: ExecutionOptions };
|
|
12
14
|
}
|
|
13
15
|
|
|
14
16
|
export function getModelDefinition(model: string): ModelDefinition {
|
|
@@ -20,6 +22,8 @@ export function getModelDefinition(model: string): ModelDefinition {
|
|
|
20
22
|
return new ClaudeModelDefinition(modelName);
|
|
21
23
|
} else if (publisher?.includes("google")) {
|
|
22
24
|
return new GeminiModelDefinition(modelName);
|
|
25
|
+
} else if (publisher?.includes("meta")) {
|
|
26
|
+
return new LLamaModelDefinition(modelName);
|
|
23
27
|
}
|
|
24
28
|
|
|
25
29
|
//Fallback, assume it is Gemini.
|
package/src/watsonx/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { AIModel, AbstractDriver, Completion, CompletionChunk, DriverOptions, EmbeddingsOptions, EmbeddingsResult, ExecutionOptions, TextFallbackOptions } from "@llumiverse/core";
|
|
2
2
|
import { transformSSEStream } from "@llumiverse/core/async";
|
|
3
|
-
import { FetchClient } from "api-fetch-client";
|
|
3
|
+
import { FetchClient } from "@vertesia/api-fetch-client";
|
|
4
4
|
import { GenerateEmbeddingPayload, GenerateEmbeddingResponse, WatsonAuthToken, WatsonxListModelResponse, WatsonxModelSpec, WatsonxTextGenerationPayload, WatsonxTextGenerationResponse } from "./interfaces.js";
|
|
5
5
|
|
|
6
6
|
interface WatsonxDriverOptions extends DriverOptions {
|
package/src/xai/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { AIModel, Completion, DriverOptions, ExecutionOptions, PromptOptions, PromptSegment } from "@llumiverse/core";
|
|
2
2
|
import { formatOpenAILikeMultimodalPrompt, OpenAIPromptFormatterOptions } from "@llumiverse/core/formatters";
|
|
3
|
-
import { FetchClient } from "api-fetch-client";
|
|
3
|
+
import { FetchClient } from "@vertesia/api-fetch-client";
|
|
4
4
|
import OpenAI from "openai";
|
|
5
5
|
import { BaseOpenAIDriver } from "../openai/index.js";
|
|
6
6
|
|