graphlit-client 1.0.20250613009 → 1.0.20250615002
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +121 -21
- package/dist/client.d.ts +91 -1
- package/dist/client.js +473 -24
- package/dist/generated/graphql-documents.js +7 -0
- package/dist/generated/graphql-types.d.ts +7 -0
- package/dist/model-mapping.js +96 -1
- package/dist/streaming/llm-formatters.d.ts +72 -0
- package/dist/streaming/llm-formatters.js +205 -0
- package/dist/streaming/providers.d.ts +31 -1
- package/dist/streaming/providers.js +678 -1
- package/package.json +17 -1
@@ -27,6 +27,15 @@ function cleanSchemaForGoogle(schema) {
|
|
27
27
|
if (key === "$schema" || key === "additionalProperties") {
|
28
28
|
continue;
|
29
29
|
}
|
30
|
+
// Handle format field for string types - Google only supports 'enum' and 'date-time'
|
31
|
+
if (key === "format" && typeof value === "string") {
|
32
|
+
// Only keep supported formats
|
33
|
+
if (value === "enum" || value === "date-time") {
|
34
|
+
cleaned[key] = value;
|
35
|
+
}
|
36
|
+
// Skip unsupported formats like "date", "time", "email", etc.
|
37
|
+
continue;
|
38
|
+
}
|
30
39
|
// Recursively clean nested objects
|
31
40
|
cleaned[key] = cleanSchemaForGoogle(value);
|
32
41
|
}
|
@@ -61,7 +70,7 @@ onEvent, onComplete) {
|
|
61
70
|
try {
|
62
71
|
const modelName = getModelName(specification);
|
63
72
|
if (!modelName) {
|
64
|
-
throw new Error(`No model name found for
|
73
|
+
throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`);
|
65
74
|
}
|
66
75
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
67
76
|
console.log(`🤖 [OpenAI] Model Config: Service=OpenAI | Model=${modelName} | Temperature=${specification.openAI?.temperature} | MaxTokens=${specification.openAI?.completionTokenLimit || "null"} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
|
@@ -979,3 +988,671 @@ onEvent, onComplete) {
|
|
979
988
|
throw error;
|
980
989
|
}
|
981
990
|
}
|
991
|
+
/**
|
992
|
+
* Stream with Groq SDK (OpenAI-compatible)
|
993
|
+
*/
|
994
|
+
export async function streamWithGroq(specification, messages, tools, groqClient, // Groq client instance (OpenAI-compatible)
|
995
|
+
onEvent, onComplete) {
|
996
|
+
// Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
|
997
|
+
return streamWithOpenAI(specification, messages, tools, groqClient, onEvent, onComplete);
|
998
|
+
}
|
999
|
+
/**
|
1000
|
+
* Stream with Cerebras SDK (OpenAI-compatible)
|
1001
|
+
*/
|
1002
|
+
export async function streamWithCerebras(specification, messages, tools, cerebrasClient, // OpenAI client instance configured for Cerebras
|
1003
|
+
onEvent, onComplete) {
|
1004
|
+
// Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
|
1005
|
+
return streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
|
1006
|
+
}
|
1007
|
+
/**
|
1008
|
+
* Stream with Deepseek SDK (OpenAI-compatible)
|
1009
|
+
*/
|
1010
|
+
export async function streamWithDeepseek(specification, messages, tools, deepseekClient, // OpenAI client instance configured for Deepseek
|
1011
|
+
onEvent, onComplete) {
|
1012
|
+
let fullMessage = "";
|
1013
|
+
let toolCalls = [];
|
1014
|
+
// Performance metrics
|
1015
|
+
const startTime = Date.now();
|
1016
|
+
let firstTokenTime = 0;
|
1017
|
+
let firstMeaningfulContentTime = 0;
|
1018
|
+
let tokenCount = 0;
|
1019
|
+
let toolArgumentTokens = 0;
|
1020
|
+
let lastEventTime = 0;
|
1021
|
+
const interTokenDelays = [];
|
1022
|
+
// Tool calling metrics
|
1023
|
+
const toolMetrics = {
|
1024
|
+
totalTools: 0,
|
1025
|
+
successfulTools: 0,
|
1026
|
+
failedTools: 0,
|
1027
|
+
toolTimes: [],
|
1028
|
+
currentToolStart: 0,
|
1029
|
+
roundStartTime: startTime,
|
1030
|
+
rounds: [],
|
1031
|
+
currentRound: 1,
|
1032
|
+
};
|
1033
|
+
try {
|
1034
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1035
|
+
console.log(`🔍 [Deepseek] Specification object:`, {
|
1036
|
+
name: specification.name,
|
1037
|
+
serviceType: specification.serviceType,
|
1038
|
+
deepseek: specification.deepseek,
|
1039
|
+
hasDeepseekModel: !!specification.deepseek?.model,
|
1040
|
+
deepseekModelValue: specification.deepseek?.model
|
1041
|
+
});
|
1042
|
+
}
|
1043
|
+
const modelName = getModelName(specification);
|
1044
|
+
if (!modelName) {
|
1045
|
+
console.error(`❌ [Deepseek] Model resolution failed:`, {
|
1046
|
+
name: specification.name,
|
1047
|
+
serviceType: specification.serviceType,
|
1048
|
+
deepseek: specification.deepseek,
|
1049
|
+
hasCustomModelName: !!specification.deepseek?.modelName
|
1050
|
+
});
|
1051
|
+
throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`);
|
1052
|
+
}
|
1053
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1054
|
+
console.log(`🤖 [Deepseek] Model Config: Service=Deepseek | Model=${modelName} | Temperature=${specification.deepseek?.temperature} | MaxTokens=${specification.deepseek?.completionTokenLimit || "null"} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
|
1055
|
+
}
|
1056
|
+
const streamConfig = {
|
1057
|
+
model: modelName,
|
1058
|
+
messages,
|
1059
|
+
stream: true,
|
1060
|
+
temperature: specification.deepseek?.temperature,
|
1061
|
+
};
|
1062
|
+
// Only add max_completion_tokens if it's defined
|
1063
|
+
if (specification.deepseek?.completionTokenLimit) {
|
1064
|
+
streamConfig.max_completion_tokens =
|
1065
|
+
specification.deepseek.completionTokenLimit;
|
1066
|
+
}
|
1067
|
+
// Add tools if provided
|
1068
|
+
if (tools && tools.length > 0) {
|
1069
|
+
streamConfig.tools = tools.map((tool) => ({
|
1070
|
+
type: "function",
|
1071
|
+
function: {
|
1072
|
+
name: tool.name,
|
1073
|
+
description: tool.description,
|
1074
|
+
parameters: tool.schema ? JSON.parse(tool.schema) : {},
|
1075
|
+
},
|
1076
|
+
}));
|
1077
|
+
}
|
1078
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1079
|
+
console.log(`⏱️ [Deepseek] Starting LLM call at: ${new Date().toISOString()}`);
|
1080
|
+
}
|
1081
|
+
const stream = await deepseekClient.chat.completions.create(streamConfig);
|
1082
|
+
for await (const chunk of stream) {
|
1083
|
+
const delta = chunk.choices[0]?.delta;
|
1084
|
+
if (!delta)
|
1085
|
+
continue;
|
1086
|
+
const currentTime = Date.now();
|
1087
|
+
// Track first token time
|
1088
|
+
if (firstTokenTime === 0) {
|
1089
|
+
firstTokenTime = currentTime - startTime;
|
1090
|
+
}
|
1091
|
+
// Track inter-token delays
|
1092
|
+
if (lastEventTime > 0) {
|
1093
|
+
const delay = currentTime - lastEventTime;
|
1094
|
+
interTokenDelays.push(delay);
|
1095
|
+
}
|
1096
|
+
lastEventTime = currentTime;
|
1097
|
+
// Handle message content
|
1098
|
+
if (delta.content) {
|
1099
|
+
tokenCount++;
|
1100
|
+
fullMessage += delta.content;
|
1101
|
+
// Track first meaningful content
|
1102
|
+
if (firstMeaningfulContentTime === 0 && fullMessage.trim().length > 0) {
|
1103
|
+
firstMeaningfulContentTime = currentTime - startTime;
|
1104
|
+
}
|
1105
|
+
onEvent({
|
1106
|
+
type: "message",
|
1107
|
+
message: fullMessage,
|
1108
|
+
});
|
1109
|
+
// Performance metrics tracking (internal only)
|
1110
|
+
if (tokenCount % 10 === 0) {
|
1111
|
+
const totalTokens = tokenCount + toolArgumentTokens;
|
1112
|
+
const tokensPerSecond = totalTokens > 0 ? totalTokens / ((currentTime - startTime) / 1000) : 0;
|
1113
|
+
const avgInterTokenDelay = interTokenDelays.length > 0
|
1114
|
+
? interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length
|
1115
|
+
: 0;
|
1116
|
+
}
|
1117
|
+
}
|
1118
|
+
// Handle tool calls
|
1119
|
+
if (delta.tool_calls) {
|
1120
|
+
for (const toolCall of delta.tool_calls) {
|
1121
|
+
const index = toolCall.index;
|
1122
|
+
// Initialize tool call if it doesn't exist
|
1123
|
+
if (!toolCalls[index]) {
|
1124
|
+
toolCalls[index] = {
|
1125
|
+
id: toolCall.id || `tool_${index}`,
|
1126
|
+
name: toolCall.function?.name || "",
|
1127
|
+
arguments: "",
|
1128
|
+
};
|
1129
|
+
// Start tool timing
|
1130
|
+
toolMetrics.totalTools++;
|
1131
|
+
toolMetrics.currentToolStart = currentTime;
|
1132
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1133
|
+
console.log(`🔧 [Deepseek] Tool call started: ${toolCalls[index].name}`);
|
1134
|
+
}
|
1135
|
+
onEvent({
|
1136
|
+
type: "tool_call_parsed",
|
1137
|
+
toolCall: { ...toolCalls[index] },
|
1138
|
+
});
|
1139
|
+
}
|
1140
|
+
// Update tool call name if provided
|
1141
|
+
if (toolCall.function?.name) {
|
1142
|
+
toolCalls[index].name = toolCall.function.name;
|
1143
|
+
}
|
1144
|
+
// Accumulate arguments
|
1145
|
+
if (toolCall.function?.arguments) {
|
1146
|
+
toolCalls[index].arguments += toolCall.function.arguments;
|
1147
|
+
toolArgumentTokens++;
|
1148
|
+
}
|
1149
|
+
// Update with current state
|
1150
|
+
onEvent({
|
1151
|
+
type: "tool_call_delta",
|
1152
|
+
toolCallId: toolCalls[index].id,
|
1153
|
+
argumentDelta: toolCall.function?.arguments || "",
|
1154
|
+
});
|
1155
|
+
}
|
1156
|
+
}
|
1157
|
+
}
|
1158
|
+
// Process completed tool calls
|
1159
|
+
const validToolCalls = toolCalls.filter((tc, idx) => {
|
1160
|
+
if (!isValidJSON(tc.arguments)) {
|
1161
|
+
console.warn(`[Deepseek] Filtering out incomplete tool call ${idx} (${tc.name}) with INVALID JSON (${tc.arguments.length} chars)`);
|
1162
|
+
return false;
|
1163
|
+
}
|
1164
|
+
return true;
|
1165
|
+
});
|
1166
|
+
if (toolCalls.length !== validToolCalls.length) {
|
1167
|
+
console.log(`[Deepseek] Filtered out ${toolCalls.length - validToolCalls.length} incomplete tool calls`);
|
1168
|
+
}
|
1169
|
+
// Final performance metrics
|
1170
|
+
const totalTime = Date.now() - startTime;
|
1171
|
+
const totalTokens = tokenCount + toolArgumentTokens;
|
1172
|
+
const tokensPerSecond = totalTokens > 0 ? totalTokens / (totalTime / 1000) : 0;
|
1173
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_METRICS) {
|
1174
|
+
const metricsData = {
|
1175
|
+
totalTime: `${totalTime}ms`,
|
1176
|
+
ttft: `${firstTokenTime}ms`,
|
1177
|
+
ttfmc: firstMeaningfulContentTime > 0
|
1178
|
+
? `${firstMeaningfulContentTime}ms`
|
1179
|
+
: null,
|
1180
|
+
contentTokens: tokenCount,
|
1181
|
+
toolTokens: toolArgumentTokens,
|
1182
|
+
totalTokens: totalTokens,
|
1183
|
+
tps: tokensPerSecond.toFixed(2),
|
1184
|
+
};
|
1185
|
+
console.log(`📊 [Deepseek] Performance: Total=${metricsData.totalTime} | TTFT=${metricsData.ttft}${metricsData.ttfmc ? ` | TTFMC=${metricsData.ttfmc}` : ""} | Tokens(content/tool/total)=${metricsData.contentTokens}/${metricsData.toolTokens}/${metricsData.totalTokens} | TPS=${metricsData.tps}`);
|
1186
|
+
}
|
1187
|
+
// Send completion event
|
1188
|
+
onEvent({
|
1189
|
+
type: "complete",
|
1190
|
+
tokens: totalTokens,
|
1191
|
+
});
|
1192
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1193
|
+
console.log(`✅ [Deepseek] Stream completed: ${fullMessage.length} chars, ${validToolCalls.length} tools`);
|
1194
|
+
}
|
1195
|
+
onComplete(fullMessage, validToolCalls);
|
1196
|
+
}
|
1197
|
+
catch (error) {
|
1198
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1199
|
+
console.error(`❌ [Deepseek] Stream error:`, error);
|
1200
|
+
}
|
1201
|
+
onEvent({
|
1202
|
+
type: "error",
|
1203
|
+
error: `Deepseek streaming error: ${error}`,
|
1204
|
+
});
|
1205
|
+
throw error;
|
1206
|
+
}
|
1207
|
+
}
|
1208
|
+
/**
|
1209
|
+
* Stream with Cohere SDK
|
1210
|
+
*/
|
1211
|
+
export async function streamWithCohere(specification, messages, tools, cohereClient, // CohereClient instance
|
1212
|
+
onEvent, onComplete) {
|
1213
|
+
let fullMessage = "";
|
1214
|
+
let toolCalls = [];
|
1215
|
+
// Performance metrics
|
1216
|
+
const startTime = Date.now();
|
1217
|
+
let firstTokenTime = 0;
|
1218
|
+
let tokenCount = 0;
|
1219
|
+
try {
|
1220
|
+
const modelName = getModelName(specification);
|
1221
|
+
if (!modelName) {
|
1222
|
+
throw new Error(`No model name found for Cohere specification: ${specification.name}`);
|
1223
|
+
}
|
1224
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1225
|
+
console.log(`🤖 [Cohere] Model Config: Service=Cohere | Model=${modelName} | Temperature=${specification.cohere?.temperature} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
|
1226
|
+
}
|
1227
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1228
|
+
console.log(`🔍 [Cohere] Messages array length: ${messages.length}`);
|
1229
|
+
console.log(`🔍 [Cohere] All messages:`, JSON.stringify(messages, null, 2));
|
1230
|
+
}
|
1231
|
+
if (messages.length === 0) {
|
1232
|
+
throw new Error("No messages found for Cohere streaming");
|
1233
|
+
}
|
1234
|
+
const streamConfig = {
|
1235
|
+
model: modelName,
|
1236
|
+
messages: messages, // All messages in chronological order
|
1237
|
+
};
|
1238
|
+
// Only add temperature if it's defined
|
1239
|
+
if (specification.cohere?.temperature !== undefined) {
|
1240
|
+
streamConfig.temperature = specification.cohere.temperature;
|
1241
|
+
}
|
1242
|
+
// Add tools if provided
|
1243
|
+
if (tools && tools.length > 0) {
|
1244
|
+
streamConfig.tools = tools.map((tool) => {
|
1245
|
+
if (!tool.schema) {
|
1246
|
+
return {
|
1247
|
+
name: tool.name,
|
1248
|
+
description: tool.description,
|
1249
|
+
parameter_definitions: {},
|
1250
|
+
};
|
1251
|
+
}
|
1252
|
+
// Parse the JSON schema
|
1253
|
+
const schema = JSON.parse(tool.schema);
|
1254
|
+
// Convert JSON Schema to Cohere's expected format
|
1255
|
+
const parameter_definitions = {};
|
1256
|
+
if (schema.properties) {
|
1257
|
+
for (const [key, value] of Object.entries(schema.properties)) {
|
1258
|
+
const prop = value;
|
1259
|
+
const paramDef = {
|
1260
|
+
type: prop.type || "string",
|
1261
|
+
description: prop.description || "",
|
1262
|
+
required: schema.required?.includes(key) || false,
|
1263
|
+
};
|
1264
|
+
// Add additional properties that Cohere might expect
|
1265
|
+
if (prop.enum) {
|
1266
|
+
paramDef.options = prop.enum;
|
1267
|
+
}
|
1268
|
+
if (prop.default !== undefined) {
|
1269
|
+
paramDef.default = prop.default;
|
1270
|
+
}
|
1271
|
+
if (prop.items) {
|
1272
|
+
paramDef.items = prop.items;
|
1273
|
+
}
|
1274
|
+
parameter_definitions[key] = paramDef;
|
1275
|
+
}
|
1276
|
+
}
|
1277
|
+
return {
|
1278
|
+
name: tool.name,
|
1279
|
+
description: tool.description,
|
1280
|
+
parameter_definitions, // Use snake_case as expected by Cohere API
|
1281
|
+
};
|
1282
|
+
});
|
1283
|
+
}
|
1284
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1285
|
+
console.log(`🔍 [Cohere] Final stream config:`, JSON.stringify(streamConfig, null, 2));
|
1286
|
+
console.log(`🔍 [Cohere] Cohere client methods available:`, Object.getOwnPropertyNames(cohereClient));
|
1287
|
+
console.log(`🔍 [Cohere] Has chatStream method:`, typeof cohereClient.chatStream === 'function');
|
1288
|
+
console.log(`🔍 [Cohere] Has chat property:`, !!cohereClient.chat);
|
1289
|
+
if (cohereClient.chat) {
|
1290
|
+
console.log(`🔍 [Cohere] Chat methods:`, Object.getOwnPropertyNames(cohereClient.chat));
|
1291
|
+
}
|
1292
|
+
console.log(`⏱️ [Cohere] Starting stream request at: ${new Date().toISOString()}`);
|
1293
|
+
}
|
1294
|
+
let stream;
|
1295
|
+
try {
|
1296
|
+
stream = await cohereClient.chatStream(streamConfig);
|
1297
|
+
}
|
1298
|
+
catch (streamError) {
|
1299
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1300
|
+
console.error(`❌ [Cohere] Stream creation failed:`, streamError);
|
1301
|
+
if (streamError.response) {
|
1302
|
+
console.error(`❌ [Cohere] Stream response status: ${streamError.response.status}`);
|
1303
|
+
console.error(`❌ [Cohere] Stream response data:`, streamError.response.data);
|
1304
|
+
}
|
1305
|
+
if (streamError.status) {
|
1306
|
+
console.error(`❌ [Cohere] Direct status: ${streamError.status}`);
|
1307
|
+
}
|
1308
|
+
if (streamError.body) {
|
1309
|
+
console.error(`❌ [Cohere] Response body:`, streamError.body);
|
1310
|
+
}
|
1311
|
+
}
|
1312
|
+
throw streamError;
|
1313
|
+
}
|
1314
|
+
for await (const chunk of stream) {
|
1315
|
+
if (chunk.eventType === "text-generation") {
|
1316
|
+
const text = chunk.text;
|
1317
|
+
if (text) {
|
1318
|
+
fullMessage += text;
|
1319
|
+
tokenCount++;
|
1320
|
+
if (firstTokenTime === 0) {
|
1321
|
+
firstTokenTime = Date.now() - startTime;
|
1322
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1323
|
+
console.log(`⚡ [Cohere] Time to First Token: ${firstTokenTime}ms`);
|
1324
|
+
}
|
1325
|
+
}
|
1326
|
+
onEvent({
|
1327
|
+
type: "token",
|
1328
|
+
token: text,
|
1329
|
+
});
|
1330
|
+
}
|
1331
|
+
}
|
1332
|
+
else if (chunk.eventType === "tool-calls-generation") {
|
1333
|
+
// Handle tool calls
|
1334
|
+
if (chunk.toolCalls) {
|
1335
|
+
for (const toolCall of chunk.toolCalls) {
|
1336
|
+
const id = `tool_${Date.now()}_${toolCalls.length}`;
|
1337
|
+
const formattedToolCall = {
|
1338
|
+
id,
|
1339
|
+
name: toolCall.name,
|
1340
|
+
arguments: JSON.stringify(toolCall.parameters),
|
1341
|
+
};
|
1342
|
+
toolCalls.push(formattedToolCall);
|
1343
|
+
onEvent({
|
1344
|
+
type: "tool_call_start",
|
1345
|
+
toolCall: { id, name: toolCall.name },
|
1346
|
+
});
|
1347
|
+
onEvent({
|
1348
|
+
type: "tool_call_parsed",
|
1349
|
+
toolCall: formattedToolCall,
|
1350
|
+
});
|
1351
|
+
}
|
1352
|
+
}
|
1353
|
+
}
|
1354
|
+
}
|
1355
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1356
|
+
console.log(`✅ [Cohere] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length}`);
|
1357
|
+
}
|
1358
|
+
onComplete(fullMessage, toolCalls);
|
1359
|
+
}
|
1360
|
+
catch (error) {
|
1361
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1362
|
+
console.error(`❌ [Cohere] Stream error:`, error);
|
1363
|
+
if (error instanceof Error) {
|
1364
|
+
console.error(`❌ [Cohere] Error message: ${error.message}`);
|
1365
|
+
console.error(`❌ [Cohere] Error stack: ${error.stack}`);
|
1366
|
+
}
|
1367
|
+
// Log additional error details if available
|
1368
|
+
if (error.response) {
|
1369
|
+
console.error(`❌ [Cohere] Response status: ${error.response.status}`);
|
1370
|
+
console.error(`❌ [Cohere] Response data:`, error.response.data);
|
1371
|
+
}
|
1372
|
+
}
|
1373
|
+
throw error;
|
1374
|
+
}
|
1375
|
+
}
|
1376
|
+
/**
|
1377
|
+
* Stream with Mistral SDK
|
1378
|
+
*/
|
1379
|
+
export async function streamWithMistral(specification, messages, tools, mistralClient, // Mistral client instance
|
1380
|
+
onEvent, onComplete) {
|
1381
|
+
let fullMessage = "";
|
1382
|
+
let toolCalls = [];
|
1383
|
+
// Performance metrics
|
1384
|
+
const startTime = Date.now();
|
1385
|
+
let firstTokenTime = 0;
|
1386
|
+
let tokenCount = 0;
|
1387
|
+
try {
|
1388
|
+
const modelName = getModelName(specification);
|
1389
|
+
if (!modelName) {
|
1390
|
+
throw new Error(`No model name found for Mistral specification: ${specification.name}`);
|
1391
|
+
}
|
1392
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1393
|
+
console.log(`🤖 [Mistral] Model Config: Service=Mistral | Model=${modelName} | Temperature=${specification.mistral?.temperature} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
|
1394
|
+
}
|
1395
|
+
const streamConfig = {
|
1396
|
+
model: modelName,
|
1397
|
+
messages,
|
1398
|
+
temperature: specification.mistral?.temperature,
|
1399
|
+
};
|
1400
|
+
// Add tools if provided
|
1401
|
+
if (tools && tools.length > 0) {
|
1402
|
+
streamConfig.tools = tools.map((tool) => ({
|
1403
|
+
type: "function",
|
1404
|
+
function: {
|
1405
|
+
name: tool.name,
|
1406
|
+
description: tool.description,
|
1407
|
+
parameters: tool.schema ? JSON.parse(tool.schema) : {},
|
1408
|
+
},
|
1409
|
+
}));
|
1410
|
+
}
|
1411
|
+
const stream = await mistralClient.chat.stream(streamConfig);
|
1412
|
+
for await (const chunk of stream) {
|
1413
|
+
const delta = chunk.data.choices[0]?.delta;
|
1414
|
+
if (delta?.content) {
|
1415
|
+
fullMessage += delta.content;
|
1416
|
+
tokenCount++;
|
1417
|
+
if (firstTokenTime === 0) {
|
1418
|
+
firstTokenTime = Date.now() - startTime;
|
1419
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1420
|
+
console.log(`⚡ [Mistral] Time to First Token: ${firstTokenTime}ms`);
|
1421
|
+
}
|
1422
|
+
}
|
1423
|
+
onEvent({
|
1424
|
+
type: "token",
|
1425
|
+
token: delta.content,
|
1426
|
+
});
|
1427
|
+
}
|
1428
|
+
// Handle tool calls
|
1429
|
+
if (delta?.tool_calls) {
|
1430
|
+
for (const toolCallDelta of delta.tool_calls) {
|
1431
|
+
const index = toolCallDelta.index || 0;
|
1432
|
+
if (!toolCalls[index]) {
|
1433
|
+
toolCalls[index] = {
|
1434
|
+
id: toolCallDelta.id || `tool_${Date.now()}_${index}`,
|
1435
|
+
name: "",
|
1436
|
+
arguments: "",
|
1437
|
+
};
|
1438
|
+
onEvent({
|
1439
|
+
type: "tool_call_start",
|
1440
|
+
toolCall: {
|
1441
|
+
id: toolCalls[index].id,
|
1442
|
+
name: toolCallDelta.function?.name || "",
|
1443
|
+
},
|
1444
|
+
});
|
1445
|
+
}
|
1446
|
+
if (toolCallDelta.function?.name) {
|
1447
|
+
toolCalls[index].name = toolCallDelta.function.name;
|
1448
|
+
}
|
1449
|
+
if (toolCallDelta.function?.arguments) {
|
1450
|
+
toolCalls[index].arguments += toolCallDelta.function.arguments;
|
1451
|
+
onEvent({
|
1452
|
+
type: "tool_call_delta",
|
1453
|
+
toolCallId: toolCalls[index].id,
|
1454
|
+
argumentDelta: toolCallDelta.function.arguments,
|
1455
|
+
});
|
1456
|
+
}
|
1457
|
+
}
|
1458
|
+
}
|
1459
|
+
}
|
1460
|
+
// Emit complete events for tool calls
|
1461
|
+
for (const toolCall of toolCalls) {
|
1462
|
+
if (isValidJSON(toolCall.arguments)) {
|
1463
|
+
onEvent({
|
1464
|
+
type: "tool_call_parsed",
|
1465
|
+
toolCall,
|
1466
|
+
});
|
1467
|
+
}
|
1468
|
+
}
|
1469
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1470
|
+
console.log(`✅ [Mistral] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length}`);
|
1471
|
+
}
|
1472
|
+
onComplete(fullMessage, toolCalls);
|
1473
|
+
}
|
1474
|
+
catch (error) {
|
1475
|
+
throw error;
|
1476
|
+
}
|
1477
|
+
}
|
1478
|
+
/**
|
1479
|
+
* Stream with Bedrock SDK (for Claude models)
|
1480
|
+
*/
|
1481
|
+
export async function streamWithBedrock(specification, messages, systemPrompt, tools, bedrockClient, // BedrockRuntimeClient instance
|
1482
|
+
onEvent, onComplete) {
|
1483
|
+
let fullMessage = "";
|
1484
|
+
let toolCalls = [];
|
1485
|
+
// Map contentBlockIndex to tool calls for proper correlation
|
1486
|
+
const toolCallsByIndex = new Map();
|
1487
|
+
// Performance metrics
|
1488
|
+
const startTime = Date.now();
|
1489
|
+
let firstTokenTime = 0;
|
1490
|
+
let tokenCount = 0;
|
1491
|
+
try {
|
1492
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1493
|
+
console.log(`🔍 [Bedrock] Specification object:`, JSON.stringify(specification, null, 2));
|
1494
|
+
}
|
1495
|
+
const modelName = getModelName(specification);
|
1496
|
+
if (!modelName) {
|
1497
|
+
console.error(`❌ [Bedrock] Model resolution failed for specification:`, {
|
1498
|
+
name: specification.name,
|
1499
|
+
serviceType: specification.serviceType,
|
1500
|
+
bedrock: specification.bedrock,
|
1501
|
+
hasCustomModelName: !!specification.bedrock?.modelName
|
1502
|
+
});
|
1503
|
+
throw new Error(`No model name found for Bedrock specification: ${specification.name} (service: ${specification.serviceType}, bedrock.model: ${specification.bedrock?.model})`);
|
1504
|
+
}
|
1505
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1506
|
+
console.log(`🤖 [Bedrock] Model Config: Service=Bedrock | Model=${modelName} | Temperature=${specification.bedrock?.temperature} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
|
1507
|
+
}
|
1508
|
+
// Import the ConverseStreamCommand for unified API
|
1509
|
+
const { ConverseStreamCommand } = await import("@aws-sdk/client-bedrock-runtime");
|
1510
|
+
// Convert messages to Bedrock Converse format
|
1511
|
+
// The AWS SDK expects content as an array of content blocks
|
1512
|
+
const converseMessages = messages.map((msg) => ({
|
1513
|
+
role: msg.role,
|
1514
|
+
content: [{
|
1515
|
+
text: typeof msg.content === 'string' ? msg.content : msg.content.toString()
|
1516
|
+
}]
|
1517
|
+
}));
|
1518
|
+
// Prepare the request using Converse API format
|
1519
|
+
// Using 'any' type because:
|
1520
|
+
// 1. We're dynamically importing the SDK (can't import types at compile time)
|
1521
|
+
// 2. The ConverseStreamCommandInput type has complex union types for system/toolConfig
|
1522
|
+
// 3. The structure matches the AWS SDK expectations
|
1523
|
+
const request = {
|
1524
|
+
modelId: modelName,
|
1525
|
+
messages: converseMessages,
|
1526
|
+
inferenceConfig: {
|
1527
|
+
temperature: specification.bedrock?.temperature ?? undefined,
|
1528
|
+
topP: specification.bedrock?.probability ?? undefined,
|
1529
|
+
maxTokens: specification.bedrock?.completionTokenLimit || 1000,
|
1530
|
+
},
|
1531
|
+
};
|
1532
|
+
// Add system prompt if provided
|
1533
|
+
if (systemPrompt) {
|
1534
|
+
request.system = [{ text: systemPrompt }];
|
1535
|
+
}
|
1536
|
+
// Add tools if provided
|
1537
|
+
if (tools && tools.length > 0) {
|
1538
|
+
request.toolConfig = {
|
1539
|
+
tools: tools.map((tool) => ({
|
1540
|
+
toolSpec: {
|
1541
|
+
name: tool.name,
|
1542
|
+
description: tool.description,
|
1543
|
+
inputSchema: {
|
1544
|
+
json: tool.schema ? JSON.parse(tool.schema) : {},
|
1545
|
+
},
|
1546
|
+
},
|
1547
|
+
})),
|
1548
|
+
};
|
1549
|
+
}
|
1550
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1551
|
+
console.log(`🔍 [Bedrock] Converse request:`, JSON.stringify(request, null, 2));
|
1552
|
+
}
|
1553
|
+
const command = new ConverseStreamCommand(request);
|
1554
|
+
const response = await bedrockClient.send(command);
|
1555
|
+
if (response.stream) {
|
1556
|
+
for await (const event of response.stream) {
|
1557
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1558
|
+
console.log(`🔍 [Bedrock] Stream event:`, JSON.stringify(event));
|
1559
|
+
}
|
1560
|
+
// Handle different event types from Converse API
|
1561
|
+
if (event.contentBlockDelta) {
|
1562
|
+
const delta = event.contentBlockDelta.delta;
|
1563
|
+
const contentIndex = event.contentBlockDelta.contentBlockIndex;
|
1564
|
+
if (delta?.text) {
|
1565
|
+
const text = delta.text;
|
1566
|
+
fullMessage += text;
|
1567
|
+
tokenCount++;
|
1568
|
+
if (firstTokenTime === 0) {
|
1569
|
+
firstTokenTime = Date.now() - startTime;
|
1570
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1571
|
+
console.log(`⚡ [Bedrock] Time to First Token: ${firstTokenTime}ms`);
|
1572
|
+
}
|
1573
|
+
}
|
1574
|
+
onEvent({
|
1575
|
+
type: "token",
|
1576
|
+
token: text,
|
1577
|
+
});
|
1578
|
+
onEvent({
|
1579
|
+
type: "message",
|
1580
|
+
message: fullMessage,
|
1581
|
+
});
|
1582
|
+
}
|
1583
|
+
else if (delta?.toolUse) {
|
1584
|
+
// Handle tool use input delta
|
1585
|
+
if (delta.toolUse.input && contentIndex !== undefined) {
|
1586
|
+
// Find the corresponding tool call by index
|
1587
|
+
// Bedrock uses contentBlockIndex to correlate deltas with their starts
|
1588
|
+
const toolCall = toolCallsByIndex.get(contentIndex);
|
1589
|
+
if (toolCall) {
|
1590
|
+
toolCall.arguments += delta.toolUse.input;
|
1591
|
+
}
|
1592
|
+
}
|
1593
|
+
}
|
1594
|
+
}
|
1595
|
+
else if (event.contentBlockStart) {
|
1596
|
+
// Handle tool use start
|
1597
|
+
const start = event.contentBlockStart.start;
|
1598
|
+
const startIndex = event.contentBlockStart.contentBlockIndex;
|
1599
|
+
if (start?.toolUse && startIndex !== undefined) {
|
1600
|
+
const toolUse = start.toolUse;
|
1601
|
+
const id = toolUse.toolUseId || `tool_${Date.now()}_${toolCalls.length}`;
|
1602
|
+
// Initialize the tool call
|
1603
|
+
const toolCall = {
|
1604
|
+
id,
|
1605
|
+
name: toolUse.name || "",
|
1606
|
+
arguments: "",
|
1607
|
+
};
|
1608
|
+
// Store in both array and map
|
1609
|
+
toolCalls.push(toolCall);
|
1610
|
+
toolCallsByIndex.set(startIndex, toolCall);
|
1611
|
+
onEvent({
|
1612
|
+
type: "tool_call_start",
|
1613
|
+
toolCall: { id, name: toolUse.name || "" },
|
1614
|
+
});
|
1615
|
+
}
|
1616
|
+
}
|
1617
|
+
else if (event.contentBlockStop) {
|
1618
|
+
// Handle tool use completion
|
1619
|
+
const stopIndex = event.contentBlockStop.contentBlockIndex;
|
1620
|
+
if (stopIndex !== undefined) {
|
1621
|
+
const toolCall = toolCallsByIndex.get(stopIndex);
|
1622
|
+
if (toolCall) {
|
1623
|
+
// Emit tool_call_parsed event when tool arguments are complete
|
1624
|
+
onEvent({
|
1625
|
+
type: "tool_call_parsed",
|
1626
|
+
toolCall: toolCall,
|
1627
|
+
});
|
1628
|
+
}
|
1629
|
+
}
|
1630
|
+
}
|
1631
|
+
else if (event.metadata) {
|
1632
|
+
// Metadata events contain usage information
|
1633
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1634
|
+
console.log(`📊 [Bedrock] Metadata:`, event.metadata);
|
1635
|
+
}
|
1636
|
+
}
|
1637
|
+
}
|
1638
|
+
}
|
1639
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1640
|
+
console.log(`✅ [Bedrock] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length}`);
|
1641
|
+
}
|
1642
|
+
onEvent({
|
1643
|
+
type: "complete",
|
1644
|
+
tokens: tokenCount,
|
1645
|
+
});
|
1646
|
+
onComplete(fullMessage, toolCalls);
|
1647
|
+
}
|
1648
|
+
catch (error) {
|
1649
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1650
|
+
console.error(`❌ [Bedrock] Stream error:`, error);
|
1651
|
+
}
|
1652
|
+
onEvent({
|
1653
|
+
type: "error",
|
1654
|
+
error: `Bedrock streaming error: ${error}`,
|
1655
|
+
});
|
1656
|
+
throw error;
|
1657
|
+
}
|
1658
|
+
}
|