@xalia/agent 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/dist/agent/src/agent/agent.js +103 -54
  2. package/dist/agent/src/agent/agentUtils.js +22 -21
  3. package/dist/agent/src/agent/compressingContextManager.js +3 -2
  4. package/dist/agent/src/agent/dummyLLM.js +1 -3
  5. package/dist/agent/src/agent/imageGenLLM.js +67 -0
  6. package/dist/agent/src/agent/imageGenerator.js +43 -0
  7. package/dist/agent/src/agent/llm.js +27 -0
  8. package/dist/agent/src/agent/mcpServerManager.js +18 -6
  9. package/dist/agent/src/agent/nullAgentEventHandler.js +6 -0
  10. package/dist/agent/src/agent/openAILLM.js +3 -3
  11. package/dist/agent/src/agent/openAILLMStreaming.js +41 -6
  12. package/dist/agent/src/chat/client/chatClient.js +84 -13
  13. package/dist/agent/src/chat/client/sessionClient.js +47 -6
  14. package/dist/agent/src/chat/client/sessionFiles.js +102 -0
  15. package/dist/agent/src/chat/data/apiKeyManager.js +38 -7
  16. package/dist/agent/src/chat/data/database.js +83 -70
  17. package/dist/agent/src/chat/data/dbSessionFileModels.js +49 -0
  18. package/dist/agent/src/chat/data/dbSessionFiles.js +76 -0
  19. package/dist/agent/src/chat/data/dbSessionMessages.js +57 -0
  20. package/dist/agent/src/chat/data/mimeTypes.js +44 -0
  21. package/dist/agent/src/chat/protocol/messages.js +21 -0
  22. package/dist/agent/src/chat/server/chatContextManager.js +14 -7
  23. package/dist/agent/src/chat/server/connectionManager.js +14 -36
  24. package/dist/agent/src/chat/server/connectionManager.test.js +2 -16
  25. package/dist/agent/src/chat/server/conversation.js +69 -45
  26. package/dist/agent/src/chat/server/imageGeneratorTools.js +111 -0
  27. package/dist/agent/src/chat/server/openSession.js +205 -43
  28. package/dist/agent/src/chat/server/server.js +5 -8
  29. package/dist/agent/src/chat/server/sessionFileManager.js +171 -38
  30. package/dist/agent/src/chat/server/sessionRegistry.js +199 -32
  31. package/dist/agent/src/chat/server/test-utils/mockFactories.js +12 -11
  32. package/dist/agent/src/chat/server/tools.js +27 -6
  33. package/dist/agent/src/chat/utils/multiAsyncQueue.js +9 -1
  34. package/dist/agent/src/test/agent.test.js +15 -11
  35. package/dist/agent/src/test/chatContextManager.test.js +4 -0
  36. package/dist/agent/src/test/clientServerConnection.test.js +2 -2
  37. package/dist/agent/src/test/db.test.js +33 -70
  38. package/dist/agent/src/test/dbSessionFiles.test.js +179 -0
  39. package/dist/agent/src/test/dbSessionMessages.test.js +67 -0
  40. package/dist/agent/src/test/dbTestTools.js +6 -5
  41. package/dist/agent/src/test/imageLoad.test.js +1 -1
  42. package/dist/agent/src/test/mcpServerManager.test.js +1 -1
  43. package/dist/agent/src/test/multiAsyncQueue.test.js +50 -0
  44. package/dist/agent/src/test/testTools.js +12 -0
  45. package/dist/agent/src/tool/agentChat.js +25 -6
  46. package/dist/agent/src/tool/agentMain.js +1 -1
  47. package/dist/agent/src/tool/chatMain.js +113 -4
  48. package/dist/agent/src/tool/commandPrompt.js +7 -3
  49. package/dist/agent/src/tool/files.js +23 -15
  50. package/dist/agent/src/tool/options.js +2 -2
  51. package/package.json +1 -1
  52. package/scripts/test_chat +124 -66
  53. package/src/agent/agent.ts +145 -38
  54. package/src/agent/agentUtils.ts +27 -21
  55. package/src/agent/compressingContextManager.ts +5 -4
  56. package/src/agent/context.ts +1 -1
  57. package/src/agent/dummyLLM.ts +1 -3
  58. package/src/agent/iAgentEventHandler.ts +15 -2
  59. package/src/agent/imageGenLLM.ts +99 -0
  60. package/src/agent/imageGenerator.ts +60 -0
  61. package/src/agent/llm.ts +128 -4
  62. package/src/agent/mcpServerManager.ts +26 -7
  63. package/src/agent/nullAgentEventHandler.ts +6 -0
  64. package/src/agent/openAILLM.ts +3 -8
  65. package/src/agent/openAILLMStreaming.ts +60 -14
  66. package/src/chat/client/chatClient.ts +119 -14
  67. package/src/chat/client/sessionClient.ts +75 -9
  68. package/src/chat/client/sessionFiles.ts +145 -0
  69. package/src/chat/data/apiKeyManager.ts +55 -7
  70. package/src/chat/data/dataModels.ts +16 -7
  71. package/src/chat/data/database.ts +107 -92
  72. package/src/chat/data/dbSessionFileModels.ts +91 -0
  73. package/src/chat/data/dbSessionFiles.ts +99 -0
  74. package/src/chat/data/dbSessionMessages.ts +68 -0
  75. package/src/chat/data/mimeTypes.ts +58 -0
  76. package/src/chat/protocol/messages.ts +127 -13
  77. package/src/chat/server/chatContextManager.ts +36 -13
  78. package/src/chat/server/connectionManager.test.ts +1 -22
  79. package/src/chat/server/connectionManager.ts +18 -53
  80. package/src/chat/server/conversation.ts +96 -57
  81. package/src/chat/server/imageGeneratorTools.ts +138 -0
  82. package/src/chat/server/openSession.ts +287 -49
  83. package/src/chat/server/server.ts +5 -11
  84. package/src/chat/server/sessionFileManager.ts +223 -63
  85. package/src/chat/server/sessionRegistry.ts +285 -41
  86. package/src/chat/server/test-utils/mockFactories.ts +13 -13
  87. package/src/chat/server/tools.ts +43 -8
  88. package/src/chat/utils/agentSessionMap.ts +2 -2
  89. package/src/chat/utils/multiAsyncQueue.ts +11 -1
  90. package/src/test/agent.test.ts +23 -14
  91. package/src/test/chatContextManager.test.ts +7 -2
  92. package/src/test/clientServerConnection.test.ts +3 -3
  93. package/src/test/compressingContextManager.test.ts +1 -1
  94. package/src/test/context.test.ts +2 -1
  95. package/src/test/conversation.test.ts +1 -1
  96. package/src/test/db.test.ts +41 -83
  97. package/src/test/dbSessionFiles.test.ts +258 -0
  98. package/src/test/dbSessionMessages.test.ts +85 -0
  99. package/src/test/dbTestTools.ts +9 -5
  100. package/src/test/imageLoad.test.ts +2 -2
  101. package/src/test/mcpServerManager.test.ts +3 -1
  102. package/src/test/multiAsyncQueue.test.ts +58 -0
  103. package/src/test/testTools.ts +15 -1
  104. package/src/tool/agentChat.ts +35 -7
  105. package/src/tool/agentMain.ts +7 -7
  106. package/src/tool/chatMain.ts +126 -5
  107. package/src/tool/commandPrompt.ts +10 -5
  108. package/src/tool/files.ts +30 -13
  109. package/src/tool/options.ts +1 -1
  110. package/test_data/dummyllm_script_image_gen.json +19 -0
  111. package/test_data/dummyllm_script_invoke_image_gen_tool.json +30 -0
  112. package/test_data/image_gen_test_profile.json +5 -0
@@ -1,49 +1,16 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
2
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.Agent = exports.AgentProfile = void 0;
3
+ exports.Agent = exports.DEFAULT_LLM_URL = exports.AgentProfile = void 0;
37
4
  exports.createUserMessage = createUserMessage;
38
5
  exports.createUserMessageEnsure = createUserMessageEnsure;
39
- const dotenv = __importStar(require("dotenv"));
6
+ exports.completionToAssistantMessageParam = completionToAssistantMessageParam;
40
7
  const mcpServerManager_1 = require("./mcpServerManager");
41
8
  const assert_1 = require("assert");
42
9
  const sdk_1 = require("@xalia/xmcp/sdk");
43
10
  var sdk_2 = require("@xalia/xmcp/sdk");
44
11
  Object.defineProperty(exports, "AgentProfile", { enumerable: true, get: function () { return sdk_2.AgentProfile; } });
12
+ exports.DEFAULT_LLM_URL = "http://localhost:5001/v1";
45
13
  const MAX_TOOL_CALL_RESPONSE_LENGTH = 4000;
46
- dotenv.config();
47
14
  const logger = (0, sdk_1.getLogger)();
48
15
  class Agent {
49
16
  constructor(eventHandler, mcpServerManager, llm, contextManager) {
@@ -88,13 +55,24 @@ class Agent {
88
55
  return this.userMessagesRaw([userMessage]);
89
56
  }
90
57
  async userMessagesRaw(userMessages) {
58
+ // Image and audio handling
59
+ //
60
+ // `ChatCompletions` (responses from the LLM) can contain `audio` and
61
+ // `images` tags. However, the `ChatCompletionMessageParam` type does not
62
+ // allow for "assistant" messages with images / audio.
63
+ //
64
+ // As such, our current approach is to extract all assistant-generated
65
+ // media and return it separately.
91
66
  // Note: `getLLMContext` returns a copy to we can mutate this array
92
67
  const context = this.contextManager.getLLMContext();
93
68
  const newMessagesIdx = context.length;
94
69
  // Add the new user messages
95
70
  context.push(...userMessages);
71
+ const images = [];
72
+ // We convert the `ChatCompletionsMessage` into a
73
+ // `ChatCompletionAssistantMessageParam` and extract image data.
96
74
  let completion = await this.chatCompletion(context);
97
- let message = completion.choices[0].message;
75
+ let message = this.processCompletion(completion, images);
98
76
  context.push(message);
99
77
  // While there are tool calls to make, invoke them and loop
100
78
  while (message.tool_calls && message.tool_calls.length > 0) {
@@ -111,6 +89,7 @@ class Agent {
111
89
  role: "tool",
112
90
  tool_call_id: toolCall.id,
113
91
  content: result.response,
92
+ metadata: result.metadata,
114
93
  });
115
94
  // If the tool call requested that its args be redacted, this can be
116
95
  // done now - before the next LLM invocation.
@@ -130,15 +109,15 @@ class Agent {
130
109
  // view of the conversation state.
131
110
  try {
132
111
  completion = await this.chatCompletion(context); // CAN THROW
133
- message = completion.choices[0].message;
112
+ message = this.processCompletion(completion, images);
134
113
  context.push(message);
135
114
  }
136
115
  finally {
137
116
  // Now that the tool call results have been passed to the LLM, perform
138
117
  // any updates on them. Pass the (updated) tool-call-result LLM
139
- // messages to the event handler - note, we want to do this even if
140
- // the an error occured, so that the caller has an up-to-date picture
141
- // of the context state when the error occured.
118
+ // messages to the event handler - note, we want to do this even if an
119
+ // error occured, so that the caller has an up-to-date picture of the
120
+ // context state when the error occured.
142
121
  toolCallResults.forEach(([indexInContext, tcr]) => {
143
122
  const ctxMsg = context[indexInContext];
144
123
  if (tcr.overwriteResponse) {
@@ -155,7 +134,7 @@ class Agent {
155
134
  this.eventHandler.onCompletion(message);
156
135
  // Add all new new messages to the context
157
136
  this.contextManager.addMessages(context.slice(newMessagesIdx));
158
- return completion.choices[0].message;
137
+ return { message, images: images.length === 0 ? undefined : images };
159
138
  }
160
139
  userMessage(msg, imageB64) {
161
140
  void this.userMessageEx(msg, imageB64);
@@ -180,11 +159,13 @@ class Agent {
180
159
  // Compute the full list of available tools
181
160
  let tools;
182
161
  const mcpTools = this.mcpServerManager.getOpenAITools();
162
+ logger.debug(`[chatCompletion] mcpTools: ${JSON.stringify(mcpTools)}`);
183
163
  const enabledTools = this.tools.concat(mcpTools);
184
164
  if (enabledTools.length > 0) {
185
165
  tools = enabledTools;
186
166
  }
187
- const completion = await this.llm.getConversationResponse(context, tools, this.eventHandler.onAgentMessage.bind(this.eventHandler));
167
+ logger.debug(`[chatCompletion] tools: ${JSON.stringify(tools)}`);
168
+ const completion = await this.llm.getConversationResponse(context, tools, this.eventHandler.onAgentMessage.bind(this.eventHandler), this.eventHandler.onReasoning.bind(this.eventHandler));
188
169
  logger.debug(`Received chat completion ${JSON.stringify(completion)}`);
189
170
  return completion;
190
171
  }
@@ -234,21 +215,27 @@ class Agent {
234
215
  const toolName = toolCall.function.name;
235
216
  const agentTool = this.agentTools.get(toolName);
236
217
  const isAgentTool = !!agentTool;
237
- const approve = await this.eventHandler.onToolCall(toolCall, isAgentTool);
238
- if (!approve) {
239
- result = { response: "User denied tool request." };
240
- }
241
- else if (isAgentTool) {
218
+ if (isAgentTool) {
242
219
  // Internal (agent) tool
243
- const args = JSON.parse(toolCall.function.arguments);
244
- result = await agentTool.handler(this, args);
220
+ if (!(await this.eventHandler.onToolCall(toolCall, true))) {
221
+ result = { response: "User denied tool request." };
222
+ }
223
+ else {
224
+ const args = JSON.parse(toolCall.function.arguments);
225
+ result = await agentTool.handler(this, args);
226
+ }
245
227
  }
246
228
  else {
247
- // McpServer tool call (agentTool === undefined)
229
+ // McpServer tool call (agentTool === undefined). Sanity check the
230
+ // tool call data, get approval, and then invoke.
248
231
  const args = JSON.parse(toolCall.function.arguments);
249
- result = {
250
- response: await this.mcpServerManager.invoke(toolName, args),
251
- };
232
+ const tc = this.mcpServerManager.verifyToolCall(toolName, args);
233
+ if (!(await this.eventHandler.onToolCall(toolCall, false))) {
234
+ result = { response: "User denied tool request." };
235
+ }
236
+ else {
237
+ result = { response: await this.mcpServerManager.invoke(tc) };
238
+ }
252
239
  logger.debug(`tool call result ${JSON.stringify(result)}`);
253
240
  }
254
241
  }
@@ -278,6 +265,17 @@ class Agent {
278
265
  }
279
266
  return result;
280
267
  }
268
+ processCompletion(completion, images) {
269
+ // Add any images into the list, and call the event handler
270
+ const compMessage = completion.choices[0].message;
271
+ if (compMessage.images) {
272
+ for (const image of compMessage.images) {
273
+ this.eventHandler.onImage(image);
274
+ images.push(image);
275
+ }
276
+ }
277
+ return completionToAssistantMessageParam(compMessage);
278
+ }
281
279
  }
282
280
  exports.Agent = Agent;
283
281
  /**
@@ -324,3 +322,54 @@ function createUserMessageEnsure(msg, imageB64, name) {
324
322
  (0, assert_1.strict)(userMsg);
325
323
  return userMsg;
326
324
  }
325
+ function completionToAssistantMessageParam(compMessage) {
326
+ // Strip down the `ChatCompletionMessage` to a
327
+ // `ChatCompletionAssistantMessageParam`, only including the non-null
328
+ // elements. For reference:
329
+ //
330
+ // Response from the LLM:
331
+ //
332
+ // export interface ChatCompletionMessage {
333
+ // role: 'assistant';
334
+ // audio?: ChatCompletionAudio | null;
335
+ // content: string | null;
336
+ // refusal: string | null;
337
+ // tool_calls?: Array<ChatCompletionMessageToolCall>;
338
+ //
339
+ // annotations?: Array<ChatCompletionMessage.Annotation>;
340
+ // // openrouter
341
+ // images?: Array<ChatCompletionContentPartImage>
342
+ // }
343
+ //
344
+ // Input to the LLM
345
+ //
346
+ // export interface ChatCompletionAssistantMessageParam {
347
+ // role: "assistant";
348
+ // audio?: ChatCompletionAssistantMessageParam.Audio | null;
349
+ // content?:
350
+ // | string
351
+ // | Array<ChatCompletionContentPartText |
352
+ // ChatCompletionContentPartRefusal>
353
+ // | null;
354
+ // refusal?: string | null;
355
+ // tool_calls?: Array<ChatCompletionMessageToolCall>;
356
+ //
357
+ // name?: string;
358
+ // }
359
+ const message = {
360
+ role: "assistant",
361
+ };
362
+ if (compMessage.audio) {
363
+ message.audio = compMessage.audio;
364
+ }
365
+ if (compMessage.content) {
366
+ message.content = compMessage.content;
367
+ }
368
+ if (compMessage.refusal) {
369
+ message.refusal = compMessage.refusal;
370
+ }
371
+ if (compMessage.tool_calls) {
372
+ message.tool_calls = compMessage.tool_calls;
373
+ }
374
+ return message;
375
+ }
@@ -1,6 +1,5 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.XALIA_APP_HEADER = exports.DEFAULT_LLM_MODEL = exports.DEFAULT_LLM_URL = void 0;
4
3
  exports.createAgentWithoutSkills = createAgentWithoutSkills;
5
4
  exports.createAgentWithSkills = createAgentWithSkills;
6
5
  exports.createAgentFromSkillManager = createAgentFromSkillManager;
@@ -16,15 +15,9 @@ const dummyLLM_1 = require("./dummyLLM");
16
15
  const assert_1 = require("assert");
17
16
  const repeatLLM_1 = require("./repeatLLM");
18
17
  const context_1 = require("./context");
18
+ const imageGenLLM_1 = require("./imageGenLLM");
19
19
  const logger = (0, sdk_1.getLogger)();
20
- exports.DEFAULT_LLM_URL = "http://localhost:5001/v1";
21
- // uses openrouter
22
- exports.DEFAULT_LLM_MODEL = process.env["DEFAULT_LLM_MODEL"] || "openai/gpt-4o";
23
- exports.XALIA_APP_HEADER = {
24
- "HTTP-Referer": "xalia.ai",
25
- "X-Title": "Xalia",
26
- };
27
- async function createAgentWithoutSkills(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream = false) {
20
+ async function createAgentWithoutSkills(llmUrl, agentProfile, defaultModel, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream = false) {
28
21
  // Init SudoMcpServerManager
29
22
  logger.debug("[createAgentWithSkills] creating SudoMcpServerManager.");
30
23
  const sudoMcpServerManager = await sudoMcpServerManager_1.SkillManager.initialize((url, authResultP, displayName) => {
@@ -33,7 +26,7 @@ async function createAgentWithoutSkills(llmUrl, agentProfile, eventHandler, plat
33
26
  logger.debug("[createAgentWithoutSkills] restore mcp settings:" +
34
27
  JSON.stringify(agentProfile.mcp_settings));
35
28
  // Create agent using the event handler
36
- const agent = await createAgentFromSkillManager(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, sudoMcpServerManager, stream);
29
+ const agent = await createAgentFromSkillManager(llmUrl, agentProfile, defaultModel, eventHandler, platform, contextManager, llmApiKey, sudoMcpServerManager, stream);
37
30
  return [agent, sudoMcpServerManager];
38
31
  }
39
32
  /**
@@ -41,16 +34,16 @@ async function createAgentWithoutSkills(llmUrl, agentProfile, eventHandler, plat
41
34
  * IAgentEventHandler interface. This is the preferred way to create
42
35
  * agents.
43
36
  */
44
- async function createAgentWithSkills(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream = false) {
45
- const [agent, sudoMcpServerManager] = await createAgentWithoutSkills(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream);
37
+ async function createAgentWithSkills(llmUrl, agentProfile, defaultModel, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream = false) {
38
+ const [agent, sudoMcpServerManager] = await createAgentWithoutSkills(llmUrl, agentProfile, defaultModel, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream);
46
39
  logger.debug("[createAgentWithSkills] restoring skills");
47
40
  await sudoMcpServerManager.restoreMcpSettings(agentProfile.mcp_settings);
48
41
  return [agent, sudoMcpServerManager];
49
42
  }
50
- async function createAgentFromSkillManager(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, skillManager, stream = false) {
43
+ async function createAgentFromSkillManager(llmUrl, agentProfile, defaultModel, eventHandler, platform, contextManager, llmApiKey, skillManager, stream = false) {
51
44
  // Create agent
52
45
  logger.debug("[createAgentFromSkillManager] creating agent ...");
53
- const llm = await createLLM(llmUrl, llmApiKey, agentProfile.model, stream, platform);
46
+ const llm = await createLLM(llmUrl, llmApiKey, agentProfile.model || defaultModel, stream, platform);
54
47
  contextManager.setAgentPrompt(agentProfile.system_prompt);
55
48
  const agent = agent_1.Agent.initializeWithLLM(eventHandler, llm, contextManager, skillManager);
56
49
  logger.debug("[createAgentFromSkillManager] done");
@@ -64,12 +57,16 @@ async function createLLM(llmUrl, llmApiKey, model, stream = false, platform) {
64
57
  else if (model === "repeat") {
65
58
  llm = new repeatLLM_1.RepeatLLM();
66
59
  }
60
+ else if (model == imageGenLLM_1.DEFAULT_IMAGE_GEN_MODEL) {
61
+ logger.info("ImageGenLLM");
62
+ llm = new imageGenLLM_1.ImageGenLLM(llmApiKey, llmUrl, model);
63
+ }
67
64
  else {
68
65
  // Regular Agent
69
66
  if (!llmApiKey) {
70
67
  throw new Error("Missing OpenAI API Key");
71
68
  }
72
- logger.debug(`Initializing Agent: ${llmUrl ?? "unknown"} - ${model ?? "unknown"}`);
69
+ logger.debug(`Initializing Agent: ${llmUrl ?? "unknown"} - ${model}`);
73
70
  if (stream) {
74
71
  llm = new openAILLMStreaming_1.OpenAILLMStreaming(llmApiKey, llmUrl, model);
75
72
  }
@@ -86,11 +83,13 @@ async function createLLM(llmUrl, llmApiKey, model, stream = false, platform) {
86
83
  * messages are not used by the caller, the user does not need to approve tool
87
84
  * calls, etc).
88
85
  */
89
- async function createNonInteractiveAgent(url, agentProfile, conversation, platform, openaiApiKey, sudomcpConfig, approveToolsUpTo) {
86
+ async function createNonInteractiveAgent(url, agentProfile, defaultModel, conversation, platform, openaiApiKey, sudomcpConfig, approveToolsUpTo) {
90
87
  let remainingToolCalls = approveToolsUpTo;
91
88
  const eventHandler = {
92
89
  onCompletion: () => { },
90
+ onImage: () => { },
93
91
  onAgentMessage: async () => { },
92
+ onReasoning: async () => { },
94
93
  // eslint-disable-next-line @typescript-eslint/require-await
95
94
  onToolCall: async () => {
96
95
  if (remainingToolCalls !== 0) {
@@ -102,25 +101,26 @@ async function createNonInteractiveAgent(url, agentProfile, conversation, platfo
102
101
  onToolCallResult: () => { },
103
102
  };
104
103
  const contextManager = new context_1.ContextManager(agentProfile.system_prompt, conversation || []);
105
- const [agent, _] = await createAgentWithSkills(url, agentProfile, eventHandler, platform, contextManager, openaiApiKey, sudomcpConfig, undefined);
104
+ const [agent, _] = await createAgentWithSkills(url, agentProfile, defaultModel, eventHandler, platform, contextManager, openaiApiKey, sudomcpConfig, undefined);
106
105
  return agent;
107
106
  }
108
107
  /**
109
108
  * Create an Agent (from the AgentProfile), pass it a single prompt and output
110
109
  * the response.
111
110
  */
112
- async function runOneShot(url, agentProfile, conversation, platform, prompt, image, llmApiKey, sudomcpConfig, approveToolsUpTo) {
111
+ async function runOneShot(url, agentProfile, defaultModel, conversation, platform, prompt, image, llmApiKey, sudomcpConfig, approveToolsUpTo) {
113
112
  logger.debug("[runOneShot]: start");
114
113
  // Create a non-interactive agent and pass any prompt/ image to it. Return
115
114
  // the first answer.
116
- const agent = await createNonInteractiveAgent(url, agentProfile, conversation, platform, llmApiKey, sudomcpConfig, approveToolsUpTo);
117
- const response = await agent.userMessageEx(prompt, image);
115
+ const agent = await createNonInteractiveAgent(url, agentProfile, defaultModel, conversation, platform, llmApiKey, sudomcpConfig, approveToolsUpTo);
116
+ const agentResponse = await agent.userMessageEx(prompt, image);
118
117
  await agent.shutdown();
119
118
  logger.debug("[runOneShot]: shutdown done");
120
- if (!response) {
119
+ if (!agentResponse) {
121
120
  throw new Error("No message returned from agent");
122
121
  }
123
122
  // Handle different content types
123
+ const response = agentResponse.message;
124
124
  let responseText = "";
125
125
  if (typeof response.content === "string") {
126
126
  responseText = response.content;
@@ -146,5 +146,6 @@ async function runOneShot(url, agentProfile, conversation, platform, prompt, ima
146
146
  return {
147
147
  response: responseText,
148
148
  conversation: agent.getConversation(),
149
+ images: agentResponse.images,
149
150
  };
150
151
  }
@@ -36,10 +36,11 @@ async function createCompressionAgent(compressionAgentUrl, compressionAgentModel
36
36
  }
37
37
  async function createSummary(compressionAgentUrl, compressionAgentModel, compressionAgentApiKey, conversation) {
38
38
  const agent = await createCompressionAgent(compressionAgentUrl, compressionAgentModel, compressionAgentApiKey);
39
- const resp = await agent.userMessageEx(JSON.stringify(conversation));
40
- if (!resp) {
39
+ const agentResp = await agent.userMessageEx(JSON.stringify(conversation));
40
+ if (!agentResp) {
41
41
  throw new Error("compression agent returned null");
42
42
  }
43
+ const resp = agentResp.message;
43
44
  (0, assert_1.strict)(resp.role === "assistant");
44
45
  (0, assert_1.strict)(typeof resp.content === "string", "expected string content from compression agent");
45
46
  return resp.content;
@@ -57,9 +57,7 @@ class DummyLLM {
57
57
  }
58
58
  if (onMessage) {
59
59
  const message = response.message;
60
- if (message.content) {
61
- void onMessage(message.content, true);
62
- }
60
+ void onMessage(message.content || "", true);
63
61
  }
64
62
  return {
65
63
  id: String(this.idx),
@@ -0,0 +1,67 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ImageGenLLM = exports.DEFAULT_IMAGE_GEN_MODEL = void 0;
4
+ const openai_1 = require("openai");
5
+ const assert_1 = require("assert");
6
+ const fs_1 = require("fs");
7
+ const sdk_1 = require("@xalia/xmcp/sdk");
8
+ const llm_1 = require("./llm");
9
+ const logger = (0, sdk_1.getLogger)();
10
+ exports.DEFAULT_IMAGE_GEN_MODEL = "google/gemini-2.5-flash-image-preview";
11
+ class ImageGenLLM {
12
+ constructor(apiKey, apiUrl, model) {
13
+ logger.debug(`here`);
14
+ this.openai = new openai_1.OpenAI({
15
+ apiKey,
16
+ baseURL: apiUrl,
17
+ dangerouslyAllowBrowser: true,
18
+ defaultHeaders: llm_1.XALIA_APP_HEADER,
19
+ });
20
+ this.model = model || exports.DEFAULT_IMAGE_GEN_MODEL;
21
+ }
22
+ setModel(model) {
23
+ this.model = model;
24
+ }
25
+ getModel() {
26
+ return this.model;
27
+ }
28
+ getUrl() {
29
+ return this.openai.baseURL;
30
+ }
31
+ async getConversationResponse(messages, tools, onMessage) {
32
+ (0, assert_1.strict)(!tools || tools.length === 0, "tools not supported in ImageGenLLM");
33
+ // Designed for image generation using openrouter, which tweaks the Create
34
+ const params = {
35
+ model: this.model,
36
+ messages,
37
+ tools,
38
+ modalities: ["image", "text"],
39
+ };
40
+ logger.info(`[ImageGenLLM] params; ${JSON.stringify(params)}`);
41
+ const completion = (await this.openai.chat.completions.create(params));
42
+ // const completion = {} as unknown as ChatCompletion;
43
+ const filePath = "./completion.json";
44
+ logger.info(`[ImageGenLLM] writing ${filePath}`);
45
+ (0, fs_1.writeFileSync)(filePath, JSON.stringify(completion), "utf-8");
46
+ logger.info(`[ImageGenLLM] written`);
47
+ // logger.debug(
48
+ // `[ImageGenLLM.getConversationResponse] completion:
49
+ // ${JSON.stringify(completion)}`
50
+ // );
51
+ if (onMessage) {
52
+ const message = completion.choices[0].message;
53
+ if (message.content) {
54
+ await onMessage(message.content, true);
55
+ }
56
+ if (message.images) {
57
+ message.images.forEach((image, index) => {
58
+ const imageUrl = image.image_url.url; // Base64 data URL
59
+ const truncated = imageUrl.substring(0, 50);
60
+ logger.info(`[ImageGenLLM] ${String(index + 1)}: ${truncated}...`);
61
+ });
62
+ }
63
+ }
64
+ return completion;
65
+ }
66
+ }
67
+ exports.ImageGenLLM = ImageGenLLM;
@@ -0,0 +1,43 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ImageGenerator = void 0;
4
+ const agent_1 = require("./agent");
5
+ const agentUtils_1 = require("./agentUtils");
6
+ const context_1 = require("./context");
7
+ const nullAgentEventHandler_1 = require("./nullAgentEventHandler");
8
+ const nullPlatform_1 = require("./nullPlatform");
9
+ const nodePlatform_1 = require("../tool/nodePlatform");
10
+ const imageGenLLM_1 = require("./imageGenLLM");
11
+ const IMAGE_GEN_SYSTEM_PROMPT = "You are an image generator";
12
+ class ImageGenerator {
13
+ constructor(agent, contextManager) {
14
+ this.agent = agent;
15
+ this.contextManager = contextManager;
16
+ }
17
+ static async init(llmUrl, llmApiKey, model) {
18
+ const development = !!process.env.DEVELOPMENT;
19
+ const llm = await (0, agentUtils_1.createLLM)(llmUrl, llmApiKey, model || imageGenLLM_1.DEFAULT_IMAGE_GEN_MODEL, false /* stream */, development ? nodePlatform_1.NODE_PLATFORM : nullPlatform_1.NULL_PLATFORM // allow file loading
20
+ );
21
+ const contextManager = new context_1.ContextManager(IMAGE_GEN_SYSTEM_PROMPT, []);
22
+ const agent = agent_1.Agent.initializeWithLLM(nullAgentEventHandler_1.NULL_AGENT_EVENT_HANDLER, llm, contextManager);
23
+ return new ImageGenerator(agent, contextManager);
24
+ }
25
+ async generate(prompt, image) {
26
+ const userMessage = (0, agent_1.createUserMessage)(prompt, image);
27
+ if (!userMessage) {
28
+ throw new Error("invalid user message / input image");
29
+ }
30
+ const agentResponse = await this.agent.userMessageRaw(userMessage);
31
+ if (!agentResponse) {
32
+ throw new Error("invalid response from image gen agent");
33
+ }
34
+ if (!agentResponse.images || agentResponse.images.length === 0) {
35
+ throw new Error("invalid response from image gen agent");
36
+ }
37
+ // Clear the context
38
+ while (this.contextManager.popMessage())
39
+ ;
40
+ return agentResponse.images[0].image_url.url;
41
+ }
42
+ }
43
+ exports.ImageGenerator = ImageGenerator;
@@ -1,2 +1,29 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.XALIA_APP_HEADER = void 0;
4
+ exports.choiceDeltaExtractReasoning = choiceDeltaExtractReasoning;
5
+ exports.XALIA_APP_HEADER = {
6
+ "HTTP-Referer": "xalia.ai",
7
+ "X-Title": "Xalia",
8
+ };
9
+ function choiceDeltaExtractReasoning(delta) {
10
+ if (delta.reasoning) {
11
+ return delta.reasoning;
12
+ }
13
+ if (delta.reasoning_details) {
14
+ let reasoning = "";
15
+ for (const details of delta.reasoning_details) {
16
+ if (details.type !== "reasoning.text") {
17
+ throw new Error(`unexpected details.type: ${details.type}`);
18
+ }
19
+ if (details.text) {
20
+ if (typeof details.text !== "string") {
21
+ throw new Error(`unexpected typeof details.text: ${typeof details.text}`);
22
+ }
23
+ reasoning += details.text;
24
+ }
25
+ }
26
+ return reasoning;
27
+ }
28
+ return undefined;
29
+ }
@@ -203,11 +203,7 @@ class McpServerManager {
203
203
  }
204
204
  return this.enabledOpenAITools;
205
205
  }
206
- /**
207
- * Note the `qualifiedToolName` is the full `{mcpServerName}/{toolName}` as
208
- * in the openai spec.
209
- */
210
- async invoke(qualifiedToolName, args) {
206
+ verifyToolCall(qualifiedToolName, args) {
211
207
  const [mcpServerName, toolName] = splitQualifiedName(qualifiedToolName);
212
208
  logger.debug(`invoke: qualified: ${qualifiedToolName}`);
213
209
  logger.debug(`invoke: mcpServerName: ${mcpServerName}, toolName: ${toolName}`);
@@ -217,7 +213,23 @@ class McpServerManager {
217
213
  if (!cb) {
218
214
  throw new Error(`Unknown tool ${qualifiedToolName}`);
219
215
  }
220
- return cb(JSON.stringify(args));
216
+ return {
217
+ mcpServerName,
218
+ toolName,
219
+ args,
220
+ };
221
+ }
222
+ /**
223
+ * Note the `qualifiedToolName` is the full `{mcpServerName}/{toolName}` as
224
+ * in the openai spec.
225
+ */
226
+ async invoke(toolCall) {
227
+ const server = this.getMcpServerInternal(toolCall.mcpServerName);
228
+ const cb = server.getCallback(toolCall.toolName);
229
+ if (!cb) {
230
+ throw new Error(`Unknown tool ${toolCall.toolName}`);
231
+ }
232
+ return cb(JSON.stringify(toolCall.args));
221
233
  }
222
234
  /**
223
235
  * "Settings" refers to the set of added servers and enabled tools.
@@ -7,12 +7,18 @@ exports.NULL_AGENT_EVENT_HANDLER = void 0;
7
7
  */
8
8
  exports.NULL_AGENT_EVENT_HANDLER = {
9
9
  onCompletion: () => { },
10
+ onImage: () => { },
10
11
  onToolCallResult: () => { },
11
12
  onAgentMessage: () => {
12
13
  return new Promise((r) => {
13
14
  r();
14
15
  });
15
16
  },
17
+ onReasoning: () => {
18
+ return new Promise((r) => {
19
+ r();
20
+ });
21
+ },
16
22
  onToolCall: () => {
17
23
  return new Promise((r) => {
18
24
  r(false);
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.OpenAILLM = void 0;
4
- const agentUtils_1 = require("./agentUtils");
4
+ const llm_1 = require("./llm");
5
5
  const openai_1 = require("openai");
6
6
  class OpenAILLM {
7
7
  constructor(apiKey, apiUrl, model) {
@@ -9,9 +9,9 @@ class OpenAILLM {
9
9
  apiKey,
10
10
  baseURL: apiUrl,
11
11
  dangerouslyAllowBrowser: true,
12
- defaultHeaders: agentUtils_1.XALIA_APP_HEADER,
12
+ defaultHeaders: llm_1.XALIA_APP_HEADER,
13
13
  });
14
- this.model = model || agentUtils_1.DEFAULT_LLM_MODEL;
14
+ this.model = model;
15
15
  }
16
16
  setModel(model) {
17
17
  this.model = model;