@xalia/agent 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/dist/agent/src/agent/agent.js +109 -57
  2. package/dist/agent/src/agent/agentUtils.js +24 -26
  3. package/dist/agent/src/agent/compressingContextManager.js +3 -2
  4. package/dist/agent/src/agent/dummyLLM.js +1 -3
  5. package/dist/agent/src/agent/imageGenLLM.js +67 -0
  6. package/dist/agent/src/agent/imageGenerator.js +43 -0
  7. package/dist/agent/src/agent/llm.js +27 -0
  8. package/dist/agent/src/agent/mcpServerManager.js +18 -6
  9. package/dist/agent/src/agent/nullAgentEventHandler.js +6 -0
  10. package/dist/agent/src/agent/openAILLM.js +3 -3
  11. package/dist/agent/src/agent/openAILLMStreaming.js +41 -6
  12. package/dist/agent/src/chat/client/chatClient.js +154 -235
  13. package/dist/agent/src/chat/client/constants.js +1 -2
  14. package/dist/agent/src/chat/client/sessionClient.js +47 -15
  15. package/dist/agent/src/chat/client/sessionFiles.js +102 -0
  16. package/dist/agent/src/chat/data/apiKeyManager.js +38 -7
  17. package/dist/agent/src/chat/data/database.js +83 -70
  18. package/dist/agent/src/chat/data/dbSessionFileModels.js +49 -0
  19. package/dist/agent/src/chat/data/dbSessionFiles.js +76 -0
  20. package/dist/agent/src/chat/data/dbSessionMessages.js +57 -0
  21. package/dist/agent/src/chat/data/mimeTypes.js +44 -0
  22. package/dist/agent/src/chat/protocol/messages.js +21 -1
  23. package/dist/agent/src/chat/server/chatContextManager.js +19 -16
  24. package/dist/agent/src/chat/server/connectionManager.js +14 -36
  25. package/dist/agent/src/chat/server/connectionManager.test.js +3 -16
  26. package/dist/agent/src/chat/server/conversation.js +73 -44
  27. package/dist/agent/src/chat/server/imageGeneratorTools.js +111 -0
  28. package/dist/agent/src/chat/server/openSession.js +398 -233
  29. package/dist/agent/src/chat/server/openSessionMessageSender.js +2 -0
  30. package/dist/agent/src/chat/server/server.js +5 -8
  31. package/dist/agent/src/chat/server/sessionFileManager.js +171 -38
  32. package/dist/agent/src/chat/server/sessionRegistry.js +214 -42
  33. package/dist/agent/src/chat/server/test-utils/mockFactories.js +12 -11
  34. package/dist/agent/src/chat/server/tools.js +27 -6
  35. package/dist/agent/src/chat/utils/approvalManager.js +82 -64
  36. package/dist/agent/src/chat/utils/multiAsyncQueue.js +9 -1
  37. package/dist/agent/src/chat/{client/responseHandler.js → utils/responseAwaiter.js} +41 -18
  38. package/dist/agent/src/test/agent.test.js +104 -63
  39. package/dist/agent/src/test/approvalManager.test.js +79 -35
  40. package/dist/agent/src/test/chatContextManager.test.js +16 -17
  41. package/dist/agent/src/test/clientServerConnection.test.js +2 -2
  42. package/dist/agent/src/test/db.test.js +33 -70
  43. package/dist/agent/src/test/dbSessionFiles.test.js +179 -0
  44. package/dist/agent/src/test/dbSessionMessages.test.js +67 -0
  45. package/dist/agent/src/test/dbTestTools.js +6 -5
  46. package/dist/agent/src/test/imageLoad.test.js +1 -1
  47. package/dist/agent/src/test/mcpServerManager.test.js +1 -1
  48. package/dist/agent/src/test/multiAsyncQueue.test.js +50 -0
  49. package/dist/agent/src/test/responseAwaiter.test.js +74 -0
  50. package/dist/agent/src/test/testTools.js +12 -0
  51. package/dist/agent/src/tool/agentChat.js +25 -6
  52. package/dist/agent/src/tool/agentMain.js +1 -1
  53. package/dist/agent/src/tool/chatMain.js +115 -6
  54. package/dist/agent/src/tool/commandPrompt.js +7 -3
  55. package/dist/agent/src/tool/files.js +23 -15
  56. package/dist/agent/src/tool/options.js +2 -2
  57. package/package.json +1 -1
  58. package/scripts/setup_chat +2 -2
  59. package/scripts/test_chat +95 -36
  60. package/src/agent/agent.ts +152 -41
  61. package/src/agent/agentUtils.ts +34 -41
  62. package/src/agent/compressingContextManager.ts +5 -4
  63. package/src/agent/context.ts +1 -1
  64. package/src/agent/dummyLLM.ts +1 -3
  65. package/src/agent/iAgentEventHandler.ts +15 -2
  66. package/src/agent/imageGenLLM.ts +99 -0
  67. package/src/agent/imageGenerator.ts +60 -0
  68. package/src/agent/llm.ts +128 -4
  69. package/src/agent/mcpServerManager.ts +26 -7
  70. package/src/agent/nullAgentEventHandler.ts +6 -0
  71. package/src/agent/openAILLM.ts +3 -8
  72. package/src/agent/openAILLMStreaming.ts +60 -14
  73. package/src/chat/client/chatClient.ts +262 -286
  74. package/src/chat/client/constants.ts +0 -2
  75. package/src/chat/client/sessionClient.ts +82 -20
  76. package/src/chat/client/sessionFiles.ts +142 -0
  77. package/src/chat/data/apiKeyManager.ts +55 -7
  78. package/src/chat/data/dataModels.ts +17 -7
  79. package/src/chat/data/database.ts +107 -92
  80. package/src/chat/data/dbSessionFileModels.ts +91 -0
  81. package/src/chat/data/dbSessionFiles.ts +99 -0
  82. package/src/chat/data/dbSessionMessages.ts +68 -0
  83. package/src/chat/data/mimeTypes.ts +58 -0
  84. package/src/chat/protocol/messages.ts +136 -25
  85. package/src/chat/server/chatContextManager.ts +42 -24
  86. package/src/chat/server/connectionManager.test.ts +2 -22
  87. package/src/chat/server/connectionManager.ts +18 -53
  88. package/src/chat/server/conversation.ts +106 -59
  89. package/src/chat/server/imageGeneratorTools.ts +138 -0
  90. package/src/chat/server/openSession.ts +606 -325
  91. package/src/chat/server/openSessionMessageSender.ts +4 -0
  92. package/src/chat/server/server.ts +5 -11
  93. package/src/chat/server/sessionFileManager.ts +223 -63
  94. package/src/chat/server/sessionRegistry.ts +317 -52
  95. package/src/chat/server/test-utils/mockFactories.ts +13 -13
  96. package/src/chat/server/tools.ts +43 -8
  97. package/src/chat/utils/agentSessionMap.ts +2 -2
  98. package/src/chat/utils/approvalManager.ts +153 -81
  99. package/src/chat/utils/multiAsyncQueue.ts +11 -1
  100. package/src/chat/{client/responseHandler.ts → utils/responseAwaiter.ts} +73 -23
  101. package/src/test/agent.test.ts +152 -75
  102. package/src/test/approvalManager.test.ts +108 -40
  103. package/src/test/chatContextManager.test.ts +26 -22
  104. package/src/test/clientServerConnection.test.ts +3 -3
  105. package/src/test/compressingContextManager.test.ts +1 -1
  106. package/src/test/context.test.ts +2 -1
  107. package/src/test/conversation.test.ts +1 -1
  108. package/src/test/db.test.ts +41 -83
  109. package/src/test/dbSessionFiles.test.ts +258 -0
  110. package/src/test/dbSessionMessages.test.ts +85 -0
  111. package/src/test/dbTestTools.ts +9 -5
  112. package/src/test/imageLoad.test.ts +2 -2
  113. package/src/test/mcpServerManager.test.ts +3 -1
  114. package/src/test/multiAsyncQueue.test.ts +58 -0
  115. package/src/test/responseAwaiter.test.ts +103 -0
  116. package/src/test/testTools.ts +15 -1
  117. package/src/tool/agentChat.ts +36 -8
  118. package/src/tool/agentMain.ts +7 -7
  119. package/src/tool/chatMain.ts +128 -7
  120. package/src/tool/commandPrompt.ts +10 -5
  121. package/src/tool/files.ts +30 -13
  122. package/src/tool/options.ts +1 -1
  123. package/test_data/dummyllm_script_image_gen.json +19 -0
  124. package/test_data/dummyllm_script_invoke_image_gen_tool.json +30 -0
  125. package/test_data/image_gen_test_profile.json +5 -0
  126. package/dist/agent/src/test/responseHandler.test.js +0 -61
  127. package/src/test/responseHandler.test.ts +0 -78
@@ -1,49 +1,16 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
2
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.Agent = exports.AgentProfile = void 0;
3
+ exports.Agent = exports.DEFAULT_LLM_URL = exports.AgentProfile = void 0;
37
4
  exports.createUserMessage = createUserMessage;
38
5
  exports.createUserMessageEnsure = createUserMessageEnsure;
39
- const dotenv = __importStar(require("dotenv"));
6
+ exports.completionToAssistantMessageParam = completionToAssistantMessageParam;
40
7
  const mcpServerManager_1 = require("./mcpServerManager");
41
8
  const assert_1 = require("assert");
42
9
  const sdk_1 = require("@xalia/xmcp/sdk");
43
10
  var sdk_2 = require("@xalia/xmcp/sdk");
44
11
  Object.defineProperty(exports, "AgentProfile", { enumerable: true, get: function () { return sdk_2.AgentProfile; } });
12
+ exports.DEFAULT_LLM_URL = "http://localhost:5001/v1";
45
13
  const MAX_TOOL_CALL_RESPONSE_LENGTH = 4000;
46
- dotenv.config();
47
14
  const logger = (0, sdk_1.getLogger)();
48
15
  class Agent {
49
16
  constructor(eventHandler, mcpServerManager, llm, contextManager) {
@@ -88,13 +55,24 @@ class Agent {
88
55
  return this.userMessagesRaw([userMessage]);
89
56
  }
90
57
  async userMessagesRaw(userMessages) {
58
+ // Image and audio handling
59
+ //
60
+ // `ChatCompletions` (responses from the LLM) can contain `audio` and
61
+ // `images` tags. However, the `ChatCompletionMessageParam` type does not
62
+ // allow for "assistant" messages with images / audio.
63
+ //
64
+ // As such, our current approach is to extract all assistant-generated
65
+ // media and return it separately.
91
66
  // Note: `getLLMContext` returns a copy to we can mutate this array
92
67
  const context = this.contextManager.getLLMContext();
93
68
  const newMessagesIdx = context.length;
94
69
  // Add the new user messages
95
70
  context.push(...userMessages);
71
+ const images = [];
72
+ // We convert the `ChatCompletionsMessage` into a
73
+ // `ChatCompletionAssistantMessageParam` and extract image data.
96
74
  let completion = await this.chatCompletion(context);
97
- let message = completion.choices[0].message;
75
+ let message = this.processCompletion(completion, images);
98
76
  context.push(message);
99
77
  // While there are tool calls to make, invoke them and loop
100
78
  while (message.tool_calls && message.tool_calls.length > 0) {
@@ -107,11 +85,15 @@ class Agent {
107
85
  // `toolCallResults`.
108
86
  const result = await this.doToolCall(toolCall);
109
87
  toolCallResults.push([context.length, result]);
110
- context.push({
88
+ const toolResult = {
111
89
  role: "tool",
112
90
  tool_call_id: toolCall.id,
113
91
  content: result.response,
114
- });
92
+ };
93
+ if (result.metadata) {
94
+ toolResult.metadata = result.metadata;
95
+ }
96
+ context.push(toolResult);
115
97
  // If the tool call requested that its args be redacted, this can be
116
98
  // done now - before the next LLM invocation.
117
99
  if (result.overwriteArgs) {
@@ -130,15 +112,15 @@ class Agent {
130
112
  // view of the conversation state.
131
113
  try {
132
114
  completion = await this.chatCompletion(context); // CAN THROW
133
- message = completion.choices[0].message;
115
+ message = this.processCompletion(completion, images);
134
116
  context.push(message);
135
117
  }
136
118
  finally {
137
119
  // Now that the tool call results have been passed to the LLM, perform
138
120
  // any updates on them. Pass the (updated) tool-call-result LLM
139
- // messages to the event handler - note, we want to do this even if
140
- // the an error occured, so that the caller has an up-to-date picture
141
- // of the context state when the error occured.
121
+ // messages to the event handler - note, we want to do this even if an
122
+ // error occured, so that the caller has an up-to-date picture of the
123
+ // context state when the error occured.
142
124
  toolCallResults.forEach(([indexInContext, tcr]) => {
143
125
  const ctxMsg = context[indexInContext];
144
126
  if (tcr.overwriteResponse) {
@@ -155,7 +137,7 @@ class Agent {
155
137
  this.eventHandler.onCompletion(message);
156
138
  // Add all new new messages to the context
157
139
  this.contextManager.addMessages(context.slice(newMessagesIdx));
158
- return completion.choices[0].message;
140
+ return { message, images: images.length === 0 ? undefined : images };
159
141
  }
160
142
  userMessage(msg, imageB64) {
161
143
  void this.userMessageEx(msg, imageB64);
@@ -180,11 +162,13 @@ class Agent {
180
162
  // Compute the full list of available tools
181
163
  let tools;
182
164
  const mcpTools = this.mcpServerManager.getOpenAITools();
165
+ logger.debug(`[chatCompletion] mcpTools: ${JSON.stringify(mcpTools)}`);
183
166
  const enabledTools = this.tools.concat(mcpTools);
184
167
  if (enabledTools.length > 0) {
185
168
  tools = enabledTools;
186
169
  }
187
- const completion = await this.llm.getConversationResponse(context, tools, this.eventHandler.onAgentMessage.bind(this.eventHandler));
170
+ logger.debug(`[chatCompletion] tools: ${JSON.stringify(tools)}`);
171
+ const completion = await this.llm.getConversationResponse(context, tools, this.eventHandler.onAgentMessage.bind(this.eventHandler), this.eventHandler.onReasoning.bind(this.eventHandler));
188
172
  logger.debug(`Received chat completion ${JSON.stringify(completion)}`);
189
173
  return completion;
190
174
  }
@@ -234,21 +218,27 @@ class Agent {
234
218
  const toolName = toolCall.function.name;
235
219
  const agentTool = this.agentTools.get(toolName);
236
220
  const isAgentTool = !!agentTool;
237
- const approve = await this.eventHandler.onToolCall(toolCall, isAgentTool);
238
- if (!approve) {
239
- result = { response: "User denied tool request." };
240
- }
241
- else if (isAgentTool) {
221
+ if (isAgentTool) {
242
222
  // Internal (agent) tool
243
- const args = JSON.parse(toolCall.function.arguments);
244
- result = await agentTool.handler(this, args);
223
+ if (!(await this.eventHandler.onToolCall(toolCall, true))) {
224
+ result = { response: "User denied tool request." };
225
+ }
226
+ else {
227
+ const args = JSON.parse(toolCall.function.arguments || "{}");
228
+ result = await agentTool.handler(this, args);
229
+ }
245
230
  }
246
231
  else {
247
- // McpServer tool call (agentTool === undefined)
248
- const args = JSON.parse(toolCall.function.arguments);
249
- result = {
250
- response: await this.mcpServerManager.invoke(toolName, args),
251
- };
232
+ // McpServer tool call (agentTool === undefined). Sanity check the
233
+ // tool call data, get approval, and then invoke.
234
+ const args = JSON.parse(toolCall.function.arguments || "{}");
235
+ const tc = this.mcpServerManager.verifyToolCall(toolName, args);
236
+ if (!(await this.eventHandler.onToolCall(toolCall, false))) {
237
+ result = { response: "User denied tool request." };
238
+ }
239
+ else {
240
+ result = { response: await this.mcpServerManager.invoke(tc) };
241
+ }
252
242
  logger.debug(`tool call result ${JSON.stringify(result)}`);
253
243
  }
254
244
  }
@@ -278,6 +268,17 @@ class Agent {
278
268
  }
279
269
  return result;
280
270
  }
271
+ processCompletion(completion, images) {
272
+ // Add any images into the list, and call the event handler
273
+ const compMessage = completion.choices[0].message;
274
+ if (compMessage.images) {
275
+ for (const image of compMessage.images) {
276
+ this.eventHandler.onImage(image);
277
+ images.push(image);
278
+ }
279
+ }
280
+ return completionToAssistantMessageParam(compMessage);
281
+ }
281
282
  }
282
283
  exports.Agent = Agent;
283
284
  /**
@@ -324,3 +325,54 @@ function createUserMessageEnsure(msg, imageB64, name) {
324
325
  (0, assert_1.strict)(userMsg);
325
326
  return userMsg;
326
327
  }
328
+ function completionToAssistantMessageParam(compMessage) {
329
+ // Strip down the `ChatCompletionMessage` to a
330
+ // `ChatCompletionAssistantMessageParam`, only including the non-null
331
+ // elements. For reference:
332
+ //
333
+ // Response from the LLM:
334
+ //
335
+ // export interface ChatCompletionMessage {
336
+ // role: 'assistant';
337
+ // audio?: ChatCompletionAudio | null;
338
+ // content: string | null;
339
+ // refusal: string | null;
340
+ // tool_calls?: Array<ChatCompletionMessageToolCall>;
341
+ //
342
+ // annotations?: Array<ChatCompletionMessage.Annotation>;
343
+ // // openrouter
344
+ // images?: Array<ChatCompletionContentPartImage>
345
+ // }
346
+ //
347
+ // Input to the LLM
348
+ //
349
+ // export interface ChatCompletionAssistantMessageParam {
350
+ // role: "assistant";
351
+ // audio?: ChatCompletionAssistantMessageParam.Audio | null;
352
+ // content?:
353
+ // | string
354
+ // | Array<ChatCompletionContentPartText |
355
+ // ChatCompletionContentPartRefusal>
356
+ // | null;
357
+ // refusal?: string | null;
358
+ // tool_calls?: Array<ChatCompletionMessageToolCall>;
359
+ //
360
+ // name?: string;
361
+ // }
362
+ const message = {
363
+ role: "assistant",
364
+ };
365
+ if (compMessage.audio) {
366
+ message.audio = compMessage.audio;
367
+ }
368
+ if (compMessage.content) {
369
+ message.content = compMessage.content;
370
+ }
371
+ if (compMessage.refusal) {
372
+ message.refusal = compMessage.refusal;
373
+ }
374
+ if (compMessage.tool_calls) {
375
+ message.tool_calls = compMessage.tool_calls;
376
+ }
377
+ return message;
378
+ }
@@ -1,6 +1,5 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.XALIA_APP_HEADER = exports.DEFAULT_LLM_MODEL = exports.DEFAULT_LLM_URL = void 0;
4
3
  exports.createAgentWithoutSkills = createAgentWithoutSkills;
5
4
  exports.createAgentWithSkills = createAgentWithSkills;
6
5
  exports.createAgentFromSkillManager = createAgentFromSkillManager;
@@ -16,24 +15,16 @@ const dummyLLM_1 = require("./dummyLLM");
16
15
  const assert_1 = require("assert");
17
16
  const repeatLLM_1 = require("./repeatLLM");
18
17
  const context_1 = require("./context");
18
+ const imageGenLLM_1 = require("./imageGenLLM");
19
19
  const logger = (0, sdk_1.getLogger)();
20
- exports.DEFAULT_LLM_URL = "http://localhost:5001/v1";
21
- // uses openrouter
22
- exports.DEFAULT_LLM_MODEL = process.env["DEFAULT_LLM_MODEL"] || "openai/gpt-4o";
23
- exports.XALIA_APP_HEADER = {
24
- "HTTP-Referer": "xalia.ai",
25
- "X-Title": "Xalia",
26
- };
27
- async function createAgentWithoutSkills(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream = false) {
20
+ async function createAgentWithoutSkills(llmUrl, model, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream = false) {
28
21
  // Init SudoMcpServerManager
29
22
  logger.debug("[createAgentWithSkills] creating SudoMcpServerManager.");
30
23
  const sudoMcpServerManager = await sudoMcpServerManager_1.SkillManager.initialize((url, authResultP, displayName) => {
31
24
  platform.openUrl(url, authResultP, displayName);
32
25
  }, sudomcpConfig.backend_url, sudomcpConfig.api_key, authorizedUrl);
33
- logger.debug("[createAgentWithoutSkills] restore mcp settings:" +
34
- JSON.stringify(agentProfile.mcp_settings));
35
26
  // Create agent using the event handler
36
- const agent = await createAgentFromSkillManager(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, sudoMcpServerManager, stream);
27
+ const agent = await createAgentFromSkillManager(llmUrl, model, eventHandler, platform, contextManager, llmApiKey, sudoMcpServerManager, stream);
37
28
  return [agent, sudoMcpServerManager];
38
29
  }
39
30
  /**
@@ -41,17 +32,16 @@ async function createAgentWithoutSkills(llmUrl, agentProfile, eventHandler, plat
41
32
  * IAgentEventHandler interface. This is the preferred way to create
42
33
  * agents.
43
34
  */
44
- async function createAgentWithSkills(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream = false) {
45
- const [agent, sudoMcpServerManager] = await createAgentWithoutSkills(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream);
46
- logger.debug("[createAgentWithSkills] restoring skills");
47
- await sudoMcpServerManager.restoreMcpSettings(agentProfile.mcp_settings);
35
+ async function createAgentWithSkills(llmUrl, model, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, mcpSettings, authorizedUrl, stream = false) {
36
+ const [agent, sudoMcpServerManager] = await createAgentWithoutSkills(llmUrl, model, eventHandler, platform, contextManager, llmApiKey, sudomcpConfig, authorizedUrl, stream);
37
+ logger.debug(`[createAgentWithSkills] skilles: ${JSON.stringify(mcpSettings)}`);
38
+ await sudoMcpServerManager.restoreMcpSettings(mcpSettings);
48
39
  return [agent, sudoMcpServerManager];
49
40
  }
50
- async function createAgentFromSkillManager(llmUrl, agentProfile, eventHandler, platform, contextManager, llmApiKey, skillManager, stream = false) {
41
+ async function createAgentFromSkillManager(llmUrl, model, eventHandler, platform, contextManager, llmApiKey, skillManager, stream = false) {
51
42
  // Create agent
52
43
  logger.debug("[createAgentFromSkillManager] creating agent ...");
53
- const llm = await createLLM(llmUrl, llmApiKey, agentProfile.model, stream, platform);
54
- contextManager.setAgentPrompt(agentProfile.system_prompt);
44
+ const llm = await createLLM(llmUrl, llmApiKey, model, stream, platform);
55
45
  const agent = agent_1.Agent.initializeWithLLM(eventHandler, llm, contextManager, skillManager);
56
46
  logger.debug("[createAgentFromSkillManager] done");
57
47
  return agent;
@@ -64,12 +54,16 @@ async function createLLM(llmUrl, llmApiKey, model, stream = false, platform) {
64
54
  else if (model === "repeat") {
65
55
  llm = new repeatLLM_1.RepeatLLM();
66
56
  }
57
+ else if (model == imageGenLLM_1.DEFAULT_IMAGE_GEN_MODEL) {
58
+ logger.info("ImageGenLLM");
59
+ llm = new imageGenLLM_1.ImageGenLLM(llmApiKey, llmUrl, model);
60
+ }
67
61
  else {
68
62
  // Regular Agent
69
63
  if (!llmApiKey) {
70
64
  throw new Error("Missing OpenAI API Key");
71
65
  }
72
- logger.debug(`Initializing Agent: ${llmUrl ?? "unknown"} - ${model ?? "unknown"}`);
66
+ logger.debug(`Initializing Agent: ${llmUrl ?? "unknown"} - ${model}`);
73
67
  if (stream) {
74
68
  llm = new openAILLMStreaming_1.OpenAILLMStreaming(llmApiKey, llmUrl, model);
75
69
  }
@@ -86,11 +80,13 @@ async function createLLM(llmUrl, llmApiKey, model, stream = false, platform) {
86
80
  * messages are not used by the caller, the user does not need to approve tool
87
81
  * calls, etc).
88
82
  */
89
- async function createNonInteractiveAgent(url, agentProfile, conversation, platform, openaiApiKey, sudomcpConfig, approveToolsUpTo) {
83
+ async function createNonInteractiveAgent(url, agentProfile, defaultModel, conversation, platform, openaiApiKey, sudomcpConfig, approveToolsUpTo) {
90
84
  let remainingToolCalls = approveToolsUpTo;
91
85
  const eventHandler = {
92
86
  onCompletion: () => { },
87
+ onImage: () => { },
93
88
  onAgentMessage: async () => { },
89
+ onReasoning: async () => { },
94
90
  // eslint-disable-next-line @typescript-eslint/require-await
95
91
  onToolCall: async () => {
96
92
  if (remainingToolCalls !== 0) {
@@ -102,25 +98,26 @@ async function createNonInteractiveAgent(url, agentProfile, conversation, platfo
102
98
  onToolCallResult: () => { },
103
99
  };
104
100
  const contextManager = new context_1.ContextManager(agentProfile.system_prompt, conversation || []);
105
- const [agent, _] = await createAgentWithSkills(url, agentProfile, eventHandler, platform, contextManager, openaiApiKey, sudomcpConfig, undefined);
101
+ const [agent, _] = await createAgentWithSkills(url, agentProfile.model || defaultModel, eventHandler, platform, contextManager, openaiApiKey, sudomcpConfig, agentProfile.mcp_settings, undefined);
106
102
  return agent;
107
103
  }
108
104
  /**
109
105
  * Create an Agent (from the AgentProfile), pass it a single prompt and output
110
106
  * the response.
111
107
  */
112
- async function runOneShot(url, agentProfile, conversation, platform, prompt, image, llmApiKey, sudomcpConfig, approveToolsUpTo) {
108
+ async function runOneShot(url, agentProfile, defaultModel, conversation, platform, prompt, image, llmApiKey, sudomcpConfig, approveToolsUpTo) {
113
109
  logger.debug("[runOneShot]: start");
114
110
  // Create a non-interactive agent and pass any prompt/ image to it. Return
115
111
  // the first answer.
116
- const agent = await createNonInteractiveAgent(url, agentProfile, conversation, platform, llmApiKey, sudomcpConfig, approveToolsUpTo);
117
- const response = await agent.userMessageEx(prompt, image);
112
+ const agent = await createNonInteractiveAgent(url, agentProfile, defaultModel, conversation, platform, llmApiKey, sudomcpConfig, approveToolsUpTo);
113
+ const agentResponse = await agent.userMessageEx(prompt, image);
118
114
  await agent.shutdown();
119
115
  logger.debug("[runOneShot]: shutdown done");
120
- if (!response) {
116
+ if (!agentResponse) {
121
117
  throw new Error("No message returned from agent");
122
118
  }
123
119
  // Handle different content types
120
+ const response = agentResponse.message;
124
121
  let responseText = "";
125
122
  if (typeof response.content === "string") {
126
123
  responseText = response.content;
@@ -146,5 +143,6 @@ async function runOneShot(url, agentProfile, conversation, platform, prompt, ima
146
143
  return {
147
144
  response: responseText,
148
145
  conversation: agent.getConversation(),
146
+ images: agentResponse.images,
149
147
  };
150
148
  }
@@ -36,10 +36,11 @@ async function createCompressionAgent(compressionAgentUrl, compressionAgentModel
36
36
  }
37
37
  async function createSummary(compressionAgentUrl, compressionAgentModel, compressionAgentApiKey, conversation) {
38
38
  const agent = await createCompressionAgent(compressionAgentUrl, compressionAgentModel, compressionAgentApiKey);
39
- const resp = await agent.userMessageEx(JSON.stringify(conversation));
40
- if (!resp) {
39
+ const agentResp = await agent.userMessageEx(JSON.stringify(conversation));
40
+ if (!agentResp) {
41
41
  throw new Error("compression agent returned null");
42
42
  }
43
+ const resp = agentResp.message;
43
44
  (0, assert_1.strict)(resp.role === "assistant");
44
45
  (0, assert_1.strict)(typeof resp.content === "string", "expected string content from compression agent");
45
46
  return resp.content;
@@ -57,9 +57,7 @@ class DummyLLM {
57
57
  }
58
58
  if (onMessage) {
59
59
  const message = response.message;
60
- if (message.content) {
61
- void onMessage(message.content, true);
62
- }
60
+ void onMessage(message.content || "", true);
63
61
  }
64
62
  return {
65
63
  id: String(this.idx),
@@ -0,0 +1,67 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ImageGenLLM = exports.DEFAULT_IMAGE_GEN_MODEL = void 0;
4
+ const openai_1 = require("openai");
5
+ const assert_1 = require("assert");
6
+ const fs_1 = require("fs");
7
+ const sdk_1 = require("@xalia/xmcp/sdk");
8
+ const llm_1 = require("./llm");
9
+ const logger = (0, sdk_1.getLogger)();
10
+ exports.DEFAULT_IMAGE_GEN_MODEL = "google/gemini-2.5-flash-image-preview";
11
+ class ImageGenLLM {
12
+ constructor(apiKey, apiUrl, model) {
13
+ logger.debug(`here`);
14
+ this.openai = new openai_1.OpenAI({
15
+ apiKey,
16
+ baseURL: apiUrl,
17
+ dangerouslyAllowBrowser: true,
18
+ defaultHeaders: llm_1.XALIA_APP_HEADER,
19
+ });
20
+ this.model = model || exports.DEFAULT_IMAGE_GEN_MODEL;
21
+ }
22
+ setModel(model) {
23
+ this.model = model;
24
+ }
25
+ getModel() {
26
+ return this.model;
27
+ }
28
+ getUrl() {
29
+ return this.openai.baseURL;
30
+ }
31
+ async getConversationResponse(messages, tools, onMessage) {
32
+ (0, assert_1.strict)(!tools || tools.length === 0, "tools not supported in ImageGenLLM");
33
+ // Designed for image generation using openrouter, which tweaks the Create
34
+ const params = {
35
+ model: this.model,
36
+ messages,
37
+ tools,
38
+ modalities: ["image", "text"],
39
+ };
40
+ logger.info(`[ImageGenLLM] params; ${JSON.stringify(params)}`);
41
+ const completion = (await this.openai.chat.completions.create(params));
42
+ // const completion = {} as unknown as ChatCompletion;
43
+ const filePath = "./completion.json";
44
+ logger.info(`[ImageGenLLM] writing ${filePath}`);
45
+ (0, fs_1.writeFileSync)(filePath, JSON.stringify(completion), "utf-8");
46
+ logger.info(`[ImageGenLLM] written`);
47
+ // logger.debug(
48
+ // `[ImageGenLLM.getConversationResponse] completion:
49
+ // ${JSON.stringify(completion)}`
50
+ // );
51
+ if (onMessage) {
52
+ const message = completion.choices[0].message;
53
+ if (message.content) {
54
+ await onMessage(message.content, true);
55
+ }
56
+ if (message.images) {
57
+ message.images.forEach((image, index) => {
58
+ const imageUrl = image.image_url.url; // Base64 data URL
59
+ const truncated = imageUrl.substring(0, 50);
60
+ logger.info(`[ImageGenLLM] ${String(index + 1)}: ${truncated}...`);
61
+ });
62
+ }
63
+ }
64
+ return completion;
65
+ }
66
+ }
67
+ exports.ImageGenLLM = ImageGenLLM;
@@ -0,0 +1,43 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ImageGenerator = void 0;
4
+ const agent_1 = require("./agent");
5
+ const agentUtils_1 = require("./agentUtils");
6
+ const context_1 = require("./context");
7
+ const nullAgentEventHandler_1 = require("./nullAgentEventHandler");
8
+ const nullPlatform_1 = require("./nullPlatform");
9
+ const nodePlatform_1 = require("../tool/nodePlatform");
10
+ const imageGenLLM_1 = require("./imageGenLLM");
11
+ const IMAGE_GEN_SYSTEM_PROMPT = "You are an image generator";
12
+ class ImageGenerator {
13
+ constructor(agent, contextManager) {
14
+ this.agent = agent;
15
+ this.contextManager = contextManager;
16
+ }
17
+ static async init(llmUrl, llmApiKey, model) {
18
+ const development = !!process.env.DEVELOPMENT;
19
+ const llm = await (0, agentUtils_1.createLLM)(llmUrl, llmApiKey, model || imageGenLLM_1.DEFAULT_IMAGE_GEN_MODEL, false /* stream */, development ? nodePlatform_1.NODE_PLATFORM : nullPlatform_1.NULL_PLATFORM // allow file loading
20
+ );
21
+ const contextManager = new context_1.ContextManager(IMAGE_GEN_SYSTEM_PROMPT, []);
22
+ const agent = agent_1.Agent.initializeWithLLM(nullAgentEventHandler_1.NULL_AGENT_EVENT_HANDLER, llm, contextManager);
23
+ return new ImageGenerator(agent, contextManager);
24
+ }
25
+ async generate(prompt, image) {
26
+ const userMessage = (0, agent_1.createUserMessage)(prompt, image);
27
+ if (!userMessage) {
28
+ throw new Error("invalid user message / input image");
29
+ }
30
+ const agentResponse = await this.agent.userMessageRaw(userMessage);
31
+ if (!agentResponse) {
32
+ throw new Error("invalid response from image gen agent");
33
+ }
34
+ if (!agentResponse.images || agentResponse.images.length === 0) {
35
+ throw new Error("invalid response from image gen agent");
36
+ }
37
+ // Clear the context
38
+ while (this.contextManager.popMessage())
39
+ ;
40
+ return agentResponse.images[0].image_url.url;
41
+ }
42
+ }
43
+ exports.ImageGenerator = ImageGenerator;
@@ -1,2 +1,29 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.XALIA_APP_HEADER = void 0;
4
+ exports.choiceDeltaExtractReasoning = choiceDeltaExtractReasoning;
5
+ exports.XALIA_APP_HEADER = {
6
+ "HTTP-Referer": "xalia.ai",
7
+ "X-Title": "Xalia",
8
+ };
9
+ function choiceDeltaExtractReasoning(delta) {
10
+ if (delta.reasoning) {
11
+ return delta.reasoning;
12
+ }
13
+ if (delta.reasoning_details) {
14
+ let reasoning = "";
15
+ for (const details of delta.reasoning_details) {
16
+ if (details.type !== "reasoning.text") {
17
+ throw new Error(`unexpected details.type: ${details.type}`);
18
+ }
19
+ if (details.text) {
20
+ if (typeof details.text !== "string") {
21
+ throw new Error(`unexpected typeof details.text: ${typeof details.text}`);
22
+ }
23
+ reasoning += details.text;
24
+ }
25
+ }
26
+ return reasoning;
27
+ }
28
+ return undefined;
29
+ }
@@ -203,11 +203,7 @@ class McpServerManager {
203
203
  }
204
204
  return this.enabledOpenAITools;
205
205
  }
206
- /**
207
- * Note the `qualifiedToolName` is the full `{mcpServerName}/{toolName}` as
208
- * in the openai spec.
209
- */
210
- async invoke(qualifiedToolName, args) {
206
+ verifyToolCall(qualifiedToolName, args) {
211
207
  const [mcpServerName, toolName] = splitQualifiedName(qualifiedToolName);
212
208
  logger.debug(`invoke: qualified: ${qualifiedToolName}`);
213
209
  logger.debug(`invoke: mcpServerName: ${mcpServerName}, toolName: ${toolName}`);
@@ -217,7 +213,23 @@ class McpServerManager {
217
213
  if (!cb) {
218
214
  throw new Error(`Unknown tool ${qualifiedToolName}`);
219
215
  }
220
- return cb(JSON.stringify(args));
216
+ return {
217
+ mcpServerName,
218
+ toolName,
219
+ args,
220
+ };
221
+ }
222
+ /**
223
+ * Note the `qualifiedToolName` is the full `{mcpServerName}/{toolName}` as
224
+ * in the openai spec.
225
+ */
226
+ async invoke(toolCall) {
227
+ const server = this.getMcpServerInternal(toolCall.mcpServerName);
228
+ const cb = server.getCallback(toolCall.toolName);
229
+ if (!cb) {
230
+ throw new Error(`Unknown tool ${toolCall.toolName}`);
231
+ }
232
+ return cb(JSON.stringify(toolCall.args));
221
233
  }
222
234
  /**
223
235
  * "Settings" refers to the set of added servers and enabled tools.
@@ -7,12 +7,18 @@ exports.NULL_AGENT_EVENT_HANDLER = void 0;
7
7
  */
8
8
  exports.NULL_AGENT_EVENT_HANDLER = {
9
9
  onCompletion: () => { },
10
+ onImage: () => { },
10
11
  onToolCallResult: () => { },
11
12
  onAgentMessage: () => {
12
13
  return new Promise((r) => {
13
14
  r();
14
15
  });
15
16
  },
17
+ onReasoning: () => {
18
+ return new Promise((r) => {
19
+ r();
20
+ });
21
+ },
16
22
  onToolCall: () => {
17
23
  return new Promise((r) => {
18
24
  r(false);
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.OpenAILLM = void 0;
4
- const agentUtils_1 = require("./agentUtils");
4
+ const llm_1 = require("./llm");
5
5
  const openai_1 = require("openai");
6
6
  class OpenAILLM {
7
7
  constructor(apiKey, apiUrl, model) {
@@ -9,9 +9,9 @@ class OpenAILLM {
9
9
  apiKey,
10
10
  baseURL: apiUrl,
11
11
  dangerouslyAllowBrowser: true,
12
- defaultHeaders: agentUtils_1.XALIA_APP_HEADER,
12
+ defaultHeaders: llm_1.XALIA_APP_HEADER,
13
13
  });
14
- this.model = model || agentUtils_1.DEFAULT_LLM_MODEL;
14
+ this.model = model;
15
15
  }
16
16
  setModel(model) {
17
17
  this.model = model;