@xalia/agent 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/dist/agent/src/agent/agent.js +103 -54
  2. package/dist/agent/src/agent/agentUtils.js +22 -21
  3. package/dist/agent/src/agent/compressingContextManager.js +3 -2
  4. package/dist/agent/src/agent/dummyLLM.js +1 -3
  5. package/dist/agent/src/agent/imageGenLLM.js +67 -0
  6. package/dist/agent/src/agent/imageGenerator.js +43 -0
  7. package/dist/agent/src/agent/llm.js +27 -0
  8. package/dist/agent/src/agent/mcpServerManager.js +18 -6
  9. package/dist/agent/src/agent/nullAgentEventHandler.js +6 -0
  10. package/dist/agent/src/agent/openAILLM.js +3 -3
  11. package/dist/agent/src/agent/openAILLMStreaming.js +41 -6
  12. package/dist/agent/src/chat/client/chatClient.js +84 -13
  13. package/dist/agent/src/chat/client/sessionClient.js +47 -6
  14. package/dist/agent/src/chat/client/sessionFiles.js +102 -0
  15. package/dist/agent/src/chat/data/apiKeyManager.js +38 -7
  16. package/dist/agent/src/chat/data/database.js +83 -70
  17. package/dist/agent/src/chat/data/dbSessionFileModels.js +49 -0
  18. package/dist/agent/src/chat/data/dbSessionFiles.js +76 -0
  19. package/dist/agent/src/chat/data/dbSessionMessages.js +57 -0
  20. package/dist/agent/src/chat/data/mimeTypes.js +44 -0
  21. package/dist/agent/src/chat/protocol/messages.js +21 -0
  22. package/dist/agent/src/chat/server/chatContextManager.js +14 -7
  23. package/dist/agent/src/chat/server/connectionManager.js +14 -36
  24. package/dist/agent/src/chat/server/connectionManager.test.js +2 -16
  25. package/dist/agent/src/chat/server/conversation.js +69 -45
  26. package/dist/agent/src/chat/server/imageGeneratorTools.js +111 -0
  27. package/dist/agent/src/chat/server/openSession.js +205 -43
  28. package/dist/agent/src/chat/server/server.js +5 -8
  29. package/dist/agent/src/chat/server/sessionFileManager.js +171 -38
  30. package/dist/agent/src/chat/server/sessionRegistry.js +199 -32
  31. package/dist/agent/src/chat/server/test-utils/mockFactories.js +12 -11
  32. package/dist/agent/src/chat/server/tools.js +27 -6
  33. package/dist/agent/src/chat/utils/multiAsyncQueue.js +9 -1
  34. package/dist/agent/src/test/agent.test.js +15 -11
  35. package/dist/agent/src/test/chatContextManager.test.js +4 -0
  36. package/dist/agent/src/test/clientServerConnection.test.js +2 -2
  37. package/dist/agent/src/test/db.test.js +33 -70
  38. package/dist/agent/src/test/dbSessionFiles.test.js +179 -0
  39. package/dist/agent/src/test/dbSessionMessages.test.js +67 -0
  40. package/dist/agent/src/test/dbTestTools.js +6 -5
  41. package/dist/agent/src/test/imageLoad.test.js +1 -1
  42. package/dist/agent/src/test/mcpServerManager.test.js +1 -1
  43. package/dist/agent/src/test/multiAsyncQueue.test.js +50 -0
  44. package/dist/agent/src/test/testTools.js +12 -0
  45. package/dist/agent/src/tool/agentChat.js +25 -6
  46. package/dist/agent/src/tool/agentMain.js +1 -1
  47. package/dist/agent/src/tool/chatMain.js +113 -4
  48. package/dist/agent/src/tool/commandPrompt.js +7 -3
  49. package/dist/agent/src/tool/files.js +23 -15
  50. package/dist/agent/src/tool/options.js +2 -2
  51. package/package.json +1 -1
  52. package/scripts/test_chat +124 -66
  53. package/src/agent/agent.ts +145 -38
  54. package/src/agent/agentUtils.ts +27 -21
  55. package/src/agent/compressingContextManager.ts +5 -4
  56. package/src/agent/context.ts +1 -1
  57. package/src/agent/dummyLLM.ts +1 -3
  58. package/src/agent/iAgentEventHandler.ts +15 -2
  59. package/src/agent/imageGenLLM.ts +99 -0
  60. package/src/agent/imageGenerator.ts +60 -0
  61. package/src/agent/llm.ts +128 -4
  62. package/src/agent/mcpServerManager.ts +26 -7
  63. package/src/agent/nullAgentEventHandler.ts +6 -0
  64. package/src/agent/openAILLM.ts +3 -8
  65. package/src/agent/openAILLMStreaming.ts +60 -14
  66. package/src/chat/client/chatClient.ts +119 -14
  67. package/src/chat/client/sessionClient.ts +75 -9
  68. package/src/chat/client/sessionFiles.ts +145 -0
  69. package/src/chat/data/apiKeyManager.ts +55 -7
  70. package/src/chat/data/dataModels.ts +16 -7
  71. package/src/chat/data/database.ts +107 -92
  72. package/src/chat/data/dbSessionFileModels.ts +91 -0
  73. package/src/chat/data/dbSessionFiles.ts +99 -0
  74. package/src/chat/data/dbSessionMessages.ts +68 -0
  75. package/src/chat/data/mimeTypes.ts +58 -0
  76. package/src/chat/protocol/messages.ts +127 -13
  77. package/src/chat/server/chatContextManager.ts +36 -13
  78. package/src/chat/server/connectionManager.test.ts +1 -22
  79. package/src/chat/server/connectionManager.ts +18 -53
  80. package/src/chat/server/conversation.ts +96 -57
  81. package/src/chat/server/imageGeneratorTools.ts +138 -0
  82. package/src/chat/server/openSession.ts +287 -49
  83. package/src/chat/server/server.ts +5 -11
  84. package/src/chat/server/sessionFileManager.ts +223 -63
  85. package/src/chat/server/sessionRegistry.ts +285 -41
  86. package/src/chat/server/test-utils/mockFactories.ts +13 -13
  87. package/src/chat/server/tools.ts +43 -8
  88. package/src/chat/utils/agentSessionMap.ts +2 -2
  89. package/src/chat/utils/multiAsyncQueue.ts +11 -1
  90. package/src/test/agent.test.ts +23 -14
  91. package/src/test/chatContextManager.test.ts +7 -2
  92. package/src/test/clientServerConnection.test.ts +3 -3
  93. package/src/test/compressingContextManager.test.ts +1 -1
  94. package/src/test/context.test.ts +2 -1
  95. package/src/test/conversation.test.ts +1 -1
  96. package/src/test/db.test.ts +41 -83
  97. package/src/test/dbSessionFiles.test.ts +258 -0
  98. package/src/test/dbSessionMessages.test.ts +85 -0
  99. package/src/test/dbTestTools.ts +9 -5
  100. package/src/test/imageLoad.test.ts +2 -2
  101. package/src/test/mcpServerManager.test.ts +3 -1
  102. package/src/test/multiAsyncQueue.test.ts +58 -0
  103. package/src/test/testTools.ts +15 -1
  104. package/src/tool/agentChat.ts +35 -7
  105. package/src/tool/agentMain.ts +7 -7
  106. package/src/tool/chatMain.ts +126 -5
  107. package/src/tool/commandPrompt.ts +10 -5
  108. package/src/tool/files.ts +30 -13
  109. package/src/tool/options.ts +1 -1
  110. package/test_data/dummyllm_script_image_gen.json +19 -0
  111. package/test_data/dummyllm_script_invoke_image_gen_tool.json +30 -0
  112. package/test_data/image_gen_test_profile.json +5 -0
@@ -8,26 +8,18 @@ import { Configuration as SudoMcpConfiguration } from "@xalia/xmcp/sdk";
8
8
  import { OpenAILLM } from "./openAILLM";
9
9
  import { OpenAILLMStreaming } from "./openAILLMStreaming";
10
10
  import { DummyLLM } from "./dummyLLM";
11
- import { ILLM } from "./llm";
11
+ import { ChatCompletionMessageParam, ILLM } from "./llm";
12
12
  import { strict as assert } from "assert";
13
13
  import { RepeatLLM } from "./repeatLLM";
14
14
  import { ContextManager, IContextManager } from "./context";
15
+ import { DEFAULT_IMAGE_GEN_MODEL, ImageGenLLM } from "./imageGenLLM";
15
16
 
16
17
  const logger = getLogger();
17
18
 
18
- export const DEFAULT_LLM_URL = "http://localhost:5001/v1";
19
- // uses openrouter
20
- export const DEFAULT_LLM_MODEL =
21
- process.env["DEFAULT_LLM_MODEL"] || "openai/gpt-4o";
22
-
23
- export const XALIA_APP_HEADER = {
24
- "HTTP-Referer": "xalia.ai",
25
- "X-Title": "Xalia",
26
- };
27
-
28
19
  export async function createAgentWithoutSkills(
29
20
  llmUrl: string,
30
21
  agentProfile: AgentProfile,
22
+ defaultModel: string,
31
23
  eventHandler: IAgentEventHandler,
32
24
  platform: IPlatform,
33
25
  contextManager: IContextManager,
@@ -55,6 +47,7 @@ export async function createAgentWithoutSkills(
55
47
  const agent = await createAgentFromSkillManager(
56
48
  llmUrl,
57
49
  agentProfile,
50
+ defaultModel,
58
51
  eventHandler,
59
52
  platform,
60
53
  contextManager,
@@ -74,6 +67,7 @@ export async function createAgentWithoutSkills(
74
67
  export async function createAgentWithSkills(
75
68
  llmUrl: string,
76
69
  agentProfile: AgentProfile,
70
+ defaultModel: string,
77
71
  eventHandler: IAgentEventHandler,
78
72
  platform: IPlatform,
79
73
  contextManager: IContextManager,
@@ -85,6 +79,7 @@ export async function createAgentWithSkills(
85
79
  const [agent, sudoMcpServerManager] = await createAgentWithoutSkills(
86
80
  llmUrl,
87
81
  agentProfile,
82
+ defaultModel,
88
83
  eventHandler,
89
84
  platform,
90
85
  contextManager,
@@ -103,11 +98,12 @@ export async function createAgentWithSkills(
103
98
  export async function createAgentFromSkillManager(
104
99
  llmUrl: string,
105
100
  agentProfile: AgentProfile,
101
+ defaultModel: string,
106
102
  eventHandler: IAgentEventHandler,
107
103
  platform: IPlatform,
108
104
  contextManager: IContextManager,
109
105
  llmApiKey: string | undefined,
110
- skillManager: SkillManager,
106
+ skillManager: SkillManager | undefined,
111
107
  stream: boolean = false
112
108
  ): Promise<Agent> {
113
109
  // Create agent
@@ -115,7 +111,7 @@ export async function createAgentFromSkillManager(
115
111
  const llm = await createLLM(
116
112
  llmUrl,
117
113
  llmApiKey,
118
- agentProfile.model,
114
+ agentProfile.model || defaultModel,
119
115
  stream,
120
116
  platform
121
117
  );
@@ -134,7 +130,7 @@ export async function createAgentFromSkillManager(
134
130
  export async function createLLM(
135
131
  llmUrl: string | undefined,
136
132
  llmApiKey: string | undefined,
137
- model: string | undefined,
133
+ model: string,
138
134
  stream: boolean = false,
139
135
  platform: IPlatform
140
136
  ): Promise<ILLM> {
@@ -144,15 +140,16 @@ export async function createLLM(
144
140
  llm = await DummyLLM.initFromModelUrl(model, platform);
145
141
  } else if (model === "repeat") {
146
142
  llm = new RepeatLLM();
143
+ } else if (model == DEFAULT_IMAGE_GEN_MODEL) {
144
+ logger.info("ImageGenLLM");
145
+ llm = new ImageGenLLM(llmApiKey, llmUrl, model);
147
146
  } else {
148
147
  // Regular Agent
149
148
  if (!llmApiKey) {
150
149
  throw new Error("Missing OpenAI API Key");
151
150
  }
152
151
 
153
- logger.debug(
154
- `Initializing Agent: ${llmUrl ?? "unknown"} - ${model ?? "unknown"}`
155
- );
152
+ logger.debug(`Initializing Agent: ${llmUrl ?? "unknown"} - ${model}`);
156
153
  if (stream) {
157
154
  llm = new OpenAILLMStreaming(llmApiKey, llmUrl, model);
158
155
  } else {
@@ -173,7 +170,8 @@ export async function createLLM(
173
170
  export async function createNonInteractiveAgent(
174
171
  url: string,
175
172
  agentProfile: AgentProfile,
176
- conversation: OpenAI.ChatCompletionMessageParam[] | undefined,
173
+ defaultModel: string,
174
+ conversation: ChatCompletionMessageParam[] | undefined,
177
175
  platform: IPlatform,
178
176
  openaiApiKey: string | undefined,
179
177
  sudomcpConfig: SudoMcpConfiguration,
@@ -182,7 +180,9 @@ export async function createNonInteractiveAgent(
182
180
  let remainingToolCalls = approveToolsUpTo;
183
181
  const eventHandler: IAgentEventHandler = {
184
182
  onCompletion: () => {},
183
+ onImage: () => {},
185
184
  onAgentMessage: async () => {},
185
+ onReasoning: async () => {},
186
186
  // eslint-disable-next-line @typescript-eslint/require-await
187
187
  onToolCall: async () => {
188
188
  if (remainingToolCalls !== 0) {
@@ -201,6 +201,7 @@ export async function createNonInteractiveAgent(
201
201
  const [agent, _] = await createAgentWithSkills(
202
202
  url,
203
203
  agentProfile,
204
+ defaultModel,
204
205
  eventHandler,
205
206
  platform,
206
207
  contextManager,
@@ -219,7 +220,8 @@ export async function createNonInteractiveAgent(
219
220
  export async function runOneShot(
220
221
  url: string,
221
222
  agentProfile: AgentProfile,
222
- conversation: OpenAI.ChatCompletionMessageParam[] | undefined,
223
+ defaultModel: string,
224
+ conversation: ChatCompletionMessageParam[] | undefined,
223
225
  platform: IPlatform,
224
226
  prompt: string,
225
227
  image: string | undefined,
@@ -229,6 +231,7 @@ export async function runOneShot(
229
231
  ): Promise<{
230
232
  response: string;
231
233
  conversation: OpenAI.ChatCompletionMessageParam[];
234
+ images: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] | undefined;
232
235
  }> {
233
236
  logger.debug("[runOneShot]: start");
234
237
 
@@ -238,6 +241,7 @@ export async function runOneShot(
238
241
  const agent = await createNonInteractiveAgent(
239
242
  url,
240
243
  agentProfile,
244
+ defaultModel,
241
245
  conversation,
242
246
  platform,
243
247
  llmApiKey,
@@ -245,15 +249,16 @@ export async function runOneShot(
245
249
  approveToolsUpTo
246
250
  );
247
251
 
248
- const response = await agent.userMessageEx(prompt, image);
252
+ const agentResponse = await agent.userMessageEx(prompt, image);
249
253
  await agent.shutdown();
250
254
  logger.debug("[runOneShot]: shutdown done");
251
255
 
252
- if (!response) {
256
+ if (!agentResponse) {
253
257
  throw new Error("No message returned from agent");
254
258
  }
255
259
 
256
260
  // Handle different content types
261
+ const response = agentResponse.message;
257
262
  let responseText = "";
258
263
  if (typeof response.content === "string") {
259
264
  responseText = response.content;
@@ -277,5 +282,6 @@ export async function runOneShot(
277
282
  return {
278
283
  response: responseText,
279
284
  conversation: agent.getConversation(),
285
+ images: agentResponse.images,
280
286
  };
281
287
  }
@@ -1,11 +1,11 @@
1
1
  import { strict as assert } from "assert";
2
2
  import { getLogger } from "@xalia/xmcp/sdk";
3
3
 
4
+ import { Agent } from "./agent";
4
5
  import {
5
- Agent,
6
6
  ChatCompletionUserMessageParam,
7
7
  ChatCompletionMessageParam,
8
- } from "./agent";
8
+ } from "./llm";
9
9
  import { NULL_PLATFORM } from "./nullPlatform";
10
10
  import { createLLM } from "./agentUtils";
11
11
  import { ContextManager, ContextManagerWithCommit } from "./context";
@@ -68,11 +68,12 @@ export async function createSummary(
68
68
  compressionAgentApiKey
69
69
  );
70
70
 
71
- const resp = await agent.userMessageEx(JSON.stringify(conversation));
72
- if (!resp) {
71
+ const agentResp = await agent.userMessageEx(JSON.stringify(conversation));
72
+ if (!agentResp) {
73
73
  throw new Error("compression agent returned null");
74
74
  }
75
75
 
76
+ const resp = agentResp.message;
76
77
  assert(resp.role === "assistant");
77
78
  assert(
78
79
  typeof resp.content === "string",
@@ -2,7 +2,7 @@ import { strict as assert } from "assert";
2
2
  import {
3
3
  ChatCompletionMessageParam,
4
4
  ChatCompletionUserMessageParam,
5
- } from "./agent";
5
+ } from "./llm";
6
6
  import { SystemPromptProvider } from "./promptProvider";
7
7
 
8
8
  /**
@@ -92,9 +92,7 @@ export class DummyLLM implements ILLM {
92
92
 
93
93
  if (onMessage) {
94
94
  const message = response.message;
95
- if (message.content) {
96
- void onMessage(message.content, true);
97
- }
95
+ void onMessage(message.content || "", true);
98
96
  }
99
97
 
100
98
  return {
@@ -2,7 +2,8 @@ import { OpenAI } from "openai";
2
2
  import {
3
3
  ChatCompletionAssistantMessageParam,
4
4
  ChatCompletionMessageToolCall,
5
- } from "./agent";
5
+ ChatCompletionToolMessageParam,
6
+ } from "./llm";
6
7
 
7
8
  /**
8
9
  * Interface for handling events from the Agent.
@@ -18,11 +19,18 @@ export interface IAgentEventHandler {
18
19
  */
19
20
  onCompletion(result: ChatCompletionAssistantMessageParam): void;
20
21
 
22
+ /**
23
+ * Images do not appear as part of `ChatCompletionAssistantMessageParam`,
24
+ * although they do appear in the final returned result from the Agent.
25
+ * This callback gives clients the chance to see them earlier.
26
+ */
27
+ onImage(image: OpenAI.Chat.Completions.ChatCompletionContentPartImage): void;
28
+
21
29
  /**
22
30
  * Called when a tool call execution completes (success, error, or denial).
23
31
  * These messages are insertes into the LLM context.
24
32
  */
25
- onToolCallResult(result: OpenAI.ChatCompletionToolMessageParam): void;
33
+ onToolCallResult(result: ChatCompletionToolMessageParam): void;
26
34
 
27
35
  /**
28
36
  * Called when the agent produces a message chunk (streaming). Calls here
@@ -33,6 +41,11 @@ export interface IAgentEventHandler {
33
41
  */
34
42
  onAgentMessage(chunk: string, isEnd: boolean): Promise<void>;
35
43
 
44
+ /**
45
+ * Called when the agent produces reasoning tokens
46
+ */
47
+ onReasoning(chunk: string): Promise<void>;
48
+
36
49
  /**
37
50
  * Called when the agent wants to execute a tool call. Calls do NOT
38
51
  * correspond to context entries. (A completion from the LLM may correspond
@@ -0,0 +1,99 @@
1
+ import { OpenAI } from "openai";
2
+ import { strict as assert } from "assert";
3
+ import { writeFileSync } from "fs";
4
+
5
+ import { getLogger } from "@xalia/xmcp/sdk";
6
+
7
+ import { ILLM, ChatCompletion, XALIA_APP_HEADER } from "./llm";
8
+
9
+ const logger = getLogger();
10
+
11
+ export const DEFAULT_IMAGE_GEN_MODEL = "google/gemini-2.5-flash-image-preview";
12
+
13
+ type ChatCompletionCreateParams = Omit<
14
+ OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming,
15
+ "modalities"
16
+ > & {
17
+ modalities: Array<"text" | "audio" | "image">;
18
+ };
19
+
20
+ export class ImageGenLLM implements ILLM {
21
+ private readonly openai: OpenAI;
22
+ private model: string;
23
+
24
+ constructor(
25
+ apiKey: string | undefined,
26
+ apiUrl: string | undefined,
27
+ model: string | undefined
28
+ ) {
29
+ logger.debug(`here`);
30
+ this.openai = new OpenAI({
31
+ apiKey,
32
+ baseURL: apiUrl,
33
+ dangerouslyAllowBrowser: true,
34
+ defaultHeaders: XALIA_APP_HEADER,
35
+ });
36
+ this.model = model || DEFAULT_IMAGE_GEN_MODEL;
37
+ }
38
+
39
+ public setModel(model: string) {
40
+ this.model = model;
41
+ }
42
+
43
+ getModel(): string {
44
+ return this.model;
45
+ }
46
+
47
+ getUrl(): string {
48
+ return this.openai.baseURL;
49
+ }
50
+
51
+ public async getConversationResponse(
52
+ messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
53
+ tools?: OpenAI.Chat.Completions.ChatCompletionTool[],
54
+ onMessage?: (msg: string, end: boolean) => Promise<void>
55
+ ): Promise<ChatCompletion> {
56
+ assert(!tools || tools.length === 0, "tools not supported in ImageGenLLM");
57
+
58
+ // Designed for image generation using openrouter, which tweaks the Create
59
+ const params: ChatCompletionCreateParams = {
60
+ model: this.model,
61
+ messages,
62
+ tools,
63
+ modalities: ["image", "text"],
64
+ };
65
+
66
+ logger.info(`[ImageGenLLM] params; ${JSON.stringify(params)}`);
67
+
68
+ const completion = (await this.openai.chat.completions.create(
69
+ params as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming
70
+ )) as ChatCompletion;
71
+
72
+ // const completion = {} as unknown as ChatCompletion;
73
+
74
+ const filePath: string = "./completion.json";
75
+ logger.info(`[ImageGenLLM] writing ${filePath}`);
76
+ writeFileSync(filePath, JSON.stringify(completion), "utf-8");
77
+ logger.info(`[ImageGenLLM] written`);
78
+
79
+ // logger.debug(
80
+ // `[ImageGenLLM.getConversationResponse] completion:
81
+ // ${JSON.stringify(completion)}`
82
+ // );
83
+
84
+ if (onMessage) {
85
+ const message = completion.choices[0].message;
86
+ if (message.content) {
87
+ await onMessage(message.content, true);
88
+ }
89
+ if (message.images) {
90
+ message.images.forEach((image, index) => {
91
+ const imageUrl = image.image_url.url; // Base64 data URL
92
+ const truncated = imageUrl.substring(0, 50);
93
+ logger.info(`[ImageGenLLM] ${String(index + 1)}: ${truncated}...`);
94
+ });
95
+ }
96
+ }
97
+ return completion;
98
+ }
99
+ }
@@ -0,0 +1,60 @@
1
+ import { Agent, createUserMessage } from "./agent";
2
+ import { createLLM } from "./agentUtils";
3
+ import { ContextManager } from "./context";
4
+ import { NULL_AGENT_EVENT_HANDLER } from "./nullAgentEventHandler";
5
+ import { NULL_PLATFORM } from "./nullPlatform";
6
+ import { NODE_PLATFORM } from "../tool/nodePlatform";
7
+ import { DEFAULT_IMAGE_GEN_MODEL } from "./imageGenLLM";
8
+
9
+ const IMAGE_GEN_SYSTEM_PROMPT = "You are an image generator";
10
+
11
+ export class ImageGenerator {
12
+ readonly agent: Agent;
13
+ readonly contextManager: ContextManager;
14
+
15
+ constructor(agent: Agent, contextManager: ContextManager) {
16
+ this.agent = agent;
17
+ this.contextManager = contextManager;
18
+ }
19
+
20
+ public static async init(
21
+ llmUrl: string,
22
+ llmApiKey: string,
23
+ model?: string
24
+ ): Promise<ImageGenerator> {
25
+ const development = !!process.env.DEVELOPMENT;
26
+ const llm = await createLLM(
27
+ llmUrl,
28
+ llmApiKey,
29
+ model || DEFAULT_IMAGE_GEN_MODEL,
30
+ false /* stream */,
31
+ development ? NODE_PLATFORM : NULL_PLATFORM // allow file loading
32
+ );
33
+ const contextManager = new ContextManager(IMAGE_GEN_SYSTEM_PROMPT, []);
34
+ const agent = Agent.initializeWithLLM(
35
+ NULL_AGENT_EVENT_HANDLER,
36
+ llm,
37
+ contextManager
38
+ );
39
+ return new ImageGenerator(agent, contextManager);
40
+ }
41
+
42
+ public async generate(prompt: string, image?: string): Promise<string> {
43
+ const userMessage = createUserMessage(prompt, image);
44
+ if (!userMessage) {
45
+ throw new Error("invalid user message / input image");
46
+ }
47
+ const agentResponse = await this.agent.userMessageRaw(userMessage);
48
+ if (!agentResponse) {
49
+ throw new Error("invalid response from image gen agent");
50
+ }
51
+ if (!agentResponse.images || agentResponse.images.length === 0) {
52
+ throw new Error("invalid response from image gen agent");
53
+ }
54
+
55
+ // Clear the context
56
+ while (this.contextManager.popMessage());
57
+
58
+ return agentResponse.images[0].image_url.url;
59
+ }
60
+ }
package/src/agent/llm.ts CHANGED
@@ -1,15 +1,139 @@
1
1
  import { OpenAI } from "openai";
2
2
 
3
+ export const XALIA_APP_HEADER = {
4
+ "HTTP-Referer": "xalia.ai",
5
+ "X-Title": "Xalia",
6
+ };
7
+
8
+ // Extend the ChatCompletionMessage type with an `images` value, compatible
9
+ // with google/gemini-2.5-flash-image-preview.
10
+
11
+ export type ChatCompletionMessage =
12
+ OpenAI.Chat.Completions.ChatCompletionMessage & {
13
+ images?: OpenAI.Chat.Completions.ChatCompletionContentPartImage[];
14
+ };
15
+
16
+ export type ChatCompletionChoice = Omit<
17
+ OpenAI.Chat.Completions.ChatCompletion.Choice,
18
+ "message"
19
+ > & { message: ChatCompletionMessage };
20
+
21
+ export type ChatCompletion = Omit<
22
+ OpenAI.Chat.Completions.ChatCompletion,
23
+ "choices"
24
+ > & { choices: Array<ChatCompletionChoice> };
25
+
26
+ // Shortcuts to other useful OpenAI types.
27
+
28
+ export type ChatCompletionMessageToolCall =
29
+ OpenAI.ChatCompletionMessageToolCall;
30
+
31
+ export type ChatCompletionAssistantMessageParam =
32
+ OpenAI.ChatCompletionAssistantMessageParam;
33
+
34
+ export type ChatCompletionUserMessageParam =
35
+ OpenAI.ChatCompletionUserMessageParam;
36
+
37
+ export type ChatCompletionToolMessageParam =
38
+ OpenAI.ChatCompletionToolMessageParam & {
39
+ metadata?: Record<string, string>;
40
+ };
41
+
42
+ export type ChatCompletionMessageParam =
43
+ | OpenAI.Chat.Completions.ChatCompletionSystemMessageParam
44
+ | ChatCompletionAssistantMessageParam
45
+ | ChatCompletionUserMessageParam
46
+ | ChatCompletionToolMessageParam;
47
+
48
+ // The tool description type
49
+
50
+ export type ChatCompletionTool = OpenAI.Chat.Completions.ChatCompletionTool;
51
+
52
+ // CompletionCreate params
53
+
54
+ // openrouter reasoning type
55
+
56
+ export type ReasoningEffort = {
57
+ effort?: OpenAI.ReasoningEffort;
58
+ max_tokens?: never;
59
+ };
60
+
61
+ export type ReasoningMaxTokens = { effort?: never; max_tokens?: number };
62
+
63
+ export type ReasoningExclude = { exclude?: boolean; enabled?: never };
64
+
65
+ export type ReasoningEnabled = {
66
+ exclude?: never;
67
+ enabled?: boolean;
68
+ };
69
+
70
+ export type Reasoning = (ReasoningEffort | ReasoningMaxTokens) &
71
+ (ReasoningExclude | ReasoningEnabled);
72
+
73
+ export type ReasoningDetails = {
74
+ type: "reasoning.text" | "<unknown>";
75
+ text?: string;
76
+ signature?: string;
77
+ format?: string;
78
+ index?: number;
79
+ };
80
+
81
+ /**
82
+ * A (openrouter-specific) stream chunk possibly containing reasoning tokens.
83
+ */
84
+ export type ChatCompletionChunkChoiceDeltaWithReasoning =
85
+ OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & {
86
+ reasoning?: string;
87
+ reasoning_details?: ReasoningDetails[];
88
+ };
89
+
90
+ export function choiceDeltaExtractReasoning(
91
+ delta: ChatCompletionChunkChoiceDeltaWithReasoning
92
+ ): string | undefined {
93
+ if (delta.reasoning) {
94
+ return delta.reasoning;
95
+ }
96
+
97
+ if (delta.reasoning_details) {
98
+ let reasoning = "";
99
+ for (const details of delta.reasoning_details) {
100
+ if (details.type !== "reasoning.text") {
101
+ throw new Error(`unexpected details.type: ${details.type}`);
102
+ }
103
+ if (details.text) {
104
+ if (typeof details.text !== "string") {
105
+ throw new Error(
106
+ `unexpected typeof details.text: ${typeof details.text}`
107
+ );
108
+ }
109
+ reasoning += details.text;
110
+ }
111
+ }
112
+ return reasoning;
113
+ }
114
+
115
+ return undefined;
116
+ }
117
+
118
+ /**
119
+ * A chat completion message with extra reasoning tokens.
120
+ */
121
+ export type ChatCompletionMessageWithReasoning =
122
+ OpenAI.Chat.Completions.ChatCompletionMessage & {
123
+ reasoning?: string;
124
+ };
125
+
3
126
  export interface ILLM {
4
127
  getModel(): string;
5
128
 
6
129
  getUrl(): string;
7
130
 
8
131
  getConversationResponse(
9
- messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
10
- tools?: OpenAI.Chat.Completions.ChatCompletionTool[],
11
- onMessage?: (msg: string, end: boolean) => Promise<void>
12
- ): Promise<OpenAI.Chat.Completions.ChatCompletion>;
132
+ messages: ChatCompletionMessageParam[],
133
+ tools?: ChatCompletionTool[],
134
+ onMessage?: (msg: string, end: boolean) => Promise<void>,
135
+ onReasoning?: (reasoning: string) => Promise<void>
136
+ ): Promise<ChatCompletion>;
13
137
 
14
138
  setModel(model: string): void;
15
139
  }
@@ -12,6 +12,12 @@ export type { McpServerSettings } from "@xalia/xmcp/sdk";
12
12
 
13
13
  const logger = getLogger();
14
14
 
15
+ export type VerifiedMcpToolCall = {
16
+ mcpServerName: string;
17
+ toolName: string;
18
+ args: unknown;
19
+ };
20
+
15
21
  /// Callback into an Mcp server
16
22
  export type McpCallback = { (args: string): Promise<string> };
17
23
 
@@ -292,14 +298,10 @@ export class McpServerManager implements IMcpServerManager {
292
298
  return this.enabledOpenAITools;
293
299
  }
294
300
 
295
- /**
296
- * Note the `qualifiedToolName` is the full `{mcpServerName}/{toolName}` as
297
- * in the openai spec.
298
- */
299
- public async invoke(
301
+ public verifyToolCall(
300
302
  qualifiedToolName: string,
301
303
  args: unknown
302
- ): Promise<string> {
304
+ ): VerifiedMcpToolCall {
303
305
  const [mcpServerName, toolName] = splitQualifiedName(qualifiedToolName);
304
306
  logger.debug(`invoke: qualified: ${qualifiedToolName}`);
305
307
  logger.debug(
@@ -312,8 +314,25 @@ export class McpServerManager implements IMcpServerManager {
312
314
  if (!cb) {
313
315
  throw new Error(`Unknown tool ${qualifiedToolName}`);
314
316
  }
317
+ return {
318
+ mcpServerName,
319
+ toolName,
320
+ args,
321
+ };
322
+ }
323
+
324
+ /**
325
+ * Note the `qualifiedToolName` is the full `{mcpServerName}/{toolName}` as
326
+ * in the openai spec.
327
+ */
328
+ public async invoke(toolCall: VerifiedMcpToolCall): Promise<string> {
329
+ const server = this.getMcpServerInternal(toolCall.mcpServerName);
330
+ const cb = server.getCallback(toolCall.toolName);
331
+ if (!cb) {
332
+ throw new Error(`Unknown tool ${toolCall.toolName}`);
333
+ }
315
334
 
316
- return cb(JSON.stringify(args));
335
+ return cb(JSON.stringify(toolCall.args));
317
336
  }
318
337
 
319
338
  /**
@@ -6,12 +6,18 @@ import { IAgentEventHandler } from "./iAgentEventHandler";
6
6
  */
7
7
  export const NULL_AGENT_EVENT_HANDLER: IAgentEventHandler = {
8
8
  onCompletion: (): void => {},
9
+ onImage: (): void => {},
9
10
  onToolCallResult: (): void => {},
10
11
  onAgentMessage: (): Promise<void> => {
11
12
  return new Promise<void>((r) => {
12
13
  r();
13
14
  });
14
15
  },
16
+ onReasoning: (): Promise<void> => {
17
+ return new Promise<void>((r) => {
18
+ r();
19
+ });
20
+ },
15
21
  onToolCall: (): Promise<boolean> => {
16
22
  return new Promise((r) => {
17
23
  r(false);
@@ -1,23 +1,18 @@
1
- import { DEFAULT_LLM_MODEL, XALIA_APP_HEADER } from "./agentUtils";
2
- import { ILLM } from "./llm";
1
+ import { ILLM, XALIA_APP_HEADER } from "./llm";
3
2
  import { OpenAI } from "openai";
4
3
 
5
4
  export class OpenAILLM implements ILLM {
6
5
  private readonly openai: OpenAI;
7
6
  private model: string;
8
7
 
9
- constructor(
10
- apiKey: string,
11
- apiUrl: string | undefined,
12
- model: string | undefined
13
- ) {
8
+ constructor(apiKey: string, apiUrl: string | undefined, model: string) {
14
9
  this.openai = new OpenAI({
15
10
  apiKey,
16
11
  baseURL: apiUrl,
17
12
  dangerouslyAllowBrowser: true,
18
13
  defaultHeaders: XALIA_APP_HEADER,
19
14
  });
20
- this.model = model || DEFAULT_LLM_MODEL;
15
+ this.model = model;
21
16
  }
22
17
 
23
18
  public setModel(model: string) {