@xalia/agent 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/src/agent/agent.js +103 -54
- package/dist/agent/src/agent/agentUtils.js +22 -21
- package/dist/agent/src/agent/compressingContextManager.js +3 -2
- package/dist/agent/src/agent/dummyLLM.js +1 -3
- package/dist/agent/src/agent/imageGenLLM.js +67 -0
- package/dist/agent/src/agent/imageGenerator.js +43 -0
- package/dist/agent/src/agent/llm.js +27 -0
- package/dist/agent/src/agent/mcpServerManager.js +18 -6
- package/dist/agent/src/agent/nullAgentEventHandler.js +6 -0
- package/dist/agent/src/agent/openAILLM.js +3 -3
- package/dist/agent/src/agent/openAILLMStreaming.js +41 -6
- package/dist/agent/src/chat/client/chatClient.js +84 -13
- package/dist/agent/src/chat/client/sessionClient.js +47 -6
- package/dist/agent/src/chat/client/sessionFiles.js +102 -0
- package/dist/agent/src/chat/data/apiKeyManager.js +38 -7
- package/dist/agent/src/chat/data/database.js +83 -70
- package/dist/agent/src/chat/data/dbSessionFileModels.js +49 -0
- package/dist/agent/src/chat/data/dbSessionFiles.js +76 -0
- package/dist/agent/src/chat/data/dbSessionMessages.js +57 -0
- package/dist/agent/src/chat/data/mimeTypes.js +44 -0
- package/dist/agent/src/chat/protocol/messages.js +21 -0
- package/dist/agent/src/chat/server/chatContextManager.js +14 -7
- package/dist/agent/src/chat/server/connectionManager.js +14 -36
- package/dist/agent/src/chat/server/connectionManager.test.js +2 -16
- package/dist/agent/src/chat/server/conversation.js +69 -45
- package/dist/agent/src/chat/server/imageGeneratorTools.js +111 -0
- package/dist/agent/src/chat/server/openSession.js +205 -43
- package/dist/agent/src/chat/server/server.js +5 -8
- package/dist/agent/src/chat/server/sessionFileManager.js +171 -38
- package/dist/agent/src/chat/server/sessionRegistry.js +199 -32
- package/dist/agent/src/chat/server/test-utils/mockFactories.js +12 -11
- package/dist/agent/src/chat/server/tools.js +27 -6
- package/dist/agent/src/chat/utils/multiAsyncQueue.js +9 -1
- package/dist/agent/src/test/agent.test.js +15 -11
- package/dist/agent/src/test/chatContextManager.test.js +4 -0
- package/dist/agent/src/test/clientServerConnection.test.js +2 -2
- package/dist/agent/src/test/db.test.js +33 -70
- package/dist/agent/src/test/dbSessionFiles.test.js +179 -0
- package/dist/agent/src/test/dbSessionMessages.test.js +67 -0
- package/dist/agent/src/test/dbTestTools.js +6 -5
- package/dist/agent/src/test/imageLoad.test.js +1 -1
- package/dist/agent/src/test/mcpServerManager.test.js +1 -1
- package/dist/agent/src/test/multiAsyncQueue.test.js +50 -0
- package/dist/agent/src/test/testTools.js +12 -0
- package/dist/agent/src/tool/agentChat.js +25 -6
- package/dist/agent/src/tool/agentMain.js +1 -1
- package/dist/agent/src/tool/chatMain.js +113 -4
- package/dist/agent/src/tool/commandPrompt.js +7 -3
- package/dist/agent/src/tool/files.js +23 -15
- package/dist/agent/src/tool/options.js +2 -2
- package/package.json +1 -1
- package/scripts/test_chat +124 -66
- package/src/agent/agent.ts +145 -38
- package/src/agent/agentUtils.ts +27 -21
- package/src/agent/compressingContextManager.ts +5 -4
- package/src/agent/context.ts +1 -1
- package/src/agent/dummyLLM.ts +1 -3
- package/src/agent/iAgentEventHandler.ts +15 -2
- package/src/agent/imageGenLLM.ts +99 -0
- package/src/agent/imageGenerator.ts +60 -0
- package/src/agent/llm.ts +128 -4
- package/src/agent/mcpServerManager.ts +26 -7
- package/src/agent/nullAgentEventHandler.ts +6 -0
- package/src/agent/openAILLM.ts +3 -8
- package/src/agent/openAILLMStreaming.ts +60 -14
- package/src/chat/client/chatClient.ts +119 -14
- package/src/chat/client/sessionClient.ts +75 -9
- package/src/chat/client/sessionFiles.ts +145 -0
- package/src/chat/data/apiKeyManager.ts +55 -7
- package/src/chat/data/dataModels.ts +16 -7
- package/src/chat/data/database.ts +107 -92
- package/src/chat/data/dbSessionFileModels.ts +91 -0
- package/src/chat/data/dbSessionFiles.ts +99 -0
- package/src/chat/data/dbSessionMessages.ts +68 -0
- package/src/chat/data/mimeTypes.ts +58 -0
- package/src/chat/protocol/messages.ts +127 -13
- package/src/chat/server/chatContextManager.ts +36 -13
- package/src/chat/server/connectionManager.test.ts +1 -22
- package/src/chat/server/connectionManager.ts +18 -53
- package/src/chat/server/conversation.ts +96 -57
- package/src/chat/server/imageGeneratorTools.ts +138 -0
- package/src/chat/server/openSession.ts +287 -49
- package/src/chat/server/server.ts +5 -11
- package/src/chat/server/sessionFileManager.ts +223 -63
- package/src/chat/server/sessionRegistry.ts +285 -41
- package/src/chat/server/test-utils/mockFactories.ts +13 -13
- package/src/chat/server/tools.ts +43 -8
- package/src/chat/utils/agentSessionMap.ts +2 -2
- package/src/chat/utils/multiAsyncQueue.ts +11 -1
- package/src/test/agent.test.ts +23 -14
- package/src/test/chatContextManager.test.ts +7 -2
- package/src/test/clientServerConnection.test.ts +3 -3
- package/src/test/compressingContextManager.test.ts +1 -1
- package/src/test/context.test.ts +2 -1
- package/src/test/conversation.test.ts +1 -1
- package/src/test/db.test.ts +41 -83
- package/src/test/dbSessionFiles.test.ts +258 -0
- package/src/test/dbSessionMessages.test.ts +85 -0
- package/src/test/dbTestTools.ts +9 -5
- package/src/test/imageLoad.test.ts +2 -2
- package/src/test/mcpServerManager.test.ts +3 -1
- package/src/test/multiAsyncQueue.test.ts +58 -0
- package/src/test/testTools.ts +15 -1
- package/src/tool/agentChat.ts +35 -7
- package/src/tool/agentMain.ts +7 -7
- package/src/tool/chatMain.ts +126 -5
- package/src/tool/commandPrompt.ts +10 -5
- package/src/tool/files.ts +30 -13
- package/src/tool/options.ts +1 -1
- package/test_data/dummyllm_script_image_gen.json +19 -0
- package/test_data/dummyllm_script_invoke_image_gen_tool.json +30 -0
- package/test_data/image_gen_test_profile.json +5 -0
package/src/agent/agentUtils.ts
CHANGED
|
@@ -8,26 +8,18 @@ import { Configuration as SudoMcpConfiguration } from "@xalia/xmcp/sdk";
|
|
|
8
8
|
import { OpenAILLM } from "./openAILLM";
|
|
9
9
|
import { OpenAILLMStreaming } from "./openAILLMStreaming";
|
|
10
10
|
import { DummyLLM } from "./dummyLLM";
|
|
11
|
-
import { ILLM } from "./llm";
|
|
11
|
+
import { ChatCompletionMessageParam, ILLM } from "./llm";
|
|
12
12
|
import { strict as assert } from "assert";
|
|
13
13
|
import { RepeatLLM } from "./repeatLLM";
|
|
14
14
|
import { ContextManager, IContextManager } from "./context";
|
|
15
|
+
import { DEFAULT_IMAGE_GEN_MODEL, ImageGenLLM } from "./imageGenLLM";
|
|
15
16
|
|
|
16
17
|
const logger = getLogger();
|
|
17
18
|
|
|
18
|
-
export const DEFAULT_LLM_URL = "http://localhost:5001/v1";
|
|
19
|
-
// uses openrouter
|
|
20
|
-
export const DEFAULT_LLM_MODEL =
|
|
21
|
-
process.env["DEFAULT_LLM_MODEL"] || "openai/gpt-4o";
|
|
22
|
-
|
|
23
|
-
export const XALIA_APP_HEADER = {
|
|
24
|
-
"HTTP-Referer": "xalia.ai",
|
|
25
|
-
"X-Title": "Xalia",
|
|
26
|
-
};
|
|
27
|
-
|
|
28
19
|
export async function createAgentWithoutSkills(
|
|
29
20
|
llmUrl: string,
|
|
30
21
|
agentProfile: AgentProfile,
|
|
22
|
+
defaultModel: string,
|
|
31
23
|
eventHandler: IAgentEventHandler,
|
|
32
24
|
platform: IPlatform,
|
|
33
25
|
contextManager: IContextManager,
|
|
@@ -55,6 +47,7 @@ export async function createAgentWithoutSkills(
|
|
|
55
47
|
const agent = await createAgentFromSkillManager(
|
|
56
48
|
llmUrl,
|
|
57
49
|
agentProfile,
|
|
50
|
+
defaultModel,
|
|
58
51
|
eventHandler,
|
|
59
52
|
platform,
|
|
60
53
|
contextManager,
|
|
@@ -74,6 +67,7 @@ export async function createAgentWithoutSkills(
|
|
|
74
67
|
export async function createAgentWithSkills(
|
|
75
68
|
llmUrl: string,
|
|
76
69
|
agentProfile: AgentProfile,
|
|
70
|
+
defaultModel: string,
|
|
77
71
|
eventHandler: IAgentEventHandler,
|
|
78
72
|
platform: IPlatform,
|
|
79
73
|
contextManager: IContextManager,
|
|
@@ -85,6 +79,7 @@ export async function createAgentWithSkills(
|
|
|
85
79
|
const [agent, sudoMcpServerManager] = await createAgentWithoutSkills(
|
|
86
80
|
llmUrl,
|
|
87
81
|
agentProfile,
|
|
82
|
+
defaultModel,
|
|
88
83
|
eventHandler,
|
|
89
84
|
platform,
|
|
90
85
|
contextManager,
|
|
@@ -103,11 +98,12 @@ export async function createAgentWithSkills(
|
|
|
103
98
|
export async function createAgentFromSkillManager(
|
|
104
99
|
llmUrl: string,
|
|
105
100
|
agentProfile: AgentProfile,
|
|
101
|
+
defaultModel: string,
|
|
106
102
|
eventHandler: IAgentEventHandler,
|
|
107
103
|
platform: IPlatform,
|
|
108
104
|
contextManager: IContextManager,
|
|
109
105
|
llmApiKey: string | undefined,
|
|
110
|
-
skillManager: SkillManager,
|
|
106
|
+
skillManager: SkillManager | undefined,
|
|
111
107
|
stream: boolean = false
|
|
112
108
|
): Promise<Agent> {
|
|
113
109
|
// Create agent
|
|
@@ -115,7 +111,7 @@ export async function createAgentFromSkillManager(
|
|
|
115
111
|
const llm = await createLLM(
|
|
116
112
|
llmUrl,
|
|
117
113
|
llmApiKey,
|
|
118
|
-
agentProfile.model,
|
|
114
|
+
agentProfile.model || defaultModel,
|
|
119
115
|
stream,
|
|
120
116
|
platform
|
|
121
117
|
);
|
|
@@ -134,7 +130,7 @@ export async function createAgentFromSkillManager(
|
|
|
134
130
|
export async function createLLM(
|
|
135
131
|
llmUrl: string | undefined,
|
|
136
132
|
llmApiKey: string | undefined,
|
|
137
|
-
model: string
|
|
133
|
+
model: string,
|
|
138
134
|
stream: boolean = false,
|
|
139
135
|
platform: IPlatform
|
|
140
136
|
): Promise<ILLM> {
|
|
@@ -144,15 +140,16 @@ export async function createLLM(
|
|
|
144
140
|
llm = await DummyLLM.initFromModelUrl(model, platform);
|
|
145
141
|
} else if (model === "repeat") {
|
|
146
142
|
llm = new RepeatLLM();
|
|
143
|
+
} else if (model == DEFAULT_IMAGE_GEN_MODEL) {
|
|
144
|
+
logger.info("ImageGenLLM");
|
|
145
|
+
llm = new ImageGenLLM(llmApiKey, llmUrl, model);
|
|
147
146
|
} else {
|
|
148
147
|
// Regular Agent
|
|
149
148
|
if (!llmApiKey) {
|
|
150
149
|
throw new Error("Missing OpenAI API Key");
|
|
151
150
|
}
|
|
152
151
|
|
|
153
|
-
logger.debug(
|
|
154
|
-
`Initializing Agent: ${llmUrl ?? "unknown"} - ${model ?? "unknown"}`
|
|
155
|
-
);
|
|
152
|
+
logger.debug(`Initializing Agent: ${llmUrl ?? "unknown"} - ${model}`);
|
|
156
153
|
if (stream) {
|
|
157
154
|
llm = new OpenAILLMStreaming(llmApiKey, llmUrl, model);
|
|
158
155
|
} else {
|
|
@@ -173,7 +170,8 @@ export async function createLLM(
|
|
|
173
170
|
export async function createNonInteractiveAgent(
|
|
174
171
|
url: string,
|
|
175
172
|
agentProfile: AgentProfile,
|
|
176
|
-
|
|
173
|
+
defaultModel: string,
|
|
174
|
+
conversation: ChatCompletionMessageParam[] | undefined,
|
|
177
175
|
platform: IPlatform,
|
|
178
176
|
openaiApiKey: string | undefined,
|
|
179
177
|
sudomcpConfig: SudoMcpConfiguration,
|
|
@@ -182,7 +180,9 @@ export async function createNonInteractiveAgent(
|
|
|
182
180
|
let remainingToolCalls = approveToolsUpTo;
|
|
183
181
|
const eventHandler: IAgentEventHandler = {
|
|
184
182
|
onCompletion: () => {},
|
|
183
|
+
onImage: () => {},
|
|
185
184
|
onAgentMessage: async () => {},
|
|
185
|
+
onReasoning: async () => {},
|
|
186
186
|
// eslint-disable-next-line @typescript-eslint/require-await
|
|
187
187
|
onToolCall: async () => {
|
|
188
188
|
if (remainingToolCalls !== 0) {
|
|
@@ -201,6 +201,7 @@ export async function createNonInteractiveAgent(
|
|
|
201
201
|
const [agent, _] = await createAgentWithSkills(
|
|
202
202
|
url,
|
|
203
203
|
agentProfile,
|
|
204
|
+
defaultModel,
|
|
204
205
|
eventHandler,
|
|
205
206
|
platform,
|
|
206
207
|
contextManager,
|
|
@@ -219,7 +220,8 @@ export async function createNonInteractiveAgent(
|
|
|
219
220
|
export async function runOneShot(
|
|
220
221
|
url: string,
|
|
221
222
|
agentProfile: AgentProfile,
|
|
222
|
-
|
|
223
|
+
defaultModel: string,
|
|
224
|
+
conversation: ChatCompletionMessageParam[] | undefined,
|
|
223
225
|
platform: IPlatform,
|
|
224
226
|
prompt: string,
|
|
225
227
|
image: string | undefined,
|
|
@@ -229,6 +231,7 @@ export async function runOneShot(
|
|
|
229
231
|
): Promise<{
|
|
230
232
|
response: string;
|
|
231
233
|
conversation: OpenAI.ChatCompletionMessageParam[];
|
|
234
|
+
images: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] | undefined;
|
|
232
235
|
}> {
|
|
233
236
|
logger.debug("[runOneShot]: start");
|
|
234
237
|
|
|
@@ -238,6 +241,7 @@ export async function runOneShot(
|
|
|
238
241
|
const agent = await createNonInteractiveAgent(
|
|
239
242
|
url,
|
|
240
243
|
agentProfile,
|
|
244
|
+
defaultModel,
|
|
241
245
|
conversation,
|
|
242
246
|
platform,
|
|
243
247
|
llmApiKey,
|
|
@@ -245,15 +249,16 @@ export async function runOneShot(
|
|
|
245
249
|
approveToolsUpTo
|
|
246
250
|
);
|
|
247
251
|
|
|
248
|
-
const
|
|
252
|
+
const agentResponse = await agent.userMessageEx(prompt, image);
|
|
249
253
|
await agent.shutdown();
|
|
250
254
|
logger.debug("[runOneShot]: shutdown done");
|
|
251
255
|
|
|
252
|
-
if (!
|
|
256
|
+
if (!agentResponse) {
|
|
253
257
|
throw new Error("No message returned from agent");
|
|
254
258
|
}
|
|
255
259
|
|
|
256
260
|
// Handle different content types
|
|
261
|
+
const response = agentResponse.message;
|
|
257
262
|
let responseText = "";
|
|
258
263
|
if (typeof response.content === "string") {
|
|
259
264
|
responseText = response.content;
|
|
@@ -277,5 +282,6 @@ export async function runOneShot(
|
|
|
277
282
|
return {
|
|
278
283
|
response: responseText,
|
|
279
284
|
conversation: agent.getConversation(),
|
|
285
|
+
images: agentResponse.images,
|
|
280
286
|
};
|
|
281
287
|
}
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { strict as assert } from "assert";
|
|
2
2
|
import { getLogger } from "@xalia/xmcp/sdk";
|
|
3
3
|
|
|
4
|
+
import { Agent } from "./agent";
|
|
4
5
|
import {
|
|
5
|
-
Agent,
|
|
6
6
|
ChatCompletionUserMessageParam,
|
|
7
7
|
ChatCompletionMessageParam,
|
|
8
|
-
} from "./
|
|
8
|
+
} from "./llm";
|
|
9
9
|
import { NULL_PLATFORM } from "./nullPlatform";
|
|
10
10
|
import { createLLM } from "./agentUtils";
|
|
11
11
|
import { ContextManager, ContextManagerWithCommit } from "./context";
|
|
@@ -68,11 +68,12 @@ export async function createSummary(
|
|
|
68
68
|
compressionAgentApiKey
|
|
69
69
|
);
|
|
70
70
|
|
|
71
|
-
const
|
|
72
|
-
if (!
|
|
71
|
+
const agentResp = await agent.userMessageEx(JSON.stringify(conversation));
|
|
72
|
+
if (!agentResp) {
|
|
73
73
|
throw new Error("compression agent returned null");
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
const resp = agentResp.message;
|
|
76
77
|
assert(resp.role === "assistant");
|
|
77
78
|
assert(
|
|
78
79
|
typeof resp.content === "string",
|
package/src/agent/context.ts
CHANGED
package/src/agent/dummyLLM.ts
CHANGED
|
@@ -2,7 +2,8 @@ import { OpenAI } from "openai";
|
|
|
2
2
|
import {
|
|
3
3
|
ChatCompletionAssistantMessageParam,
|
|
4
4
|
ChatCompletionMessageToolCall,
|
|
5
|
-
|
|
5
|
+
ChatCompletionToolMessageParam,
|
|
6
|
+
} from "./llm";
|
|
6
7
|
|
|
7
8
|
/**
|
|
8
9
|
* Interface for handling events from the Agent.
|
|
@@ -18,11 +19,18 @@ export interface IAgentEventHandler {
|
|
|
18
19
|
*/
|
|
19
20
|
onCompletion(result: ChatCompletionAssistantMessageParam): void;
|
|
20
21
|
|
|
22
|
+
/**
|
|
23
|
+
* Images do not appear as part of `ChatCompletionAssistantMessageParam`,
|
|
24
|
+
* although they do appear in the final returned result from the Agent.
|
|
25
|
+
* This callback gives clients the chance to see them earlier.
|
|
26
|
+
*/
|
|
27
|
+
onImage(image: OpenAI.Chat.Completions.ChatCompletionContentPartImage): void;
|
|
28
|
+
|
|
21
29
|
/**
|
|
22
30
|
* Called when a tool call execution completes (success, error, or denial).
|
|
23
31
|
* These messages are insertes into the LLM context.
|
|
24
32
|
*/
|
|
25
|
-
onToolCallResult(result:
|
|
33
|
+
onToolCallResult(result: ChatCompletionToolMessageParam): void;
|
|
26
34
|
|
|
27
35
|
/**
|
|
28
36
|
* Called when the agent produces a message chunk (streaming). Calls here
|
|
@@ -33,6 +41,11 @@ export interface IAgentEventHandler {
|
|
|
33
41
|
*/
|
|
34
42
|
onAgentMessage(chunk: string, isEnd: boolean): Promise<void>;
|
|
35
43
|
|
|
44
|
+
/**
|
|
45
|
+
* Called when the agent produces reasoning tokens
|
|
46
|
+
*/
|
|
47
|
+
onReasoning(chunk: string): Promise<void>;
|
|
48
|
+
|
|
36
49
|
/**
|
|
37
50
|
* Called when the agent wants to execute a tool call. Calls do NOT
|
|
38
51
|
* correspond to context entries. (A completion from the LLM may correspond
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import { OpenAI } from "openai";
|
|
2
|
+
import { strict as assert } from "assert";
|
|
3
|
+
import { writeFileSync } from "fs";
|
|
4
|
+
|
|
5
|
+
import { getLogger } from "@xalia/xmcp/sdk";
|
|
6
|
+
|
|
7
|
+
import { ILLM, ChatCompletion, XALIA_APP_HEADER } from "./llm";
|
|
8
|
+
|
|
9
|
+
const logger = getLogger();
|
|
10
|
+
|
|
11
|
+
export const DEFAULT_IMAGE_GEN_MODEL = "google/gemini-2.5-flash-image-preview";
|
|
12
|
+
|
|
13
|
+
type ChatCompletionCreateParams = Omit<
|
|
14
|
+
OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming,
|
|
15
|
+
"modalities"
|
|
16
|
+
> & {
|
|
17
|
+
modalities: Array<"text" | "audio" | "image">;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export class ImageGenLLM implements ILLM {
|
|
21
|
+
private readonly openai: OpenAI;
|
|
22
|
+
private model: string;
|
|
23
|
+
|
|
24
|
+
constructor(
|
|
25
|
+
apiKey: string | undefined,
|
|
26
|
+
apiUrl: string | undefined,
|
|
27
|
+
model: string | undefined
|
|
28
|
+
) {
|
|
29
|
+
logger.debug(`here`);
|
|
30
|
+
this.openai = new OpenAI({
|
|
31
|
+
apiKey,
|
|
32
|
+
baseURL: apiUrl,
|
|
33
|
+
dangerouslyAllowBrowser: true,
|
|
34
|
+
defaultHeaders: XALIA_APP_HEADER,
|
|
35
|
+
});
|
|
36
|
+
this.model = model || DEFAULT_IMAGE_GEN_MODEL;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
public setModel(model: string) {
|
|
40
|
+
this.model = model;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
getModel(): string {
|
|
44
|
+
return this.model;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
getUrl(): string {
|
|
48
|
+
return this.openai.baseURL;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
public async getConversationResponse(
|
|
52
|
+
messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
|
|
53
|
+
tools?: OpenAI.Chat.Completions.ChatCompletionTool[],
|
|
54
|
+
onMessage?: (msg: string, end: boolean) => Promise<void>
|
|
55
|
+
): Promise<ChatCompletion> {
|
|
56
|
+
assert(!tools || tools.length === 0, "tools not supported in ImageGenLLM");
|
|
57
|
+
|
|
58
|
+
// Designed for image generation using openrouter, which tweaks the Create
|
|
59
|
+
const params: ChatCompletionCreateParams = {
|
|
60
|
+
model: this.model,
|
|
61
|
+
messages,
|
|
62
|
+
tools,
|
|
63
|
+
modalities: ["image", "text"],
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
logger.info(`[ImageGenLLM] params; ${JSON.stringify(params)}`);
|
|
67
|
+
|
|
68
|
+
const completion = (await this.openai.chat.completions.create(
|
|
69
|
+
params as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming
|
|
70
|
+
)) as ChatCompletion;
|
|
71
|
+
|
|
72
|
+
// const completion = {} as unknown as ChatCompletion;
|
|
73
|
+
|
|
74
|
+
const filePath: string = "./completion.json";
|
|
75
|
+
logger.info(`[ImageGenLLM] writing ${filePath}`);
|
|
76
|
+
writeFileSync(filePath, JSON.stringify(completion), "utf-8");
|
|
77
|
+
logger.info(`[ImageGenLLM] written`);
|
|
78
|
+
|
|
79
|
+
// logger.debug(
|
|
80
|
+
// `[ImageGenLLM.getConversationResponse] completion:
|
|
81
|
+
// ${JSON.stringify(completion)}`
|
|
82
|
+
// );
|
|
83
|
+
|
|
84
|
+
if (onMessage) {
|
|
85
|
+
const message = completion.choices[0].message;
|
|
86
|
+
if (message.content) {
|
|
87
|
+
await onMessage(message.content, true);
|
|
88
|
+
}
|
|
89
|
+
if (message.images) {
|
|
90
|
+
message.images.forEach((image, index) => {
|
|
91
|
+
const imageUrl = image.image_url.url; // Base64 data URL
|
|
92
|
+
const truncated = imageUrl.substring(0, 50);
|
|
93
|
+
logger.info(`[ImageGenLLM] ${String(index + 1)}: ${truncated}...`);
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return completion;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { Agent, createUserMessage } from "./agent";
|
|
2
|
+
import { createLLM } from "./agentUtils";
|
|
3
|
+
import { ContextManager } from "./context";
|
|
4
|
+
import { NULL_AGENT_EVENT_HANDLER } from "./nullAgentEventHandler";
|
|
5
|
+
import { NULL_PLATFORM } from "./nullPlatform";
|
|
6
|
+
import { NODE_PLATFORM } from "../tool/nodePlatform";
|
|
7
|
+
import { DEFAULT_IMAGE_GEN_MODEL } from "./imageGenLLM";
|
|
8
|
+
|
|
9
|
+
const IMAGE_GEN_SYSTEM_PROMPT = "You are an image generator";
|
|
10
|
+
|
|
11
|
+
export class ImageGenerator {
|
|
12
|
+
readonly agent: Agent;
|
|
13
|
+
readonly contextManager: ContextManager;
|
|
14
|
+
|
|
15
|
+
constructor(agent: Agent, contextManager: ContextManager) {
|
|
16
|
+
this.agent = agent;
|
|
17
|
+
this.contextManager = contextManager;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
public static async init(
|
|
21
|
+
llmUrl: string,
|
|
22
|
+
llmApiKey: string,
|
|
23
|
+
model?: string
|
|
24
|
+
): Promise<ImageGenerator> {
|
|
25
|
+
const development = !!process.env.DEVELOPMENT;
|
|
26
|
+
const llm = await createLLM(
|
|
27
|
+
llmUrl,
|
|
28
|
+
llmApiKey,
|
|
29
|
+
model || DEFAULT_IMAGE_GEN_MODEL,
|
|
30
|
+
false /* stream */,
|
|
31
|
+
development ? NODE_PLATFORM : NULL_PLATFORM // allow file loading
|
|
32
|
+
);
|
|
33
|
+
const contextManager = new ContextManager(IMAGE_GEN_SYSTEM_PROMPT, []);
|
|
34
|
+
const agent = Agent.initializeWithLLM(
|
|
35
|
+
NULL_AGENT_EVENT_HANDLER,
|
|
36
|
+
llm,
|
|
37
|
+
contextManager
|
|
38
|
+
);
|
|
39
|
+
return new ImageGenerator(agent, contextManager);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
public async generate(prompt: string, image?: string): Promise<string> {
|
|
43
|
+
const userMessage = createUserMessage(prompt, image);
|
|
44
|
+
if (!userMessage) {
|
|
45
|
+
throw new Error("invalid user message / input image");
|
|
46
|
+
}
|
|
47
|
+
const agentResponse = await this.agent.userMessageRaw(userMessage);
|
|
48
|
+
if (!agentResponse) {
|
|
49
|
+
throw new Error("invalid response from image gen agent");
|
|
50
|
+
}
|
|
51
|
+
if (!agentResponse.images || agentResponse.images.length === 0) {
|
|
52
|
+
throw new Error("invalid response from image gen agent");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Clear the context
|
|
56
|
+
while (this.contextManager.popMessage());
|
|
57
|
+
|
|
58
|
+
return agentResponse.images[0].image_url.url;
|
|
59
|
+
}
|
|
60
|
+
}
|
package/src/agent/llm.ts
CHANGED
|
@@ -1,15 +1,139 @@
|
|
|
1
1
|
import { OpenAI } from "openai";
|
|
2
2
|
|
|
3
|
+
export const XALIA_APP_HEADER = {
|
|
4
|
+
"HTTP-Referer": "xalia.ai",
|
|
5
|
+
"X-Title": "Xalia",
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
// Extend the ChatCompletionMessage type with an `images` value, compatible
|
|
9
|
+
// with google/gemini-2.5-flash-image-preview.
|
|
10
|
+
|
|
11
|
+
export type ChatCompletionMessage =
|
|
12
|
+
OpenAI.Chat.Completions.ChatCompletionMessage & {
|
|
13
|
+
images?: OpenAI.Chat.Completions.ChatCompletionContentPartImage[];
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export type ChatCompletionChoice = Omit<
|
|
17
|
+
OpenAI.Chat.Completions.ChatCompletion.Choice,
|
|
18
|
+
"message"
|
|
19
|
+
> & { message: ChatCompletionMessage };
|
|
20
|
+
|
|
21
|
+
export type ChatCompletion = Omit<
|
|
22
|
+
OpenAI.Chat.Completions.ChatCompletion,
|
|
23
|
+
"choices"
|
|
24
|
+
> & { choices: Array<ChatCompletionChoice> };
|
|
25
|
+
|
|
26
|
+
// Shortcuts to other useful OpenAI types.
|
|
27
|
+
|
|
28
|
+
export type ChatCompletionMessageToolCall =
|
|
29
|
+
OpenAI.ChatCompletionMessageToolCall;
|
|
30
|
+
|
|
31
|
+
export type ChatCompletionAssistantMessageParam =
|
|
32
|
+
OpenAI.ChatCompletionAssistantMessageParam;
|
|
33
|
+
|
|
34
|
+
export type ChatCompletionUserMessageParam =
|
|
35
|
+
OpenAI.ChatCompletionUserMessageParam;
|
|
36
|
+
|
|
37
|
+
export type ChatCompletionToolMessageParam =
|
|
38
|
+
OpenAI.ChatCompletionToolMessageParam & {
|
|
39
|
+
metadata?: Record<string, string>;
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export type ChatCompletionMessageParam =
|
|
43
|
+
| OpenAI.Chat.Completions.ChatCompletionSystemMessageParam
|
|
44
|
+
| ChatCompletionAssistantMessageParam
|
|
45
|
+
| ChatCompletionUserMessageParam
|
|
46
|
+
| ChatCompletionToolMessageParam;
|
|
47
|
+
|
|
48
|
+
// The tool description type
|
|
49
|
+
|
|
50
|
+
export type ChatCompletionTool = OpenAI.Chat.Completions.ChatCompletionTool;
|
|
51
|
+
|
|
52
|
+
// CompletionCreate params
|
|
53
|
+
|
|
54
|
+
// openrouter reasoning type
|
|
55
|
+
|
|
56
|
+
export type ReasoningEffort = {
|
|
57
|
+
effort?: OpenAI.ReasoningEffort;
|
|
58
|
+
max_tokens?: never;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
export type ReasoningMaxTokens = { effort?: never; max_tokens?: number };
|
|
62
|
+
|
|
63
|
+
export type ReasoningExclude = { exclude?: boolean; enabled?: never };
|
|
64
|
+
|
|
65
|
+
export type ReasoningEnabled = {
|
|
66
|
+
exclude?: never;
|
|
67
|
+
enabled?: boolean;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
export type Reasoning = (ReasoningEffort | ReasoningMaxTokens) &
|
|
71
|
+
(ReasoningExclude | ReasoningEnabled);
|
|
72
|
+
|
|
73
|
+
export type ReasoningDetails = {
|
|
74
|
+
type: "reasoning.text" | "<unknown>";
|
|
75
|
+
text?: string;
|
|
76
|
+
signature?: string;
|
|
77
|
+
format?: string;
|
|
78
|
+
index?: number;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* A (openrouter-specific) stream chunk possibly containing reasoning tokens.
|
|
83
|
+
*/
|
|
84
|
+
export type ChatCompletionChunkChoiceDeltaWithReasoning =
|
|
85
|
+
OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta & {
|
|
86
|
+
reasoning?: string;
|
|
87
|
+
reasoning_details?: ReasoningDetails[];
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
export function choiceDeltaExtractReasoning(
|
|
91
|
+
delta: ChatCompletionChunkChoiceDeltaWithReasoning
|
|
92
|
+
): string | undefined {
|
|
93
|
+
if (delta.reasoning) {
|
|
94
|
+
return delta.reasoning;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (delta.reasoning_details) {
|
|
98
|
+
let reasoning = "";
|
|
99
|
+
for (const details of delta.reasoning_details) {
|
|
100
|
+
if (details.type !== "reasoning.text") {
|
|
101
|
+
throw new Error(`unexpected details.type: ${details.type}`);
|
|
102
|
+
}
|
|
103
|
+
if (details.text) {
|
|
104
|
+
if (typeof details.text !== "string") {
|
|
105
|
+
throw new Error(
|
|
106
|
+
`unexpected typeof details.text: ${typeof details.text}`
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
reasoning += details.text;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return reasoning;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return undefined;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* A chat completion message with extra reasoning tokens.
|
|
120
|
+
*/
|
|
121
|
+
export type ChatCompletionMessageWithReasoning =
|
|
122
|
+
OpenAI.Chat.Completions.ChatCompletionMessage & {
|
|
123
|
+
reasoning?: string;
|
|
124
|
+
};
|
|
125
|
+
|
|
3
126
|
export interface ILLM {
|
|
4
127
|
getModel(): string;
|
|
5
128
|
|
|
6
129
|
getUrl(): string;
|
|
7
130
|
|
|
8
131
|
getConversationResponse(
|
|
9
|
-
messages:
|
|
10
|
-
tools?:
|
|
11
|
-
onMessage?: (msg: string, end: boolean) => Promise<void
|
|
12
|
-
|
|
132
|
+
messages: ChatCompletionMessageParam[],
|
|
133
|
+
tools?: ChatCompletionTool[],
|
|
134
|
+
onMessage?: (msg: string, end: boolean) => Promise<void>,
|
|
135
|
+
onReasoning?: (reasoning: string) => Promise<void>
|
|
136
|
+
): Promise<ChatCompletion>;
|
|
13
137
|
|
|
14
138
|
setModel(model: string): void;
|
|
15
139
|
}
|
|
@@ -12,6 +12,12 @@ export type { McpServerSettings } from "@xalia/xmcp/sdk";
|
|
|
12
12
|
|
|
13
13
|
const logger = getLogger();
|
|
14
14
|
|
|
15
|
+
export type VerifiedMcpToolCall = {
|
|
16
|
+
mcpServerName: string;
|
|
17
|
+
toolName: string;
|
|
18
|
+
args: unknown;
|
|
19
|
+
};
|
|
20
|
+
|
|
15
21
|
/// Callback into an Mcp server
|
|
16
22
|
export type McpCallback = { (args: string): Promise<string> };
|
|
17
23
|
|
|
@@ -292,14 +298,10 @@ export class McpServerManager implements IMcpServerManager {
|
|
|
292
298
|
return this.enabledOpenAITools;
|
|
293
299
|
}
|
|
294
300
|
|
|
295
|
-
|
|
296
|
-
* Note the `qualifiedToolName` is the full `{mcpServerName}/{toolName}` as
|
|
297
|
-
* in the openai spec.
|
|
298
|
-
*/
|
|
299
|
-
public async invoke(
|
|
301
|
+
public verifyToolCall(
|
|
300
302
|
qualifiedToolName: string,
|
|
301
303
|
args: unknown
|
|
302
|
-
):
|
|
304
|
+
): VerifiedMcpToolCall {
|
|
303
305
|
const [mcpServerName, toolName] = splitQualifiedName(qualifiedToolName);
|
|
304
306
|
logger.debug(`invoke: qualified: ${qualifiedToolName}`);
|
|
305
307
|
logger.debug(
|
|
@@ -312,8 +314,25 @@ export class McpServerManager implements IMcpServerManager {
|
|
|
312
314
|
if (!cb) {
|
|
313
315
|
throw new Error(`Unknown tool ${qualifiedToolName}`);
|
|
314
316
|
}
|
|
317
|
+
return {
|
|
318
|
+
mcpServerName,
|
|
319
|
+
toolName,
|
|
320
|
+
args,
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Note the `qualifiedToolName` is the full `{mcpServerName}/{toolName}` as
|
|
326
|
+
* in the openai spec.
|
|
327
|
+
*/
|
|
328
|
+
public async invoke(toolCall: VerifiedMcpToolCall): Promise<string> {
|
|
329
|
+
const server = this.getMcpServerInternal(toolCall.mcpServerName);
|
|
330
|
+
const cb = server.getCallback(toolCall.toolName);
|
|
331
|
+
if (!cb) {
|
|
332
|
+
throw new Error(`Unknown tool ${toolCall.toolName}`);
|
|
333
|
+
}
|
|
315
334
|
|
|
316
|
-
return cb(JSON.stringify(args));
|
|
335
|
+
return cb(JSON.stringify(toolCall.args));
|
|
317
336
|
}
|
|
318
337
|
|
|
319
338
|
/**
|
|
@@ -6,12 +6,18 @@ import { IAgentEventHandler } from "./iAgentEventHandler";
|
|
|
6
6
|
*/
|
|
7
7
|
export const NULL_AGENT_EVENT_HANDLER: IAgentEventHandler = {
|
|
8
8
|
onCompletion: (): void => {},
|
|
9
|
+
onImage: (): void => {},
|
|
9
10
|
onToolCallResult: (): void => {},
|
|
10
11
|
onAgentMessage: (): Promise<void> => {
|
|
11
12
|
return new Promise<void>((r) => {
|
|
12
13
|
r();
|
|
13
14
|
});
|
|
14
15
|
},
|
|
16
|
+
onReasoning: (): Promise<void> => {
|
|
17
|
+
return new Promise<void>((r) => {
|
|
18
|
+
r();
|
|
19
|
+
});
|
|
20
|
+
},
|
|
15
21
|
onToolCall: (): Promise<boolean> => {
|
|
16
22
|
return new Promise((r) => {
|
|
17
23
|
r(false);
|
package/src/agent/openAILLM.ts
CHANGED
|
@@ -1,23 +1,18 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { ILLM } from "./llm";
|
|
1
|
+
import { ILLM, XALIA_APP_HEADER } from "./llm";
|
|
3
2
|
import { OpenAI } from "openai";
|
|
4
3
|
|
|
5
4
|
export class OpenAILLM implements ILLM {
|
|
6
5
|
private readonly openai: OpenAI;
|
|
7
6
|
private model: string;
|
|
8
7
|
|
|
9
|
-
constructor(
|
|
10
|
-
apiKey: string,
|
|
11
|
-
apiUrl: string | undefined,
|
|
12
|
-
model: string | undefined
|
|
13
|
-
) {
|
|
8
|
+
constructor(apiKey: string, apiUrl: string | undefined, model: string) {
|
|
14
9
|
this.openai = new OpenAI({
|
|
15
10
|
apiKey,
|
|
16
11
|
baseURL: apiUrl,
|
|
17
12
|
dangerouslyAllowBrowser: true,
|
|
18
13
|
defaultHeaders: XALIA_APP_HEADER,
|
|
19
14
|
});
|
|
20
|
-
this.model = model
|
|
15
|
+
this.model = model;
|
|
21
16
|
}
|
|
22
17
|
|
|
23
18
|
public setModel(model: string) {
|