@xalia/agent 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/dist/agent/src/agent/agent.js +103 -54
  2. package/dist/agent/src/agent/agentUtils.js +22 -21
  3. package/dist/agent/src/agent/compressingContextManager.js +3 -2
  4. package/dist/agent/src/agent/dummyLLM.js +1 -3
  5. package/dist/agent/src/agent/imageGenLLM.js +67 -0
  6. package/dist/agent/src/agent/imageGenerator.js +43 -0
  7. package/dist/agent/src/agent/llm.js +27 -0
  8. package/dist/agent/src/agent/mcpServerManager.js +18 -6
  9. package/dist/agent/src/agent/nullAgentEventHandler.js +6 -0
  10. package/dist/agent/src/agent/openAILLM.js +3 -3
  11. package/dist/agent/src/agent/openAILLMStreaming.js +41 -6
  12. package/dist/agent/src/chat/client/chatClient.js +84 -13
  13. package/dist/agent/src/chat/client/sessionClient.js +47 -6
  14. package/dist/agent/src/chat/client/sessionFiles.js +102 -0
  15. package/dist/agent/src/chat/data/apiKeyManager.js +38 -7
  16. package/dist/agent/src/chat/data/database.js +83 -70
  17. package/dist/agent/src/chat/data/dbSessionFileModels.js +49 -0
  18. package/dist/agent/src/chat/data/dbSessionFiles.js +76 -0
  19. package/dist/agent/src/chat/data/dbSessionMessages.js +57 -0
  20. package/dist/agent/src/chat/data/mimeTypes.js +44 -0
  21. package/dist/agent/src/chat/protocol/messages.js +21 -0
  22. package/dist/agent/src/chat/server/chatContextManager.js +14 -7
  23. package/dist/agent/src/chat/server/connectionManager.js +14 -36
  24. package/dist/agent/src/chat/server/connectionManager.test.js +2 -16
  25. package/dist/agent/src/chat/server/conversation.js +69 -45
  26. package/dist/agent/src/chat/server/imageGeneratorTools.js +111 -0
  27. package/dist/agent/src/chat/server/openSession.js +205 -43
  28. package/dist/agent/src/chat/server/server.js +5 -8
  29. package/dist/agent/src/chat/server/sessionFileManager.js +171 -38
  30. package/dist/agent/src/chat/server/sessionRegistry.js +199 -32
  31. package/dist/agent/src/chat/server/test-utils/mockFactories.js +12 -11
  32. package/dist/agent/src/chat/server/tools.js +27 -6
  33. package/dist/agent/src/chat/utils/multiAsyncQueue.js +9 -1
  34. package/dist/agent/src/test/agent.test.js +15 -11
  35. package/dist/agent/src/test/chatContextManager.test.js +4 -0
  36. package/dist/agent/src/test/clientServerConnection.test.js +2 -2
  37. package/dist/agent/src/test/db.test.js +33 -70
  38. package/dist/agent/src/test/dbSessionFiles.test.js +179 -0
  39. package/dist/agent/src/test/dbSessionMessages.test.js +67 -0
  40. package/dist/agent/src/test/dbTestTools.js +6 -5
  41. package/dist/agent/src/test/imageLoad.test.js +1 -1
  42. package/dist/agent/src/test/mcpServerManager.test.js +1 -1
  43. package/dist/agent/src/test/multiAsyncQueue.test.js +50 -0
  44. package/dist/agent/src/test/testTools.js +12 -0
  45. package/dist/agent/src/tool/agentChat.js +25 -6
  46. package/dist/agent/src/tool/agentMain.js +1 -1
  47. package/dist/agent/src/tool/chatMain.js +113 -4
  48. package/dist/agent/src/tool/commandPrompt.js +7 -3
  49. package/dist/agent/src/tool/files.js +23 -15
  50. package/dist/agent/src/tool/options.js +2 -2
  51. package/package.json +1 -1
  52. package/scripts/test_chat +124 -66
  53. package/src/agent/agent.ts +145 -38
  54. package/src/agent/agentUtils.ts +27 -21
  55. package/src/agent/compressingContextManager.ts +5 -4
  56. package/src/agent/context.ts +1 -1
  57. package/src/agent/dummyLLM.ts +1 -3
  58. package/src/agent/iAgentEventHandler.ts +15 -2
  59. package/src/agent/imageGenLLM.ts +99 -0
  60. package/src/agent/imageGenerator.ts +60 -0
  61. package/src/agent/llm.ts +128 -4
  62. package/src/agent/mcpServerManager.ts +26 -7
  63. package/src/agent/nullAgentEventHandler.ts +6 -0
  64. package/src/agent/openAILLM.ts +3 -8
  65. package/src/agent/openAILLMStreaming.ts +60 -14
  66. package/src/chat/client/chatClient.ts +119 -14
  67. package/src/chat/client/sessionClient.ts +75 -9
  68. package/src/chat/client/sessionFiles.ts +145 -0
  69. package/src/chat/data/apiKeyManager.ts +55 -7
  70. package/src/chat/data/dataModels.ts +16 -7
  71. package/src/chat/data/database.ts +107 -92
  72. package/src/chat/data/dbSessionFileModels.ts +91 -0
  73. package/src/chat/data/dbSessionFiles.ts +99 -0
  74. package/src/chat/data/dbSessionMessages.ts +68 -0
  75. package/src/chat/data/mimeTypes.ts +58 -0
  76. package/src/chat/protocol/messages.ts +127 -13
  77. package/src/chat/server/chatContextManager.ts +36 -13
  78. package/src/chat/server/connectionManager.test.ts +1 -22
  79. package/src/chat/server/connectionManager.ts +18 -53
  80. package/src/chat/server/conversation.ts +96 -57
  81. package/src/chat/server/imageGeneratorTools.ts +138 -0
  82. package/src/chat/server/openSession.ts +287 -49
  83. package/src/chat/server/server.ts +5 -11
  84. package/src/chat/server/sessionFileManager.ts +223 -63
  85. package/src/chat/server/sessionRegistry.ts +285 -41
  86. package/src/chat/server/test-utils/mockFactories.ts +13 -13
  87. package/src/chat/server/tools.ts +43 -8
  88. package/src/chat/utils/agentSessionMap.ts +2 -2
  89. package/src/chat/utils/multiAsyncQueue.ts +11 -1
  90. package/src/test/agent.test.ts +23 -14
  91. package/src/test/chatContextManager.test.ts +7 -2
  92. package/src/test/clientServerConnection.test.ts +3 -3
  93. package/src/test/compressingContextManager.test.ts +1 -1
  94. package/src/test/context.test.ts +2 -1
  95. package/src/test/conversation.test.ts +1 -1
  96. package/src/test/db.test.ts +41 -83
  97. package/src/test/dbSessionFiles.test.ts +258 -0
  98. package/src/test/dbSessionMessages.test.ts +85 -0
  99. package/src/test/dbTestTools.ts +9 -5
  100. package/src/test/imageLoad.test.ts +2 -2
  101. package/src/test/mcpServerManager.test.ts +3 -1
  102. package/src/test/multiAsyncQueue.test.ts +58 -0
  103. package/src/test/testTools.ts +15 -1
  104. package/src/tool/agentChat.ts +35 -7
  105. package/src/tool/agentMain.ts +7 -7
  106. package/src/tool/chatMain.ts +126 -5
  107. package/src/tool/commandPrompt.ts +10 -5
  108. package/src/tool/files.ts +30 -13
  109. package/src/tool/options.ts +1 -1
  110. package/test_data/dummyllm_script_image_gen.json +19 -0
  111. package/test_data/dummyllm_script_invoke_image_gen_tool.json +30 -0
  112. package/test_data/image_gen_test_profile.json +5 -0
package/scripts/test_chat CHANGED
@@ -47,6 +47,7 @@ stop_chat_server
47
47
  # Start the server with full logging and short heartbeat timeout.
48
48
  export DEFAULT_LLM_MODEL=repeat
49
49
  export DEVELOPMENT=1
50
+ export GEN_IMAGE_MODEL=dummy:test_data/dummyllm_script_image_gen.json
50
51
 
51
52
  LOG_LEVEL=debug \
52
53
  COMPRESSION_TRIGGER_NUM_MESSAGES=3 \
@@ -94,87 +95,128 @@ pushd _test_chat
94
95
 
95
96
  # Create an agent for the single-user tests
96
97
 
97
- echo '{"system_prompt":"You are a helpful agent in a multi-user chat session. If you have nothing to contribute and are not being addressed, just say `No comment`.","model":"'${DEFAULT_LLM_MODEL}'","mcp_settings":{}}' > agent_profile.json
98
+ echo '{"system_prompt":"You are a helpful agent in a multi-user chat session. If you have nothing to contribute and are not being addressed, just say `No comment`.","mcp_settings":{}}' > agent_profile.json
98
99
  ${client} agent-profile set profile0 --profile agent_profile.json | jq -r .uuid > agent_profile.uuid
99
100
  agent_profile_id=`cat agent_profile.uuid`
100
101
 
101
- # User 0 tries to join with invalid API key - fails with "invalid api key" error
102
+ # # User 0 tries to join with invalid API key - fails with "invalid api key" error
102
103
 
103
- invalid_apikey0="invalid_api_key"
104
- ${agent} chat client --session-id no_such_session --api-key "${invalid_apikey0}" 2>&1 | tee invalid_apikey_test.log | grep -i "invalid api key" || \
105
- (echo "Expected 'invalid api key' error message"; cat invalid_apikey_test.log; exit 1)
104
+ # invalid_apikey0="invalid_api_key"
105
+ # ${agent} chat client --session-id no_such_session --api-key "${invalid_apikey0}" 2>&1 | tee invalid_apikey_test.log | grep -i "invalid api key" || \
106
+ # (echo "Expected 'invalid api key' error message"; cat invalid_apikey_test.log; exit 1)
106
107
 
107
- # User 0 tries to join non-existant session - fails
108
+ # # User 0 tries to join non-existant session - fails
108
109
 
109
- ${agent} chat clear-sessions
110
- ${agent} chat client --session-id no_such_session && \
111
- (echo "Should not be able to join"; exit 1)
110
+ # ${agent} chat clear-sessions
111
+ # ${agent} chat client --session-id no_such_session && \
112
+ # (echo "Should not be able to join"; exit 1)
112
113
 
113
- # User 0 creates a new session with `crash_agent_profile` and interacts with
114
- # it, exercising LLM error conditions.
114
+ # # User 0 creates a new session with `crash_agent_profile` and interacts with
115
+ # # it, exercising LLM error conditions.
115
116
 
116
- echo ':si crash_session.id' > crash_script
117
- echo 'Hello' >> crash_script
118
- echo 'Hello again' >> crash_script
119
- ${agent} chat client --session-title crash_test_session \
120
- --agent-profile-id ${crash_agent_profile_id} \
121
- --script crash_script
117
+ # echo ':si crash_session.id' > crash_script
118
+ # echo 'Hello' >> crash_script
119
+ # echo 'Hello again' >> crash_script
120
+ # ${agent} chat client --session-title crash_test_session \
121
+ # --agent-profile-id ${crash_agent_profile_id} \
122
+ # --script crash_script
122
123
 
123
- # User 0 creates empty session, which should not appear in the DB.
124
+ # # User 0 creates empty session, which should not appear in the DB.
124
125
 
125
- echo ':si empty_session.id' > empty_session
126
- ${agent} chat client --session-title test_session \
127
- --agent-profile-id ${agent_profile_id} \
128
- --script empty_session
129
- empty_session_id=`cat empty_session.id`
130
- ${agent} chat list-sessions | grep `cat empty_session.id` && \
131
- (echo "Empty session should not be persisted"; exit 1)
126
+ # echo ':si empty_session.id' > empty_session
127
+ # ${agent} chat client --session-title test_session \
128
+ # --agent-profile-id ${agent_profile_id} \
129
+ # --script empty_session
130
+ # empty_session_id=`cat empty_session.id`
131
+ # ${agent} chat list-sessions | grep `cat empty_session.id` && \
132
+ # (echo "Empty session should not be persisted"; exit 1)
132
133
 
133
134
  # User 0 creates a user session, sends message and writes session id to file
134
135
 
135
- echo 'tell me a joke' > init_session
136
+ echo ':pause-agent 1' > init_session
137
+ echo 'tell me a joke' >> init_session
136
138
  echo ':si session.id' >> init_session
139
+ echo ':share-session guest.key' >> init_session
140
+ echo ':pause-agent 0' >> init_session
141
+ echo 'now please' >> init_session
137
142
  ${agent} chat client --session-title test_session \
138
143
  --agent-profile-id ${agent_profile_id} \
139
144
  --script init_session
140
145
  session_id=`cat session.id`
146
+ guest_key=`cat guest.key`
147
+
148
+ # # Check that the session got saved with the updated title
149
+
150
+ # ${agent} chat list-sessions | grep '"title":"tell me a joke"'
151
+
152
+ # # Check that a guest can join with the guest key
141
153
 
142
- # Check that the session got saved with the updated title
154
+ # echo "Hello from the guest" > guest_script
155
+ # ${agent} chat client \
156
+ # --session-id ${session_id} \
157
+ # --api-key ${guest_key} \
158
+ # --script guest_script | tee guest_output.txt
159
+ # grep "joke" guest_output.txt
160
+ # grep "[Guest]" guest_output.txt
143
161
 
144
- ${agent} chat list-sessions | grep '"title":"tell me a joke"'
162
+ # # Dummy session to invoke the image generation
145
163
 
146
- # User 1 tries to join the session. Should be rejected.
164
+ # echo '{"system_prompt":"prompt","model":"dummy:test_data/dummyllm_script_invoke_image_gen_tool.json","mcp_settings":{}}' > invoke_gen_image_profile.json
165
+ # ${client} agent-profile set invoke_gen_image_agent_profile \
166
+ # --profile invoke_gen_image_profile.json \
167
+ # | jq -r .uuid > invoke_gen_image_profile.uuid
168
+ # invoke_gen_image_profile_id=`cat invoke_gen_image_profile.uuid`
147
169
 
148
- ${agent} chat client --session-id ${session_id} --api-key ${apikey1} \
149
- --script init_session > join_unauthorized.log 2>&1 && \
150
- (echo "Should exit with error when joining invalid session"; exit 1)
170
+ # echo "Choose an animal and generate an image of it" > invoke_gen_image_script
171
+ # echo ":list-files" >> invoke_gen_image_script
172
+ # echo ":si invoke_gen_image.id" >> invoke_gen_image_script
151
173
 
152
- grep -i "not authorized" join_unauthorized.log || \
153
- (echo "Should include error message"; exit 1)
174
+ # LOG_LEVEL=debug \
175
+ # ${agent} chat client \
176
+ # --session-title invoke_gen_image_session \
177
+ # --agent-profile-id ${invoke_gen_image_profile_id} \
178
+ # --script invoke_gen_image_script | tee invoke_gen_image_output.txt
179
+ # grep 'frog.png' invoke_gen_image_output.txt
180
+ # grep 'metadata: {"type":"image/png"}' invoke_gen_image_output.txt
181
+
182
+ # # User 1 tries to join the session. Should be rejected.
183
+
184
+ # ${agent} chat client --session-id ${session_id} --api-key ${apikey1} \
185
+ # --script init_session > join_unauthorized.log 2>&1 && \
186
+ # (echo "Should exit with error when joining invalid session"; exit 1)
187
+
188
+ # grep -i "not authorized" join_unauthorized.log || \
189
+ # (echo "Should include error message"; exit 1)
154
190
 
155
191
  # User 0 create a team, and write team id to a file
156
192
 
157
193
  echo ':ct team0' > script_create_team
158
194
  echo ':ci team0.id' >> script_create_team
159
- ${agent} chat client --session-id ${session_id} --script script_create_team
195
+ ${agent} chat client \
196
+ --session-id ${session_id} \
197
+ --script script_create_team
160
198
  team_id=`cat team0.id`
161
199
 
162
- # User 0 create a new team session (including a new agent), and write
163
- # session id to a file.
200
+ # User 0 create a new team session (including a new paused agent), and write
201
+ # session id to a file. Extract the agent_profile UUID.
164
202
 
165
- echo 'tell me a joke' > script_team_chat
203
+ echo ':pause-agent 1' > script_team_chat
204
+ echo 'tell me a joke' >> script_team_chat
166
205
  echo ':si team_session.id' >> script_team_chat
167
206
  ${agent} chat client --session-title test_team_session \
168
207
  --team-id ${team_id} \
169
- --script script_team_chat
208
+ --script script_team_chat | tee team_chat_1
170
209
  team_session_id=`cat team_session.id`
171
210
 
172
- # Extract the agent uuid form the new session
173
-
174
211
  get_profile_id_query='.[] | select(.uuid=="'${team_session_id}'") | .agent_profile_uuid'
175
212
  ${agent} chat list-sessions | jq -r "${get_profile_id_query}" > team_agent.id
176
213
  team_agent_id=`cat team_agent.id`
177
214
 
215
+ # Check that the agent has not said anything
216
+
217
+ grep AGENT team_chat_1 && \
218
+ (echo "Unexpected AGENT messages"; exit 1)
219
+
178
220
  # User 1 tries to join the team session. Should be rejected.
179
221
 
180
222
  ${agent} chat client --session-id ${team_session_id} --api-key ${apikey1} \
@@ -183,36 +225,53 @@ pushd _test_chat
183
225
  grep -i "not authorized" join_unauthorized_team.log || \
184
226
  (echo "Should include error message (no auth team session)"; exit 1)
185
227
 
186
- # User 0 adds user 1 as a participant in team and checks the DB
228
+ # User0 sets up the session
229
+ # add charuser1 to the team
230
+ # add duckduckgo-search
231
+ # add a session file (md file)
232
+ # add an image to the workspace
233
+ # unpause the agent
234
+
235
+ echo ":ap ${team_id} chatuser1" > script_setup_team_session
236
+ echo ':lp' >> script_setup_team_session
237
+ echo ":as duckduckgo-search" >> script_setup_team_session
238
+ echo ':put-file plan.md data:text/markdown,#PLAN\n##STEP1\n##STEP2\n' \
239
+ >> script_setup_team_session
240
+ echo ':sw file:../test_data/frog.png For context, this image represents our shared workspace' >> script_setup_team_session
241
+ echo ':pause-agent 0' >> script_setup_team_session
242
+ ${agent} chat client \
243
+ --session-id ${team_session_id} \
244
+ --script script_setup_team_session
245
+
246
+ # checks the DB for chatuser2 participant and duckduckgo mcp server
187
247
 
188
- echo ":ap ${team_id} chatuser1" > script_add_chatuser1
189
- echo ':lp' >> script_add_chatuser1
190
- ${agent} chat client --session-id ${team_session_id} --script script_add_chatuser1
191
248
  ${agent} chat list-participants --session ${team_session_id} | grep chatuser1
249
+ ${client} agent-profile get ${team_agent_id} | grep duckduckgo
192
250
 
193
- # Have a participant add duckduckgo-search
194
- # Check that the duckduckgo-search server is available
195
- # Have a participant remove it and check that got reflected in the db
251
+ # Check the state. Clean the workspace, file and mcp servers
196
252
 
197
- echo ":as duckduckgo-search" > script_add_duckduckgo
198
- ${agent} chat client --session-id ${team_session_id} --script script_add_duckduckgo
199
- ${client} agent-profile get ${team_agent_id} | grep duckduckgo
200
- sleep 1
253
+ echo ':sleep 1000' > script_check_state
254
+ echo ':list-files filelist.json' >> script_check_state
255
+ echo 'Tell me about the image in our workspace?' >> script_check_state
256
+ echo ':sw' >> script_check_state
257
+ echo 'How many steps are in plan.md?' >> script_check_state
258
+ echo ':sleep 5000' >> script_check_state
259
+ echo ':delete-file plan.md' >> script_check_state
260
+ echo ':rs duckduckgo-search' >> script_check_state
261
+ ${agent} chat client \
262
+ --session-id ${team_session_id} \
263
+ --script script_check_state
201
264
 
202
- echo ":rs duckduckgo-search" > script_rm_duckduckgo
203
- ${agent} chat client --session-id ${team_session_id} --script script_rm_duckduckgo
204
265
  sleep 1
205
- (${client} agent-profile get ${team_session_id} | grep duckduckgo) && ( \
266
+
267
+ # Check duckduckgo-search got removed
268
+ (${client} agent-profile get ${team_agent_id} | grep duckduckgo) && ( \
206
269
  echo "ERROR: expected duckduckgo to be removed from agent profile" ; \
207
270
  exit 1 \
208
271
  )
209
272
 
210
- # Set an image to the workspace and asks a question
211
-
212
- echo ':sw file:../test_data/frog.png For context, this image represents our shared workspace' > script_set_workspace
213
- echo 'Tell me about the image in our workspace?' >> script_set_workspace
214
- echo ':sw' >> script_set_workspace
215
- ${agent} chat client --session-id ${session_id} --script script_set_workspace
273
+ # Check the file manager list
274
+ grep 'plan.md' filelist.json
216
275
 
217
276
  # Run both clients with a script that continues the conversation
218
277
 
@@ -222,12 +281,11 @@ pushd _test_chat
222
281
  ${agent} chat client --session-id ${team_session_id} --api-key ${apikey1} --script script1 >chatuser1.output
223
282
  wait ${pid0}
224
283
 
225
- # The name chatuser0 should appear in chatuser1's session output
284
+ # The name chatuser0 and AGENT should appear in chatuser1's session output
226
285
 
227
- if ! (grep chatuser0 chatuser1.output) ; then
228
- echo "error: expected chatuser0 in chatuser1's output"
229
- exit 1
230
- fi
286
+ grep chatuser0 chatuser1.output
287
+ grep chatuser1 chatuser1.output
288
+ grep AGENT chatuser1.output
231
289
 
232
290
  popd
233
291
 
@@ -1,15 +1,35 @@
1
- import * as dotenv from "dotenv";
2
1
  import { OpenAI } from "openai";
3
2
  import { McpServerManager } from "./mcpServerManager";
4
3
  import { strict as assert } from "assert";
5
- import { ILLM } from "./llm";
4
+ import {
5
+ ILLM,
6
+ ChatCompletionMessageParam,
7
+ ChatCompletionUserMessageParam,
8
+ ChatCompletionMessageToolCall,
9
+ ChatCompletion,
10
+ ChatCompletionAssistantMessageParam,
11
+ ChatCompletionMessage,
12
+ } from "./llm";
6
13
  import { AgentProfile, getLogger } from "@xalia/xmcp/sdk";
7
14
  import { IAgentEventHandler } from "./iAgentEventHandler";
8
15
  import { IContextManager } from "./context";
9
16
  export { AgentProfile } from "@xalia/xmcp/sdk";
10
17
 
18
+ export const DEFAULT_LLM_URL = "http://localhost:5001/v1";
19
+
11
20
  const MAX_TOOL_CALL_RESPONSE_LENGTH = 4000;
12
21
 
22
+ /**
23
+ * An agent's response, with optional extra image data.
24
+ * `ChatCompletionMessageParam` may one day be updated to allow image data (as
25
+ * it does for audio data), but for now image data is not included and so we
26
+ * keep it separate.
27
+ */
28
+ export type AssistantResponse = {
29
+ message: ChatCompletionMessageParam;
30
+ images?: OpenAI.Chat.Completions.ChatCompletionContentPartImage[];
31
+ };
32
+
13
33
  export interface IAgentToolProvider {
14
34
  /**
15
35
  * Any initial setup to be performed by the tool (loading data, etc). This
@@ -25,6 +45,11 @@ export type ToolCallResult = {
25
45
  */
26
46
  response: string;
27
47
 
48
+ /**
49
+ * Application-specific meta data about the tool call result.
50
+ */
51
+ metadata?: Record<string, string>;
52
+
28
53
  /**
29
54
  * If set, `response` is used in the next round of the Agent loop (if any),
30
55
  * but `overwriteResponse` is passed to the ContextManager to be stored, and
@@ -49,21 +74,6 @@ export type ToolHandler = (
49
74
 
50
75
  export type McpServerUrls = (name: string) => string;
51
76
 
52
- export type ChatCompletionMessageParam = OpenAI.ChatCompletionMessageParam;
53
-
54
- export type ChatCompletionMessageToolCall =
55
- OpenAI.ChatCompletionMessageToolCall;
56
-
57
- export type ChatCompletionAssistantMessageParam =
58
- OpenAI.ChatCompletionAssistantMessageParam;
59
-
60
- export type ChatCompletionUserMessageParam =
61
- OpenAI.ChatCompletionUserMessageParam;
62
-
63
- export type ChatCompletionToolMessageParam =
64
- OpenAI.ChatCompletionToolMessageParam;
65
-
66
- dotenv.config();
67
77
  const logger = getLogger();
68
78
 
69
79
  export interface IConversation {
@@ -153,7 +163,7 @@ export class Agent implements IConversation {
153
163
  msg?: string,
154
164
  imageB64?: string,
155
165
  name?: string
156
- ): Promise<ChatCompletionMessageParam | undefined> {
166
+ ): Promise<AssistantResponse | undefined> {
157
167
  const userMessage = createUserMessage(msg, imageB64, name);
158
168
  if (!userMessage) {
159
169
  return undefined;
@@ -163,13 +173,22 @@ export class Agent implements IConversation {
163
173
 
164
174
  public async userMessageRaw(
165
175
  userMessage: ChatCompletionUserMessageParam
166
- ): Promise<ChatCompletionMessageParam | undefined> {
176
+ ): Promise<AssistantResponse | undefined> {
167
177
  return this.userMessagesRaw([userMessage]);
168
178
  }
169
179
 
170
180
  public async userMessagesRaw(
171
181
  userMessages: ChatCompletionUserMessageParam[]
172
- ): Promise<ChatCompletionMessageParam | undefined> {
182
+ ): Promise<AssistantResponse | undefined> {
183
+ // Image and audio handling
184
+ //
185
+ // `ChatCompletions` (responses from the LLM) can contain `audio` and
186
+ // `images` tags. However, the `ChatCompletionMessageParam` type does not
187
+ // allow for "assistant" messages with images / audio.
188
+ //
189
+ // As such, our current approach is to extract all assistant-generated
190
+ // media and return it separately.
191
+
173
192
  // Note: `getLLMContext` returns a copy to we can mutate this array
174
193
  const context = this.contextManager.getLLMContext();
175
194
  const newMessagesIdx = context.length;
@@ -177,8 +196,13 @@ export class Agent implements IConversation {
177
196
  // Add the new user messages
178
197
  context.push(...userMessages);
179
198
 
199
+ const images: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] = [];
200
+
201
+ // We convert the `ChatCompletionsMessage` into a
202
+ // `ChatCompletionAssistantMessageParam` and extract image data.
203
+
180
204
  let completion = await this.chatCompletion(context);
181
- let message = completion.choices[0].message;
205
+ let message = this.processCompletion(completion, images);
182
206
  context.push(message);
183
207
 
184
208
  // While there are tool calls to make, invoke them and loop
@@ -199,6 +223,7 @@ export class Agent implements IConversation {
199
223
  role: "tool",
200
224
  tool_call_id: toolCall.id,
201
225
  content: result.response,
226
+ metadata: result.metadata,
202
227
  });
203
228
 
204
229
  // If the tool call requested that its args be redacted, this can be
@@ -226,14 +251,14 @@ export class Agent implements IConversation {
226
251
 
227
252
  try {
228
253
  completion = await this.chatCompletion(context); // CAN THROW
229
- message = completion.choices[0].message;
254
+ message = this.processCompletion(completion, images);
230
255
  context.push(message);
231
256
  } finally {
232
257
  // Now that the tool call results have been passed to the LLM, perform
233
258
  // any updates on them. Pass the (updated) tool-call-result LLM
234
- // messages to the event handler - note, we want to do this even if
235
- // the an error occured, so that the caller has an up-to-date picture
236
- // of the context state when the error occured.
259
+ // messages to the event handler - note, we want to do this even if an
260
+ // error occured, so that the caller has an up-to-date picture of the
261
+ // context state when the error occured.
237
262
 
238
263
  toolCallResults.forEach(([indexInContext, tcr]) => {
239
264
  const ctxMsg = context[indexInContext];
@@ -256,7 +281,7 @@ export class Agent implements IConversation {
256
281
  // Add all new new messages to the context
257
282
  this.contextManager.addMessages(context.slice(newMessagesIdx));
258
283
 
259
- return completion.choices[0].message;
284
+ return { message, images: images.length === 0 ? undefined : images };
260
285
  }
261
286
 
262
287
  public userMessage(msg?: string, imageB64?: string): void {
@@ -285,19 +310,22 @@ export class Agent implements IConversation {
285
310
 
286
311
  async chatCompletion(
287
312
  context: ChatCompletionMessageParam[]
288
- ): Promise<OpenAI.Chat.Completions.ChatCompletion> {
313
+ ): Promise<ChatCompletion> {
289
314
  // Compute the full list of available tools
290
315
 
291
316
  let tools: OpenAI.ChatCompletionTool[] | undefined;
292
317
  const mcpTools = this.mcpServerManager.getOpenAITools();
318
+ logger.debug(`[chatCompletion] mcpTools: ${JSON.stringify(mcpTools)}`);
293
319
  const enabledTools = this.tools.concat(mcpTools);
294
320
  if (enabledTools.length > 0) {
295
321
  tools = enabledTools;
296
322
  }
323
+ logger.debug(`[chatCompletion] tools: ${JSON.stringify(tools)}`);
297
324
  const completion = await this.llm.getConversationResponse(
298
325
  context,
299
326
  tools,
300
- this.eventHandler.onAgentMessage.bind(this.eventHandler)
327
+ this.eventHandler.onAgentMessage.bind(this.eventHandler),
328
+ this.eventHandler.onReasoning.bind(this.eventHandler)
301
329
  );
302
330
  logger.debug(`Received chat completion ${JSON.stringify(completion)}`);
303
331
  return completion;
@@ -359,19 +387,26 @@ export class Agent implements IConversation {
359
387
  const toolName = toolCall.function.name;
360
388
  const agentTool = this.agentTools.get(toolName);
361
389
  const isAgentTool = !!agentTool;
362
- const approve = await this.eventHandler.onToolCall(toolCall, isAgentTool);
363
- if (!approve) {
364
- result = { response: "User denied tool request." };
365
- } else if (isAgentTool) {
390
+
391
+ if (isAgentTool) {
366
392
  // Internal (agent) tool
367
- const args: unknown = JSON.parse(toolCall.function.arguments);
368
- result = await agentTool.handler(this, args);
393
+ if (!(await this.eventHandler.onToolCall(toolCall, true))) {
394
+ result = { response: "User denied tool request." };
395
+ } else {
396
+ const args: unknown = JSON.parse(toolCall.function.arguments);
397
+ result = await agentTool.handler(this, args);
398
+ }
369
399
  } else {
370
- // McpServer tool call (agentTool === undefined)
400
+ // McpServer tool call (agentTool === undefined). Sanity check the
401
+ // tool call data, get approval, and then invoke.
402
+
371
403
  const args: unknown = JSON.parse(toolCall.function.arguments);
372
- result = {
373
- response: await this.mcpServerManager.invoke(toolName, args),
374
- };
404
+ const tc = this.mcpServerManager.verifyToolCall(toolName, args);
405
+ if (!(await this.eventHandler.onToolCall(toolCall, false))) {
406
+ result = { response: "User denied tool request." };
407
+ } else {
408
+ result = { response: await this.mcpServerManager.invoke(tc) };
409
+ }
375
410
  logger.debug(`tool call result ${JSON.stringify(result)}`);
376
411
  }
377
412
  } catch (e) {
@@ -402,6 +437,23 @@ export class Agent implements IConversation {
402
437
 
403
438
  return result;
404
439
  }
440
+
441
+ private processCompletion(
442
+ completion: ChatCompletion,
443
+ images: OpenAI.Chat.Completions.ChatCompletionContentPartImage[]
444
+ ): ChatCompletionAssistantMessageParam {
445
+ // Add any images into the list, and call the event handler
446
+
447
+ const compMessage = completion.choices[0].message;
448
+ if (compMessage.images) {
449
+ for (const image of compMessage.images) {
450
+ this.eventHandler.onImage(image);
451
+ images.push(image);
452
+ }
453
+ }
454
+
455
+ return completionToAssistantMessageParam(compMessage);
456
+ }
405
457
  }
406
458
 
407
459
  /**
@@ -460,3 +512,58 @@ export function createUserMessageEnsure(
460
512
  assert(userMsg);
461
513
  return userMsg;
462
514
  }
515
+
516
+ export function completionToAssistantMessageParam(
517
+ compMessage: ChatCompletionMessage
518
+ ): ChatCompletionAssistantMessageParam {
519
+ // Strip down the `ChatCompletionMessage` to a
520
+ // `ChatCompletionAssistantMessageParam`, only including the non-null
521
+ // elements. For reference:
522
+ //
523
+ // Response from the LLM:
524
+ //
525
+ // export interface ChatCompletionMessage {
526
+ // role: 'assistant';
527
+ // audio?: ChatCompletionAudio | null;
528
+ // content: string | null;
529
+ // refusal: string | null;
530
+ // tool_calls?: Array<ChatCompletionMessageToolCall>;
531
+ //
532
+ // annotations?: Array<ChatCompletionMessage.Annotation>;
533
+ // // openrouter
534
+ // images?: Array<ChatCompletionContentPartImage>
535
+ // }
536
+ //
537
+ // Input to the LLM
538
+ //
539
+ // export interface ChatCompletionAssistantMessageParam {
540
+ // role: "assistant";
541
+ // audio?: ChatCompletionAssistantMessageParam.Audio | null;
542
+ // content?:
543
+ // | string
544
+ // | Array<ChatCompletionContentPartText |
545
+ // ChatCompletionContentPartRefusal>
546
+ // | null;
547
+ // refusal?: string | null;
548
+ // tool_calls?: Array<ChatCompletionMessageToolCall>;
549
+ //
550
+ // name?: string;
551
+ // }
552
+
553
+ const message: ChatCompletionAssistantMessageParam = {
554
+ role: "assistant",
555
+ };
556
+ if (compMessage.audio) {
557
+ message.audio = compMessage.audio;
558
+ }
559
+ if (compMessage.content) {
560
+ message.content = compMessage.content;
561
+ }
562
+ if (compMessage.refusal) {
563
+ message.refusal = compMessage.refusal;
564
+ }
565
+ if (compMessage.tool_calls) {
566
+ message.tool_calls = compMessage.tool_calls;
567
+ }
568
+ return message;
569
+ }