@cephalization/phoenix-insight 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -67,7 +67,7 @@ var PhoenixClientError = class extends Error {
67
67
  function createPhoenixClient(config = {}) {
68
68
  const headers = {};
69
69
  if (config.apiKey) {
70
- headers["api_key"] = config.apiKey;
70
+ headers["Authorization"] = `Bearer ${config.apiKey}`;
71
71
  }
72
72
  const clientOptions = {
73
73
  options: {
@@ -790,6 +790,308 @@ Use this tool when you want to present structured analysis results, metrics, tab
790
790
  });
791
791
  }
792
792
 
793
+ // src/agent/conversation.ts
794
+ import {
795
+ pruneMessages
796
+ } from "ai";
797
+ function createUserMessage(content) {
798
+ return { role: "user", content };
799
+ }
800
+ function createAssistantMessage(content) {
801
+ return { role: "assistant", content };
802
+ }
803
+ function createAssistantMessageWithParts(parts) {
804
+ return { role: "assistant", content: parts };
805
+ }
806
+ function createToolMessage(results) {
807
+ return { role: "tool", content: results };
808
+ }
809
+ function convertUserMessage(message) {
810
+ return {
811
+ role: "user",
812
+ content: message.content
813
+ };
814
+ }
815
+ function convertAssistantMessage(message) {
816
+ if (typeof message.content === "string") {
817
+ return {
818
+ role: "assistant",
819
+ content: message.content
820
+ };
821
+ }
822
+ const sdkContent = message.content.map((part) => {
823
+ if (part.type === "text") {
824
+ return {
825
+ type: "text",
826
+ text: part.text
827
+ };
828
+ } else {
829
+ return {
830
+ type: "tool-call",
831
+ toolCallId: part.toolCallId,
832
+ toolName: part.toolName,
833
+ input: part.args
834
+ };
835
+ }
836
+ });
837
+ return {
838
+ role: "assistant",
839
+ content: sdkContent
840
+ };
841
+ }
842
+ function convertToolMessage(message) {
843
+ const sdkContent = message.content.map((part) => ({
844
+ type: "tool-result",
845
+ toolCallId: part.toolCallId,
846
+ toolName: part.toolName,
847
+ output: part.isError ? { type: "error-json", value: part.result } : { type: "json", value: part.result }
848
+ }));
849
+ return {
850
+ role: "tool",
851
+ content: sdkContent
852
+ };
853
+ }
854
+ function toModelMessage(message) {
855
+ switch (message.role) {
856
+ case "user":
857
+ return convertUserMessage(message);
858
+ case "assistant":
859
+ return convertAssistantMessage(message);
860
+ case "tool":
861
+ return convertToolMessage(message);
862
+ }
863
+ }
864
+ function toModelMessages(history) {
865
+ return history.map(toModelMessage);
866
+ }
867
+ var TRUNCATED_REPORT_PLACEHOLDER = "[Report content truncated to save tokens]";
868
+ function truncateReportToolCalls(messages) {
869
+ return messages.map((message) => {
870
+ if (message.role !== "assistant") {
871
+ return message;
872
+ }
873
+ if (typeof message.content === "string") {
874
+ return message;
875
+ }
876
+ const newContent = message.content.map((part) => {
877
+ if (part.type !== "tool-call") {
878
+ return part;
879
+ }
880
+ if (part.toolName !== "generate_report") {
881
+ return part;
882
+ }
883
+ const input = part.input;
884
+ const truncatedInput = {
885
+ content: TRUNCATED_REPORT_PLACEHOLDER
886
+ };
887
+ if (input?.title) {
888
+ truncatedInput.title = input.title;
889
+ }
890
+ return {
891
+ ...part,
892
+ input: truncatedInput
893
+ };
894
+ });
895
+ return {
896
+ ...message,
897
+ content: newContent
898
+ };
899
+ });
900
+ }
901
+ async function extractMessagesFromResponse(result) {
902
+ const messages = [];
903
+ const stepsValue = result.steps;
904
+ const steps = await Promise.resolve(stepsValue);
905
+ if (!steps || steps.length === 0) {
906
+ return messages;
907
+ }
908
+ for (const step of steps) {
909
+ const hasText = step.text && step.text.length > 0;
910
+ const hasToolCalls = step.toolCalls && step.toolCalls.length > 0;
911
+ if (hasText || hasToolCalls) {
912
+ if (hasToolCalls) {
913
+ const parts = [];
914
+ if (hasText) {
915
+ parts.push({
916
+ type: "text",
917
+ text: step.text
918
+ });
919
+ }
920
+ for (const toolCall of step.toolCalls) {
921
+ parts.push({
922
+ type: "tool-call",
923
+ toolCallId: toolCall.toolCallId,
924
+ toolName: toolCall.toolName,
925
+ args: toolCall.input
926
+ });
927
+ }
928
+ messages.push(createAssistantMessageWithParts(parts));
929
+ } else {
930
+ messages.push(createAssistantMessage(step.text));
931
+ }
932
+ }
933
+ if (step.toolResults && step.toolResults.length > 0) {
934
+ const results = step.toolResults.map(
935
+ (toolResult) => ({
936
+ type: "tool-result",
937
+ toolCallId: toolResult.toolCallId,
938
+ toolName: toolResult.toolName,
939
+ result: toolResult.output
940
+ })
941
+ );
942
+ messages.push(createToolMessage(results));
943
+ }
944
+ }
945
+ return messages;
946
+ }
947
+ function convertUIMessage(message) {
948
+ switch (message.role) {
949
+ case "user":
950
+ return { role: "user", content: message.content };
951
+ case "assistant": {
952
+ if (typeof message.content === "string") {
953
+ return { role: "assistant", content: message.content };
954
+ }
955
+ const parts = message.content.map(
956
+ (part) => {
957
+ if (part.type === "text") {
958
+ return { type: "text", text: part.text };
959
+ } else {
960
+ return {
961
+ type: "tool-call",
962
+ toolCallId: part.toolCallId,
963
+ toolName: part.toolName,
964
+ args: part.args
965
+ };
966
+ }
967
+ }
968
+ );
969
+ return { role: "assistant", content: parts };
970
+ }
971
+ case "tool": {
972
+ const results = message.content.map(
973
+ (part) => ({
974
+ type: "tool-result",
975
+ toolCallId: part.toolCallId,
976
+ toolName: part.toolName,
977
+ result: part.result,
978
+ ...part.isError && { isError: part.isError }
979
+ })
980
+ );
981
+ return { role: "tool", content: results };
982
+ }
983
+ }
984
+ }
985
+ function fromUIMessages(uiMessages) {
986
+ if (!Array.isArray(uiMessages)) {
987
+ return [];
988
+ }
989
+ return uiMessages.filter((msg) => {
990
+ if (!msg || typeof msg !== "object") return false;
991
+ const m = msg;
992
+ return m.role === "user" || m.role === "assistant" || m.role === "tool";
993
+ }).map(convertUIMessage);
994
+ }
995
+ function fromModelMessage(message) {
996
+ switch (message.role) {
997
+ case "user": {
998
+ const content = typeof message.content === "string" ? message.content : message.content.filter((part) => part.type === "text").map((part) => part.text).join("");
999
+ return { role: "user", content };
1000
+ }
1001
+ case "assistant": {
1002
+ if (typeof message.content === "string") {
1003
+ return { role: "assistant", content: message.content };
1004
+ }
1005
+ const parts = [];
1006
+ for (const part of message.content) {
1007
+ if (part.type === "text") {
1008
+ parts.push({ type: "text", text: part.text });
1009
+ } else if (part.type === "tool-call") {
1010
+ parts.push({
1011
+ type: "tool-call",
1012
+ toolCallId: part.toolCallId,
1013
+ toolName: part.toolName,
1014
+ args: part.input
1015
+ });
1016
+ }
1017
+ }
1018
+ if (parts.length === 0) {
1019
+ return { role: "assistant", content: "" };
1020
+ }
1021
+ const textParts = parts.filter(
1022
+ (p) => p.type === "text"
1023
+ );
1024
+ if (parts.length === textParts.length && textParts.length === 1 && textParts[0]) {
1025
+ return { role: "assistant", content: textParts[0].text };
1026
+ }
1027
+ return { role: "assistant", content: parts };
1028
+ }
1029
+ case "tool": {
1030
+ const results = [];
1031
+ const content = message.content;
1032
+ for (const part of content) {
1033
+ if (part.type === "tool-result") {
1034
+ const output = part.output;
1035
+ let result;
1036
+ let isError = false;
1037
+ if (output && typeof output === "object" && "type" in output && "value" in output) {
1038
+ const typedOutput = output;
1039
+ result = typedOutput.value;
1040
+ isError = typedOutput.type === "error-json" || typedOutput.type === "error-text";
1041
+ } else {
1042
+ result = output;
1043
+ }
1044
+ results.push({
1045
+ type: "tool-result",
1046
+ toolCallId: part.toolCallId,
1047
+ toolName: part.toolName,
1048
+ result,
1049
+ ...isError && { isError }
1050
+ });
1051
+ }
1052
+ }
1053
+ return { role: "tool", content: results };
1054
+ }
1055
+ case "system": {
1056
+ return { role: "user", content: `[System]: ${message.content}` };
1057
+ }
1058
+ default: {
1059
+ return { role: "user", content: "[Unknown message type]" };
1060
+ }
1061
+ }
1062
+ }
1063
+ function fromModelMessages(messages) {
1064
+ return messages.map(fromModelMessage);
1065
+ }
1066
+ function compactConversation(messages, options) {
1067
+ const keepFirstN = options?.keepFirstN ?? 2;
1068
+ const keepLastN = options?.keepLastN ?? 6;
1069
+ if (messages.length <= keepFirstN + keepLastN) {
1070
+ return messages;
1071
+ }
1072
+ const modelMessages = toModelMessages(messages);
1073
+ const middleStartIndex = keepFirstN;
1074
+ const middleEndIndex = modelMessages.length - keepLastN;
1075
+ if (middleEndIndex <= middleStartIndex) {
1076
+ return messages;
1077
+ }
1078
+ const firstMessages = modelMessages.slice(0, middleStartIndex);
1079
+ const middleMessages = modelMessages.slice(middleStartIndex, middleEndIndex);
1080
+ const lastMessages = modelMessages.slice(middleEndIndex);
1081
+ const prunedMiddle = pruneMessages({
1082
+ messages: middleMessages,
1083
+ reasoning: "all",
1084
+ toolCalls: "all",
1085
+ emptyMessages: "remove"
1086
+ });
1087
+ const compactedModelMessages = [
1088
+ ...firstMessages,
1089
+ ...prunedMiddle,
1090
+ ...lastMessages
1091
+ ];
1092
+ return fromModelMessages(compactedModelMessages);
1093
+ }
1094
+
793
1095
  // src/agent/index.ts
794
1096
  var PhoenixInsightAgent = class {
795
1097
  mode;
@@ -878,6 +1180,14 @@ var PhoenixInsightAgent = class {
878
1180
  }
879
1181
  /**
880
1182
  * Generate a response for a user query
1183
+ *
1184
+ * @param userQuery - The current user query
1185
+ * @param options - Optional configuration
1186
+ * @param options.onStepFinish - Callback called after each agent step
1187
+ * @param options.messages - Optional conversation history for multi-turn conversations.
1188
+ * When provided, the history is converted to AI SDK format and the userQuery is
1189
+ * appended as the final user message. Report tool calls in history are truncated
1190
+ * to save tokens.
881
1191
  */
882
1192
  async generate(userQuery, options) {
883
1193
  let tools;
@@ -889,17 +1199,32 @@ var PhoenixInsightAgent = class {
889
1199
  );
890
1200
  }
891
1201
  try {
892
- const result = await generateText({
1202
+ const baseConfig = {
893
1203
  model: this.model,
894
1204
  system: this.systemPrompt,
895
- prompt: userQuery,
896
1205
  tools,
897
1206
  stopWhen: stepCountIs(this.maxSteps),
898
1207
  onStepFinish: options?.onStepFinish,
899
1208
  experimental_telemetry: {
900
1209
  isEnabled: true
901
1210
  }
902
- });
1211
+ };
1212
+ let result;
1213
+ if (options?.messages && options.messages.length > 0) {
1214
+ const historyMessages = toModelMessages(options.messages);
1215
+ const truncatedHistory = truncateReportToolCalls(historyMessages);
1216
+ const currentUserMessage = toModelMessages([createUserMessage(userQuery)]);
1217
+ const allMessages = [...truncatedHistory, ...currentUserMessage];
1218
+ result = await generateText({
1219
+ ...baseConfig,
1220
+ messages: allMessages
1221
+ });
1222
+ } else {
1223
+ result = await generateText({
1224
+ ...baseConfig,
1225
+ prompt: userQuery
1226
+ });
1227
+ }
903
1228
  return result;
904
1229
  } catch (error) {
905
1230
  if (error instanceof Error) {
@@ -924,6 +1249,14 @@ var PhoenixInsightAgent = class {
924
1249
  }
925
1250
  /**
926
1251
  * Stream a response for a user query
1252
+ *
1253
+ * @param userQuery - The current user query
1254
+ * @param options - Optional configuration
1255
+ * @param options.onStepFinish - Callback called after each agent step
1256
+ * @param options.messages - Optional conversation history for multi-turn conversations.
1257
+ * When provided, the history is converted to AI SDK format and the userQuery is
1258
+ * appended as the final user message. Report tool calls in history are truncated
1259
+ * to save tokens.
927
1260
  */
928
1261
  async stream(userQuery, options) {
929
1262
  let tools;
@@ -935,17 +1268,32 @@ var PhoenixInsightAgent = class {
935
1268
  );
936
1269
  }
937
1270
  try {
938
- const result = streamText({
1271
+ const baseConfig = {
939
1272
  model: this.model,
940
1273
  system: this.systemPrompt,
941
- prompt: userQuery,
942
1274
  tools,
943
1275
  stopWhen: stepCountIs(this.maxSteps),
944
1276
  onStepFinish: options?.onStepFinish,
945
1277
  experimental_telemetry: {
946
1278
  isEnabled: true
947
1279
  }
948
- });
1280
+ };
1281
+ let result;
1282
+ if (options?.messages && options.messages.length > 0) {
1283
+ const historyMessages = toModelMessages(options.messages);
1284
+ const truncatedHistory = truncateReportToolCalls(historyMessages);
1285
+ const currentUserMessage = toModelMessages([createUserMessage(userQuery)]);
1286
+ const allMessages = [...truncatedHistory, ...currentUserMessage];
1287
+ result = streamText({
1288
+ ...baseConfig,
1289
+ messages: allMessages
1290
+ });
1291
+ } else {
1292
+ result = streamText({
1293
+ ...baseConfig,
1294
+ prompt: userQuery
1295
+ });
1296
+ }
949
1297
  return result;
950
1298
  } catch (error) {
951
1299
  if (error instanceof Error) {
@@ -979,11 +1327,11 @@ async function createInsightAgent(config) {
979
1327
  return new PhoenixInsightAgent(config);
980
1328
  }
981
1329
  async function runQuery(agent, userQuery, options) {
982
- const { stream = false, ...callbacks } = options || {};
1330
+ const { stream = false, ...rest } = options || {};
983
1331
  if (stream) {
984
- return await agent.stream(userQuery, callbacks);
1332
+ return await agent.stream(userQuery, rest);
985
1333
  } else {
986
- return await agent.generate(userQuery, callbacks);
1334
+ return await agent.generate(userQuery, rest);
987
1335
  }
988
1336
  }
989
1337
  async function runOneShotQuery(config, userQuery, options) {
@@ -996,6 +1344,66 @@ async function runOneShotQuery(config, userQuery, options) {
996
1344
  }
997
1345
  }
998
1346
 
1347
+ // src/agent/token-errors.ts
1348
+ import { APICallError } from "ai";
1349
+ var TOKEN_LIMIT_ERROR_PATTERNS = [
1350
+ // Anthropic-specific patterns
1351
+ "prompt is too long",
1352
+ "context window",
1353
+ "context length",
1354
+ "max_tokens",
1355
+ "maximum context",
1356
+ "token limit",
1357
+ "tokens exceed",
1358
+ "exceeds the maximum",
1359
+ "too many tokens",
1360
+ // Generic patterns that might apply to other providers
1361
+ "context limit",
1362
+ "input too long",
1363
+ "request too large"
1364
+ ];
1365
+ var TOKEN_LIMIT_STATUS_CODES = [400, 413, 422];
1366
+ function isAPICallError(error) {
1367
+ return APICallError.isInstance(error);
1368
+ }
1369
+ function messageContainsTokenLimitPattern(message) {
1370
+ const lowerMessage = message.toLowerCase();
1371
+ return TOKEN_LIMIT_ERROR_PATTERNS.some(
1372
+ (pattern) => lowerMessage.includes(pattern.toLowerCase())
1373
+ );
1374
+ }
1375
+ function isTokenLimitError(error) {
1376
+ if (!isAPICallError(error)) {
1377
+ if (error instanceof Error) {
1378
+ return messageContainsTokenLimitPattern(error.message);
1379
+ }
1380
+ return false;
1381
+ }
1382
+ const message = error.message || "";
1383
+ const hasTokenLimitMessage = messageContainsTokenLimitPattern(message);
1384
+ if (error.statusCode === void 0) {
1385
+ return hasTokenLimitMessage;
1386
+ }
1387
+ const hasRelevantStatusCode = TOKEN_LIMIT_STATUS_CODES.includes(
1388
+ error.statusCode
1389
+ );
1390
+ return hasRelevantStatusCode && hasTokenLimitMessage;
1391
+ }
1392
+ function getTokenLimitErrorDescription(error) {
1393
+ if (!isTokenLimitError(error)) {
1394
+ return null;
1395
+ }
1396
+ if (error instanceof Error) {
1397
+ const message = error.message;
1398
+ const tokenMatch = message.match(/(\d+)\s*tokens?/i);
1399
+ if (tokenMatch) {
1400
+ return `Request exceeded token limit (${tokenMatch[1]} tokens). Context will be compacted.`;
1401
+ }
1402
+ return "Request exceeded the model's context window. Context will be compacted.";
1403
+ }
1404
+ return "Request exceeded the model's context window. Context will be compacted.";
1405
+ }
1406
+
999
1407
  // src/snapshot/projects.ts
1000
1408
  function toJSONL(items) {
1001
1409
  return items.map((item) => JSON.stringify(item)).join("\n");
@@ -3105,15 +3513,26 @@ var AgentSession = class {
3105
3513
  });
3106
3514
  }
3107
3515
  /**
3108
- * Add a message to the conversation history
3516
+ * Send a context compacted notification to the client
3109
3517
  */
3110
- addToHistory(role, content) {
3111
- this.conversationHistory.push({
3112
- role,
3113
- content,
3114
- timestamp: Date.now()
3518
+ sendContextCompacted(reason) {
3519
+ this.send({
3520
+ type: "context_compacted",
3521
+ payload: { sessionId: this.sessionId, reason }
3115
3522
  });
3116
3523
  }
3524
+ /**
3525
+ * Add a user message to the conversation history
3526
+ */
3527
+ addUserMessage(content) {
3528
+ this.conversationHistory.push(createUserMessage(content));
3529
+ }
3530
+ /**
3531
+ * Add assistant messages (including tool calls and results) to the conversation history
3532
+ */
3533
+ addAssistantMessages(messages) {
3534
+ this.conversationHistory.push(...messages);
3535
+ }
3117
3536
  /**
3118
3537
  * Get a callback function for the report tool
3119
3538
  * This can be passed to the report tool to send reports to the client
@@ -3124,20 +3543,91 @@ var AgentSession = class {
3124
3543
  };
3125
3544
  }
3126
3545
  /**
3127
- * Execute a query and stream the response to the client
3546
+ * Execute a query and stream the response to the client.
3547
+ *
3548
+ * The conversation history is passed to the agent for multi-turn context.
3549
+ * After the response completes, both the user message and the assistant's
3550
+ * response (including any tool calls and results) are appended to the history.
3551
+ *
3552
+ * If a token limit error occurs, the conversation is automatically compacted
3553
+ * and the query is retried once.
3554
+ *
3555
+ * @param query - The query to execute
3556
+ * @param options - Optional settings including client-provided history
3128
3557
  */
3129
- async executeQuery(query) {
3558
+ async executeQuery(query, options) {
3130
3559
  if (this.isExecuting) {
3131
3560
  this.sendError("A query is already being executed");
3132
3561
  return;
3133
3562
  }
3134
3563
  this.isExecuting = true;
3135
3564
  this.abortController = new AbortController();
3136
- this.addToHistory("user", query);
3565
+ let historyToUse;
3566
+ let usingClientHistory = false;
3567
+ if (options?.history && Array.isArray(options.history) && options.history.length > 0) {
3568
+ historyToUse = fromUIMessages(options.history);
3569
+ usingClientHistory = true;
3570
+ } else {
3571
+ historyToUse = [...this.conversationHistory];
3572
+ }
3573
+ try {
3574
+ const firstAttemptError = await this.executeQueryWithHistory(
3575
+ query,
3576
+ historyToUse,
3577
+ usingClientHistory
3578
+ );
3579
+ if (firstAttemptError && isTokenLimitError(firstAttemptError)) {
3580
+ const errorDescription = getTokenLimitErrorDescription(firstAttemptError);
3581
+ const originalLength = historyToUse.length;
3582
+ historyToUse = compactConversation(historyToUse);
3583
+ const compactedLength = historyToUse.length;
3584
+ if (!usingClientHistory) {
3585
+ this.conversationHistory = historyToUse;
3586
+ }
3587
+ const reason = errorDescription ?? `Conversation compacted from ${originalLength} to ${compactedLength} messages to fit model limits.`;
3588
+ this.sendContextCompacted(reason);
3589
+ const retryError = await this.executeQueryWithHistory(
3590
+ query,
3591
+ historyToUse,
3592
+ usingClientHistory
3593
+ );
3594
+ if (retryError) {
3595
+ if (!this.abortController?.signal.aborted) {
3596
+ const message = retryError instanceof Error ? retryError.message : String(retryError);
3597
+ this.sendError(`Query failed after compaction: ${message}`);
3598
+ }
3599
+ } else {
3600
+ this.sendDone();
3601
+ }
3602
+ } else if (firstAttemptError) {
3603
+ if (!this.abortController?.signal.aborted) {
3604
+ const message = firstAttemptError instanceof Error ? firstAttemptError.message : String(firstAttemptError);
3605
+ this.sendError(`Query failed: ${message}`);
3606
+ }
3607
+ } else {
3608
+ this.sendDone();
3609
+ }
3610
+ } finally {
3611
+ this.isExecuting = false;
3612
+ this.abortController = null;
3613
+ }
3614
+ }
3615
+ /**
3616
+ * Execute a query with the provided conversation history.
3617
+ * Returns the error if execution fails, or null if successful.
3618
+ * On success, updates the server-side conversation history with the query and response
3619
+ * (unless usingClientHistory is true, in which case the client manages its own history).
3620
+ *
3621
+ * @param query - The query to execute
3622
+ * @param history - The conversation history to use for this query
3623
+ * @param usingClientHistory - If true, the client provided the history and manages its own state
3624
+ */
3625
+ async executeQueryWithHistory(query, history, usingClientHistory) {
3137
3626
  try {
3138
3627
  const agent = await this.getAgent();
3139
- const result = await agent.stream(query, {});
3140
- let fullResponse = "";
3628
+ const result = await agent.stream(query, {
3629
+ messages: history
3630
+ });
3141
3631
  let lastStepHadText = false;
3142
3632
  for await (const part of result.fullStream) {
3143
3633
  if (this.abortController?.signal.aborted) {
@@ -3147,11 +3637,9 @@ var AgentSession = class {
3147
3637
  case "text-delta":
3148
3638
  if (lastStepHadText && part.text.trim().length > 0) {
3149
3639
  const separator = "\n\n";
3150
- fullResponse += separator;
3151
3640
  this.sendText(separator);
3152
3641
  lastStepHadText = false;
3153
3642
  }
3154
- fullResponse += part.text;
3155
3643
  this.sendText(part.text);
3156
3644
  break;
3157
3645
  case "tool-call":
@@ -3161,27 +3649,23 @@ var AgentSession = class {
3161
3649
  this.sendToolResult(part.toolName, part.output);
3162
3650
  break;
3163
3651
  case "text-end":
3164
- if (fullResponse.trim().length > 0) {
3165
- lastStepHadText = true;
3166
- }
3652
+ lastStepHadText = true;
3167
3653
  break;
3168
3654
  }
3169
3655
  }
3170
3656
  if (!this.abortController?.signal.aborted) {
3171
3657
  await result.response;
3658
+ if (!usingClientHistory) {
3659
+ this.addUserMessage(query);
3660
+ const assistantMessages = await extractMessagesFromResponse(result);
3661
+ if (assistantMessages.length > 0) {
3662
+ this.addAssistantMessages(assistantMessages);
3663
+ }
3664
+ }
3172
3665
  }
3173
- if (fullResponse) {
3174
- this.addToHistory("assistant", fullResponse);
3175
- }
3176
- this.sendDone();
3666
+ return null;
3177
3667
  } catch (error) {
3178
- if (!this.abortController?.signal.aborted) {
3179
- const message = error instanceof Error ? error.message : String(error);
3180
- this.sendError(`Query failed: ${message}`);
3181
- }
3182
- } finally {
3183
- this.isExecuting = false;
3184
- this.abortController = null;
3668
+ return error instanceof Error ? error : new Error(String(error));
3185
3669
  }
3186
3670
  }
3187
3671
  /**
@@ -3779,14 +4263,14 @@ async function runUIServer(options) {
3779
4263
  },
3780
4264
  onMessage: async (message, ws) => {
3781
4265
  if (message.type === "query") {
3782
- const { content, sessionId: clientSessionId } = message.payload;
4266
+ const { content, sessionId: clientSessionId, history } = message.payload;
3783
4267
  const sessionId = clientSessionId ?? `session-${Date.now()}`;
3784
4268
  const session = sessionManager.getOrCreateSession(
3785
4269
  ws,
3786
4270
  sessionId,
3787
4271
  (msg) => wsServer.sendToClient(ws, msg)
3788
4272
  );
3789
- session.executeQuery(content).catch((error) => {
4273
+ session.executeQuery(content, { history }).catch((error) => {
3790
4274
  console.error("Error executing query:", error);
3791
4275
  wsServer.sendToClient(ws, {
3792
4276
  type: "error",
@@ -3901,6 +4385,7 @@ async function runInteractiveMode() {
3901
4385
  maxSteps: 25
3902
4386
  };
3903
4387
  agent = await createInsightAgent(agentConfig);
4388
+ const conversationHistory = [];
3904
4389
  const rl = readline.createInterface({
3905
4390
  input: process.stdin,
3906
4391
  output: process.stdout,
@@ -3919,6 +4404,81 @@ async function runInteractiveMode() {
3919
4404
  userExited = true;
3920
4405
  rl.prompt();
3921
4406
  });
4407
+ const executeAgentQuery = async (query, messages, agentProgress) => {
4408
+ if (config.stream) {
4409
+ const result = await agent.stream(query, {
4410
+ messages: [...messages],
4411
+ onStepFinish: (step) => {
4412
+ if (step.toolCalls?.length) {
4413
+ step.toolCalls.forEach((toolCall) => {
4414
+ const toolName = toolCall.toolName;
4415
+ if (toolName === "bash") {
4416
+ const command = toolCall.args?.command || "";
4417
+ const shortCmd = command.split("\n")[0].substring(0, 50);
4418
+ agentProgress.updateTool(
4419
+ toolName,
4420
+ shortCmd + (command.length > 50 ? "..." : "")
4421
+ );
4422
+ } else {
4423
+ agentProgress.updateTool(toolName);
4424
+ }
4425
+ });
4426
+ }
4427
+ if (step.toolResults?.length) {
4428
+ step.toolResults.forEach((toolResult) => {
4429
+ agentProgress.updateToolResult(
4430
+ toolResult.toolName,
4431
+ !toolResult.isError
4432
+ );
4433
+ });
4434
+ }
4435
+ }
4436
+ });
4437
+ agentProgress.stop();
4438
+ console.log("\n\u2728 Answer:\n");
4439
+ for await (const chunk of result.textStream) {
4440
+ process.stdout.write(chunk);
4441
+ }
4442
+ console.log();
4443
+ await result.response;
4444
+ const assistantMessages = await extractMessagesFromResponse(result);
4445
+ return { assistantMessages };
4446
+ } else {
4447
+ const result = await agent.generate(query, {
4448
+ messages: [...messages],
4449
+ onStepFinish: (step) => {
4450
+ if (step.toolCalls?.length) {
4451
+ step.toolCalls.forEach((toolCall) => {
4452
+ const toolName = toolCall.toolName;
4453
+ if (toolName === "bash") {
4454
+ const command = toolCall.args?.command || "";
4455
+ const shortCmd = command.split("\n")[0].substring(0, 50);
4456
+ agentProgress.updateTool(
4457
+ toolName,
4458
+ shortCmd + (command.length > 50 ? "..." : "")
4459
+ );
4460
+ } else {
4461
+ agentProgress.updateTool(toolName);
4462
+ }
4463
+ });
4464
+ }
4465
+ if (step.toolResults?.length) {
4466
+ step.toolResults.forEach((toolResult) => {
4467
+ agentProgress.updateToolResult(
4468
+ toolResult.toolName,
4469
+ !toolResult.isError
4470
+ );
4471
+ });
4472
+ }
4473
+ }
4474
+ });
4475
+ agentProgress.succeed();
4476
+ console.log("\n\u2728 Answer:\n");
4477
+ console.log(result.text);
4478
+ const assistantMessages = await extractMessagesFromResponse(result);
4479
+ return { assistantMessages };
4480
+ }
4481
+ };
3922
4482
  const processQuery = async (query) => {
3923
4483
  if (query === "exit" || query === "quit") {
3924
4484
  return true;
@@ -3956,76 +4516,55 @@ async function runInteractiveMode() {
3956
4516
  return false;
3957
4517
  }
3958
4518
  try {
4519
+ if (conversationHistory.length > 0) {
4520
+ console.log(
4521
+ `(continuing conversation with ${conversationHistory.length} previous messages)
4522
+ `
4523
+ );
4524
+ }
3959
4525
  const agentProgress = new AgentProgress(!config.stream);
3960
4526
  agentProgress.startThinking();
3961
- if (config.stream) {
3962
- const result = await agent.stream(query, {
3963
- onStepFinish: (step) => {
3964
- if (step.toolCalls?.length) {
3965
- step.toolCalls.forEach((toolCall) => {
3966
- const toolName = toolCall.toolName;
3967
- if (toolName === "bash") {
3968
- const command = toolCall.args?.command || "";
3969
- const shortCmd = command.split("\n")[0].substring(0, 50);
3970
- agentProgress.updateTool(
3971
- toolName,
3972
- shortCmd + (command.length > 50 ? "..." : "")
3973
- );
3974
- } else {
3975
- agentProgress.updateTool(toolName);
3976
- }
3977
- });
3978
- }
3979
- if (step.toolResults?.length) {
3980
- step.toolResults.forEach((toolResult) => {
3981
- agentProgress.updateToolResult(
3982
- toolResult.toolName,
3983
- !toolResult.isError
3984
- );
3985
- });
3986
- }
3987
- }
3988
- });
3989
- agentProgress.stop();
3990
- console.log("\n\u2728 Answer:\n");
3991
- for await (const chunk of result.textStream) {
3992
- process.stdout.write(chunk);
4527
+ let didCompact = false;
4528
+ let currentHistory = [...conversationHistory];
4529
+ try {
4530
+ const { assistantMessages } = await executeAgentQuery(
4531
+ query,
4532
+ currentHistory,
4533
+ agentProgress
4534
+ );
4535
+ conversationHistory.push(createUserMessage(query));
4536
+ conversationHistory.push(...assistantMessages);
4537
+ } catch (error) {
4538
+ if (isTokenLimitError(error) && conversationHistory.length > 0) {
4539
+ agentProgress.stop();
4540
+ console.log(
4541
+ "\n\u26A0\uFE0F Context was trimmed to fit model limits\n"
4542
+ );
4543
+ const compactedHistory = compactConversation(conversationHistory);
4544
+ currentHistory = compactedHistory;
4545
+ didCompact = true;
4546
+ const retryProgress = new AgentProgress(!config.stream);
4547
+ retryProgress.startThinking();
4548
+ const { assistantMessages } = await executeAgentQuery(
4549
+ query,
4550
+ currentHistory,
4551
+ retryProgress
4552
+ );
4553
+ conversationHistory.length = 0;
4554
+ conversationHistory.push(...compactedHistory);
4555
+ conversationHistory.push(createUserMessage(query));
4556
+ conversationHistory.push(...assistantMessages);
4557
+ } else {
4558
+ throw error;
3993
4559
  }
3994
- console.log();
3995
- await result.response;
3996
- } else {
3997
- const result = await agent.generate(query, {
3998
- onStepFinish: (step) => {
3999
- if (step.toolCalls?.length) {
4000
- step.toolCalls.forEach((toolCall) => {
4001
- const toolName = toolCall.toolName;
4002
- if (toolName === "bash") {
4003
- const command = toolCall.args?.command || "";
4004
- const shortCmd = command.split("\n")[0].substring(0, 50);
4005
- agentProgress.updateTool(
4006
- toolName,
4007
- shortCmd + (command.length > 50 ? "..." : "")
4008
- );
4009
- } else {
4010
- agentProgress.updateTool(toolName);
4011
- }
4012
- });
4013
- }
4014
- if (step.toolResults?.length) {
4015
- step.toolResults.forEach((toolResult) => {
4016
- agentProgress.updateToolResult(
4017
- toolResult.toolName,
4018
- !toolResult.isError
4019
- );
4020
- });
4021
- }
4022
- }
4023
- });
4024
- agentProgress.succeed();
4025
- console.log("\n\u2728 Answer:\n");
4026
- console.log(result.text);
4027
4560
  }
4028
4561
  console.log("\n" + "\u2500".repeat(50) + "\n");
4562
+ if (didCompact) {
4563
+ console.log(
4564
+ `(conversation compacted to ${conversationHistory.length} messages)
4565
+ `
4566
+ );
4567
+ }
4029
4568
  } catch (error) {
4030
4569
  console.error("\n\u274C Query Error:");
4031
4570
  if (error instanceof PhoenixClientError) {