npm - @cephalization/phoenix-insight - Versions diffs - 1.1.0 → 1.2.0 - Mend

@cephalization/phoenix-insight 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/cli.js CHANGED Viewed

@@ -67,7 +67,7 @@ var PhoenixClientError = class extends Error {
 function createPhoenixClient(config = {}) {
   const headers = {};
   if (config.apiKey) {
-    headers["api_key"] = config.apiKey;
+    headers["Authorization"] = `Bearer ${config.apiKey}`;
   }
   const clientOptions = {
     options: {
@@ -790,6 +790,308 @@ Use this tool when you want to present structured analysis results, metrics, tab
   });
 }
+// src/agent/conversation.ts
+import {
+  pruneMessages
+} from "ai";
+function createUserMessage(content) {
+  return { role: "user", content };
+}
+function createAssistantMessage(content) {
+  return { role: "assistant", content };
+}
+function createAssistantMessageWithParts(parts) {
+  return { role: "assistant", content: parts };
+}
+function createToolMessage(results) {
+  return { role: "tool", content: results };
+}
+function convertUserMessage(message) {
+  return {
+    role: "user",
+    content: message.content
+  };
+}
+function convertAssistantMessage(message) {
+  if (typeof message.content === "string") {
+    return {
+      role: "assistant",
+      content: message.content
+    };
+  }
+  const sdkContent = message.content.map((part) => {
+    if (part.type === "text") {
+      return {
+        type: "text",
+        text: part.text
+      };
+    } else {
+      return {
+        type: "tool-call",
+        toolCallId: part.toolCallId,
+        toolName: part.toolName,
+        input: part.args
+      };
+    }
+  });
+  return {
+    role: "assistant",
+    content: sdkContent
+  };
+}
+function convertToolMessage(message) {
+  const sdkContent = message.content.map((part) => ({
+    type: "tool-result",
+    toolCallId: part.toolCallId,
+    toolName: part.toolName,
+    output: part.isError ? { type: "error-json", value: part.result } : { type: "json", value: part.result }
+  }));
+  return {
+    role: "tool",
+    content: sdkContent
+  };
+}
+function toModelMessage(message) {
+  switch (message.role) {
+    case "user":
+      return convertUserMessage(message);
+    case "assistant":
+      return convertAssistantMessage(message);
+    case "tool":
+      return convertToolMessage(message);
+  }
+}
+function toModelMessages(history) {
+  return history.map(toModelMessage);
+}
+var TRUNCATED_REPORT_PLACEHOLDER = "[Report content truncated to save tokens]";
+function truncateReportToolCalls(messages) {
+  return messages.map((message) => {
+    if (message.role !== "assistant") {
+      return message;
+    }
+    if (typeof message.content === "string") {
+      return message;
+    }
+    const newContent = message.content.map((part) => {
+      if (part.type !== "tool-call") {
+        return part;
+      }
+      if (part.toolName !== "generate_report") {
+        return part;
+      }
+      const input = part.input;
+      const truncatedInput = {
+        content: TRUNCATED_REPORT_PLACEHOLDER
+      };
+      if (input?.title) {
+        truncatedInput.title = input.title;
+      }
+      return {
+        ...part,
+        input: truncatedInput
+      };
+    });
+    return {
+      ...message,
+      content: newContent
+    };
+  });
+}
+async function extractMessagesFromResponse(result) {
+  const messages = [];
+  const stepsValue = result.steps;
+  const steps = await Promise.resolve(stepsValue);
+  if (!steps || steps.length === 0) {
+    return messages;
+  }
+  for (const step of steps) {
+    const hasText = step.text && step.text.length > 0;
+    const hasToolCalls = step.toolCalls && step.toolCalls.length > 0;
+    if (hasText || hasToolCalls) {
+      if (hasToolCalls) {
+        const parts = [];
+        if (hasText) {
+          parts.push({
+            type: "text",
+            text: step.text
+          });
+        }
+        for (const toolCall of step.toolCalls) {
+          parts.push({
+            type: "tool-call",
+            toolCallId: toolCall.toolCallId,
+            toolName: toolCall.toolName,
+            args: toolCall.input
+          });
+        }
+        messages.push(createAssistantMessageWithParts(parts));
+      } else {
+        messages.push(createAssistantMessage(step.text));
+      }
+    }
+    if (step.toolResults && step.toolResults.length > 0) {
+      const results = step.toolResults.map(
+        (toolResult) => ({
+          type: "tool-result",
+          toolCallId: toolResult.toolCallId,
+          toolName: toolResult.toolName,
+          result: toolResult.output
+        })
+      );
+      messages.push(createToolMessage(results));
+    }
+  }
+  return messages;
+}
+function convertUIMessage(message) {
+  switch (message.role) {
+    case "user":
+      return { role: "user", content: message.content };
+    case "assistant": {
+      if (typeof message.content === "string") {
+        return { role: "assistant", content: message.content };
+      }
+      const parts = message.content.map(
+        (part) => {
+          if (part.type === "text") {
+            return { type: "text", text: part.text };
+          } else {
+            return {
+              type: "tool-call",
+              toolCallId: part.toolCallId,
+              toolName: part.toolName,
+              args: part.args
+            };
+          }
+        }
+      );
+      return { role: "assistant", content: parts };
+    }
+    case "tool": {
+      const results = message.content.map(
+        (part) => ({
+          type: "tool-result",
+          toolCallId: part.toolCallId,
+          toolName: part.toolName,
+          result: part.result,
+          ...part.isError && { isError: part.isError }
+        })
+      );
+      return { role: "tool", content: results };
+    }
+  }
+}
+function fromUIMessages(uiMessages) {
+  if (!Array.isArray(uiMessages)) {
+    return [];
+  }
+  return uiMessages.filter((msg) => {
+    if (!msg || typeof msg !== "object") return false;
+    const m = msg;
+    return m.role === "user" || m.role === "assistant" || m.role === "tool";
+  }).map(convertUIMessage);
+}
+function fromModelMessage(message) {
+  switch (message.role) {
+    case "user": {
+      const content = typeof message.content === "string" ? message.content : message.content.filter((part) => part.type === "text").map((part) => part.text).join("");
+      return { role: "user", content };
+    }
+    case "assistant": {
+      if (typeof message.content === "string") {
+        return { role: "assistant", content: message.content };
+      }
+      const parts = [];
+      for (const part of message.content) {
+        if (part.type === "text") {
+          parts.push({ type: "text", text: part.text });
+        } else if (part.type === "tool-call") {
+          parts.push({
+            type: "tool-call",
+            toolCallId: part.toolCallId,
+            toolName: part.toolName,
+            args: part.input
+          });
+        }
+      }
+      if (parts.length === 0) {
+        return { role: "assistant", content: "" };
+      }
+      const textParts = parts.filter(
+        (p) => p.type === "text"
+      );
+      if (parts.length === textParts.length && textParts.length === 1 && textParts[0]) {
+        return { role: "assistant", content: textParts[0].text };
+      }
+      return { role: "assistant", content: parts };
+    }
+    case "tool": {
+      const results = [];
+      const content = message.content;
+      for (const part of content) {
+        if (part.type === "tool-result") {
+          const output = part.output;
+          let result;
+          let isError = false;
+          if (output && typeof output === "object" && "type" in output && "value" in output) {
+            const typedOutput = output;
+            result = typedOutput.value;
+            isError = typedOutput.type === "error-json" || typedOutput.type === "error-text";
+          } else {
+            result = output;
+          }
+          results.push({
+            type: "tool-result",
+            toolCallId: part.toolCallId,
+            toolName: part.toolName,
+            result,
+            ...isError && { isError }
+          });
+        }
+      }
+      return { role: "tool", content: results };
+    }
+    case "system": {
+      return { role: "user", content: `[System]: ${message.content}` };
+    }
+    default: {
+      return { role: "user", content: "[Unknown message type]" };
+    }
+  }
+}
+function fromModelMessages(messages) {
+  return messages.map(fromModelMessage);
+}
+function compactConversation(messages, options) {
+  const keepFirstN = options?.keepFirstN ?? 2;
+  const keepLastN = options?.keepLastN ?? 6;
+  if (messages.length <= keepFirstN + keepLastN) {
+    return messages;
+  }
+  const modelMessages = toModelMessages(messages);
+  const middleStartIndex = keepFirstN;
+  const middleEndIndex = modelMessages.length - keepLastN;
+  if (middleEndIndex <= middleStartIndex) {
+    return messages;
+  }
+  const firstMessages = modelMessages.slice(0, middleStartIndex);
+  const middleMessages = modelMessages.slice(middleStartIndex, middleEndIndex);
+  const lastMessages = modelMessages.slice(middleEndIndex);
+  const prunedMiddle = pruneMessages({
+    messages: middleMessages,
+    reasoning: "all",
+    toolCalls: "all",
+    emptyMessages: "remove"
+  });
+  const compactedModelMessages = [
+    ...firstMessages,
+    ...prunedMiddle,
+    ...lastMessages
+  ];
+  return fromModelMessages(compactedModelMessages);
+}
 // src/agent/index.ts
 var PhoenixInsightAgent = class {
   mode;
@@ -878,6 +1180,14 @@ var PhoenixInsightAgent = class {
   }
   /**
    * Generate a response for a user query
+   *
+   * @param userQuery - The current user query
+   * @param options - Optional configuration
+   * @param options.onStepFinish - Callback called after each agent step
+   * @param options.messages - Optional conversation history for multi-turn conversations.
+   *   When provided, the history is converted to AI SDK format and the userQuery is
+   *   appended as the final user message. Report tool calls in history are truncated
+   *   to save tokens.
    */
   async generate(userQuery, options) {
     let tools;
@@ -889,17 +1199,32 @@ var PhoenixInsightAgent = class {
       );
     }
     try {
-      const result = await generateText({
+      const baseConfig = {
         model: this.model,
         system: this.systemPrompt,
-        prompt: userQuery,
         tools,
         stopWhen: stepCountIs(this.maxSteps),
         onStepFinish: options?.onStepFinish,
         experimental_telemetry: {
           isEnabled: true
         }
-      });
+      };
+      let result;
+      if (options?.messages && options.messages.length > 0) {
+        const historyMessages = toModelMessages(options.messages);
+        const truncatedHistory = truncateReportToolCalls(historyMessages);
+        const currentUserMessage = toModelMessages([createUserMessage(userQuery)]);
+        const allMessages = [...truncatedHistory, ...currentUserMessage];
+        result = await generateText({
+          ...baseConfig,
+          messages: allMessages
+        });
+      } else {
+        result = await generateText({
+          ...baseConfig,
+          prompt: userQuery
+        });
+      }
       return result;
     } catch (error) {
       if (error instanceof Error) {
@@ -924,6 +1249,14 @@ var PhoenixInsightAgent = class {
   }
   /**
    * Stream a response for a user query
+   *
+   * @param userQuery - The current user query
+   * @param options - Optional configuration
+   * @param options.onStepFinish - Callback called after each agent step
+   * @param options.messages - Optional conversation history for multi-turn conversations.
+   *   When provided, the history is converted to AI SDK format and the userQuery is
+   *   appended as the final user message. Report tool calls in history are truncated
+   *   to save tokens.
    */
   async stream(userQuery, options) {
     let tools;
@@ -935,17 +1268,32 @@ var PhoenixInsightAgent = class {
       );
     }
     try {
-      const result = streamText({
+      const baseConfig = {
         model: this.model,
         system: this.systemPrompt,
-        prompt: userQuery,
         tools,
         stopWhen: stepCountIs(this.maxSteps),
         onStepFinish: options?.onStepFinish,
         experimental_telemetry: {
           isEnabled: true
         }
-      });
+      };
+      let result;
+      if (options?.messages && options.messages.length > 0) {
+        const historyMessages = toModelMessages(options.messages);
+        const truncatedHistory = truncateReportToolCalls(historyMessages);
+        const currentUserMessage = toModelMessages([createUserMessage(userQuery)]);
+        const allMessages = [...truncatedHistory, ...currentUserMessage];
+        result = streamText({
+          ...baseConfig,
+          messages: allMessages
+        });
+      } else {
+        result = streamText({
+          ...baseConfig,
+          prompt: userQuery
+        });
+      }
       return result;
     } catch (error) {
       if (error instanceof Error) {
@@ -979,11 +1327,11 @@ async function createInsightAgent(config) {
   return new PhoenixInsightAgent(config);
 }
 async function runQuery(agent, userQuery, options) {
-  const { stream = false, ...callbacks } = options || {};
+  const { stream = false, ...rest } = options || {};
   if (stream) {
-    return await agent.stream(userQuery, callbacks);
+    return await agent.stream(userQuery, rest);
   } else {
-    return await agent.generate(userQuery, callbacks);
+    return await agent.generate(userQuery, rest);
   }
 }
 async function runOneShotQuery(config, userQuery, options) {
@@ -996,6 +1344,66 @@ async function runOneShotQuery(config, userQuery, options) {
   }
 }
+// src/agent/token-errors.ts
+import { APICallError } from "ai";
+var TOKEN_LIMIT_ERROR_PATTERNS = [
+  // Anthropic-specific patterns
+  "prompt is too long",
+  "context window",
+  "context length",
+  "max_tokens",
+  "maximum context",
+  "token limit",
+  "tokens exceed",
+  "exceeds the maximum",
+  "too many tokens",
+  // Generic patterns that might apply to other providers
+  "context limit",
+  "input too long",
+  "request too large"
+];
+var TOKEN_LIMIT_STATUS_CODES = [400, 413, 422];
+function isAPICallError(error) {
+  return APICallError.isInstance(error);
+}
+function messageContainsTokenLimitPattern(message) {
+  const lowerMessage = message.toLowerCase();
+  return TOKEN_LIMIT_ERROR_PATTERNS.some(
+    (pattern) => lowerMessage.includes(pattern.toLowerCase())
+  );
+}
+function isTokenLimitError(error) {
+  if (!isAPICallError(error)) {
+    if (error instanceof Error) {
+      return messageContainsTokenLimitPattern(error.message);
+    }
+    return false;
+  }
+  const message = error.message || "";
+  const hasTokenLimitMessage = messageContainsTokenLimitPattern(message);
+  if (error.statusCode === void 0) {
+    return hasTokenLimitMessage;
+  }
+  const hasRelevantStatusCode = TOKEN_LIMIT_STATUS_CODES.includes(
+    error.statusCode
+  );
+  return hasRelevantStatusCode && hasTokenLimitMessage;
+}
+function getTokenLimitErrorDescription(error) {
+  if (!isTokenLimitError(error)) {
+    return null;
+  }
+  if (error instanceof Error) {
+    const message = error.message;
+    const tokenMatch = message.match(/(\d+)\s*tokens?/i);
+    if (tokenMatch) {
+      return `Request exceeded token limit (${tokenMatch[1]} tokens). Context will be compacted.`;
+    }
+    return "Request exceeded the model's context window. Context will be compacted.";
+  }
+  return "Request exceeded the model's context window. Context will be compacted.";
+}
 // src/snapshot/projects.ts
 function toJSONL(items) {
   return items.map((item) => JSON.stringify(item)).join("\n");
@@ -3105,15 +3513,26 @@ var AgentSession = class {
     });
   }
   /**
-   * Add a message to the conversation history
+   * Send a context compacted notification to the client
    */
-  addToHistory(role, content) {
-    this.conversationHistory.push({
-      role,
-      content,
-      timestamp: Date.now()
+  sendContextCompacted(reason) {
+    this.send({
+      type: "context_compacted",
+      payload: { sessionId: this.sessionId, reason }
     });
   }
+  /**
+   * Add a user message to the conversation history
+   */
+  addUserMessage(content) {
+    this.conversationHistory.push(createUserMessage(content));
+  }
+  /**
+   * Add assistant messages (including tool calls and results) to the conversation history
+   */
+  addAssistantMessages(messages) {
+    this.conversationHistory.push(...messages);
+  }
   /**
    * Get a callback function for the report tool
    * This can be passed to the report tool to send reports to the client
@@ -3124,20 +3543,91 @@ var AgentSession = class {
     };
   }
   /**
-   * Execute a query and stream the response to the client
+   * Execute a query and stream the response to the client.
+   *
+   * The conversation history is passed to the agent for multi-turn context.
+   * After the response completes, both the user message and the assistant's
+   * response (including any tool calls and results) are appended to the history.
+   *
+   * If a token limit error occurs, the conversation is automatically compacted
+   * and the query is retried once.
+   *
+   * @param query - The query to execute
+   * @param options - Optional settings including client-provided history
    */
-  async executeQuery(query) {
+  async executeQuery(query, options) {
     if (this.isExecuting) {
       this.sendError("A query is already being executed");
       return;
     }
     this.isExecuting = true;
     this.abortController = new AbortController();
-    this.addToHistory("user", query);
+    let historyToUse;
+    let usingClientHistory = false;
+    if (options?.history && Array.isArray(options.history) && options.history.length > 0) {
+      historyToUse = fromUIMessages(options.history);
+      usingClientHistory = true;
+    } else {
+      historyToUse = [...this.conversationHistory];
+    }
+    try {
+      const firstAttemptError = await this.executeQueryWithHistory(
+        query,
+        historyToUse,
+        usingClientHistory
+      );
+      if (firstAttemptError && isTokenLimitError(firstAttemptError)) {
+        const errorDescription = getTokenLimitErrorDescription(firstAttemptError);
+        const originalLength = historyToUse.length;
+        historyToUse = compactConversation(historyToUse);
+        const compactedLength = historyToUse.length;
+        if (!usingClientHistory) {
+          this.conversationHistory = historyToUse;
+        }
+        const reason = errorDescription ?? `Conversation compacted from ${originalLength} to ${compactedLength} messages to fit model limits.`;
+        this.sendContextCompacted(reason);
+        const retryError = await this.executeQueryWithHistory(
+          query,
+          historyToUse,
+          usingClientHistory
+        );
+        if (retryError) {
+          if (!this.abortController?.signal.aborted) {
+            const message = retryError instanceof Error ? retryError.message : String(retryError);
+            this.sendError(`Query failed after compaction: ${message}`);
+          }
+        } else {
+          this.sendDone();
+        }
+      } else if (firstAttemptError) {
+        if (!this.abortController?.signal.aborted) {
+          const message = firstAttemptError instanceof Error ? firstAttemptError.message : String(firstAttemptError);
+          this.sendError(`Query failed: ${message}`);
+        }
+      } else {
+        this.sendDone();
+      }
+    } finally {
+      this.isExecuting = false;
+      this.abortController = null;
+    }
+  }
+  /**
+   * Execute a query with the provided conversation history.
+   * Returns the error if execution fails, or null if successful.
+   * On success, updates the server-side conversation history with the query and response
+   * (unless usingClientHistory is true, in which case the client manages its own history).
+   *
+   * @param query - The query to execute
+   * @param history - The conversation history to use for this query
+   * @param usingClientHistory - If true, the client provided the history and manages its own state
+   */
+  async executeQueryWithHistory(query, history, usingClientHistory) {
     try {
       const agent = await this.getAgent();
-      const result = await agent.stream(query, {});
-      let fullResponse = "";
+      const result = await agent.stream(query, {
+        messages: history
+      });
       let lastStepHadText = false;
       for await (const part of result.fullStream) {
         if (this.abortController?.signal.aborted) {
@@ -3147,11 +3637,9 @@ var AgentSession = class {
           case "text-delta":
             if (lastStepHadText && part.text.trim().length > 0) {
               const separator = "\n\n";
-              fullResponse += separator;
               this.sendText(separator);
               lastStepHadText = false;
             }
-            fullResponse += part.text;
             this.sendText(part.text);
             break;
           case "tool-call":
@@ -3161,27 +3649,23 @@ var AgentSession = class {
             this.sendToolResult(part.toolName, part.output);
             break;
           case "text-end":
-            if (fullResponse.trim().length > 0) {
-              lastStepHadText = true;
-            }
+            lastStepHadText = true;
             break;
         }
       }
       if (!this.abortController?.signal.aborted) {
         await result.response;
+        if (!usingClientHistory) {
+          this.addUserMessage(query);
+          const assistantMessages = await extractMessagesFromResponse(result);
+          if (assistantMessages.length > 0) {
+            this.addAssistantMessages(assistantMessages);
+          }
+        }
       }
-      if (fullResponse) {
-        this.addToHistory("assistant", fullResponse);
-      }
-      this.sendDone();
+      return null;
     } catch (error) {
-      if (!this.abortController?.signal.aborted) {
-        const message = error instanceof Error ? error.message : String(error);
-        this.sendError(`Query failed: ${message}`);
-      }
-    } finally {
-      this.isExecuting = false;
-      this.abortController = null;
+      return error instanceof Error ? error : new Error(String(error));
     }
   }
   /**
@@ -3779,14 +4263,14 @@ async function runUIServer(options) {
       },
       onMessage: async (message, ws) => {
         if (message.type === "query") {
-          const { content, sessionId: clientSessionId } = message.payload;
+          const { content, sessionId: clientSessionId, history } = message.payload;
           const sessionId = clientSessionId ?? `session-${Date.now()}`;
           const session = sessionManager.getOrCreateSession(
             ws,
             sessionId,
             (msg) => wsServer.sendToClient(ws, msg)
           );
-          session.executeQuery(content).catch((error) => {
+          session.executeQuery(content, { history }).catch((error) => {
             console.error("Error executing query:", error);
             wsServer.sendToClient(ws, {
               type: "error",
@@ -3901,6 +4385,7 @@ async function runInteractiveMode() {
       maxSteps: 25
     };
     agent = await createInsightAgent(agentConfig);
+    const conversationHistory = [];
     const rl = readline.createInterface({
       input: process.stdin,
       output: process.stdout,
@@ -3919,6 +4404,81 @@ async function runInteractiveMode() {
       userExited = true;
       rl.prompt();
     });
+    const executeAgentQuery = async (query, messages, agentProgress) => {
+      if (config.stream) {
+        const result = await agent.stream(query, {
+          messages: [...messages],
+          onStepFinish: (step) => {
+            if (step.toolCalls?.length) {
+              step.toolCalls.forEach((toolCall) => {
+                const toolName = toolCall.toolName;
+                if (toolName === "bash") {
+                  const command = toolCall.args?.command || "";
+                  const shortCmd = command.split("\n")[0].substring(0, 50);
+                  agentProgress.updateTool(
+                    toolName,
+                    shortCmd + (command.length > 50 ? "..." : "")
+                  );
+                } else {
+                  agentProgress.updateTool(toolName);
+                }
+              });
+            }
+            if (step.toolResults?.length) {
+              step.toolResults.forEach((toolResult) => {
+                agentProgress.updateToolResult(
+                  toolResult.toolName,
+                  !toolResult.isError
+                );
+              });
+            }
+          }
+        });
+        agentProgress.stop();
+        console.log("\n\u2728 Answer:\n");
+        for await (const chunk of result.textStream) {
+          process.stdout.write(chunk);
+        }
+        console.log();
+        await result.response;
+        const assistantMessages = await extractMessagesFromResponse(result);
+        return { assistantMessages };
+      } else {
+        const result = await agent.generate(query, {
+          messages: [...messages],
+          onStepFinish: (step) => {
+            if (step.toolCalls?.length) {
+              step.toolCalls.forEach((toolCall) => {
+                const toolName = toolCall.toolName;
+                if (toolName === "bash") {
+                  const command = toolCall.args?.command || "";
+                  const shortCmd = command.split("\n")[0].substring(0, 50);
+                  agentProgress.updateTool(
+                    toolName,
+                    shortCmd + (command.length > 50 ? "..." : "")
+                  );
+                } else {
+                  agentProgress.updateTool(toolName);
+                }
+              });
+            }
+            if (step.toolResults?.length) {
+              step.toolResults.forEach((toolResult) => {
+                agentProgress.updateToolResult(
+                  toolResult.toolName,
+                  !toolResult.isError
+                );
+              });
+            }
+          }
+        });
+        agentProgress.succeed();
+        console.log("\n\u2728 Answer:\n");
+        console.log(result.text);
+        const assistantMessages = await extractMessagesFromResponse(result);
+        return { assistantMessages };
+      }
+    };
     const processQuery = async (query) => {
       if (query === "exit" || query === "quit") {
         return true;
@@ -3956,76 +4516,55 @@ async function runInteractiveMode() {
         return false;
       }
       try {
+        if (conversationHistory.length > 0) {
+          console.log(
+            `(continuing conversation with ${conversationHistory.length} previous messages)
+`
+          );
+        }
         const agentProgress = new AgentProgress(!config.stream);
         agentProgress.startThinking();
-        if (config.stream) {
-          const result = await agent.stream(query, {
-            onStepFinish: (step) => {
-              if (step.toolCalls?.length) {
-                step.toolCalls.forEach((toolCall) => {
-                  const toolName = toolCall.toolName;
-                  if (toolName === "bash") {
-                    const command = toolCall.args?.command || "";
-                    const shortCmd = command.split("\n")[0].substring(0, 50);
-                    agentProgress.updateTool(
-                      toolName,
-                      shortCmd + (command.length > 50 ? "..." : "")
-                    );
-                  } else {
-                    agentProgress.updateTool(toolName);
-                  }
-                });
-              }
-              if (step.toolResults?.length) {
-                step.toolResults.forEach((toolResult) => {
-                  agentProgress.updateToolResult(
-                    toolResult.toolName,
-                    !toolResult.isError
-                  );
-                });
-              }
-            }
-          });
-          agentProgress.stop();
-          console.log("\n\u2728 Answer:\n");
-          for await (const chunk of result.textStream) {
-            process.stdout.write(chunk);
+        let didCompact = false;
+        let currentHistory = [...conversationHistory];
+        try {
+          const { assistantMessages } = await executeAgentQuery(
+            query,
+            currentHistory,
+            agentProgress
+          );
+          conversationHistory.push(createUserMessage(query));
+          conversationHistory.push(...assistantMessages);
+        } catch (error) {
+          if (isTokenLimitError(error) && conversationHistory.length > 0) {
+            agentProgress.stop();
+            console.log(
+              "\n\u26A0\uFE0F  Context was trimmed to fit model limits\n"
+            );
+            const compactedHistory = compactConversation(conversationHistory);
+            currentHistory = compactedHistory;
+            didCompact = true;
+            const retryProgress = new AgentProgress(!config.stream);
+            retryProgress.startThinking();
+            const { assistantMessages } = await executeAgentQuery(
+              query,
+              currentHistory,
+              retryProgress
+            );
+            conversationHistory.length = 0;
+            conversationHistory.push(...compactedHistory);
+            conversationHistory.push(createUserMessage(query));
+            conversationHistory.push(...assistantMessages);
+          } else {
+            throw error;
           }
-          console.log();
-          await result.response;
-        } else {
-          const result = await agent.generate(query, {
-            onStepFinish: (step) => {
-              if (step.toolCalls?.length) {
-                step.toolCalls.forEach((toolCall) => {
-                  const toolName = toolCall.toolName;
-                  if (toolName === "bash") {
-                    const command = toolCall.args?.command || "";
-                    const shortCmd = command.split("\n")[0].substring(0, 50);
-                    agentProgress.updateTool(
-                      toolName,
-                      shortCmd + (command.length > 50 ? "..." : "")
-                    );
-                  } else {
-                    agentProgress.updateTool(toolName);
-                  }
-                });
-              }
-              if (step.toolResults?.length) {
-                step.toolResults.forEach((toolResult) => {
-                  agentProgress.updateToolResult(
-                    toolResult.toolName,
-                    !toolResult.isError
-                  );
-                });
-              }
-            }
-          });
-          agentProgress.succeed();
-          console.log("\n\u2728 Answer:\n");
-          console.log(result.text);
         }
         console.log("\n" + "\u2500".repeat(50) + "\n");
+        if (didCompact) {
+          console.log(
+            `(conversation compacted to ${conversationHistory.length} messages)
+`
+          );
+        }
       } catch (error) {
         console.error("\n\u274C Query Error:");
         if (error instanceof PhoenixClientError) {