npm - @probelabs/probe - Versions diffs - 0.6.0-rc271 → 0.6.0-rc273 - Mend

@probelabs/probe 0.6.0-rc271 → 0.6.0-rc273

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/bin/binaries/{probe-v0.6.0-rc271-aarch64-apple-darwin.tar.gz → probe-v0.6.0-rc273-aarch64-apple-darwin.tar.gz} +0 -0
package/bin/binaries/probe-v0.6.0-rc273-aarch64-unknown-linux-musl.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc273-x86_64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc273-x86_64-pc-windows-msvc.zip +0 -0
package/bin/binaries/probe-v0.6.0-rc273-x86_64-unknown-linux-musl.tar.gz +0 -0
package/build/agent/contextCompactor.js +42 -25
package/build/agent/index.js +78 -177
package/build/agent/shared/prompts.js +4 -3
package/build/agent/tasks/taskTool.js +46 -235
package/build/tools/analyzeAll.js +3 -4
package/build/tools/edit.js +3 -3
package/build/tools/vercel.js +1 -1
package/cjs/agent/ProbeAgent.cjs +95 -202
package/cjs/index.cjs +95 -202
package/package.json +1 -1
package/src/agent/contextCompactor.js +42 -25
package/src/agent/shared/prompts.js +4 -3
package/src/agent/tasks/taskTool.js +46 -235
package/src/tools/analyzeAll.js +3 -4
package/src/tools/edit.js +3 -3
package/src/tools/vercel.js +1 -1
package/bin/binaries/probe-v0.6.0-rc271-aarch64-unknown-linux-musl.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc271-x86_64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc271-x86_64-pc-windows-msvc.zip +0 -0
package/bin/binaries/probe-v0.6.0-rc271-x86_64-unknown-linux-musl.tar.gz +0 -0

package/cjs/index.cjs CHANGED Viewed

@@ -13365,22 +13365,16 @@ var init_JsonShapeDeserializer = __esm({
           if (Array.isArray(value) && ns.isListSchema()) {
             const listMember = ns.getValueSchema();
             const out = [];
-            const sparse = !!ns.getMergedTraits().sparse;
             for (const item of value) {
-              if (sparse || item != null) {
-                out.push(this._read(listMember, item));
-              }
+              out.push(this._read(listMember, item));
             }
             return out;
           }
           if (ns.isMapSchema()) {
             const mapMember = ns.getValueSchema();
             const out = {};
-            const sparse = !!ns.getMergedTraits().sparse;
             for (const [_k, _v] of Object.entries(value)) {
-              if (sparse || _v != null) {
-                out[_k] = this._read(mapMember, _v);
-              }
+              out[_k] = this._read(mapMember, _v);
             }
             return out;
           }
@@ -15029,18 +15023,18 @@ var init_XmlShapeDeserializer = __esm({
           return value;
         }
         if (typeof value === "object") {
-          const sparse = !!traits.sparse;
           const flat = !!traits.xmlFlattened;
           if (ns.isListSchema()) {
             const listValue = ns.getValueSchema();
             const buffer2 = [];
             const sourceKey = listValue.getMergedTraits().xmlName ?? "member";
             const source = flat ? value : (value[0] ?? value)[sourceKey];
+            if (source == null) {
+              return buffer2;
+            }
             const sourceArray = Array.isArray(source) ? source : [source];
             for (const v5 of sourceArray) {
-              if (v5 != null || sparse) {
-                buffer2.push(this.readSchema(listValue, v5));
-              }
+              buffer2.push(this.readSchema(listValue, v5));
             }
             return buffer2;
           }
@@ -15059,9 +15053,7 @@ var init_XmlShapeDeserializer = __esm({
             for (const entry of entries) {
               const key = entry[keyProperty];
               const value2 = entry[valueProperty];
-              if (value2 != null || sparse) {
-                buffer[key] = this.readSchema(memberNs, value2);
-              }
+              buffer[key] = this.readSchema(memberNs, value2);
             }
             return buffer;
           }
@@ -19403,7 +19395,7 @@ var require_package2 = __commonJS({
     module2.exports = {
       name: "@aws-sdk/client-bedrock-runtime",
       description: "AWS SDK for JavaScript Bedrock Runtime Client for Node.js, Browser and React Native",
-      version: "3.1001.0",
+      version: "3.1002.0",
       scripts: {
         build: "concurrently 'yarn:build:types' 'yarn:build:es' && yarn build:cjs",
         "build:cjs": "node ../../scripts/compilation/inline client-bedrock-runtime",
@@ -19427,21 +19419,21 @@ var require_package2 = __commonJS({
       dependencies: {
         "@aws-crypto/sha256-browser": "5.2.0",
         "@aws-crypto/sha256-js": "5.2.0",
-        "@aws-sdk/core": "^3.973.16",
-        "@aws-sdk/credential-provider-node": "^3.972.15",
+        "@aws-sdk/core": "^3.973.17",
+        "@aws-sdk/credential-provider-node": "^3.972.16",
         "@aws-sdk/eventstream-handler-node": "^3.972.9",
         "@aws-sdk/middleware-eventstream": "^3.972.6",
         "@aws-sdk/middleware-host-header": "^3.972.6",
         "@aws-sdk/middleware-logger": "^3.972.6",
         "@aws-sdk/middleware-recursion-detection": "^3.972.6",
-        "@aws-sdk/middleware-user-agent": "^3.972.16",
+        "@aws-sdk/middleware-user-agent": "^3.972.17",
         "@aws-sdk/middleware-websocket": "^3.972.11",
         "@aws-sdk/region-config-resolver": "^3.972.6",
-        "@aws-sdk/token-providers": "3.1001.0",
+        "@aws-sdk/token-providers": "3.1002.0",
         "@aws-sdk/types": "^3.973.4",
         "@aws-sdk/util-endpoints": "^3.996.3",
         "@aws-sdk/util-user-agent-browser": "^3.972.6",
-        "@aws-sdk/util-user-agent-node": "^3.973.1",
+        "@aws-sdk/util-user-agent-node": "^3.973.2",
         "@smithy/config-resolver": "^4.4.9",
         "@smithy/core": "^3.23.7",
         "@smithy/eventstream-serde-browser": "^4.2.10",
@@ -20190,7 +20182,7 @@ var init_package = __esm({
   "node_modules/@aws-sdk/nested-clients/package.json"() {
     package_default = {
       name: "@aws-sdk/nested-clients",
-      version: "3.996.4",
+      version: "3.996.5",
       description: "Nested clients for AWS SDK packages.",
       main: "./dist-cjs/index.js",
       module: "./dist-es/index.js",
@@ -20219,16 +20211,16 @@ var init_package = __esm({
       dependencies: {
         "@aws-crypto/sha256-browser": "5.2.0",
         "@aws-crypto/sha256-js": "5.2.0",
-        "@aws-sdk/core": "^3.973.16",
+        "@aws-sdk/core": "^3.973.17",
         "@aws-sdk/middleware-host-header": "^3.972.6",
         "@aws-sdk/middleware-logger": "^3.972.6",
         "@aws-sdk/middleware-recursion-detection": "^3.972.6",
-        "@aws-sdk/middleware-user-agent": "^3.972.16",
+        "@aws-sdk/middleware-user-agent": "^3.972.17",
         "@aws-sdk/region-config-resolver": "^3.972.6",
         "@aws-sdk/types": "^3.973.4",
         "@aws-sdk/util-endpoints": "^3.996.3",
         "@aws-sdk/util-user-agent-browser": "^3.972.6",
-        "@aws-sdk/util-user-agent-node": "^3.973.1",
+        "@aws-sdk/util-user-agent-node": "^3.973.2",
         "@smithy/config-resolver": "^4.4.9",
         "@smithy/core": "^3.23.7",
         "@smithy/fetch-http-handler": "^5.3.12",
@@ -35877,20 +35869,15 @@ var init_zod = __esm({
 // src/agent/tasks/taskTool.js
 function createTaskCompletionBlockedMessage(taskSummary) {
-  return `<task_completion_blocked>
-You cannot complete yet. The following tasks are still unresolved:
+  return `You cannot complete yet. The following tasks are still unresolved:
 ${taskSummary}
-Required action:
-1. For each "pending" or "in_progress" task, either:
-   - Complete the work and mark it: <task><action>complete</action><id>task-X</id></task>
-   - Or cancel if no longer needed: <task><action>update</action><id>task-X</id><status>cancelled</status></task>
+For each pending/in_progress task, either:
+- Complete it: call task tool with action="complete", id="task-X"
+- Cancel it: call task tool with action="update", id="task-X", status="cancelled"
-2. After ALL tasks are resolved (completed or cancelled), call attempt_completion again.
-Use <task><action>list</action></task> to review current status.
-</task_completion_blocked>`;
+After all tasks are resolved, call attempt_completion again.`;
 }
 function createTaskTool(options = {}) {
   const { taskManager, tracer, debug = false } = options;
@@ -36113,145 +36100,46 @@ var init_taskTool = __esm({
       dependencies: external_exports.array(external_exports.string()).optional(),
       after: external_exports.string().optional()
     });
-    taskSystemPrompt = `[Task Management System]
-You have access to a task tracking tool to organize your work on complex requests.
-## When to Create Tasks
-CREATE TASKS when the request has **multiple distinct deliverables or goals**:
-- "Fix bug A AND add feature B" \u2192 Two separate tasks
-- "Investigate auth, payments, AND notifications" \u2192 Three independent areas
-- "Implement X, then add tests, then update docs" \u2192 Sequential phases with different outputs
-- User explicitly asks for a plan or task breakdown
-SKIP TASKS for single-goal requests, even if they require multiple searches:
-- "How does ranking work?" \u2192 Just investigate and answer (one goal)
-- "What does function X do?" \u2192 Just look it up (one goal)
-- "Explain the authentication flow" \u2192 Just trace and explain (one goal)
-- "Find where errors are logged" \u2192 Just search and report (one goal)
-**Key insight**: Multiple *internal steps* (search, read, analyze) are NOT the same as multiple *goals*.
-A single investigation with many steps is still ONE task, not many.
-## Task Granularity
-Tasks represent LOGICAL UNITS OF WORK, not individual files or steps:
-- "Fix 8 similar test files" \u2192 ONE task (same type of fix across files)
-- "Update API + tests + docs" \u2192 THREE tasks (different types of work)
-- "Implement feature in 5 files" \u2192 ONE task (single feature)
-**Rule of thumb**: If you're creating more than 3-4 tasks, you're probably too granular.
-**Anti-patterns to avoid**:
-- One task per file \u274C
-- One task per function \u274C
-- One task per repository (when same type of work) \u274C
-**Good patterns**:
-- One task per distinct deliverable \u2713
-- One task per phase (implement, test, document) \u2713
-- One task per different type of work \u2713
-MODIFY TASKS when (during execution):
-- You discover the problem is more complex than expected \u2192 Add new tasks
-- A single task covers too much scope \u2192 Split into smaller tasks
-- You find related work that needs attention \u2192 Add dependent tasks
-- A task becomes irrelevant based on findings \u2192 Cancel it
-- Task priorities change based on discoveries \u2192 Update priority
-- You learn new context \u2192 Update task description
-## Task Workflow
-**STEP 1 - Plan (at start):**
-Analyze the request and create tasks for each logical step:
-<task>
-<action>create</action>
-<tasks>[
-  {"title": "Search for authentication module", "priority": "high"},
-  {"title": "Analyze login flow implementation", "dependencies": ["task-1"]},
-  {"title": "Find session management code", "dependencies": ["task-1"]},
-  {"title": "Summarize authentication architecture", "dependencies": ["task-2", "task-3"]}
-]</tasks>
-</task>
-**STEP 2 - Execute (during work):**
-Update task status as you work:
-<task>
-<action>update</action>
-<id>task-1</id>
-<status>in_progress</status>
-</task>
-... do the work (search, extract, etc.) ...
-<task>
-<action>complete</action>
-<id>task-1</id>
-</task>
-**STEP 2b - Adapt (when you discover new work):**
-As you work, you may discover that:
-- A task is more complex than expected \u2192 Split it into subtasks
-- New areas need investigation \u2192 Add new tasks
-- Some tasks are no longer needed \u2192 Cancel them
-- Task order should change \u2192 Update dependencies
-*Adding a new task when you discover more work:*
-<task>
-<action>create</action>
-<title>Investigate caching layer</title>
-<description>Found references to Redis caching in auth module</description>
-</task>
-*Inserting a task after a specific task (to maintain logical order):*
-<task>
-<action>create</action>
-<title>Check rate limiting</title>
-<after>task-2</after>
-</task>
-*Cancelling and splitting a complex task:*
-<task>
-<action>update</action>
-<id>task-3</id>
-<status>cancelled</status>
-</task>
-<task>
-<action>create</action>
-<tasks>[
-  {"title": "Review JWT token generation", "priority": "high"},
-  {"title": "Review token refresh logic"}
-]</tasks>
-</task>
-**STEP 3 - Finish (before completion):**
-Before calling attempt_completion, ensure ALL tasks are either:
-- \`completed\` - you finished the work
-- \`cancelled\` - no longer needed
-If you created tasks, you MUST resolve them all before completing.
-## Key Rules
-1. **Dependencies are enforced**: A task cannot start until its dependencies are completed
-2. **Circular dependencies are rejected**: task-1 \u2192 task-2 \u2192 task-1 is invalid
-3. **Completion is blocked**: attempt_completion will fail if tasks remain unresolved
-4. **List to review**: Use <task><action>list</action></task> to see current task status
-5. **Tasks are living documents**: Add, split, or cancel tasks as you learn more about the problem
+    taskSystemPrompt = `[Task Management]
+Use the task tool to track progress on complex requests with multiple distinct goals.
+## When to Use Tasks
+CREATE tasks when the request has **multiple separate deliverables**:
+- "Fix bug A AND add feature B" \u2192 two tasks
+- "Investigate auth, payments, AND notifications" \u2192 three tasks
+- "Implement X, then add tests, then update docs" \u2192 three sequential tasks
+SKIP tasks for single-goal requests, even complex ones:
+- "How does ranking work?" \u2014 just investigate and answer
+- "Explain the authentication flow" \u2014 just trace and explain
+Multiple internal steps (search, read, analyze) for one goal \u2260 multiple tasks.
+## Granularity
+Tasks = logical units of work, not files or steps.
+- "Fix 8 similar test files" \u2192 ONE task (same fix repeated)
+- "Update API + tests + docs" \u2192 THREE tasks (different work types)
+- Max 3\u20134 tasks. More means you're too granular.
+## Workflow
+1. **Plan**: Call task tool with action="create" and a tasks array up front
+2. **Execute**: Update status to "in_progress" / "completed" as you work. Add, split, or cancel tasks as you learn more.
+3. **Finish**: All tasks must be "completed" or "cancelled" before calling attempt_completion.
+## Rules
+- Dependencies are enforced: a task cannot start until its dependencies are completed
+- Circular dependencies are rejected
+- attempt_completion is blocked while tasks remain unresolved
 `;
-    taskGuidancePrompt = `<task_guidance>
-Does this request have MULTIPLE DISTINCT GOALS?
+    taskGuidancePrompt = `Does this request have MULTIPLE DISTINCT GOALS?
 - "Do A AND B AND C" (multiple goals) \u2192 Create tasks for each goal
 - "Investigate/explain/find X" (single goal) \u2192 Skip tasks, just answer directly
-Multiple internal steps (search, read, analyze) for ONE goal = NO tasks needed.
-Only create tasks when there are separate deliverables the user is asking for.
-If creating tasks, use the task tool with action="create" first.
-</task_guidance>`;
+Multiple internal steps for ONE goal = NO tasks needed.
+If creating tasks, use the task tool with action="create" first.`;
   }
 });
@@ -82750,9 +82638,10 @@ If the solution is clear, you can jump to implementation right away. If not, ask
 - After every significant change, verify the project still builds and passes linting. Do not wait until the end to discover breakage.
 # After Implementation
-- Always run the project's tests before considering the task complete. If tests fail, fix them.
-- Run lint and typecheck commands if known for the project.
-- If a build, lint, or test fails, fix the issue before finishing.
+- Verify the project builds successfully. If it doesn't, fix the build before moving on.
+- Run lint and typecheck commands if known for the project. Fix any new warnings or errors you introduced.
+- Add tests for any new or changed functionality. Tests must cover the main path and important edge cases.
+- Run the project's full test suite. If any tests fail (including pre-existing ones you may have broken), fix them before finishing.
 - When the task is done, respond to the user with a concise summary of what was implemented, what files were changed, and any relevant details. Include links (e.g. pull request URL) so the user has everything they need.
 # GitHub Integration
@@ -91965,6 +91854,19 @@ function isContextLimitError(error2) {
   }
   return false;
 }
+function messageContainsCompletion(msg) {
+  if (Array.isArray(msg.toolInvocations)) {
+    if (msg.toolInvocations.some((t5) => t5.toolName === "attempt_completion")) return true;
+  }
+  if (Array.isArray(msg.tool_calls)) {
+    if (msg.tool_calls.some((t5) => t5.function?.name === "attempt_completion")) return true;
+  }
+  if (Array.isArray(msg.content)) {
+    if (msg.content.some((p5) => p5.type === "tool-call" && p5.toolName === "attempt_completion")) return true;
+  }
+  const text = typeof msg.content === "string" ? msg.content : "";
+  return text.includes("attempt_completion");
+}
 function identifyMessageSegments(messages) {
   const segments = [];
   let currentSegment = null;
@@ -91973,27 +91875,23 @@ function identifyMessageSegments(messages) {
     if (msg.role === "system") {
       continue;
     }
+    if (msg.role === "tool" && currentSegment) {
+      currentSegment.monologueIndices.push(i5);
+      continue;
+    }
     if (msg.role === "user") {
-      const content = typeof msg.content === "string" ? msg.content : "";
-      const isToolResult = content.includes("<tool_result>");
-      if (isToolResult && currentSegment) {
-        currentSegment.finalIndex = i5;
+      if (currentSegment) {
         segments.push(currentSegment);
-        currentSegment = null;
-      } else {
-        if (currentSegment) {
-          segments.push(currentSegment);
-        }
-        currentSegment = {
-          userIndex: i5,
-          monologueIndices: [],
-          finalIndex: null
-        };
       }
+      currentSegment = {
+        userIndex: i5,
+        monologueIndices: [],
+        finalIndex: null
+      };
     }
     if (msg.role === "assistant" && currentSegment) {
-      const content = typeof msg.content === "string" ? msg.content : "";
-      if (content.includes("<attempt_completion>") || content.includes("attempt_completion")) {
+      const hasCompletion = messageContainsCompletion(msg);
+      if (hasCompletion) {
         currentSegment.monologueIndices.push(i5);
         currentSegment.finalIndex = i5;
         segments.push(currentSegment);
@@ -110100,8 +109998,7 @@ Instructions:
 - Format as a structured list if multiple items found
 - If nothing relevant is found in this chunk, respond with "No relevant items found in this chunk."
 - Do NOT summarize the code - extract the specific information requested
-- IMPORTANT: When completing, always use the FULL format: <attempt_completion><result>YOUR ANSWER HERE</result></attempt_completion>
-- Do NOT use the shorthand <attempt_complete></attempt_complete> format`;
+- When done, use the attempt_completion tool with your answer as the result.`;
   try {
     const result = await delegate({
       task,
@@ -110166,7 +110063,7 @@ async function aggregateResults(chunkResults2, aggregation, extractionPrompt, op
 ${stripResultTags(r5.result)}`).join("\n\n");
   const completionNote = `
-IMPORTANT: When completing, always use the FULL format: <attempt_completion><result>YOUR ANSWER HERE</result></attempt_completion>`;
+When done, use the attempt_completion tool with your answer as the result.`;
   const aggregationPrompts = {
     summarize: `Synthesize these analyses into a comprehensive summary. Combine related findings, remove redundancy, and present a coherent overview.
@@ -110324,7 +110221,7 @@ Your answer should:
 Format your response as a well-structured document that fully answers: "${question}"
-IMPORTANT: When completing, use the FULL format: <attempt_completion><result>YOUR ANSWER HERE</result></attempt_completion>`;
+When done, use the attempt_completion tool with your answer as the result.`;
   try {
     const result = await delegate({
       task: synthesisTask,
@@ -110655,7 +110552,7 @@ function buildSearchDelegateTask({ searchQuery, searchPath, exact, language, all
     "",
     "Strategy for complex queries:",
     "1. Analyze the query - identify key concepts, entities, and relationships",
-    '2. Run focused searches for each concept (e.g., "error handling" + "authentication" separately)',
+    '2. Run focused searches for each independent concept (e.g., for "how do payments work and how are emails sent", search "payments" and "emails" separately since they are unrelated)',
     "3. Use extract to verify relevance of promising results",
     "4. Combine all relevant targets in your final response",
     "",
@@ -111394,9 +111291,7 @@ Example: <edit><file_path>${file_path}</file_path><symbol>${allMatches[0].qualif
   if (fileTracker) {
     const check = fileTracker.checkSymbolContent(resolvedPath2, symbol15, symbolInfo.code);
     if (!check.ok && check.reason === "stale") {
-      return `Error editing ${file_path}: Symbol "${symbol15}" has changed since you last read it. Use extract to re-read the current content, then retry.
-Example: <extract><targets>${file_path}#${symbol15}</targets></extract>`;
+      return `Error editing ${file_path}: Symbol "${symbol15}" has changed since you last read it. Use the extract tool with targets="${file_path}#${symbol15}" to re-read the current content, then retry.`;
     }
   }
   const content = await import_fs12.promises.readFile(resolvedPath2, "utf-8");
@@ -111634,9 +111529,7 @@ Parameters:
             }
             if (options.fileTracker && !options.fileTracker.isFileSeen(resolvedPath2)) {
               const displayPath = toRelativePath(resolvedPath2, workspaceRoot);
-              return `Error editing ${displayPath}: This file has not been read yet in this session. Use 'extract' to read the file first, then retry your edit. This ensures you are working with the current file content.
-Example: <extract><targets>${displayPath}</targets></extract>`;
+              return `Error editing ${displayPath}: This file has not been read yet in this session. Use the extract tool with targets="${displayPath}" to read the file first, then retry your edit.`;
             }
             if (symbol15 !== void 0 && symbol15 !== null) {
               return await handleSymbolEdit({ resolvedPath: resolvedPath2, file_path, symbol: symbol15, new_string, position, debug, cwd, fileTracker: options.fileTracker });
@@ -111656,7 +111549,7 @@ Example: <extract><targets>${displayPath}</targets></extract>`;
                 const displayPath = toRelativePath(resolvedPath2, workspaceRoot);
                 return `Error editing ${displayPath}: ${staleCheck.message}
-Example: <extract><targets>${displayPath}</targets></extract>`;
+Use the extract tool with targets="${displayPath}" to re-read the file, then retry.`;
               }
             }
             const content = await import_fs12.promises.readFile(resolvedPath2, "utf-8");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@probelabs/probe",
-  "version": "0.6.0-rc271",
+  "version": "0.6.0-rc273",
   "description": "Node.js wrapper for the probe code search tool",
   "main": "src/index.js",
   "module": "src/index.js",

package/src/agent/contextCompactor.js CHANGED Viewed

@@ -58,6 +58,28 @@ export function isContextLimitError(error) {
   return false;
 }
+/**
+ * Check if an assistant message contains an attempt_completion tool call.
+ * Supports both native tool calling (toolInvocations/tool_calls) and text content.
+ */
+function messageContainsCompletion(msg) {
+  // Native tool calling: Vercel AI SDK uses toolInvocations
+  if (Array.isArray(msg.toolInvocations)) {
+    if (msg.toolInvocations.some(t => t.toolName === 'attempt_completion')) return true;
+  }
+  // Native tool calling: OpenAI format uses tool_calls
+  if (Array.isArray(msg.tool_calls)) {
+    if (msg.tool_calls.some(t => t.function?.name === 'attempt_completion')) return true;
+  }
+  // Multipart content (Vercel AI SDK v4+)
+  if (Array.isArray(msg.content)) {
+    if (msg.content.some(p => p.type === 'tool-call' && p.toolName === 'attempt_completion')) return true;
+  }
+  // Text content fallback
+  const text = typeof msg.content === 'string' ? msg.content : '';
+  return text.includes('attempt_completion');
+}
 /**
  * Identify message boundaries in conversation history
  * Structure: <user> -> <internal agentic monologue> -> <final-agent-answer>
@@ -65,7 +87,7 @@ export function isContextLimitError(error) {
  * A "segment" is:
  * - user message (role: 'user')
  * - followed by 0+ assistant messages (internal monologue)
- * - ending with tool_result or attempt_completion (final answer)
+ * - ending with attempt_completion tool call (final answer)
  *
  * @param {Array} messages - Array of message objects with {role, content}
  * @returns {Array} - Array of segments, each containing {userIndex, monologueIndices, finalIndex}
@@ -82,38 +104,33 @@ export function identifyMessageSegments(messages) {
       continue;
     }
+    // Tool result message (native tool calling format)
+    if (msg.role === 'tool' && currentSegment) {
+      currentSegment.monologueIndices.push(i);
+      continue;
+    }
     // User message starts a new segment
     if (msg.role === 'user') {
-      // Check if this is a tool_result (final answer from previous segment)
-      const content = typeof msg.content === 'string' ? msg.content : '';
-      const isToolResult = content.includes('<tool_result>');
-      if (isToolResult && currentSegment) {
-        // This is the final answer for the current segment
-        currentSegment.finalIndex = i;
+      // Save previous segment if it exists
+      if (currentSegment) {
         segments.push(currentSegment);
-        currentSegment = null;
-      } else {
-        // Save previous segment if it exists
-        if (currentSegment) {
-          segments.push(currentSegment);
-        }
-        // Start new segment
-        currentSegment = {
-          userIndex: i,
-          monologueIndices: [],
-          finalIndex: null
-        };
       }
+      // Start new segment
+      currentSegment = {
+        userIndex: i,
+        monologueIndices: [],
+        finalIndex: null
+      };
     }
     // Assistant message is part of monologue
     if (msg.role === 'assistant' && currentSegment) {
-      const content = typeof msg.content === 'string' ? msg.content : '';
+      // Check if this contains an attempt_completion tool call (native or XML format)
+      const hasCompletion = messageContainsCompletion(msg);
-      // Check if this contains attempt_completion (marks end of segment)
-      if (content.includes('<attempt_completion>') || content.includes('attempt_completion')) {
+      if (hasCompletion) {
         currentSegment.monologueIndices.push(i);
         currentSegment.finalIndex = i;
         segments.push(currentSegment);
@@ -138,7 +155,7 @@ export function identifyMessageSegments(messages) {
  *
  * Strategy:
  * 1. Keep all user messages
- * 2. Keep all final answers (tool_results, attempt_completion)
+ * 2. Keep all final answers (attempt_completion)
  * 3. Remove intermediate monologue messages from completed segments
  * 4. Keep the most recent (active) segment intact
  *

package/src/agent/shared/prompts.js CHANGED Viewed

@@ -94,9 +94,10 @@ If the solution is clear, you can jump to implementation right away. If not, ask
 - After every significant change, verify the project still builds and passes linting. Do not wait until the end to discover breakage.
 # After Implementation
-- Always run the project's tests before considering the task complete. If tests fail, fix them.
-- Run lint and typecheck commands if known for the project.
-- If a build, lint, or test fails, fix the issue before finishing.
+- Verify the project builds successfully. If it doesn't, fix the build before moving on.
+- Run lint and typecheck commands if known for the project. Fix any new warnings or errors you introduced.
+- Add tests for any new or changed functionality. Tests must cover the main path and important edge cases.
+- Run the project's full test suite. If any tests fail (including pre-existing ones you may have broken), fix them before finishing.
 - When the task is done, respond to the user with a concise summary of what was implemented, what files were changed, and any relevant details. Include links (e.g. pull request URL) so the user has everything they need.
 # GitHub Integration