@probelabs/probe 0.6.0-rc272 → 0.6.0-rc274

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31728,8 +31728,7 @@ Instructions:
31728
31728
  - Format as a structured list if multiple items found
31729
31729
  - If nothing relevant is found in this chunk, respond with "No relevant items found in this chunk."
31730
31730
  - Do NOT summarize the code - extract the specific information requested
31731
- - IMPORTANT: When completing, always use the FULL format: <attempt_completion><result>YOUR ANSWER HERE</result></attempt_completion>
31732
- - Do NOT use the shorthand <attempt_complete></attempt_complete> format`;
31731
+ - When done, use the attempt_completion tool with your answer as the result.`;
31733
31732
  try {
31734
31733
  const result = await delegate({
31735
31734
  task,
@@ -31794,7 +31793,7 @@ async function aggregateResults(chunkResults2, aggregation, extractionPrompt, op
31794
31793
  ${stripResultTags(r5.result)}`).join("\n\n");
31795
31794
  const completionNote = `
31796
31795
 
31797
- IMPORTANT: When completing, always use the FULL format: <attempt_completion><result>YOUR ANSWER HERE</result></attempt_completion>`;
31796
+ When done, use the attempt_completion tool with your answer as the result.`;
31798
31797
  const aggregationPrompts = {
31799
31798
  summarize: `Synthesize these analyses into a comprehensive summary. Combine related findings, remove redundancy, and present a coherent overview.
31800
31799
 
@@ -31952,7 +31951,7 @@ Your answer should:
31952
31951
 
31953
31952
  Format your response as a well-structured document that fully answers: "${question}"
31954
31953
 
31955
- IMPORTANT: When completing, use the FULL format: <attempt_completion><result>YOUR ANSWER HERE</result></attempt_completion>`;
31954
+ When done, use the attempt_completion tool with your answer as the result.`;
31956
31955
  try {
31957
31956
  const result = await delegate({
31958
31957
  task: synthesisTask,
@@ -39183,9 +39182,7 @@ Example: <edit><file_path>${file_path}</file_path><symbol>${allMatches[0].qualif
39183
39182
  if (fileTracker) {
39184
39183
  const check = fileTracker.checkSymbolContent(resolvedPath2, symbol15, symbolInfo.code);
39185
39184
  if (!check.ok && check.reason === "stale") {
39186
- return `Error editing ${file_path}: Symbol "${symbol15}" has changed since you last read it. Use extract to re-read the current content, then retry.
39187
-
39188
- Example: <extract><targets>${file_path}#${symbol15}</targets></extract>`;
39185
+ return `Error editing ${file_path}: Symbol "${symbol15}" has changed since you last read it. Use the extract tool with targets="${file_path}#${symbol15}" to re-read the current content, then retry.`;
39189
39186
  }
39190
39187
  }
39191
39188
  const content = await import_fs6.promises.readFile(resolvedPath2, "utf-8");
@@ -39423,9 +39420,7 @@ Parameters:
39423
39420
  }
39424
39421
  if (options.fileTracker && !options.fileTracker.isFileSeen(resolvedPath2)) {
39425
39422
  const displayPath = toRelativePath(resolvedPath2, workspaceRoot);
39426
- return `Error editing ${displayPath}: This file has not been read yet in this session. Use 'extract' to read the file first, then retry your edit. This ensures you are working with the current file content.
39427
-
39428
- Example: <extract><targets>${displayPath}</targets></extract>`;
39423
+ return `Error editing ${displayPath}: This file has not been read yet in this session. Use the extract tool with targets="${displayPath}" to read the file first, then retry your edit.`;
39429
39424
  }
39430
39425
  if (symbol15 !== void 0 && symbol15 !== null) {
39431
39426
  return await handleSymbolEdit({ resolvedPath: resolvedPath2, file_path, symbol: symbol15, new_string, position, debug, cwd, fileTracker: options.fileTracker });
@@ -39445,7 +39440,7 @@ Example: <extract><targets>${displayPath}</targets></extract>`;
39445
39440
  const displayPath = toRelativePath(resolvedPath2, workspaceRoot);
39446
39441
  return `Error editing ${displayPath}: ${staleCheck.message}
39447
39442
 
39448
- Example: <extract><targets>${displayPath}</targets></extract>`;
39443
+ Use the extract tool with targets="${displayPath}" to re-read the file, then retry.`;
39449
39444
  }
39450
39445
  }
39451
39446
  const content = await import_fs6.promises.readFile(resolvedPath2, "utf-8");
@@ -58632,20 +58627,15 @@ ${taskLines.join("\n")}
58632
58627
 
58633
58628
  // src/agent/tasks/taskTool.js
58634
58629
  function createTaskCompletionBlockedMessage(taskSummary) {
58635
- return `<task_completion_blocked>
58636
- You cannot complete yet. The following tasks are still unresolved:
58630
+ return `You cannot complete yet. The following tasks are still unresolved:
58637
58631
 
58638
58632
  ${taskSummary}
58639
58633
 
58640
- Required action:
58641
- 1. For each "pending" or "in_progress" task, either:
58642
- - Complete the work and mark it: <task><action>complete</action><id>task-X</id></task>
58643
- - Or cancel if no longer needed: <task><action>update</action><id>task-X</id><status>cancelled</status></task>
58634
+ For each pending/in_progress task, either:
58635
+ - Complete it: call task tool with action="complete", id="task-X"
58636
+ - Cancel it: call task tool with action="update", id="task-X", status="cancelled"
58644
58637
 
58645
- 2. After ALL tasks are resolved (completed or cancelled), call attempt_completion again.
58646
-
58647
- Use <task><action>list</action></task> to review current status.
58648
- </task_completion_blocked>`;
58638
+ After all tasks are resolved, call attempt_completion again.`;
58649
58639
  }
58650
58640
  function createTaskTool(options = {}) {
58651
58641
  const { taskManager, tracer, debug = false } = options;
@@ -58868,145 +58858,46 @@ var init_taskTool = __esm({
58868
58858
  dependencies: external_exports.array(external_exports.string()).optional(),
58869
58859
  after: external_exports.string().optional()
58870
58860
  });
58871
- taskSystemPrompt = `[Task Management System]
58872
-
58873
- You have access to a task tracking tool to organize your work on complex requests.
58874
-
58875
- ## When to Create Tasks
58876
-
58877
- CREATE TASKS when the request has **multiple distinct deliverables or goals**:
58878
- - "Fix bug A AND add feature B" \u2192 Two separate tasks
58879
- - "Investigate auth, payments, AND notifications" \u2192 Three independent areas
58880
- - "Implement X, then add tests, then update docs" \u2192 Sequential phases with different outputs
58881
- - User explicitly asks for a plan or task breakdown
58882
-
58883
- SKIP TASKS for single-goal requests, even if they require multiple searches:
58884
- - "How does ranking work?" \u2192 Just investigate and answer (one goal)
58885
- - "What does function X do?" \u2192 Just look it up (one goal)
58886
- - "Explain the authentication flow" \u2192 Just trace and explain (one goal)
58887
- - "Find where errors are logged" \u2192 Just search and report (one goal)
58888
-
58889
- **Key insight**: Multiple *internal steps* (search, read, analyze) are NOT the same as multiple *goals*.
58890
- A single investigation with many steps is still ONE task, not many.
58891
-
58892
- ## Task Granularity
58893
-
58894
- Tasks represent LOGICAL UNITS OF WORK, not individual files or steps:
58895
- - "Fix 8 similar test files" \u2192 ONE task (same type of fix across files)
58896
- - "Update API + tests + docs" \u2192 THREE tasks (different types of work)
58897
- - "Implement feature in 5 files" \u2192 ONE task (single feature)
58898
-
58899
- **Rule of thumb**: If you're creating more than 3-4 tasks, you're probably too granular.
58900
-
58901
- **Anti-patterns to avoid**:
58902
- - One task per file \u274C
58903
- - One task per function \u274C
58904
- - One task per repository (when same type of work) \u274C
58905
-
58906
- **Good patterns**:
58907
- - One task per distinct deliverable \u2713
58908
- - One task per phase (implement, test, document) \u2713
58909
- - One task per different type of work \u2713
58910
-
58911
- MODIFY TASKS when (during execution):
58912
- - You discover the problem is more complex than expected \u2192 Add new tasks
58913
- - A single task covers too much scope \u2192 Split into smaller tasks
58914
- - You find related work that needs attention \u2192 Add dependent tasks
58915
- - A task becomes irrelevant based on findings \u2192 Cancel it
58916
- - Task priorities change based on discoveries \u2192 Update priority
58917
- - You learn new context \u2192 Update task description
58918
-
58919
- ## Task Workflow
58920
-
58921
- **STEP 1 - Plan (at start):**
58922
- Analyze the request and create tasks for each logical step:
58923
-
58924
- <task>
58925
- <action>create</action>
58926
- <tasks>[
58927
- {"title": "Search for authentication module", "priority": "high"},
58928
- {"title": "Analyze login flow implementation", "dependencies": ["task-1"]},
58929
- {"title": "Find session management code", "dependencies": ["task-1"]},
58930
- {"title": "Summarize authentication architecture", "dependencies": ["task-2", "task-3"]}
58931
- ]</tasks>
58932
- </task>
58933
-
58934
- **STEP 2 - Execute (during work):**
58935
- Update task status as you work:
58936
-
58937
- <task>
58938
- <action>update</action>
58939
- <id>task-1</id>
58940
- <status>in_progress</status>
58941
- </task>
58942
-
58943
- ... do the work (search, extract, etc.) ...
58944
-
58945
- <task>
58946
- <action>complete</action>
58947
- <id>task-1</id>
58948
- </task>
58949
-
58950
- **STEP 2b - Adapt (when you discover new work):**
58951
- As you work, you may discover that:
58952
- - A task is more complex than expected \u2192 Split it into subtasks
58953
- - New areas need investigation \u2192 Add new tasks
58954
- - Some tasks are no longer needed \u2192 Cancel them
58955
- - Task order should change \u2192 Update dependencies
58956
-
58957
- *Adding a new task when you discover more work:*
58958
- <task>
58959
- <action>create</action>
58960
- <title>Investigate caching layer</title>
58961
- <description>Found references to Redis caching in auth module</description>
58962
- </task>
58963
-
58964
- *Inserting a task after a specific task (to maintain logical order):*
58965
- <task>
58966
- <action>create</action>
58967
- <title>Check rate limiting</title>
58968
- <after>task-2</after>
58969
- </task>
58970
-
58971
- *Cancelling and splitting a complex task:*
58972
- <task>
58973
- <action>update</action>
58974
- <id>task-3</id>
58975
- <status>cancelled</status>
58976
- </task>
58977
- <task>
58978
- <action>create</action>
58979
- <tasks>[
58980
- {"title": "Review JWT token generation", "priority": "high"},
58981
- {"title": "Review token refresh logic"}
58982
- ]</tasks>
58983
- </task>
58984
-
58985
- **STEP 3 - Finish (before completion):**
58986
- Before calling attempt_completion, ensure ALL tasks are either:
58987
- - \`completed\` - you finished the work
58988
- - \`cancelled\` - no longer needed
58989
-
58990
- If you created tasks, you MUST resolve them all before completing.
58991
-
58992
- ## Key Rules
58993
-
58994
- 1. **Dependencies are enforced**: A task cannot start until its dependencies are completed
58995
- 2. **Circular dependencies are rejected**: task-1 \u2192 task-2 \u2192 task-1 is invalid
58996
- 3. **Completion is blocked**: attempt_completion will fail if tasks remain unresolved
58997
- 4. **List to review**: Use <task><action>list</action></task> to see current task status
58998
- 5. **Tasks are living documents**: Add, split, or cancel tasks as you learn more about the problem
58861
+ taskSystemPrompt = `[Task Management]
58862
+
58863
+ Use the task tool to track progress on complex requests with multiple distinct goals.
58864
+
58865
+ ## When to Use Tasks
58866
+
58867
+ CREATE tasks when the request has **multiple separate deliverables**:
58868
+ - "Fix bug A AND add feature B" \u2192 two tasks
58869
+ - "Investigate auth, payments, AND notifications" \u2192 three tasks
58870
+ - "Implement X, then add tests, then update docs" \u2192 three sequential tasks
58871
+
58872
+ SKIP tasks for single-goal requests, even complex ones:
58873
+ - "How does ranking work?" \u2014 just investigate and answer
58874
+ - "Explain the authentication flow" \u2014 just trace and explain
58875
+ Multiple internal steps (search, read, analyze) for one goal \u2260 multiple tasks.
58876
+
58877
+ ## Granularity
58878
+
58879
+ Tasks = logical units of work, not files or steps.
58880
+ - "Fix 8 similar test files" \u2192 ONE task (same fix repeated)
58881
+ - "Update API + tests + docs" \u2192 THREE tasks (different work types)
58882
+ - Max 3\u20134 tasks. More means you're too granular.
58883
+
58884
+ ## Workflow
58885
+
58886
+ 1. **Plan**: Call task tool with action="create" and a tasks array up front
58887
+ 2. **Execute**: Update status to "in_progress" / "completed" as you work. Add, split, or cancel tasks as you learn more.
58888
+ 3. **Finish**: All tasks must be "completed" or "cancelled" before calling attempt_completion.
58889
+
58890
+ ## Rules
58891
+
58892
+ - Dependencies are enforced: a task cannot start until its dependencies are completed
58893
+ - Circular dependencies are rejected
58894
+ - attempt_completion is blocked while tasks remain unresolved
58999
58895
  `;
59000
- taskGuidancePrompt = `<task_guidance>
59001
- Does this request have MULTIPLE DISTINCT GOALS?
58896
+ taskGuidancePrompt = `Does this request have MULTIPLE DISTINCT GOALS?
59002
58897
  - "Do A AND B AND C" (multiple goals) \u2192 Create tasks for each goal
59003
58898
  - "Investigate/explain/find X" (single goal) \u2192 Skip tasks, just answer directly
59004
-
59005
- Multiple internal steps (search, read, analyze) for ONE goal = NO tasks needed.
59006
- Only create tasks when there are separate deliverables the user is asking for.
59007
-
59008
- If creating tasks, use the task tool with action="create" first.
59009
- </task_guidance>`;
58899
+ Multiple internal steps for ONE goal = NO tasks needed.
58900
+ If creating tasks, use the task tool with action="create" first.`;
59010
58901
  }
59011
58902
  });
59012
58903
 
@@ -97776,9 +97667,10 @@ If the solution is clear, you can jump to implementation right away. If not, ask
97776
97667
  - After every significant change, verify the project still builds and passes linting. Do not wait until the end to discover breakage.
97777
97668
 
97778
97669
  # After Implementation
97779
- - Always run the project's tests before considering the task complete. If tests fail, fix them.
97780
- - Run lint and typecheck commands if known for the project.
97781
- - If a build, lint, or test fails, fix the issue before finishing.
97670
+ - Verify the project builds successfully. If it doesn't, fix the build before moving on.
97671
+ - Run lint and typecheck commands if known for the project. Fix any new warnings or errors you introduced.
97672
+ - Add tests for any new or changed functionality. Tests must cover the main path and important edge cases.
97673
+ - Run the project's full test suite. If any tests fail (including pre-existing ones you may have broken), fix them before finishing.
97782
97674
  - When the task is done, respond to the user with a concise summary of what was implemented, what files were changed, and any relevant details. Include links (e.g. pull request URL) so the user has everything they need.
97783
97675
 
97784
97676
  # GitHub Integration
@@ -106991,6 +106883,19 @@ function isContextLimitError(error2) {
106991
106883
  }
106992
106884
  return false;
106993
106885
  }
106886
+ function messageContainsCompletion(msg) {
106887
+ if (Array.isArray(msg.toolInvocations)) {
106888
+ if (msg.toolInvocations.some((t5) => t5.toolName === "attempt_completion")) return true;
106889
+ }
106890
+ if (Array.isArray(msg.tool_calls)) {
106891
+ if (msg.tool_calls.some((t5) => t5.function?.name === "attempt_completion")) return true;
106892
+ }
106893
+ if (Array.isArray(msg.content)) {
106894
+ if (msg.content.some((p5) => p5.type === "tool-call" && p5.toolName === "attempt_completion")) return true;
106895
+ }
106896
+ const text = typeof msg.content === "string" ? msg.content : "";
106897
+ return text.includes("attempt_completion");
106898
+ }
106994
106899
  function identifyMessageSegments(messages) {
106995
106900
  const segments = [];
106996
106901
  let currentSegment = null;
@@ -106999,27 +106904,23 @@ function identifyMessageSegments(messages) {
106999
106904
  if (msg.role === "system") {
107000
106905
  continue;
107001
106906
  }
106907
+ if (msg.role === "tool" && currentSegment) {
106908
+ currentSegment.monologueIndices.push(i5);
106909
+ continue;
106910
+ }
107002
106911
  if (msg.role === "user") {
107003
- const content = typeof msg.content === "string" ? msg.content : "";
107004
- const isToolResult = content.includes("<tool_result>");
107005
- if (isToolResult && currentSegment) {
107006
- currentSegment.finalIndex = i5;
106912
+ if (currentSegment) {
107007
106913
  segments.push(currentSegment);
107008
- currentSegment = null;
107009
- } else {
107010
- if (currentSegment) {
107011
- segments.push(currentSegment);
107012
- }
107013
- currentSegment = {
107014
- userIndex: i5,
107015
- monologueIndices: [],
107016
- finalIndex: null
107017
- };
107018
106914
  }
106915
+ currentSegment = {
106916
+ userIndex: i5,
106917
+ monologueIndices: [],
106918
+ finalIndex: null
106919
+ };
107019
106920
  }
107020
106921
  if (msg.role === "assistant" && currentSegment) {
107021
- const content = typeof msg.content === "string" ? msg.content : "";
107022
- if (content.includes("<attempt_completion>") || content.includes("attempt_completion")) {
106922
+ const hasCompletion = messageContainsCompletion(msg);
106923
+ if (hasCompletion) {
107023
106924
  currentSegment.monologueIndices.push(i5);
107024
106925
  currentSegment.finalIndex = i5;
107025
106926
  segments.push(currentSegment);