@probelabs/probe 0.6.0-rc247 → 0.6.0-rc249

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@probelabs/probe",
3
- "version": "0.6.0-rc247",
3
+ "version": "0.6.0-rc249",
4
4
  "description": "Node.js wrapper for the probe code search tool",
5
5
  "main": "src/index.js",
6
6
  "module": "src/index.js",
@@ -85,7 +85,7 @@ import {
85
85
  validateAndFixMermaidResponse,
86
86
  tryAutoWrapForSimpleSchema
87
87
  } from './schemaUtils.js';
88
- import { removeThinkingTags } from './xmlParsingUtils.js';
88
+ import { removeThinkingTags, extractThinkingContent } from './xmlParsingUtils.js';
89
89
  import { predefinedPrompts } from './shared/prompts.js';
90
90
  import {
91
91
  MCPXmlBridge,
@@ -2904,10 +2904,11 @@ Follow these instructions carefully:
2904
2904
  // Track initial history length for storage
2905
2905
  const oldHistoryLength = this.history.length;
2906
2906
 
2907
- // Reset output buffer for this answer() call — but NOT during schema correction recursion
2908
- // When _schemaFormatted is true, this is a recursive call to fix JSON formatting,
2909
- // and we must preserve the output buffer so the parent call can append it
2910
- if (this._outputBuffer && !options?._schemaFormatted) {
2907
+ // Reset output buffer for this answer() call — but NOT during recursive calls.
2908
+ // _schemaFormatted: recursive call to fix JSON formatting
2909
+ // _completionPromptProcessed: recursive call for completionPrompt follow-up
2910
+ // Both must preserve the output buffer so the parent call can append it.
2911
+ if (this._outputBuffer && !options?._schemaFormatted && !options?._completionPromptProcessed) {
2911
2912
  this._outputBuffer.items = [];
2912
2913
  }
2913
2914
 
@@ -3554,8 +3555,25 @@ Follow these instructions carefully:
3554
3555
  continue; // Don't use broken response, continue the loop
3555
3556
  }
3556
3557
 
3557
- finalResult = prevContent;
3558
- if (this.debug) console.log(`[DEBUG] Using previous response as completion: ${finalResult.substring(0, 100)}...`);
3558
+ // Pre-strip thinking tags to avoid losing content at final cleanup stage
3559
+ const strippedContent = removeThinkingTags(prevContent);
3560
+ if (strippedContent.length > 50) {
3561
+ // Enough content outside thinking tags — use stripped version directly
3562
+ finalResult = strippedContent;
3563
+ if (this.debug) console.log(`[DEBUG] Using previous response (thinking-stripped) as completion: ${finalResult.substring(0, 100)}...`);
3564
+ } else {
3565
+ // Content was mostly/entirely inside thinking tags.
3566
+ // Extract thinking content and use it as the actual answer.
3567
+ const thinkingContent = extractThinkingContent(prevContent);
3568
+ if (thinkingContent && thinkingContent.length > 50) {
3569
+ finalResult = thinkingContent;
3570
+ if (this.debug) console.log(`[DEBUG] Previous response was mostly in thinking tags — using thinking content as completion: ${finalResult.substring(0, 100)}...`);
3571
+ } else {
3572
+ // Neither stripped nor thinking content is substantive — use raw as fallback
3573
+ finalResult = prevContent;
3574
+ if (this.debug) console.log(`[DEBUG] Using previous response as completion (raw): ${finalResult.substring(0, 100)}...`);
3575
+ }
3576
+ }
3559
3577
  } else {
3560
3578
  finalResult = 'Error: No previous response found to use as completion.';
3561
3579
  if (this.debug) console.log(`[DEBUG] No suitable previous response found for attempt_complete shorthand`);
@@ -4296,10 +4314,16 @@ After reviewing, provide your final answer using attempt_completion.`;
4296
4314
 
4297
4315
  // Make a follow-up call with the completion prompt
4298
4316
  // Pass _completionPromptProcessed to prevent infinite loops
4317
+ // Save output buffer — the recursive answer() must not destroy DSL output() content
4318
+ const savedOutputItems = this._outputBuffer ? [...this._outputBuffer.items] : [];
4299
4319
  const completionResult = await this.answer(completionPromptMessage, [], {
4300
4320
  ...options,
4301
4321
  _completionPromptProcessed: true
4302
4322
  });
4323
+ // Restore output buffer so the parent call can append it to the final result
4324
+ if (this._outputBuffer) {
4325
+ this._outputBuffer.items = savedOutputItems;
4326
+ }
4303
4327
 
4304
4328
  // Update finalResult with the result from the completion prompt
4305
4329
  finalResult = completionResult;
@@ -8,6 +8,7 @@
8
8
 
9
9
  import {
10
10
  searchSchema,
11
+ searchAllSchema,
11
12
  querySchema,
12
13
  extractSchema,
13
14
  bashSchema,
@@ -16,6 +17,7 @@ import {
16
17
  // Map of native tool names to their Zod schemas
17
18
  const NATIVE_TOOL_SCHEMAS = {
18
19
  search: searchSchema,
20
+ searchAll: searchAllSchema,
19
21
  query: querySchema,
20
22
  extract: extractSchema,
21
23
  bash: bashSchema,
@@ -23,7 +25,7 @@ const NATIVE_TOOL_SCHEMAS = {
23
25
 
24
26
  // Tools that are inherently async (make network/LLM calls)
25
27
  const ALWAYS_ASYNC = new Set([
26
- 'search', 'query', 'extract', 'listFiles', 'searchFiles', 'bash',
28
+ 'search', 'searchAll', 'query', 'extract', 'listFiles', 'searchFiles', 'bash',
27
29
  'LLM', 'map',
28
30
  ]);
29
31
 
@@ -13,10 +13,19 @@ export const searchSchema = z.object({
13
13
  query: z.string().describe('Search query with Elasticsearch syntax. Use quotes for exact matches, AND/OR for boolean logic, - for negation.'),
14
14
  path: z.string().optional().default('.').describe('Path to search in. For dependencies use "go:github.com/owner/repo", "js:package_name", or "rust:cargo_name" etc.'),
15
15
  exact: z.boolean().optional().default(false).describe('Default (false) enables stemming and keyword splitting for exploratory search - "getUserData" matches "get", "user", "data", etc. Set true for precise symbol lookup where "getUserData" matches only "getUserData". Use true when you know the exact symbol name.'),
16
+ maxTokens: z.number().nullable().optional().describe('Maximum tokens to return. Default is 20000. Set to null for unlimited results.'),
16
17
  session: z.string().optional().describe('Session ID for result caching and pagination. Pass the session ID from a previous search to get additional results (next page). Results already shown in a session are automatically excluded. Omit for a fresh search.'),
17
18
  nextPage: z.boolean().optional().default(false).describe('Set to true when requesting the next page of results. Requires passing the same session ID from the previous search output.')
18
19
  });
19
20
 
21
+ export const searchAllSchema = z.object({
22
+ query: z.string().describe('Search query with Elasticsearch syntax. Use quotes for exact matches, AND/OR for boolean logic, - for negation.'),
23
+ path: z.string().optional().default('.').describe('Path to search in.'),
24
+ exact: z.boolean().optional().default(false).describe('Use exact matching instead of stemming.'),
25
+ maxTokensPerPage: z.number().optional().default(20000).describe('Tokens per page when paginating. Default 20000.'),
26
+ maxPages: z.number().optional().default(50).describe('Maximum pages to retrieve. Default 50 (safety limit).')
27
+ });
28
+
20
29
  export const querySchema = z.object({
21
30
  pattern: z.string().describe('AST pattern to search for. Use $NAME for variable names, $$$PARAMS for parameter lists, etc.'),
22
31
  path: z.string().optional().default('.').describe('Path to search in'),
@@ -85,6 +85,8 @@ function buildToolImplementations(configOptions) {
85
85
  if (!searchPaths || searchPaths.length === 0) {
86
86
  searchPaths = [cwd || '.'];
87
87
  }
88
+ // Allow maxTokens to be passed through (null = unlimited, undefined = default 20000)
89
+ const maxTokens = params.maxTokens !== undefined ? params.maxTokens : 20000;
88
90
  return await search({
89
91
  query: params.query,
90
92
  path: searchPaths.join(' '),
@@ -92,7 +94,7 @@ function buildToolImplementations(configOptions) {
92
94
  allowTests: true,
93
95
  exact: params.exact || false,
94
96
  json: false,
95
- maxTokens: 20000,
97
+ maxTokens,
96
98
  session: sessionId,
97
99
  timeout: 60,
98
100
  });
@@ -102,6 +104,70 @@ function buildToolImplementations(configOptions) {
102
104
  },
103
105
  };
104
106
 
107
+ // searchAll: auto-paginating search that retrieves ALL results
108
+ // Calls search() repeatedly with same sessionId until no more results
109
+ tools.searchAll = {
110
+ execute: async (params) => {
111
+ try {
112
+ let searchPaths;
113
+ if (params.path) {
114
+ searchPaths = parseAndResolvePaths(params.path, cwd);
115
+ }
116
+ if (!searchPaths || searchPaths.length === 0) {
117
+ searchPaths = [cwd || '.'];
118
+ }
119
+ const pathStr = searchPaths.join(' ');
120
+ const maxTokensPerPage = params.maxTokensPerPage || 20000;
121
+ const maxPages = params.maxPages || 50; // Safety limit
122
+
123
+ let allResults = '';
124
+ let pageCount = 0;
125
+
126
+ while (pageCount < maxPages) {
127
+ const pageResult = await search({
128
+ query: params.query,
129
+ path: pathStr,
130
+ cwd,
131
+ allowTests: true,
132
+ exact: params.exact || false,
133
+ json: false,
134
+ maxTokens: maxTokensPerPage,
135
+ session: sessionId,
136
+ timeout: 60,
137
+ });
138
+
139
+ pageCount++;
140
+
141
+ // Check if we got results
142
+ if (!pageResult || pageResult.trim().length === 0) {
143
+ break;
144
+ }
145
+
146
+ // Check for "All results retrieved" or "No results found" signals
147
+ if (pageResult.includes('All results retrieved') ||
148
+ pageResult.includes('No results found') ||
149
+ pageResult.includes('No matching code blocks found')) {
150
+ // Include this final page if it has content beyond the message
151
+ if (pageResult.trim().length > 50) {
152
+ allResults += (allResults ? '\n\n' : '') + pageResult;
153
+ }
154
+ break;
155
+ }
156
+
157
+ allResults += (allResults ? '\n\n' : '') + pageResult;
158
+ }
159
+
160
+ if (pageCount >= maxPages) {
161
+ allResults += `\n\n[Warning: Reached maximum page limit (${maxPages}). Some results may be omitted.]`;
162
+ }
163
+
164
+ return allResults || 'No results found.';
165
+ } catch (e) {
166
+ return `SearchAll error: ${e.message}`;
167
+ }
168
+ },
169
+ };
170
+
105
171
  tools.query = {
106
172
  execute: async (params) => {
107
173
  try {
@@ -345,7 +411,8 @@ ${lastError}
345
411
 
346
412
  RULES REMINDER:
347
413
  - search(query) is KEYWORD SEARCH — pass a search query, NOT a filename. Use extract(filepath) to read file contents.
348
- - search(), query(), extract(), listFiles(), bash() all return STRINGS, not arrays.
414
+ - search() returns up to 20K tokens by default. Use search(query, path, {maxTokens: null}) for unlimited, or searchAll(query) to auto-paginate ALL results.
415
+ - search(), searchAll(), query(), extract(), listFiles(), bash() all return STRINGS, not arrays.
349
416
  - Use chunk(stringData) to split a string into an array of chunks.
350
417
  - Use map(array, fn) only with arrays. Do NOT pass strings to map().
351
418
  - Do NOT use .map(), .forEach(), .filter(), .join() — use for..of loops instead.
@@ -684,7 +751,8 @@ return table;
684
751
  ${funcList}
685
752
 
686
753
  **Return types — IMPORTANT:**
687
- - \`search(query)\` → **keyword search** — pass a search query (e.g. "error handling"), NOT a filename. Returns a **string** (matching code snippets). To process parts, use \`chunk()\` to split it.
754
+ - \`search(query)\` → **keyword search** — pass a search query (e.g. "error handling"), NOT a filename. Returns a **string** (matching code snippets, up to 20K tokens by default). Use \`{maxTokens: null}\` for unlimited.
755
+ - \`searchAll(query)\` → **exhaustive keyword search** — auto-paginates to retrieve ALL matching results. Returns a **string** (all matching code snippets concatenated). Use for bulk analysis.
688
756
  - \`query(pattern)\` → **AST search** — pass a tree-sitter pattern. Returns a **string** (matching code elements).
689
757
  - \`extract(targets)\` → **read file contents** — pass a file path like "src/main.js" or "src/main.js:42". Use this to read specific files found by listFiles(). Returns a **string**.
690
758
  - \`listFiles(pattern)\` → **list files** — pass a glob pattern like "**/*.md". Returns an **array** of file path strings. Use directly with \`for (const f of listFiles("**/*.md"))\`.