@blockrun/franklin 3.3.3 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +65 -25
  2. package/dist/agent/commands.d.ts +1 -1
  3. package/dist/agent/commands.js +128 -17
  4. package/dist/agent/compact.d.ts +2 -2
  5. package/dist/agent/compact.js +148 -22
  6. package/dist/agent/context.d.ts +8 -3
  7. package/dist/agent/context.js +301 -108
  8. package/dist/agent/error-classifier.d.ts +11 -2
  9. package/dist/agent/error-classifier.js +64 -10
  10. package/dist/agent/llm.d.ts +8 -1
  11. package/dist/agent/llm.js +114 -19
  12. package/dist/agent/loop.d.ts +1 -2
  13. package/dist/agent/loop.js +509 -61
  14. package/dist/agent/optimize.d.ts +2 -2
  15. package/dist/agent/optimize.js +9 -7
  16. package/dist/agent/permissions.d.ts +1 -1
  17. package/dist/agent/permissions.js +1 -1
  18. package/dist/agent/planner.d.ts +42 -0
  19. package/dist/agent/planner.js +110 -0
  20. package/dist/agent/reduce.d.ts +7 -1
  21. package/dist/agent/reduce.js +85 -3
  22. package/dist/agent/streaming-executor.d.ts +6 -1
  23. package/dist/agent/streaming-executor.js +83 -5
  24. package/dist/agent/tokens.d.ts +11 -2
  25. package/dist/agent/tokens.js +38 -5
  26. package/dist/agent/tool-guard.d.ts +27 -0
  27. package/dist/agent/tool-guard.js +324 -0
  28. package/dist/agent/types.d.ts +7 -1
  29. package/dist/agent/types.js +1 -1
  30. package/dist/brain/extract.d.ts +11 -0
  31. package/dist/brain/extract.js +154 -0
  32. package/dist/brain/index.d.ts +3 -0
  33. package/dist/brain/index.js +2 -0
  34. package/dist/brain/store.d.ts +42 -0
  35. package/dist/brain/store.js +225 -0
  36. package/dist/brain/types.d.ts +45 -0
  37. package/dist/brain/types.js +5 -0
  38. package/dist/commands/daemon.js +2 -1
  39. package/dist/commands/start.js +19 -7
  40. package/dist/config.js +1 -1
  41. package/dist/index.js +27 -2
  42. package/dist/learnings/extractor.d.ts +13 -0
  43. package/dist/learnings/extractor.js +69 -8
  44. package/dist/learnings/index.d.ts +1 -1
  45. package/dist/learnings/index.js +1 -1
  46. package/dist/learnings/store.js +42 -13
  47. package/dist/learnings/types.d.ts +1 -1
  48. package/dist/mcp/client.d.ts +1 -1
  49. package/dist/mcp/client.js +5 -5
  50. package/dist/mcp/config.d.ts +1 -1
  51. package/dist/mcp/config.js +1 -1
  52. package/dist/panel/html.d.ts +2 -0
  53. package/dist/panel/html.js +409 -146
  54. package/dist/panel/server.js +19 -0
  55. package/dist/pricing.js +3 -2
  56. package/dist/proxy/fallback.d.ts +3 -1
  57. package/dist/proxy/fallback.js +4 -4
  58. package/dist/proxy/server.js +29 -11
  59. package/dist/proxy/sse-translator.js +1 -1
  60. package/dist/router/categories.d.ts +21 -0
  61. package/dist/router/categories.js +96 -0
  62. package/dist/router/index.d.ts +9 -2
  63. package/dist/router/index.js +106 -27
  64. package/dist/router/local-elo.d.ts +32 -0
  65. package/dist/router/local-elo.js +107 -0
  66. package/dist/router/selector.d.ts +46 -0
  67. package/dist/router/selector.js +106 -0
  68. package/dist/session/storage.d.ts +5 -1
  69. package/dist/session/storage.js +24 -2
  70. package/dist/social/a11y.d.ts +1 -1
  71. package/dist/social/a11y.js +5 -1
  72. package/dist/social/browser.d.ts +5 -0
  73. package/dist/social/browser.js +22 -0
  74. package/dist/social/preflight.d.ts +4 -0
  75. package/dist/social/preflight.js +42 -3
  76. package/dist/stats/failures.d.ts +20 -0
  77. package/dist/stats/failures.js +63 -0
  78. package/dist/stats/format.d.ts +6 -0
  79. package/dist/stats/format.js +23 -0
  80. package/dist/stats/insights.js +1 -21
  81. package/dist/stats/session-tracker.d.ts +21 -0
  82. package/dist/stats/session-tracker.js +28 -0
  83. package/dist/stats/tracker.d.ts +1 -1
  84. package/dist/stats/tracker.js +1 -1
  85. package/dist/tools/bash.d.ts +14 -1
  86. package/dist/tools/bash.js +132 -7
  87. package/dist/tools/edit.js +77 -14
  88. package/dist/tools/glob.js +13 -3
  89. package/dist/tools/grep.js +30 -12
  90. package/dist/tools/imagegen.js +5 -5
  91. package/dist/tools/index.d.ts +1 -1
  92. package/dist/tools/index.js +5 -1
  93. package/dist/tools/read.d.ts +16 -2
  94. package/dist/tools/read.js +36 -8
  95. package/dist/tools/searchx.d.ts +6 -2
  96. package/dist/tools/searchx.js +221 -44
  97. package/dist/tools/subagent.js +37 -3
  98. package/dist/tools/task.js +43 -7
  99. package/dist/tools/validate.d.ts +11 -0
  100. package/dist/tools/validate.js +42 -0
  101. package/dist/tools/webfetch.js +18 -7
  102. package/dist/tools/websearch.js +41 -7
  103. package/dist/tools/write.js +26 -6
  104. package/dist/ui/app.js +31 -6
  105. package/dist/ui/model-picker.d.ts +1 -1
  106. package/dist/ui/model-picker.js +1 -1
  107. package/dist/ui/terminal.d.ts +1 -1
  108. package/dist/ui/terminal.js +1 -1
  109. package/package.json +2 -2
@@ -1,66 +1,206 @@
1
1
  /**
2
2
  * SearchX capability — search X (Twitter) for posts matching a query.
3
- * Returns candidate posts with snippets and product relevance scores.
4
- * Requires social config and X login.
3
+ * Returns candidate posts with snippets, tweet URLs, and product relevance scores.
4
+ *
5
+ * Works in two modes:
6
+ * - **Basic** (no config): browser-only search, returns snippets + URLs
7
+ * - **Enhanced** (with social config): adds product routing, dedup, login detection
5
8
  */
6
9
  import { checkSocialReady } from '../social/preflight.js';
7
10
  import { extractArticleBlocks, findRefs, findStaticText, X_TIME_LINK_PATTERN, } from '../social/a11y.js';
8
11
  import { computePreKey, hasPreKey } from '../social/db.js';
9
12
  import { detectProduct } from '../social/ai.js';
10
- import { loadConfig } from '../social/config.js';
13
+ import { loadConfig, isConfigReady } from '../social/config.js';
11
14
  import { browserPool } from '../social/browser-pool.js';
15
+ // ─── Intent detection (code-level, not LLM-level) ──────────────────────────
16
+ // When the user asks "check my @handle mentions/notifications/互动",
17
+ // the tool itself routes to x.com/notifications. No LLM judgment needed.
18
+ const NOTIFICATION_KEYWORDS = [
19
+ 'notification', 'notifications',
20
+ 'mention', 'mentions', 'mentioned',
21
+ 'reply', 'replies',
22
+ 'interact', 'interaction', 'interactions',
23
+ '互动', '通知', '提及', '回复', '看看',
24
+ 'check my', 'my account', 'my x',
25
+ 'to:', 'from:', '@',
26
+ ];
27
+ export function detectNotificationsIntent(query, handle, knownHandles) {
28
+ if (!query)
29
+ return false;
30
+ const q = query.toLowerCase();
31
+ // Collect all handles the user might reference (personal + org accounts)
32
+ const handles = new Set();
33
+ const addHandle = (h) => {
34
+ const clean = h.replace(/^@/, '').toLowerCase().trim();
35
+ if (clean.length >= 3)
36
+ handles.add(clean);
37
+ };
38
+ addHandle(handle);
39
+ if (knownHandles)
40
+ knownHandles.forEach(addHandle);
41
+ // Check if query mentions any known handle
42
+ let mentionsOwnHandle = false;
43
+ let matchedHandle = '';
44
+ for (const h of handles) {
45
+ if (q.includes(h)) {
46
+ mentionsOwnHandle = true;
47
+ matchedHandle = h;
48
+ break;
49
+ }
50
+ }
51
+ const hasInteractionKeyword = NOTIFICATION_KEYWORDS.some(kw => q.includes(kw));
52
+ // Route to notifications if: mentions own handle + interaction keyword
53
+ // OR query is literally just the handle (e.g. "blockrunai", "@BlockRunAI")
54
+ if (mentionsOwnHandle && hasInteractionKeyword)
55
+ return true;
56
+ if (mentionsOwnHandle && q.replace(/[@:]/g, '').trim() === matchedHandle)
57
+ return true;
58
+ return false;
59
+ }
12
60
  async function execute(input, _ctx) {
13
- const { query, max_results } = input;
14
- if (!query) {
15
- return { output: 'Error: query is required', isError: true };
61
+ const { query, max_results, mode } = input;
62
+ if (!query && mode !== 'notifications') {
63
+ return { output: 'Error: query is required (or set mode to "notifications")', isError: true };
16
64
  }
17
65
  const maxResults = Math.min(Math.max(max_results ?? 10, 1), 50);
18
- // ── Preflight: config + login ──────────────────────────────────────────
19
- const preflight = await checkSocialReady();
20
- if (!preflight.ready) {
21
- return {
22
- output: `SearchX not ready: ${preflight.reason}`,
23
- isError: true,
24
- };
25
- }
66
+ // ── Config: load if available, degrade gracefully if not ────────────
26
67
  const config = loadConfig();
68
+ const configStatus = isConfigReady(config);
69
+ const enhanced = configStatus.ready;
27
70
  const handle = config.handle || 'unknown';
71
+ // ── Auto-detect notifications intent from query ─────────────────────
72
+ // Skill-level routing: the code decides, not the LLM.
73
+ // If the query mentions any known handle + interaction keywords,
74
+ // or explicitly asks for notifications, route to notifications page.
75
+ // Extract known handles from config: search queries may contain org handles
76
+ // like "BlockRunAI" even if the personal handle is "@bc1beat".
77
+ const knownHandles = [];
78
+ if (config.x?.search_queries) {
79
+ for (const sq of config.x.search_queries) {
80
+ // Extract @-handles and capitalized brand names from search queries
81
+ const atHandles = sq.match(/@\w+/g);
82
+ if (atHandles)
83
+ knownHandles.push(...atHandles);
84
+ // Also add single-word brand tokens (like "BlockRunAI")
85
+ const words = sq.split(/\s+/).filter(w => /^[A-Z]/.test(w) && w.length >= 5);
86
+ knownHandles.push(...words);
87
+ }
88
+ }
89
+ const isNotifications = mode === 'notifications' || detectNotificationsIntent(query, handle, knownHandles);
90
+ // In enhanced mode, verify login via preflight
91
+ if (enhanced) {
92
+ const preflight = await checkSocialReady();
93
+ if (!preflight.ready) {
94
+ if (isNotifications) {
95
+ return {
96
+ output: 'Not logged in to X. Run `franklin social login x` first — notifications require authentication.',
97
+ isError: true,
98
+ };
99
+ }
100
+ // Search can sometimes work without login — fall through
101
+ }
102
+ }
28
103
  let browser;
29
104
  try {
30
105
  browser = await browserPool.getBrowser();
31
- // ── Navigate to X search ───────────────────────────────────────────
32
- const searchUrl = `https://x.com/search?q=${encodeURIComponent(query)}&src=typed_query&f=live`;
33
- await browser.open(searchUrl);
34
- await browser.waitForTimeout(3500);
106
+ // ── Choose page: notifications vs search ──────────────────────────
107
+ const targetUrl = isNotifications
108
+ ? 'https://x.com/notifications'
109
+ : `https://x.com/search?q=${encodeURIComponent(query)}&src=typed_query&f=live`;
110
+ try {
111
+ await browser.open(targetUrl);
112
+ }
113
+ catch (err) {
114
+ const msg = err instanceof Error ? err.message : String(err);
115
+ browserPool.releaseBrowser();
116
+ if (msg.includes('Timeout') || msg.includes('timeout')) {
117
+ return {
118
+ output: `SearchX: X.com timed out (network issue or blocked). Try again later or check your connection.`,
119
+ isError: true,
120
+ };
121
+ }
122
+ return { output: `SearchX: Failed to open X.com: ${msg.slice(0, 200)}`, isError: true };
123
+ }
124
+ await browser.waitForTimeout(4000);
35
125
  const tree = await browser.snapshot();
126
+ // ── Diagnose page state ───────────────────────────────────────────
127
+ const isLoginWall = tree.includes('Sign in') && tree.includes('Create account');
128
+ const isRateLimit = tree.includes('Rate limit') || tree.includes('Something went wrong');
129
+ const treeLen = tree.length;
130
+ if (isLoginWall) {
131
+ return {
132
+ output: `SearchX: X is showing a login wall. Run \`franklin social login x\` to authenticate.\n\nTree preview (${treeLen} chars):\n${tree.slice(0, 500)}`,
133
+ isError: true,
134
+ };
135
+ }
136
+ if (isRateLimit) {
137
+ return {
138
+ output: `SearchX: X returned an error page (rate limit or server issue). Try again in a minute.\n\nTree preview (${treeLen} chars):\n${tree.slice(0, 500)}`,
139
+ isError: true,
140
+ };
141
+ }
36
142
  // ── Extract articles ───────────────────────────────────────────────
37
143
  const articles = extractArticleBlocks(tree);
38
144
  const candidates = [];
39
145
  for (const article of articles) {
40
146
  if (candidates.length >= maxResults)
41
147
  break;
42
- // Find time-link ref (permalink to the tweet)
43
- const timeRefs = findRefs(article.text, 'link', X_TIME_LINK_PATTERN);
44
- if (timeRefs.length === 0)
45
- continue;
46
- const timeRef = timeRefs[0];
47
148
  // Extract snippet from static text (first 3 lines)
48
149
  const texts = findStaticText(article.text);
49
150
  const snippet = texts.slice(0, 3).join(' ').trim();
50
151
  if (!snippet || snippet.length < 10)
51
152
  continue;
52
- // Extract time text from the ref line
53
- const timeLinkMatch = new RegExp(`\\[${timeRef}\\]\\s+link:\\s*(.+)`).exec(article.text);
54
- const timeText = timeLinkMatch ? timeLinkMatch[1].trim() : '';
55
- // Compute pre-key for dedup
56
- const preKey = computePreKey({ snippet, time: timeText });
57
- const alreadySeen = hasPreKey('x', handle, preKey);
58
- // Product routing (zero-cost keyword score)
59
- const product = detectProduct(snippet, config.products);
153
+ // Find time-link ref (permalink to the tweet) — optional
154
+ const timeRefs = findRefs(article.text, 'link', X_TIME_LINK_PATTERN);
155
+ const timeRef = timeRefs[0] ?? null;
156
+ // Fallback: if no time-link, try to find ANY link in the article
157
+ // that looks like a tweet permalink (/username/status/...)
158
+ let tweetUrl = null;
159
+ let timeText = '';
160
+ if (timeRef) {
161
+ const timeLinkMatch = new RegExp(`\\[${timeRef}\\]\\s+link:\\s*(.+)`).exec(article.text);
162
+ timeText = timeLinkMatch ? timeLinkMatch[1].trim() : '';
163
+ try {
164
+ const href = await browser.getHref(timeRef);
165
+ if (href) {
166
+ tweetUrl = href.startsWith('http')
167
+ ? href
168
+ : `https://x.com${href.startsWith('/') ? '' : '/'}${href}`;
169
+ }
170
+ }
171
+ catch {
172
+ // Non-fatal — we still have the snippet
173
+ }
174
+ }
175
+ else {
176
+ // No time-link matched — try all links in the article for a permalink
177
+ const allLinks = findRefs(article.text, 'link');
178
+ for (const linkRef of allLinks.slice(0, 5)) {
179
+ try {
180
+ const href = await browser.getHref(linkRef);
181
+ if (href && /\/status\/\d+/.test(href)) {
182
+ tweetUrl = href.startsWith('http')
183
+ ? href
184
+ : `https://x.com${href.startsWith('/') ? '' : '/'}${href}`;
185
+ // Extract time text from this link's label
186
+ const labelMatch = new RegExp(`\\[${linkRef}\\]\\s+link:\\s*(.+)`).exec(article.text);
187
+ timeText = labelMatch ? labelMatch[1].trim() : '';
188
+ break;
189
+ }
190
+ }
191
+ catch { /* try next */ }
192
+ }
193
+ }
194
+ // Dedup (enhanced mode only)
195
+ const preKey = enhanced ? computePreKey({ snippet, time: timeText }) : '';
196
+ const alreadySeen = enhanced ? hasPreKey('x', handle, preKey) : false;
197
+ // Product routing (enhanced mode only)
198
+ const product = enhanced ? detectProduct(snippet, config.products) : null;
60
199
  candidates.push({
61
200
  index: candidates.length + 1,
62
201
  snippet,
63
202
  timeText,
203
+ tweetUrl,
64
204
  preKey,
65
205
  productMatch: product?.name ?? null,
66
206
  alreadySeen,
@@ -68,18 +208,49 @@ async function execute(input, _ctx) {
68
208
  }
69
209
  // ── Format output ──────────────────────────────────────────────────
70
210
  if (candidates.length === 0) {
71
- return { output: `No candidate posts found for query: "${query}"` };
211
+ // Include diagnostic info show first article block so we can debug the parser
212
+ let diag;
213
+ if (articles.length === 0) {
214
+ diag = `No article blocks found in AX tree (${treeLen} chars). Tree preview:\n${tree.slice(0, 800)}`;
215
+ }
216
+ else {
217
+ const sample = articles[0].text.slice(0, 600);
218
+ diag = `Found ${articles.length} article blocks but extracted 0 candidates.\nFirst article AX dump:\n${sample}`;
219
+ }
220
+ return {
221
+ output: `No candidate posts found for query: "${query}"\n\n` +
222
+ 'Tell the user: "No X posts found for this query. Try a different keyword or check back later."\n' +
223
+ 'Do NOT use WebSearch or WebFetch as a fallback — they cannot access X.com content.\n' +
224
+ 'Do NOT fabricate or invent X post links.\n\n' +
225
+ `[debug] ${diag}`,
226
+ };
72
227
  }
73
228
  const lines = candidates.map((c) => {
74
- const seen = c.alreadySeen ? ' [SEEN]' : '';
75
- const product = c.productMatch ? ` | product: ${c.productMatch}` : ' | product: none';
76
- return (`${c.index}. ${c.snippet.slice(0, 200)}\n` +
77
- ` time: ${c.timeText} | pre_key: ${c.preKey}${product}${seen}`);
229
+ const url = c.tweetUrl ? `\n url: ${c.tweetUrl}` : '';
230
+ if (enhanced) {
231
+ const seen = c.alreadySeen ? ' [SEEN]' : '';
232
+ const product = c.productMatch ? ` | product: ${c.productMatch}` : ' | product: none';
233
+ return (`${c.index}. ${c.snippet.slice(0, 200)}${url}\n` +
234
+ ` time: ${c.timeText} | pre_key: ${c.preKey}${product}${seen}`);
235
+ }
236
+ // Basic mode: simpler output
237
+ return (`${c.index}. ${c.snippet.slice(0, 200)}${url}\n` +
238
+ ` time: ${c.timeText}`);
78
239
  });
79
- return {
80
- output: `SearchX results for "${query}" (${candidates.length} candidates):\n\n` +
81
- lines.join('\n\n'),
82
- };
240
+ const header = isNotifications
241
+ ? `X Notifications (${candidates.length} items):`
242
+ : `SearchX results for "${query}" (${candidates.length} candidates):`;
243
+ let output = `${header}\n\n${lines.join('\n\n')}`;
244
+ // Explicit instructions to prevent model from hallucinating additional posts
245
+ output += '\n\n---\n';
246
+ output += 'IMPORTANT: The posts above are the ONLY real X posts found. ';
247
+ output += 'Present ONLY these posts to the user. Do NOT fabricate additional posts. ';
248
+ output += 'Do NOT use WebSearch or WebFetch to find X posts — they cannot access X.com content. ';
249
+ output += 'If the user wants more, suggest refining the search query.';
250
+ if (!enhanced) {
251
+ output += '\nTip: Run `franklin social setup` to enable product routing, dedup, and auto-replies.';
252
+ }
253
+ return { output };
83
254
  }
84
255
  catch (err) {
85
256
  const msg = err instanceof Error ? err.message : String(err);
@@ -92,18 +263,24 @@ async function execute(input, _ctx) {
92
263
  export const searchXCapability = {
93
264
  spec: {
94
265
  name: 'SearchX',
95
- description: 'Search X (Twitter) for posts matching a query. Returns candidate posts ' +
96
- 'with snippets and product relevance scores. Requires social config and X login.',
266
+ description: 'The ONLY tool that can access X (Twitter). Returns real posts with URLs. ' +
267
+ 'Use mode "search" to find posts by keyword. Use mode "notifications" to check mentions/replies. ' +
268
+ 'Call ONCE per topic — do not retry. WebSearch/WebFetch CANNOT access X.com.',
97
269
  input_schema: {
98
270
  type: 'object',
99
271
  properties: {
100
- query: { type: 'string', description: 'Search query' },
272
+ query: { type: 'string', description: 'Search query (required for search mode, optional for notifications mode)' },
101
273
  max_results: {
102
274
  type: 'number',
103
275
  description: 'Max posts to return (default 10)',
104
276
  },
277
+ mode: {
278
+ type: 'string',
279
+ enum: ['search', 'notifications'],
280
+ description: 'Mode: "search" to find posts by keyword, "notifications" to check your mentions/replies/interactions that need response. Default: search',
281
+ },
105
282
  },
106
- required: ['query'],
283
+ required: [],
107
284
  },
108
285
  },
109
286
  execute,
@@ -99,12 +99,46 @@ export function createSubAgentCapability(apiUrl, chain, capabilities) {
99
99
  return {
100
100
  spec: {
101
101
  name: 'Agent',
102
- description: 'Launch a sub-agent for independent tasks. The sub-agent has its own context and tools.',
102
+ description: `Launch a new agent to handle complex, multi-step tasks. Each agent gets its own context window, tools, and reasoning loop.
103
+
104
+ ## When to use
105
+ - Tasks requiring 3+ independent tool calls (research, exploration, implementation)
106
+ - Work that benefits from a separate context (won't pollute your main conversation)
107
+ - Parallel execution: launch multiple agents in a single response for independent tasks
108
+ - Open-ended codebase exploration that may require multiple rounds of globbing and grepping
109
+
110
+ ## When NOT to use
111
+ - If you want to read a specific file path, use Read directly — faster and cheaper
112
+ - If you are searching for a specific symbol like "class Foo", use Grep directly
113
+ - If you are searching within 2-3 specific files, use Read directly
114
+ - Simple, single-tool operations (just call the tool directly)
115
+ - Tasks that depend on results from other pending tool calls
116
+
117
+ ## Writing the prompt
118
+ Brief the agent like a smart colleague who just walked into the room — it hasn't seen this conversation, doesn't know what you've tried, doesn't understand why this task matters.
119
+ - Explain what you're trying to accomplish and why
120
+ - Describe what you've already learned or ruled out
121
+ - Give enough context about the surrounding problem that the agent can make judgment calls rather than just following a narrow instruction
122
+ - If you need a short response, say so ("report in under 200 words")
123
+ - For lookups: hand over the exact command. For investigations: hand over the question — prescribed steps become dead weight when the premise is wrong
124
+ - Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches), since it is not aware of the user's intent
125
+
126
+ Terse command-style prompts produce shallow, generic work.
127
+
128
+ **Never delegate understanding.** Don't write "based on your findings, fix the bug" or "based on the research, implement it." Those phrases push synthesis onto the agent instead of doing it yourself. Write prompts that prove you understood: include file paths, line numbers, what specifically to change.
129
+
130
+ ## Usage notes
131
+ - Always include a short description (3-5 words) summarizing what the agent will do
132
+ - The agent's result is returned to you, NOT shown to the user. To show the user the result, you must send a text message summarizing it
133
+ - Trust but verify: the agent's summary describes what it intended, not necessarily what it did. When an agent writes or edits code, check the actual changes before reporting success
134
+ - If launching multiple agents for independent work, send them ALL in a single response with multiple Agent tool calls — this runs them in parallel
135
+ - Use foreground (default) when you need results before you can proceed. The agent completes before your response continues
136
+ - Do not re-read files or re-search for things the agent already found — trust its output`,
103
137
  input_schema: {
104
138
  type: 'object',
105
139
  properties: {
106
- prompt: { type: 'string', description: 'The task for the sub-agent to perform' },
107
- description: { type: 'string', description: 'Short description of what the sub-agent will do' },
140
+ prompt: { type: 'string', description: 'The task for the sub-agent to perform. Must be self-contained — the agent has no memory of your conversation.' },
141
+ description: { type: 'string', description: 'Short (3-5 word) description of the task (e.g. "Research auth patterns", "Fix import errors")' },
108
142
  model: { type: 'string', description: 'Model for the sub-agent. Default: claude-sonnet-4.6' },
109
143
  },
110
144
  required: ['prompt'],
@@ -5,7 +5,7 @@
5
5
  const tasks = [];
6
6
  let nextId = 1;
7
7
  async function execute(input, _ctx) {
8
- const { action, subject, description, task_id, status } = input;
8
+ const { action, subject, description, activeForm, task_id, status, addBlocks, addBlockedBy } = input;
9
9
  switch (action) {
10
10
  case 'create': {
11
11
  if (!subject) {
@@ -16,6 +16,9 @@ async function execute(input, _ctx) {
16
16
  subject,
17
17
  status: 'pending',
18
18
  description,
19
+ activeForm,
20
+ blocks: [],
21
+ blockedBy: [],
19
22
  };
20
23
  tasks.push(task);
21
24
  return { output: `Task #${task.id} created: ${task.subject}` };
@@ -34,7 +37,28 @@ async function execute(input, _ctx) {
34
37
  task.subject = subject;
35
38
  if (description)
36
39
  task.description = description;
37
- return { output: `Task #${task.id} updated: ${task.status} — ${task.subject}` };
40
+ if (activeForm)
41
+ task.activeForm = activeForm;
42
+ // Dependency management
43
+ if (addBlocks) {
44
+ for (const blockedId of addBlocks) {
45
+ if (!task.blocks.includes(blockedId))
46
+ task.blocks.push(blockedId);
47
+ const blocked = tasks.find(t => t.id === blockedId);
48
+ if (blocked && !blocked.blockedBy.includes(task.id))
49
+ blocked.blockedBy.push(task.id);
50
+ }
51
+ }
52
+ if (addBlockedBy) {
53
+ for (const blockerId of addBlockedBy) {
54
+ if (!task.blockedBy.includes(blockerId))
55
+ task.blockedBy.push(blockerId);
56
+ const blocker = tasks.find(t => t.id === blockerId);
57
+ if (blocker && !blocker.blocks.includes(task.id))
58
+ blocker.blocks.push(task.id);
59
+ }
60
+ }
61
+ return { output: `Updated task #${task.id} status` };
38
62
  }
39
63
  case 'list': {
40
64
  if (tasks.length === 0) {
@@ -44,7 +68,10 @@ async function execute(input, _ctx) {
44
68
  const done = tasks.filter(t => t.status === 'completed').length;
45
69
  const lines = tasks.map(t => {
46
70
  const icon = t.status === 'completed' ? '✓' : t.status === 'in_progress' ? '→' : '○';
47
- return `${icon} #${t.id} [${t.status}] ${t.subject}`;
71
+ const deps = t.blockedBy.length > 0
72
+ ? ` (blocked by: ${t.blockedBy.map(id => `#${id}`).join(', ')})`
73
+ : '';
74
+ return `${icon} #${t.id} [${t.status}] ${t.subject}${deps}`;
48
75
  });
49
76
  lines.push(`\n${done} done, ${pending} remaining`);
50
77
  return { output: lines.join('\n') };
@@ -67,7 +94,7 @@ async function execute(input, _ctx) {
67
94
  export const taskCapability = {
68
95
  spec: {
69
96
  name: 'Task',
70
- description: 'Manage in-session tasks. Actions: create, update (status/subject), list (with summary), delete.',
97
+ description: 'Track multi-step work within a session. Use for complex tasks with 3+ steps to maintain progress. Do NOT use for simple single-step requests. Actions: create, update (status/subject), list, delete. Tasks are ephemeral — they reset when the session ends.',
71
98
  input_schema: {
72
99
  type: 'object',
73
100
  properties: {
@@ -75,13 +102,22 @@ export const taskCapability = {
75
102
  type: 'string',
76
103
  description: 'Action: "create", "update", "list", or "delete"',
77
104
  },
78
- subject: { type: 'string', description: 'Task title (for create/update)' },
79
- description: { type: 'string', description: 'Task description (for create/update)' },
80
- task_id: { type: 'number', description: 'Task ID (for update)' },
105
+ subject: { type: 'string', description: 'A brief title for the task (for create/update)' },
106
+ description: { type: 'string', description: 'What needs to be done (for create/update)' },
107
+ activeForm: { type: 'string', description: 'Present continuous form shown in spinner when in_progress (e.g., "Running tests", "Fixing bug"). If omitted, the subject is shown instead.' },
108
+ task_id: { type: 'number', description: 'Task ID (for update/delete)' },
81
109
  status: {
82
110
  type: 'string',
83
111
  description: 'New status: "pending", "in_progress", or "completed" (for update)',
84
112
  },
113
+ addBlocks: {
114
+ type: 'array',
115
+ description: 'Task IDs that cannot start until this task completes (for update)',
116
+ },
117
+ addBlockedBy: {
118
+ type: 'array',
119
+ description: 'Task IDs that must complete before this task can start (for update)',
120
+ },
85
121
  },
86
122
  required: ['action'],
87
123
  },
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Tool description validation — catches descriptions that discourage the LLM
3
+ * from using tools that actually work (like SearchX's old "Requires social config").
4
+ */
5
+ import type { CapabilityHandler } from '../agent/types.js';
6
+ export interface ToolValidationIssue {
7
+ toolName: string;
8
+ issue: string;
9
+ severity: 'warning' | 'error';
10
+ }
11
+ export declare function validateToolDescriptions(tools: CapabilityHandler[]): ToolValidationIssue[];
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Tool description validation — catches descriptions that discourage the LLM
3
+ * from using tools that actually work (like SearchX's old "Requires social config").
4
+ */
5
+ // Patterns in tool descriptions that make LLMs avoid using the tool
6
+ const BLOCKER_PATTERNS = [
7
+ /\brequires?\b.*\b(?:config|setup|login|install|key|token|credential)\b/i,
8
+ /\bmust\s+(?:configure|set\s*up|install|login)\b/i,
9
+ /\bneeds?\s+(?:configuration|setup|api\s*key)\b/i,
10
+ ];
11
+ export function validateToolDescriptions(tools) {
12
+ const issues = [];
13
+ const names = new Set();
14
+ for (const tool of tools) {
15
+ const name = tool.spec.name;
16
+ const desc = tool.spec.description;
17
+ // Duplicate names
18
+ if (names.has(name)) {
19
+ issues.push({ toolName: name, issue: 'Duplicate tool name — LLM will confuse them', severity: 'error' });
20
+ }
21
+ names.add(name);
22
+ // Description length
23
+ if (desc.length < 20) {
24
+ issues.push({ toolName: name, issue: `Description too short (${desc.length} chars) — LLM may not understand when to use this tool`, severity: 'warning' });
25
+ }
26
+ if (desc.length > 3000) {
27
+ issues.push({ toolName: name, issue: `Description too long (${desc.length} chars) — wastes context window`, severity: 'warning' });
28
+ }
29
+ // Blocker patterns — phrases that make the LLM think the tool won't work
30
+ for (const pattern of BLOCKER_PATTERNS) {
31
+ if (pattern.test(desc)) {
32
+ issues.push({
33
+ toolName: name,
34
+ issue: `Description contains blocking language: "${desc.match(pattern)?.[0]}" — LLM may avoid using this tool even when it would work`,
35
+ severity: 'warning',
36
+ });
37
+ break; // One warning per tool is enough
38
+ }
39
+ }
40
+ }
41
+ return issues;
42
+ }
@@ -3,6 +3,8 @@
3
3
  */
4
4
  import { USER_AGENT } from '../config.js';
5
5
  const MAX_BODY_BYTES = 256 * 1024; // 256KB
6
+ const DEFAULT_MAX_LENGTH = 12_288;
7
+ const HTML_READ_AHEAD_BYTES = 8_192;
6
8
  // ─── Session cache ──────────────────────────────────────────────────────────
7
9
  // Avoids re-fetching the same URL within a session (common in research tasks).
8
10
  // 15-min TTL, max 50 entries.
@@ -48,7 +50,7 @@ async function execute(input, ctx) {
48
50
  if (!['http:', 'https:'].includes(parsed.protocol)) {
49
51
  return { output: `Error: only http/https URLs are supported`, isError: true };
50
52
  }
51
- const maxLen = Math.min(max_length ?? MAX_BODY_BYTES, MAX_BODY_BYTES);
53
+ const maxLen = Math.min(max_length ?? DEFAULT_MAX_LENGTH, MAX_BODY_BYTES);
52
54
  const key = cacheKey(url, maxLen);
53
55
  // Check cache first
54
56
  const cached = getCached(key);
@@ -82,8 +84,11 @@ async function execute(input, ctx) {
82
84
  }
83
85
  const chunks = [];
84
86
  let totalBytes = 0;
87
+ const readBudget = contentType.includes('html')
88
+ ? Math.min(maxLen + HTML_READ_AHEAD_BYTES, MAX_BODY_BYTES)
89
+ : maxLen;
85
90
  try {
86
- while (totalBytes < maxLen) {
91
+ while (totalBytes < readBudget) {
87
92
  const { done, value } = await reader.read();
88
93
  if (done)
89
94
  break;
@@ -95,20 +100,24 @@ async function execute(input, ctx) {
95
100
  reader.releaseLock();
96
101
  }
97
102
  const decoder = new TextDecoder();
98
- let body = decoder.decode(Buffer.concat(chunks)).slice(0, maxLen);
103
+ const rawBody = decoder.decode(Buffer.concat(chunks));
104
+ let body = rawBody;
99
105
  // Format response based on content type
100
106
  if (contentType.includes('json')) {
101
107
  try {
102
- const parsedJson = JSON.parse(body);
108
+ const parsedJson = JSON.parse(rawBody.slice(0, maxLen));
103
109
  body = JSON.stringify(parsedJson, null, 2).slice(0, maxLen);
104
110
  }
105
111
  catch { /* leave as-is if not valid JSON */ }
106
112
  }
107
113
  else if (contentType.includes('html')) {
108
- body = stripHtml(body);
114
+ body = stripHtml(rawBody).slice(0, maxLen);
115
+ }
116
+ else {
117
+ body = rawBody.slice(0, maxLen);
109
118
  }
110
119
  let output = `URL: ${url}\nStatus: ${response.status}\nContent-Type: ${contentType}\n\n${body}`;
111
- if (totalBytes >= maxLen) {
120
+ if (totalBytes >= readBudget || rawBody.length > maxLen) {
112
121
  output += '\n\n... (content truncated)';
113
122
  }
114
123
  // Cache successful responses
@@ -141,11 +150,13 @@ function stripHtml(html) {
141
150
  .replace(/<aside[^>]*>[\s\S]*?<\/aside>/gi, '')
142
151
  .replace(/<noscript[^>]*>[\s\S]*?<\/noscript>/gi, '')
143
152
  .replace(/<svg[^>]*>[\s\S]*?<\/svg>/gi, '')
153
+ .replace(/<(path|g|defs|clipPath|symbol|use|mask|rect|circle|ellipse|polygon|polyline|line)\b[^>]*>/gi, ' ')
144
154
  .replace(/<form[^>]*>[\s\S]*?<\/form>/gi, '')
145
155
  // Convert block elements to newlines for readability
146
156
  .replace(/<\/?(p|div|h[1-6]|li|br|tr)[^>]*>/gi, '\n')
147
157
  // Strip remaining tags
148
158
  .replace(/<[^>]+>/g, ' ')
159
+ .replace(/<[^>\n]*$/g, '')
149
160
  // Decode entities
150
161
  .replace(/&nbsp;/g, ' ')
151
162
  .replace(/&amp;/g, '&')
@@ -161,7 +172,7 @@ function stripHtml(html) {
161
172
  export const webFetchCapability = {
162
173
  spec: {
163
174
  name: 'WebFetch',
164
- description: 'Fetch a web page and return its content. HTML tags are stripped for readability. Results are cached for 15 minutes.',
175
+ description: 'Fetch a web page and return its content as text. For searching the web, use WebSearch instead. Cannot access X.com (use SearchX). Large pages are truncated. Prefer WebSearch for discovery, WebFetch for reading a specific known URL.',
165
176
  input_schema: {
166
177
  type: 'object',
167
178
  properties: {