clodds 1.6.13 → 1.6.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16410,12 +16410,21 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16410
16410
  }
16411
16411
  return createMessageStreamed(params);
16412
16412
  };
16413
+ // Detect tool/skill hints early — reused for skill budget + tool preloading
16414
+ const hints = processedMessage.text ? (0, tool_registry_js_1.detectToolHints)(processedMessage.text) : { platforms: [], categories: [], hasIntent: false };
16413
16415
  // Build final system prompt (Clawdbot-style)
16414
16416
  // Priority: routed agent prompt > default system prompt
16415
- const skillContext = skills.getSkillContextForMessage(processedMessage.text || '');
16416
- const baseSystemPrompt = SYSTEM_PROMPT.replace('{{SKILLS}}', skillContext ? `\n## Skills Reference\n${skillContext}` : '');
16417
- let finalSystemPrompt = session.context.routedAgentPrompt || baseSystemPrompt;
16418
- // Add memory context if available
16417
+ const skillContext = skills.getSkillContextForMessage(processedMessage.text || '', hints, messages.length);
16418
+ // Split system prompt into cacheable blocks for prompt caching.
16419
+ // Block 1 (cached): Base system prompt without skills — stable across messages.
16420
+ // Block 2 (uncached): Skills + memory changes per query.
16421
+ const coreSystemPrompt = session.context.routedAgentPrompt
16422
+ || SYSTEM_PROMPT.replace('{{SKILLS}}', '');
16423
+ // Build dynamic context (skills + memory) — changes every query, not cached
16424
+ let dynamicContext = '';
16425
+ if (skillContext) {
16426
+ dynamicContext += `\n## Skills Reference\n${skillContext}`;
16427
+ }
16419
16428
  if (memory) {
16420
16429
  const memoryAuto = config.memory?.auto || {};
16421
16430
  const channelKey = processedMessage.chatId || processedMessage.platform;
@@ -16423,7 +16432,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16423
16432
  if (memoryAuto.includeMemoryContext !== false) {
16424
16433
  const memoryContext = memory.buildContextString(session.userId, scope);
16425
16434
  if (memoryContext) {
16426
- finalSystemPrompt += `\n\n## User Memory\n${memoryContext}`;
16435
+ dynamicContext += `\n\n## User Memory\n${memoryContext}`;
16427
16436
  }
16428
16437
  }
16429
16438
  const semanticTopK = memoryAuto.semanticSearchTopK ?? (process.env.CLODDS_MEMORY_SEARCH === '1'
@@ -16434,7 +16443,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16434
16443
  const results = await memory.semanticSearch(session.userId, scope, processedMessage.text, semanticTopK);
16435
16444
  if (results.length > 0) {
16436
16445
  const lines = results.map((r) => `- ${r.entry.key}: ${r.entry.value} (score ${r.score.toFixed(2)})`);
16437
- finalSystemPrompt += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
16446
+ dynamicContext += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
16438
16447
  }
16439
16448
  }
16440
16449
  catch (error) {
@@ -16442,6 +16451,8 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16442
16451
  }
16443
16452
  }
16444
16453
  }
16454
+ // Backward-compatible string for hooks
16455
+ let finalSystemPrompt = coreSystemPrompt + dynamicContext;
16445
16456
  // =========================================================================
16446
16457
  // HOOKS: agent:before_start - Can modify system prompt
16447
16458
  // =========================================================================
@@ -16461,6 +16472,22 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16461
16472
  if (agentStartResult?.prependContext) {
16462
16473
  finalSystemPrompt = `${agentStartResult.prependContext}\n\n${finalSystemPrompt}`;
16463
16474
  }
16475
+ const hookModified = !!(agentStartResult?.systemPrompt || agentStartResult?.prependContext);
16476
+ let systemBlocks;
16477
+ if (hookModified) {
16478
+ // Hooks changed the prompt — use as single block, still cache it
16479
+ systemBlocks = [{ type: 'text', text: finalSystemPrompt, cache_control: { type: 'ephemeral' } }];
16480
+ }
16481
+ else {
16482
+ systemBlocks = [
16483
+ // Block 1: Core system prompt (stable — cached)
16484
+ { type: 'text', text: coreSystemPrompt, cache_control: { type: 'ephemeral' } },
16485
+ ];
16486
+ // Block 2: Dynamic context (changes per query — not cached)
16487
+ if (dynamicContext) {
16488
+ systemBlocks.push({ type: 'text', text: dynamicContext });
16489
+ }
16490
+ }
16464
16491
  // =========================================================================
16465
16492
  // CONTEXT MANAGEMENT - Check token usage and compact if needed
16466
16493
  // =========================================================================
@@ -16501,12 +16528,12 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16501
16528
  let lastKnownInputTokens = 0;
16502
16529
  // Dynamic tool loading: tools discovered via tool_search during this request
16503
16530
  const discoveredTools = [];
16531
+ const MAX_DISCOVERED_TOOLS = 50; // Hard cap on all discovered tools (preload + tool_search)
16504
16532
  // Preload platform/category tools based on user message keywords.
16505
16533
  // Uses intersection mode when both platform AND intent are detected
16506
16534
  // to avoid loading all tools from multiple platforms (~150+).
16507
16535
  // Also checks conversation context for platform hints in multi-turn chats.
16508
16536
  if (TOOL_SEARCH_ENABLED && processedMessage.text) {
16509
- const hints = (0, tool_registry_js_1.detectToolHints)(processedMessage.text);
16510
16537
  // CONVERSATION CONTEXT: If no platform in current message, borrow from recent history.
16511
16538
  // "buy YES at 40 cents" after discussing polymarket → still loads polymarket tools.
16512
16539
  if (hints.platforms.length === 0 && messages.length > 1) {
@@ -16666,10 +16693,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16666
16693
  let response;
16667
16694
  try {
16668
16695
  const activeTools = getActiveTools();
16696
+ // Add cache_control to last tool for tool definition caching
16697
+ if (activeTools.length > 0) {
16698
+ activeTools[activeTools.length - 1].cache_control = { type: 'ephemeral' };
16699
+ }
16669
16700
  response = await createMessage({
16670
16701
  model: modelId,
16671
16702
  max_tokens: 1024,
16672
- system: finalSystemPrompt,
16703
+ system: systemBlocks,
16673
16704
  tools: activeTools,
16674
16705
  messages,
16675
16706
  });
@@ -16686,10 +16717,26 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16686
16717
  // Use actual API token count for accurate context tracking
16687
16718
  if (response.usage) {
16688
16719
  lastKnownInputTokens = response.usage.input_tokens;
16689
- logger_1.logger.info({ inputTokens: lastKnownInputTokens, max: modelContextWindow }, 'Actual API token usage');
16690
- }
16691
- // Tool use loop
16692
- while (response.stop_reason === 'tool_use') {
16720
+ // Track prompt cache performance
16721
+ const usage = response.usage;
16722
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
16723
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
16724
+ const cacheHitRate = cacheRead > 0
16725
+ ? (cacheRead / (cacheRead + lastKnownInputTokens)) * 100
16726
+ : 0;
16727
+ logger_1.logger.info({
16728
+ inputTokens: lastKnownInputTokens,
16729
+ max: modelContextWindow,
16730
+ cacheCreation,
16731
+ cacheRead,
16732
+ cacheHitRate: `${cacheHitRate.toFixed(1)}%`,
16733
+ }, 'API token usage (with cache stats)');
16734
+ }
16735
+ // Tool use loop — capped to prevent runaway token costs
16736
+ const MAX_TOOL_TURNS = 10;
16737
+ let toolTurnCount = 0;
16738
+ while (response.stop_reason === 'tool_use' && toolTurnCount < MAX_TOOL_TURNS) {
16739
+ toolTurnCount++;
16693
16740
  const assistantContent = response.content;
16694
16741
  messages.push({ role: 'assistant', content: assistantContent });
16695
16742
  const toolResults = [];
@@ -16766,11 +16813,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16766
16813
  }
16767
16814
  // Take top 25 results
16768
16815
  const topResults = searchResults.slice(0, 25);
16769
- // Store discovered tools for next API call (dedupe)
16816
+ // Store discovered tools for next API call (dedupe, respect global cap)
16770
16817
  const alreadyDiscovered = new Set(discoveredTools.map(t => t.name));
16771
16818
  for (const t of topResults) {
16819
+ if (discoveredTools.length >= MAX_DISCOVERED_TOOLS)
16820
+ break;
16772
16821
  if (!alreadyDiscovered.has(t.name)) {
16773
16822
  discoveredTools.push(t);
16823
+ alreadyDiscovered.add(t.name);
16774
16824
  }
16775
16825
  }
16776
16826
  result = JSON.stringify({
@@ -16811,10 +16861,18 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16811
16861
  toolResult: result,
16812
16862
  },
16813
16863
  });
16864
+ // Truncate oversized tool results to prevent token bloat
16865
+ // 16K chars ≈ 4K tokens — enough for useful data, prevents runaway costs
16866
+ const MAX_TOOL_RESULT_CHARS = 16384;
16867
+ let truncatedResult = result;
16868
+ if (typeof truncatedResult === 'string' && truncatedResult.length > MAX_TOOL_RESULT_CHARS) {
16869
+ truncatedResult = truncatedResult.slice(0, MAX_TOOL_RESULT_CHARS) + '\n...[truncated, result too large]';
16870
+ logger_1.logger.info({ tool: block.name, originalLen: result.length, truncatedTo: MAX_TOOL_RESULT_CHARS }, 'Truncated large tool result');
16871
+ }
16814
16872
  toolResults.push({
16815
16873
  type: 'tool_result',
16816
16874
  tool_use_id: block.id,
16817
- content: result,
16875
+ content: truncatedResult,
16818
16876
  });
16819
16877
  }
16820
16878
  }
@@ -16848,10 +16906,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16848
16906
  }
16849
16907
  try {
16850
16908
  const activeTools = getActiveTools();
16909
+ // Add cache_control to last tool for tool definition caching
16910
+ if (activeTools.length > 0) {
16911
+ activeTools[activeTools.length - 1].cache_control = { type: 'ephemeral' };
16912
+ }
16851
16913
  response = await createMessage({
16852
16914
  model: modelId,
16853
16915
  max_tokens: 1024,
16854
- system: finalSystemPrompt,
16916
+ system: systemBlocks,
16855
16917
  tools: activeTools,
16856
16918
  messages,
16857
16919
  });
@@ -16867,6 +16929,12 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16867
16929
  // Update actual token usage after each API call
16868
16930
  if (response.usage) {
16869
16931
  lastKnownInputTokens = response.usage.input_tokens;
16932
+ const loopUsage = response.usage;
16933
+ logger_1.logger.debug({
16934
+ inputTokens: lastKnownInputTokens,
16935
+ cacheRead: loopUsage.cache_read_input_tokens ?? 0,
16936
+ turn: toolTurnCount,
16937
+ }, 'Tool loop token usage');
16870
16938
  // If actual usage is approaching limit, force compaction next iteration
16871
16939
  if (lastKnownInputTokens > modelContextWindow * 0.85) {
16872
16940
  logger_1.logger.info({ inputTokens: lastKnownInputTokens }, 'API reports high token usage, will compact');
@@ -16884,6 +16952,9 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16884
16952
  }
16885
16953
  }
16886
16954
  }
16955
+ if (toolTurnCount >= MAX_TOOL_TURNS) {
16956
+ logger_1.logger.warn({ toolTurnCount }, 'Tool loop hit max turns cap');
16957
+ }
16887
16958
  // Extract text response
16888
16959
  const responseText = extractResponseText(response);
16889
16960
  // Save assistant response to history