clodds 1.6.14 → 1.6.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16410,12 +16410,21 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16410
16410
  }
16411
16411
  return createMessageStreamed(params);
16412
16412
  };
16413
+ // Detect tool/skill hints early — reused for skill budget + tool preloading
16414
+ const hints = processedMessage.text ? (0, tool_registry_js_1.detectToolHints)(processedMessage.text) : { platforms: [], categories: [], hasIntent: false };
16413
16415
  // Build final system prompt (Clawdbot-style)
16414
16416
  // Priority: routed agent prompt > default system prompt
16415
- const skillContext = skills.getSkillContextForMessage(processedMessage.text || '');
16416
- const baseSystemPrompt = SYSTEM_PROMPT.replace('{{SKILLS}}', skillContext ? `\n## Skills Reference\n${skillContext}` : '');
16417
- let finalSystemPrompt = session.context.routedAgentPrompt || baseSystemPrompt;
16418
- // Add memory context if available
16417
+ const skillContext = skills.getSkillContextForMessage(processedMessage.text || '', hints, messages.length);
16418
+ // Split system prompt into cacheable blocks for prompt caching.
16419
+ // Block 1 (cached): Base system prompt without skills — stable across messages.
16420
+ // Block 2 (uncached): Skills + memory changes per query.
16421
+ const coreSystemPrompt = session.context.routedAgentPrompt
16422
+ || SYSTEM_PROMPT.replace('{{SKILLS}}', '');
16423
+ // Build dynamic context (skills + memory) — changes every query, not cached
16424
+ let dynamicContext = '';
16425
+ if (skillContext) {
16426
+ dynamicContext += `\n## Skills Reference\n${skillContext}`;
16427
+ }
16419
16428
  if (memory) {
16420
16429
  const memoryAuto = config.memory?.auto || {};
16421
16430
  const channelKey = processedMessage.chatId || processedMessage.platform;
@@ -16423,7 +16432,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16423
16432
  if (memoryAuto.includeMemoryContext !== false) {
16424
16433
  const memoryContext = memory.buildContextString(session.userId, scope);
16425
16434
  if (memoryContext) {
16426
- finalSystemPrompt += `\n\n## User Memory\n${memoryContext}`;
16435
+ dynamicContext += `\n\n## User Memory\n${memoryContext}`;
16427
16436
  }
16428
16437
  }
16429
16438
  const semanticTopK = memoryAuto.semanticSearchTopK ?? (process.env.CLODDS_MEMORY_SEARCH === '1'
@@ -16434,7 +16443,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16434
16443
  const results = await memory.semanticSearch(session.userId, scope, processedMessage.text, semanticTopK);
16435
16444
  if (results.length > 0) {
16436
16445
  const lines = results.map((r) => `- ${r.entry.key}: ${r.entry.value} (score ${r.score.toFixed(2)})`);
16437
- finalSystemPrompt += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
16446
+ dynamicContext += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
16438
16447
  }
16439
16448
  }
16440
16449
  catch (error) {
@@ -16442,6 +16451,8 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16442
16451
  }
16443
16452
  }
16444
16453
  }
16454
+ // Backward-compatible string for hooks
16455
+ let finalSystemPrompt = coreSystemPrompt + dynamicContext;
16445
16456
  // =========================================================================
16446
16457
  // HOOKS: agent:before_start - Can modify system prompt
16447
16458
  // =========================================================================
@@ -16461,6 +16472,22 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16461
16472
  if (agentStartResult?.prependContext) {
16462
16473
  finalSystemPrompt = `${agentStartResult.prependContext}\n\n${finalSystemPrompt}`;
16463
16474
  }
16475
+ const hookModified = !!(agentStartResult?.systemPrompt || agentStartResult?.prependContext);
16476
+ let systemBlocks;
16477
+ if (hookModified) {
16478
+ // Hooks changed the prompt — use as single block, still cache it
16479
+ systemBlocks = [{ type: 'text', text: finalSystemPrompt, cache_control: { type: 'ephemeral' } }];
16480
+ }
16481
+ else {
16482
+ systemBlocks = [
16483
+ // Block 1: Core system prompt (stable — cached)
16484
+ { type: 'text', text: coreSystemPrompt, cache_control: { type: 'ephemeral' } },
16485
+ ];
16486
+ // Block 2: Dynamic context (changes per query — not cached)
16487
+ if (dynamicContext) {
16488
+ systemBlocks.push({ type: 'text', text: dynamicContext });
16489
+ }
16490
+ }
16464
16491
  // =========================================================================
16465
16492
  // CONTEXT MANAGEMENT - Check token usage and compact if needed
16466
16493
  // =========================================================================
@@ -16507,7 +16534,6 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16507
16534
  // to avoid loading all tools from multiple platforms (~150+).
16508
16535
  // Also checks conversation context for platform hints in multi-turn chats.
16509
16536
  if (TOOL_SEARCH_ENABLED && processedMessage.text) {
16510
- const hints = (0, tool_registry_js_1.detectToolHints)(processedMessage.text);
16511
16537
  // CONVERSATION CONTEXT: If no platform in current message, borrow from recent history.
16512
16538
  // "buy YES at 40 cents" after discussing polymarket → still loads polymarket tools.
16513
16539
  if (hints.platforms.length === 0 && messages.length > 1) {
@@ -16664,14 +16690,20 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16664
16690
  const extra = discoveredTools.filter(t => !seen.has(t.name));
16665
16691
  return [...tools, ...extra];
16666
16692
  };
16693
+ // Strip internal metadata before sending to API — Anthropic rejects extra fields
16694
+ const toApiTools = (defs) => defs.map(({ metadata: _, ...rest }) => rest);
16667
16695
  let response;
16668
16696
  try {
16669
- const activeTools = getActiveTools();
16697
+ const apiTools = toApiTools(getActiveTools());
16698
+ // Add cache_control to last tool for tool definition caching
16699
+ if (apiTools.length > 0) {
16700
+ apiTools[apiTools.length - 1].cache_control = { type: 'ephemeral' };
16701
+ }
16670
16702
  response = await createMessage({
16671
16703
  model: modelId,
16672
16704
  max_tokens: 1024,
16673
- system: finalSystemPrompt,
16674
- tools: activeTools,
16705
+ system: systemBlocks,
16706
+ tools: apiTools,
16675
16707
  messages,
16676
16708
  });
16677
16709
  }
@@ -16687,7 +16719,20 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16687
16719
  // Use actual API token count for accurate context tracking
16688
16720
  if (response.usage) {
16689
16721
  lastKnownInputTokens = response.usage.input_tokens;
16690
- logger_1.logger.info({ inputTokens: lastKnownInputTokens, max: modelContextWindow }, 'Actual API token usage');
16722
+ // Track prompt cache performance
16723
+ const usage = response.usage;
16724
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
16725
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
16726
+ const cacheHitRate = cacheRead > 0
16727
+ ? (cacheRead / (cacheRead + lastKnownInputTokens)) * 100
16728
+ : 0;
16729
+ logger_1.logger.info({
16730
+ inputTokens: lastKnownInputTokens,
16731
+ max: modelContextWindow,
16732
+ cacheCreation,
16733
+ cacheRead,
16734
+ cacheHitRate: `${cacheHitRate.toFixed(1)}%`,
16735
+ }, 'API token usage (with cache stats)');
16691
16736
  }
16692
16737
  // Tool use loop — capped to prevent runaway token costs
16693
16738
  const MAX_TOOL_TURNS = 10;
@@ -16862,12 +16907,16 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16862
16907
  }
16863
16908
  }
16864
16909
  try {
16865
- const activeTools = getActiveTools();
16910
+ const apiTools = toApiTools(getActiveTools());
16911
+ // Add cache_control to last tool for tool definition caching
16912
+ if (apiTools.length > 0) {
16913
+ apiTools[apiTools.length - 1].cache_control = { type: 'ephemeral' };
16914
+ }
16866
16915
  response = await createMessage({
16867
16916
  model: modelId,
16868
16917
  max_tokens: 1024,
16869
- system: finalSystemPrompt,
16870
- tools: activeTools,
16918
+ system: systemBlocks,
16919
+ tools: apiTools,
16871
16920
  messages,
16872
16921
  });
16873
16922
  }
@@ -16882,6 +16931,12 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16882
16931
  // Update actual token usage after each API call
16883
16932
  if (response.usage) {
16884
16933
  lastKnownInputTokens = response.usage.input_tokens;
16934
+ const loopUsage = response.usage;
16935
+ logger_1.logger.debug({
16936
+ inputTokens: lastKnownInputTokens,
16937
+ cacheRead: loopUsage.cache_read_input_tokens ?? 0,
16938
+ turn: toolTurnCount,
16939
+ }, 'Tool loop token usage');
16885
16940
  // If actual usage is approaching limit, force compaction next iteration
16886
16941
  if (lastKnownInputTokens > modelContextWindow * 0.85) {
16887
16942
  logger_1.logger.info({ inputTokens: lastKnownInputTokens }, 'API reports high token usage, will compact');