clodds 1.6.14 → 1.6.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16410,12 +16410,21 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16410
16410
  }
16411
16411
  return createMessageStreamed(params);
16412
16412
  };
16413
+ // Detect tool/skill hints early — reused for skill budget + tool preloading
16414
+ const hints = processedMessage.text ? (0, tool_registry_js_1.detectToolHints)(processedMessage.text) : { platforms: [], categories: [], hasIntent: false };
16413
16415
  // Build final system prompt (Clawdbot-style)
16414
16416
  // Priority: routed agent prompt > default system prompt
16415
- const skillContext = skills.getSkillContextForMessage(processedMessage.text || '');
16416
- const baseSystemPrompt = SYSTEM_PROMPT.replace('{{SKILLS}}', skillContext ? `\n## Skills Reference\n${skillContext}` : '');
16417
- let finalSystemPrompt = session.context.routedAgentPrompt || baseSystemPrompt;
16418
- // Add memory context if available
16417
+ const skillContext = skills.getSkillContextForMessage(processedMessage.text || '', hints, messages.length);
16418
+ // Split system prompt into cacheable blocks for prompt caching.
16419
+ // Block 1 (cached): Base system prompt without skills — stable across messages.
16420
+ // Block 2 (uncached): Skills + memory changes per query.
16421
+ const coreSystemPrompt = session.context.routedAgentPrompt
16422
+ || SYSTEM_PROMPT.replace('{{SKILLS}}', '');
16423
+ // Build dynamic context (skills + memory) — changes every query, not cached
16424
+ let dynamicContext = '';
16425
+ if (skillContext) {
16426
+ dynamicContext += `\n## Skills Reference\n${skillContext}`;
16427
+ }
16419
16428
  if (memory) {
16420
16429
  const memoryAuto = config.memory?.auto || {};
16421
16430
  const channelKey = processedMessage.chatId || processedMessage.platform;
@@ -16423,7 +16432,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16423
16432
  if (memoryAuto.includeMemoryContext !== false) {
16424
16433
  const memoryContext = memory.buildContextString(session.userId, scope);
16425
16434
  if (memoryContext) {
16426
- finalSystemPrompt += `\n\n## User Memory\n${memoryContext}`;
16435
+ dynamicContext += `\n\n## User Memory\n${memoryContext}`;
16427
16436
  }
16428
16437
  }
16429
16438
  const semanticTopK = memoryAuto.semanticSearchTopK ?? (process.env.CLODDS_MEMORY_SEARCH === '1'
@@ -16434,7 +16443,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16434
16443
  const results = await memory.semanticSearch(session.userId, scope, processedMessage.text, semanticTopK);
16435
16444
  if (results.length > 0) {
16436
16445
  const lines = results.map((r) => `- ${r.entry.key}: ${r.entry.value} (score ${r.score.toFixed(2)})`);
16437
- finalSystemPrompt += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
16446
+ dynamicContext += `\n\n## Relevant Memory (semantic search)\n${lines.join('\n')}`;
16438
16447
  }
16439
16448
  }
16440
16449
  catch (error) {
@@ -16442,6 +16451,8 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16442
16451
  }
16443
16452
  }
16444
16453
  }
16454
+ // Backward-compatible string for hooks
16455
+ let finalSystemPrompt = coreSystemPrompt + dynamicContext;
16445
16456
  // =========================================================================
16446
16457
  // HOOKS: agent:before_start - Can modify system prompt
16447
16458
  // =========================================================================
@@ -16461,6 +16472,22 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16461
16472
  if (agentStartResult?.prependContext) {
16462
16473
  finalSystemPrompt = `${agentStartResult.prependContext}\n\n${finalSystemPrompt}`;
16463
16474
  }
16475
+ const hookModified = !!(agentStartResult?.systemPrompt || agentStartResult?.prependContext);
16476
+ let systemBlocks;
16477
+ if (hookModified) {
16478
+ // Hooks changed the prompt — use as single block, still cache it
16479
+ systemBlocks = [{ type: 'text', text: finalSystemPrompt, cache_control: { type: 'ephemeral' } }];
16480
+ }
16481
+ else {
16482
+ systemBlocks = [
16483
+ // Block 1: Core system prompt (stable — cached)
16484
+ { type: 'text', text: coreSystemPrompt, cache_control: { type: 'ephemeral' } },
16485
+ ];
16486
+ // Block 2: Dynamic context (changes per query — not cached)
16487
+ if (dynamicContext) {
16488
+ systemBlocks.push({ type: 'text', text: dynamicContext });
16489
+ }
16490
+ }
16464
16491
  // =========================================================================
16465
16492
  // CONTEXT MANAGEMENT - Check token usage and compact if needed
16466
16493
  // =========================================================================
@@ -16507,7 +16534,6 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16507
16534
  // to avoid loading all tools from multiple platforms (~150+).
16508
16535
  // Also checks conversation context for platform hints in multi-turn chats.
16509
16536
  if (TOOL_SEARCH_ENABLED && processedMessage.text) {
16510
- const hints = (0, tool_registry_js_1.detectToolHints)(processedMessage.text);
16511
16537
  // CONVERSATION CONTEXT: If no platform in current message, borrow from recent history.
16512
16538
  // "buy YES at 40 cents" after discussing polymarket → still loads polymarket tools.
16513
16539
  if (hints.platforms.length === 0 && messages.length > 1) {
@@ -16667,10 +16693,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16667
16693
  let response;
16668
16694
  try {
16669
16695
  const activeTools = getActiveTools();
16696
+ // Add cache_control to last tool for tool definition caching
16697
+ if (activeTools.length > 0) {
16698
+ activeTools[activeTools.length - 1].cache_control = { type: 'ephemeral' };
16699
+ }
16670
16700
  response = await createMessage({
16671
16701
  model: modelId,
16672
16702
  max_tokens: 1024,
16673
- system: finalSystemPrompt,
16703
+ system: systemBlocks,
16674
16704
  tools: activeTools,
16675
16705
  messages,
16676
16706
  });
@@ -16687,7 +16717,20 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16687
16717
  // Use actual API token count for accurate context tracking
16688
16718
  if (response.usage) {
16689
16719
  lastKnownInputTokens = response.usage.input_tokens;
16690
- logger_1.logger.info({ inputTokens: lastKnownInputTokens, max: modelContextWindow }, 'Actual API token usage');
16720
+ // Track prompt cache performance
16721
+ const usage = response.usage;
16722
+ const cacheCreation = usage.cache_creation_input_tokens ?? 0;
16723
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
16724
+ const cacheHitRate = cacheRead > 0
16725
+ ? (cacheRead / (cacheRead + lastKnownInputTokens)) * 100
16726
+ : 0;
16727
+ logger_1.logger.info({
16728
+ inputTokens: lastKnownInputTokens,
16729
+ max: modelContextWindow,
16730
+ cacheCreation,
16731
+ cacheRead,
16732
+ cacheHitRate: `${cacheHitRate.toFixed(1)}%`,
16733
+ }, 'API token usage (with cache stats)');
16691
16734
  }
16692
16735
  // Tool use loop — capped to prevent runaway token costs
16693
16736
  const MAX_TOOL_TURNS = 10;
@@ -16863,10 +16906,14 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16863
16906
  }
16864
16907
  try {
16865
16908
  const activeTools = getActiveTools();
16909
+ // Add cache_control to last tool for tool definition caching
16910
+ if (activeTools.length > 0) {
16911
+ activeTools[activeTools.length - 1].cache_control = { type: 'ephemeral' };
16912
+ }
16866
16913
  response = await createMessage({
16867
16914
  model: modelId,
16868
16915
  max_tokens: 1024,
16869
- system: finalSystemPrompt,
16916
+ system: systemBlocks,
16870
16917
  tools: activeTools,
16871
16918
  messages,
16872
16919
  });
@@ -16882,6 +16929,12 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16882
16929
  // Update actual token usage after each API call
16883
16930
  if (response.usage) {
16884
16931
  lastKnownInputTokens = response.usage.input_tokens;
16932
+ const loopUsage = response.usage;
16933
+ logger_1.logger.debug({
16934
+ inputTokens: lastKnownInputTokens,
16935
+ cacheRead: loopUsage.cache_read_input_tokens ?? 0,
16936
+ turn: toolTurnCount,
16937
+ }, 'Tool loop token usage');
16885
16938
  // If actual usage is approaching limit, force compaction next iteration
16886
16939
  if (lastKnownInputTokens > modelContextWindow * 0.85) {
16887
16940
  logger_1.logger.info({ inputTokens: lastKnownInputTokens }, 'API reports high token usage, will compact');