clodds 1.6.3 → 1.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16336,14 +16336,16 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16336
16336
  'claude-3-opus-20240229': 200000,
16337
16337
  };
16338
16338
  const modelContextWindow = MODEL_CONTEXT_WINDOWS[modelId] || 200000;
16339
- // Estimate fixed overhead: tool definitions + system prompt (these don't change during conversation)
16340
- const toolsTokenEstimate = (0, context_1.estimateTokens)(JSON.stringify(tools), modelId);
16339
+ // Reserve space for system prompt + response buffer.
16340
+ // Tool tokens are NOT included here because client-side estimation of tool
16341
+ // tokens is wildly inaccurate (JSON.stringify tokenization != API's internal
16342
+ // tool token counting). Instead, we use actual API usage feedback after the
16343
+ // first call to calibrate. The API will reject if truly over limit.
16341
16344
  const systemTokenEstimate = (0, context_1.estimateTokens)(finalSystemPrompt, modelId);
16342
- // Reserve enough for tools + system prompt + response buffer
16343
- const reserveForFixed = toolsTokenEstimate + systemTokenEstimate + 4096;
16345
+ const reserveTokens = systemTokenEstimate + 8192;
16344
16346
  const contextConfig = {
16345
16347
  maxTokens: modelContextWindow,
16346
- reserveTokens: reserveForFixed,
16348
+ reserveTokens,
16347
16349
  compactThreshold: 0.85,
16348
16350
  minMessagesAfterCompact: 6,
16349
16351
  summarizer,
@@ -16354,15 +16356,9 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16354
16356
  similarity: memory?.cosineSimilarity,
16355
16357
  };
16356
16358
  const contextManager = (0, context_1.createContextManager)(contextConfig, memory);
16357
- const effectiveMaxTokens = (contextConfig.maxTokens ?? 128000) - reserveForFixed;
16358
- const estimateSubmitTokens = () => {
16359
- const system = (0, context_1.estimateTokens)(finalSystemPrompt, modelId);
16360
- const msgs = messages.reduce((sum, m) => {
16361
- const content = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
16362
- return sum + (0, context_1.estimateTokens)(content, modelId) + 4;
16363
- }, 0);
16364
- return system + msgs + toolsTokenEstimate;
16365
- };
16359
+ const effectiveMaxTokens = modelContextWindow - reserveTokens;
16360
+ // Track actual API token usage for accurate compaction decisions
16361
+ let lastKnownInputTokens = 0;
16366
16362
  // Add all messages to context manager for tracking
16367
16363
  for (const msg of messages) {
16368
16364
  const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
@@ -16413,20 +16409,30 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16413
16409
  }, 'Context compacted successfully');
16414
16410
  }
16415
16411
  }
16416
- const initialEstimate = estimateSubmitTokens();
16417
- logger_1.logger.info({ tokens: initialEstimate, max: effectiveMaxTokens }, 'Token estimate before submit');
16418
- // Safety: if still over limit after compaction, return a friendly error
16419
- if (initialEstimate > effectiveMaxTokens * 1.1) {
16420
- logger_1.logger.warn({ tokens: initialEstimate, max: effectiveMaxTokens }, 'Context exceeds limit even after compaction');
16421
- return 'This conversation has gotten too long for me to process. Please start a new conversation and I\'ll be happy to help!';
16412
+ let response;
16413
+ try {
16414
+ response = await createMessage({
16415
+ model: modelId,
16416
+ max_tokens: 1024,
16417
+ system: finalSystemPrompt,
16418
+ tools: tools,
16419
+ messages,
16420
+ });
16421
+ }
16422
+ catch (err) {
16423
+ // Handle prompt-too-long gracefully instead of crashing
16424
+ const errMsg = err instanceof Error ? err.message : String(err);
16425
+ if (errMsg.includes('prompt is too long') || errMsg.includes('too many tokens')) {
16426
+ logger_1.logger.warn({ error: errMsg }, 'Prompt exceeded context window');
16427
+ return 'This conversation has gotten too long for me to process. Please start a new conversation and I\'ll be happy to help!';
16428
+ }
16429
+ throw err;
16430
+ }
16431
+ // Use actual API token count for accurate context tracking
16432
+ if (response.usage) {
16433
+ lastKnownInputTokens = response.usage.input_tokens;
16434
+ logger_1.logger.info({ inputTokens: lastKnownInputTokens, max: modelContextWindow }, 'Actual API token usage');
16422
16435
  }
16423
- let response = await createMessage({
16424
- model: modelId,
16425
- max_tokens: 1024,
16426
- system: finalSystemPrompt,
16427
- tools: tools,
16428
- messages,
16429
- });
16430
16436
  // Tool use loop
16431
16437
  while (response.stop_reason === 'tool_use') {
16432
16438
  const assistantContent = response.content;
@@ -16540,20 +16546,42 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16540
16546
  sessionManager.saveCheckpoint(session, loopCompactResult.summary);
16541
16547
  }
16542
16548
  }
16543
- const loopEstimate = estimateSubmitTokens();
16544
- logger_1.logger.info({ tokens: loopEstimate, max: effectiveMaxTokens }, 'Token estimate before submit (tool loop)');
16545
- // Safety: bail if over limit during tool loop
16546
- if (loopEstimate > effectiveMaxTokens * 1.1) {
16547
- logger_1.logger.warn({ tokens: loopEstimate, max: effectiveMaxTokens }, 'Context exceeds limit during tool loop');
16548
- break;
16549
+ try {
16550
+ response = await createMessage({
16551
+ model: modelId,
16552
+ max_tokens: 1024,
16553
+ system: finalSystemPrompt,
16554
+ tools: tools,
16555
+ messages,
16556
+ });
16557
+ }
16558
+ catch (err) {
16559
+ const errMsg = err instanceof Error ? err.message : String(err);
16560
+ if (errMsg.includes('prompt is too long') || errMsg.includes('too many tokens')) {
16561
+ logger_1.logger.warn({ error: errMsg }, 'Prompt exceeded context window during tool loop');
16562
+ break;
16563
+ }
16564
+ throw err;
16565
+ }
16566
+ // Update actual token usage after each API call
16567
+ if (response.usage) {
16568
+ lastKnownInputTokens = response.usage.input_tokens;
16569
+ // If actual usage is approaching limit, force compaction next iteration
16570
+ if (lastKnownInputTokens > modelContextWindow * 0.85) {
16571
+ logger_1.logger.info({ inputTokens: lastKnownInputTokens }, 'API reports high token usage, will compact');
16572
+ const urgentCompact = await contextManager.compact();
16573
+ if (urgentCompact.success) {
16574
+ const compactedMessages = contextManager.getMessagesForApi();
16575
+ messages.length = 0;
16576
+ for (const msg of compactedMessages) {
16577
+ messages.push({
16578
+ role: msg.role === 'system' ? 'user' : msg.role,
16579
+ content: msg.content,
16580
+ });
16581
+ }
16582
+ }
16583
+ }
16549
16584
  }
16550
- response = await createMessage({
16551
- model: modelId,
16552
- max_tokens: 1024,
16553
- system: finalSystemPrompt,
16554
- tools: tools,
16555
- messages,
16556
- });
16557
16585
  }
16558
16586
  // Extract text response
16559
16587
  const responseText = extractResponseText(response);