clodds 1.6.2 → 1.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16336,9 +16336,16 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16336
16336
  'claude-3-opus-20240229': 200000,
16337
16337
  };
16338
16338
  const modelContextWindow = MODEL_CONTEXT_WINDOWS[modelId] || 200000;
16339
+ // Reserve space for system prompt + response buffer.
16340
+ // Tool tokens are NOT included here because client-side estimation of tool
16341
+ // tokens is wildly inaccurate (JSON.stringify tokenization != API's internal
16342
+ // tool token counting). Instead, we use actual API usage feedback after the
16343
+ // first call to calibrate. The API will reject if truly over limit.
16344
+ const systemTokenEstimate = (0, context_1.estimateTokens)(finalSystemPrompt, modelId);
16345
+ const reserveTokens = systemTokenEstimate + 8192;
16339
16346
  const contextConfig = {
16340
16347
  maxTokens: modelContextWindow,
16341
- reserveTokens: 4096,
16348
+ reserveTokens,
16342
16349
  compactThreshold: 0.85,
16343
16350
  minMessagesAfterCompact: 6,
16344
16351
  summarizer,
@@ -16349,17 +16356,9 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16349
16356
  similarity: memory?.cosineSimilarity,
16350
16357
  };
16351
16358
  const contextManager = (0, context_1.createContextManager)(contextConfig, memory);
16352
- const effectiveMaxTokens = (contextConfig.maxTokens ?? 128000) - (contextConfig.reserveTokens ?? 4096);
16353
- // Estimate tool definitions once (they don't change during the conversation)
16354
- const toolsTokenEstimate = (0, context_1.estimateTokens)(JSON.stringify(tools), modelId);
16355
- const estimateSubmitTokens = () => {
16356
- const system = (0, context_1.estimateTokens)(finalSystemPrompt, modelId);
16357
- const msgs = messages.reduce((sum, m) => {
16358
- const content = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
16359
- return sum + (0, context_1.estimateTokens)(content, modelId) + 4;
16360
- }, 0);
16361
- return system + msgs + toolsTokenEstimate;
16362
- };
16359
+ const effectiveMaxTokens = modelContextWindow - reserveTokens;
16360
+ // Track actual API token usage for accurate compaction decisions
16361
+ let lastKnownInputTokens = 0;
16363
16362
  // Add all messages to context manager for tracking
16364
16363
  for (const msg of messages) {
16365
16364
  const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
@@ -16368,10 +16367,9 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16368
16367
  content,
16369
16368
  });
16370
16369
  }
16371
- // Add system prompt tokens
16372
- const systemTokens = (0, context_1.estimateTokens)(finalSystemPrompt, modelId);
16373
16370
  // Check if we need to compact before first API call
16374
- const guard = contextManager.checkGuard(systemTokens);
16371
+ // (tools + system prompt are already accounted for in reserveTokens)
16372
+ const guard = contextManager.checkGuard();
16375
16373
  if (guard.shouldCompact) {
16376
16374
  logger_1.logger.info({ percentUsed: guard.percentUsed }, 'Context approaching limit, compacting');
16377
16375
  // Trigger compaction:before hook
@@ -16411,15 +16409,30 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16411
16409
  }, 'Context compacted successfully');
16412
16410
  }
16413
16411
  }
16414
- const initialEstimate = estimateSubmitTokens();
16415
- logger_1.logger.info({ tokens: initialEstimate, max: effectiveMaxTokens }, 'Token estimate before submit');
16416
- let response = await createMessage({
16417
- model: modelId,
16418
- max_tokens: 1024,
16419
- system: finalSystemPrompt,
16420
- tools: tools,
16421
- messages,
16422
- });
16412
+ let response;
16413
+ try {
16414
+ response = await createMessage({
16415
+ model: modelId,
16416
+ max_tokens: 1024,
16417
+ system: finalSystemPrompt,
16418
+ tools: tools,
16419
+ messages,
16420
+ });
16421
+ }
16422
+ catch (err) {
16423
+ // Handle prompt-too-long gracefully instead of crashing
16424
+ const errMsg = err instanceof Error ? err.message : String(err);
16425
+ if (errMsg.includes('prompt is too long') || errMsg.includes('too many tokens')) {
16426
+ logger_1.logger.warn({ error: errMsg }, 'Prompt exceeded context window');
16427
+ return 'This conversation has gotten too long for me to process. Please start a new conversation and I\'ll be happy to help!';
16428
+ }
16429
+ throw err;
16430
+ }
16431
+ // Use actual API token count for accurate context tracking
16432
+ if (response.usage) {
16433
+ lastKnownInputTokens = response.usage.input_tokens;
16434
+ logger_1.logger.info({ inputTokens: lastKnownInputTokens, max: modelContextWindow }, 'Actual API token usage');
16435
+ }
16423
16436
  // Tool use loop
16424
16437
  while (response.stop_reason === 'tool_use') {
16425
16438
  const assistantContent = response.content;
@@ -16517,7 +16530,7 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16517
16530
  content,
16518
16531
  });
16519
16532
  }
16520
- const loopGuard = contextManager.checkGuard(0);
16533
+ const loopGuard = contextManager.checkGuard();
16521
16534
  if (loopGuard.shouldCompact) {
16522
16535
  logger_1.logger.info({ percentUsed: loopGuard.percentUsed }, 'Compacting context during tool loop');
16523
16536
  const loopCompactResult = await contextManager.compact();
@@ -16533,15 +16546,42 @@ async function createAgentManager(config, feeds, db, sessionManager, sendMessage
16533
16546
  sessionManager.saveCheckpoint(session, loopCompactResult.summary);
16534
16547
  }
16535
16548
  }
16536
- const loopEstimate = estimateSubmitTokens();
16537
- logger_1.logger.info({ tokens: loopEstimate, max: effectiveMaxTokens }, 'Token estimate before submit (tool loop)');
16538
- response = await createMessage({
16539
- model: modelId,
16540
- max_tokens: 1024,
16541
- system: finalSystemPrompt,
16542
- tools: tools,
16543
- messages,
16544
- });
16549
+ try {
16550
+ response = await createMessage({
16551
+ model: modelId,
16552
+ max_tokens: 1024,
16553
+ system: finalSystemPrompt,
16554
+ tools: tools,
16555
+ messages,
16556
+ });
16557
+ }
16558
+ catch (err) {
16559
+ const errMsg = err instanceof Error ? err.message : String(err);
16560
+ if (errMsg.includes('prompt is too long') || errMsg.includes('too many tokens')) {
16561
+ logger_1.logger.warn({ error: errMsg }, 'Prompt exceeded context window during tool loop');
16562
+ break;
16563
+ }
16564
+ throw err;
16565
+ }
16566
+ // Update actual token usage after each API call
16567
+ if (response.usage) {
16568
+ lastKnownInputTokens = response.usage.input_tokens;
16569
+ // If actual usage is approaching limit, force compaction next iteration
16570
+ if (lastKnownInputTokens > modelContextWindow * 0.85) {
16571
+ logger_1.logger.info({ inputTokens: lastKnownInputTokens }, 'API reports high token usage, will compact');
16572
+ const urgentCompact = await contextManager.compact();
16573
+ if (urgentCompact.success) {
16574
+ const compactedMessages = contextManager.getMessagesForApi();
16575
+ messages.length = 0;
16576
+ for (const msg of compactedMessages) {
16577
+ messages.push({
16578
+ role: msg.role === 'system' ? 'user' : msg.role,
16579
+ content: msg.content,
16580
+ });
16581
+ }
16582
+ }
16583
+ }
16584
+ }
16545
16585
  }
16546
16586
  // Extract text response
16547
16587
  const responseText = extractResponseText(response);