npm - byterover-cli - Versions diffs - 2.0.0 → 2.1.1 - Mend

byterover-cli 2.0.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

package/dist/agent/infra/llm/context/compression/compression-helpers.js ADDED Viewed

@@ -0,0 +1,124 @@
+/**
+ * Shared compression helper functions.
+ *
+ * Extracted from ReactiveOverflowStrategy to enable reuse
+ * by EscalatedCompressionStrategy and other compression implementations.
+ */
+import { isTextPart } from '../../../../core/interfaces/message-type-guards.js';
+/**
+ * Count tokens in message history.
+ */
+export function countHistoryTokens(history, tokenizer) {
+    let total = 0;
+    for (const message of history) {
+        total += countMessageTokens(message, tokenizer);
+    }
+    return total;
+}
+/**
+ * Count tokens in a single message.
+ */
+export function countMessageTokens(message, tokenizer) {
+    // Role overhead (approximately 4 tokens)
+    let tokens = 4;
+    if (typeof message.content === 'string') {
+        tokens += tokenizer.countTokens(message.content);
+    }
+    else if (Array.isArray(message.content)) {
+        for (const part of message.content) {
+            tokens += isTextPart(part) ? tokenizer.countTokens(part.text) : 100;
+        }
+    }
+    // Tool calls overhead
+    if (message.toolCalls) {
+        for (const call of message.toolCalls) {
+            tokens += tokenizer.countTokens(call.function.name);
+            tokens += tokenizer.countTokens(call.function.arguments);
+        }
+    }
+    return tokens;
+}
+/**
+ * Extract text content from a message.
+ */
+export function extractTextContent(message) {
+    if (typeof message.content === 'string') {
+        return message.content;
+    }
+    if (Array.isArray(message.content)) {
+        return message.content
+            .filter((p) => isTextPart(p))
+            .map((p) => p.text)
+            .join('\n');
+    }
+    return '';
+}
+/**
+ * Find turn boundaries in message history.
+ *
+ * A turn boundary is the index where a user message starts.
+ * Returns indices of all user messages.
+ */
+export function findTurnBoundaries(messages) {
+    const boundaries = [];
+    for (const [index, message] of messages.entries()) {
+        if (message.role === 'user') {
+            boundaries.push(index);
+        }
+    }
+    return boundaries;
+}
+/**
+ * Format messages for the summary prompt.
+ */
+export function formatMessagesForSummary(messages) {
+    const MAX_TOTAL_CHARS = 50_000;
+    const MAX_PER_MESSAGE_CHARS = 1000;
+    const lines = [];
+    let totalChars = 0;
+    for (const message of messages) {
+        if (totalChars >= MAX_TOTAL_CHARS) {
+            lines.push(`[... ${messages.length - lines.length} more messages truncated for summarization]`);
+            break;
+        }
+        const role = formatRole(message.role);
+        const content = extractTextContent(message);
+        // Truncate very long messages (capped at 1K chars to prevent overflow)
+        const truncatedContent = content.length > MAX_PER_MESSAGE_CHARS
+            ? `${content.slice(0, MAX_PER_MESSAGE_CHARS)}... [truncated]`
+            : content;
+        if (truncatedContent) {
+            lines.push(`${role}: ${truncatedContent}`);
+            totalChars += truncatedContent.length;
+        }
+        // Include tool call information
+        if (message.toolCalls && message.toolCalls.length > 0) {
+            const toolNames = message.toolCalls.map((tc) => tc.function.name).join(', ');
+            lines.push(`[Used tools: ${toolNames}]`);
+            totalChars += toolNames.length + 15;
+        }
+    }
+    return lines.join('\n\n');
+}
+/**
+ * Format role for display.
+ */
+export function formatRole(role) {
+    switch (role) {
+        case 'assistant': {
+            return 'Assistant';
+        }
+        case 'system': {
+            return 'System';
+        }
+        case 'tool': {
+            return 'Tool Result';
+        }
+        case 'user': {
+            return 'User';
+        }
+        default: {
+            return role.charAt(0).toUpperCase() + role.slice(1);
+        }
+    }
+}

package/dist/agent/infra/llm/context/compression/escalated-compression.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Escalated Compression Strategy.
+ *
+ * Implements the three-level escalation protocol from the LCM paper:
+ * 1. Normal LLM summarization
+ * 2. Aggressive LLM summarization (0.6× token budget)
+ * 3. Deterministic binary-search prefix truncation (guaranteed convergence)
+ *
+ * Convergence guarantee: output token count is always strictly less than input
+ * token count under the same counting function. All byterover-cli tokenizers use
+ * char-per-token heuristics, so the binary search in Level 3 always terminates.
+ *
+ * This strategy is designed to be prepended to the compression chain (before
+ * MiddleRemoval + OldestRemoval) so that LLM-quality summaries are attempted
+ * first, with hard-cut fallbacks after.
+ */
+import type { IContentGenerator } from '../../../../core/interfaces/i-content-generator.js';
+import type { ITokenizer } from '../../../../core/interfaces/i-tokenizer.js';
+import type { InternalMessage } from '../../../../core/interfaces/message-types.js';
+import type { ICompressionStrategy } from './types.js';
+/**
+ * Options for EscalatedCompressionStrategy.
+ */
+export interface EscalatedCompressionOptions {
+    /** IContentGenerator for LLM summarization passes */
+    generator: IContentGenerator;
+    /** Model name for generateContent requests (default: 'default') */
+    model?: string;
+    /** Number of recent user turns to protect from summarization (default: 2) */
+    preserveTurns?: number;
+    /** Maximum output tokens for summary (default: 2200) */
+    summaryMaxOutputTokens?: number;
+}
+/**
+ * Escalated Compression Strategy implementing ICompressionStrategy.
+ *
+ * Three-level escalation ensures practical convergence:
+ * - Level 1 & 2: LLM-based — may not reach maxHistoryTokens in one pass
+ * - Level 3: Deterministic — binary search always produces output < input
+ *
+ * ContextManager runs strategies sequentially and stops when totalTokens ≤ maxInputTokens,
+ * so MiddleRemoval + OldestRemoval after this strategy serve as hard-cut fallbacks.
+ */
+export declare class EscalatedCompressionStrategy implements ICompressionStrategy {
+    private readonly generator;
+    private readonly model;
+    private readonly preserveTurns;
+    private readonly summaryMaxOutputTokens;
+    constructor(options: EscalatedCompressionOptions);
+    compress(history: InternalMessage[], maxHistoryTokens: number, tokenizer: ITokenizer): Promise<InternalMessage[]>;
+    getName(): string;
+    /**
+     * Build the final compressed history with a summary message.
+     */
+    private buildResult;
+    /**
+     * Attempt LLM summarization at a given escalation level.
+     *
+     * @returns Summary text if accepted, undefined if escalation needed
+     */
+    private tryLlmSummarization;
+}

package/dist/agent/infra/llm/context/compression/escalated-compression.js ADDED Viewed

@@ -0,0 +1,144 @@
+/**
+ * Escalated Compression Strategy.
+ *
+ * Implements the three-level escalation protocol from the LCM paper:
+ * 1. Normal LLM summarization
+ * 2. Aggressive LLM summarization (0.6× token budget)
+ * 3. Deterministic binary-search prefix truncation (guaranteed convergence)
+ *
+ * Convergence guarantee: output token count is always strictly less than input
+ * token count under the same counting function. All byterover-cli tokenizers use
+ * char-per-token heuristics, so the binary search in Level 3 always terminates.
+ *
+ * This strategy is designed to be prepended to the compression chain (before
+ * MiddleRemoval + OldestRemoval) so that LLM-quality summaries are attempted
+ * first, with hard-cut fallbacks after.
+ */
+import { randomUUID } from 'node:crypto';
+import { buildDeterministicFallbackCompaction, isCompactionOutputValid, withAggressiveCompactionDirective, } from '../../../../../shared/utils/escalation-utils.js';
+import { countHistoryTokens, findTurnBoundaries, formatMessagesForSummary, } from './compression-helpers.js';
+const SUMMARIZE_PROMPT = `Summarize the following conversation concisely, preserving:
+- Key decisions made and rationale
+- Important actions taken and their results
+- Critical context for continuing the conversation
+- Any unresolved questions or pending tasks
+- File paths, function names, and technical details that are still relevant
+Keep the summary focused and actionable. Do not include unnecessary narrative.
+Conversation:
+`;
+/**
+ * Escalated Compression Strategy implementing ICompressionStrategy.
+ *
+ * Three-level escalation ensures practical convergence:
+ * - Level 1 & 2: LLM-based — may not reach maxHistoryTokens in one pass
+ * - Level 3: Deterministic — binary search always produces output < input
+ *
+ * ContextManager runs strategies sequentially and stops when totalTokens ≤ maxInputTokens,
+ * so MiddleRemoval + OldestRemoval after this strategy serve as hard-cut fallbacks.
+ */
+export class EscalatedCompressionStrategy {
+    generator;
+    model;
+    preserveTurns;
+    summaryMaxOutputTokens;
+    constructor(options) {
+        this.generator = options.generator;
+        this.model = options.model ?? 'default';
+        this.preserveTurns = options.preserveTurns ?? 2;
+        this.summaryMaxOutputTokens = options.summaryMaxOutputTokens ?? 2200;
+    }
+    async compress(history, maxHistoryTokens, tokenizer) {
+        const currentTokens = countHistoryTokens(history, tokenizer);
+        if (currentTokens <= maxHistoryTokens) {
+            return history;
+        }
+        // Separate system messages from non-system messages
+        const systemMessages = history.filter((m) => m.role === 'system');
+        const nonSystemMessages = history.filter((m) => m.role !== 'system');
+        // Find turn boundaries and split into summarize/keep
+        const turnBoundaries = findTurnBoundaries(nonSystemMessages);
+        const turnsToPreserve = Math.min(this.preserveTurns, turnBoundaries.length);
+        const preserveFromIndex = turnsToPreserve > 0
+            ? turnBoundaries[turnBoundaries.length - turnsToPreserve]
+            : nonSystemMessages.length;
+        const messagesToSummarize = nonSystemMessages.slice(0, preserveFromIndex);
+        const messagesToKeep = nonSystemMessages.slice(preserveFromIndex);
+        // Need messages to summarize
+        if (messagesToSummarize.length === 0) {
+            return history;
+        }
+        const inputText = formatMessagesForSummary(messagesToSummarize);
+        const inputTokens = tokenizer.countTokens(inputText);
+        // Try Level 1: Normal summarization
+        const level1Result = await this.tryLlmSummarization(inputText, inputTokens, tokenizer, false);
+        if (level1Result) {
+            return this.buildResult(systemMessages, level1Result, messagesToSummarize.length, messagesToKeep);
+        }
+        // Try Level 2: Aggressive summarization
+        const level2Result = await this.tryLlmSummarization(inputText, inputTokens, tokenizer, true);
+        if (level2Result) {
+            return this.buildResult(systemMessages, level2Result, messagesToSummarize.length, messagesToKeep);
+        }
+        // Level 3: Deterministic fallback (guaranteed convergence)
+        const level3Result = buildDeterministicFallbackCompaction({
+            inputTokens,
+            sourceText: inputText,
+            suffixLabel: 'escalated-compression',
+            tokenizer,
+        });
+        return this.buildResult(systemMessages, level3Result, messagesToSummarize.length, messagesToKeep);
+    }
+    getName() {
+        return 'EscalatedCompression';
+    }
+    /**
+     * Build the final compressed history with a summary message.
+     */
+    buildResult(systemMessages, summaryContent, summarizedCount, messagesToKeep) {
+        const summaryMessage = {
+            content: `[Conversation Summary]\n${summaryContent}`,
+            metadata: {
+                compactedAt: Date.now(),
+                isSummary: true,
+                strategy: 'escalated-compression',
+                summarizedMessageCount: summarizedCount,
+            },
+            role: 'system',
+        };
+        return [...systemMessages, summaryMessage, ...messagesToKeep];
+    }
+    /**
+     * Attempt LLM summarization at a given escalation level.
+     *
+     * @returns Summary text if accepted, undefined if escalation needed
+     */
+    async tryLlmSummarization(inputText, inputTokens, tokenizer, aggressive) {
+        try {
+            const prompt = aggressive
+                ? withAggressiveCompactionDirective(SUMMARIZE_PROMPT + inputText)
+                : SUMMARIZE_PROMPT + inputText;
+            const maxTokens = aggressive
+                ? Math.floor(0.6 * this.summaryMaxOutputTokens)
+                : this.summaryMaxOutputTokens;
+            const response = await this.generator.generateContent({
+                config: { maxTokens, temperature: 0 },
+                contents: [{ content: prompt, role: 'user' }],
+                model: this.model,
+                systemPrompt: 'You are a conversation summarizer. Produce concise, information-dense summaries.',
+                taskId: randomUUID(),
+            });
+            const result = response.content;
+            if (result &&
+                tokenizer.countTokens(result) < inputTokens &&
+                isCompactionOutputValid(result)) {
+                return result;
+            }
+            return undefined;
+        }
+        catch {
+            return undefined;
+        }
+    }
+}

package/dist/agent/infra/llm/context/compression/index.d.ts CHANGED Viewed

@@ -1,8 +1,11 @@
 /**
  * Context compression strategies module
  */
+export { countHistoryTokens, countMessageTokens, extractTextContent, findTurnBoundaries, formatMessagesForSummary, formatRole, } from './compression-helpers.js';
 export { createEnhancedCompactionStrategy, EnhancedCompactionStrategy } from './enhanced-compaction.js';
 export type { CompactionResult, EnhancedCompactionOptions } from './enhanced-compaction.js';
+export { EscalatedCompressionStrategy } from './escalated-compression.js';
+export type { EscalatedCompressionOptions } from './escalated-compression.js';
 export { filterCompacted, findSummaryMessage, getCompressionStats, getFilteredMessageCount, hasSummaryMessage, isSummaryMessage, } from './filter-compacted.js';
 export { MiddleRemovalStrategy } from './middle-removal.js';
 export { OldestRemovalStrategy } from './oldest-removal.js';

package/dist/agent/infra/llm/context/compression/index.js CHANGED Viewed

@@ -1,8 +1,11 @@
 /**
  * Context compression strategies module
  */
+// Compression helpers (shared across strategies)
+export { countHistoryTokens, countMessageTokens, extractTextContent, findTurnBoundaries, formatMessagesForSummary, formatRole, } from './compression-helpers.js';
 // Compression strategies (alphabetical order)
 export { createEnhancedCompactionStrategy, EnhancedCompactionStrategy } from './enhanced-compaction.js';
+export { EscalatedCompressionStrategy } from './escalated-compression.js';
 // Filter utilities
 export { filterCompacted, findSummaryMessage, getCompressionStats, getFilteredMessageCount, hasSummaryMessage, isSummaryMessage, } from './filter-compacted.js';
 // More compression strategies

package/dist/agent/infra/llm/context/compression/reactive-overflow.d.ts CHANGED Viewed

@@ -65,33 +65,6 @@ export declare class ReactiveOverflowStrategy implements ICompressionStrategy {
     constructor(options: ReactiveOverflowOptions);
     compress(history: InternalMessage[], maxHistoryTokens: number, tokenizer: ITokenizer): Promise<InternalMessage[]>;
     getName(): string;
-    /**
-     * Count tokens in message history.
-     */
-    private countHistoryTokens;
-    /**
-     * Count tokens in a single message.
-     */
-    private countMessageTokens;
-    /**
-     * Extract text content from a message.
-     */
-    private extractTextContent;
-    /**
-     * Find turn boundaries in message history.
-     *
-     * A turn boundary is the index where a user message starts.
-     * Returns indices of all user messages.
-     */
-    private findTurnBoundaries;
-    /**
-     * Format messages for the summary prompt.
-     */
-    private formatMessagesForSummary;
-    /**
-     * Format role for display.
-     */
-    private formatRole;
     /**
      * Generate a fallback summary without LLM.
      */

package/dist/agent/infra/llm/context/compression/reactive-overflow.js CHANGED Viewed

@@ -19,7 +19,7 @@
  * - Full history preserved in storage for audit
  *
  */
-import { isTextPart } from '../../../../core/interfaces/message-type-guards.js';
+import { countHistoryTokens, extractTextContent, findTurnBoundaries, formatMessagesForSummary, } from './compression-helpers.js';
 /**
  * Default configuration values.
  */
@@ -48,7 +48,7 @@ export class ReactiveOverflowStrategy {
     }
     async compress(history, maxHistoryTokens, tokenizer) {
         // Calculate current token count
-        const currentTokens = this.countHistoryTokens(history, tokenizer);
+        const currentTokens = countHistoryTokens(history, tokenizer);
         // Check if compression is needed
         if (currentTokens <= maxHistoryTokens) {
             return history;
@@ -62,7 +62,7 @@ export class ReactiveOverflowStrategy {
             return history;
         }
         // Calculate how many messages to keep (preserve last N turns)
-        const turnBoundaries = this.findTurnBoundaries(nonSystemMessages);
+        const turnBoundaries = findTurnBoundaries(nonSystemMessages);
         const turnsToPreserve = Math.min(this.preserveLastNTurns, turnBoundaries.length);
         const preserveFromIndex = turnsToPreserve > 0
             ? turnBoundaries[turnBoundaries.length - turnsToPreserve]
@@ -92,123 +92,6 @@ export class ReactiveOverflowStrategy {
     getName() {
         return 'ReactiveOverflow';
     }
-    /**
-     * Count tokens in message history.
-     */
-    countHistoryTokens(history, tokenizer) {
-        let total = 0;
-        for (const message of history) {
-            total += this.countMessageTokens(message, tokenizer);
-        }
-        return total;
-    }
-    /**
-     * Count tokens in a single message.
-     */
-    countMessageTokens(message, tokenizer) {
-        // Role overhead (approximately 4 tokens)
-        let tokens = 4;
-        if (typeof message.content === 'string') {
-            tokens += tokenizer.countTokens(message.content);
-        }
-        else if (Array.isArray(message.content)) {
-            for (const part of message.content) {
-                tokens += isTextPart(part) ? tokenizer.countTokens(part.text) : 100;
-            }
-        }
-        // Tool calls overhead
-        if (message.toolCalls) {
-            for (const call of message.toolCalls) {
-                tokens += tokenizer.countTokens(call.function.name);
-                tokens += tokenizer.countTokens(call.function.arguments);
-            }
-        }
-        return tokens;
-    }
-    /**
-     * Extract text content from a message.
-     */
-    extractTextContent(message) {
-        if (typeof message.content === 'string') {
-            return message.content;
-        }
-        if (Array.isArray(message.content)) {
-            return message.content
-                .filter((p) => isTextPart(p))
-                .map((p) => p.text)
-                .join('\n');
-        }
-        return '';
-    }
-    /**
-     * Find turn boundaries in message history.
-     *
-     * A turn boundary is the index where a user message starts.
-     * Returns indices of all user messages.
-     */
-    findTurnBoundaries(messages) {
-        const boundaries = [];
-        for (const [index, message] of messages.entries()) {
-            if (message.role === 'user') {
-                boundaries.push(index);
-            }
-        }
-        return boundaries;
-    }
-    /**
-     * Format messages for the summary prompt.
-     */
-    formatMessagesForSummary(messages) {
-        const MAX_TOTAL_CHARS = 50_000;
-        const MAX_PER_MESSAGE_CHARS = 1000;
-        const lines = [];
-        let totalChars = 0;
-        for (const message of messages) {
-            if (totalChars >= MAX_TOTAL_CHARS) {
-                lines.push(`[... ${messages.length - lines.length} more messages truncated for summarization]`);
-                break;
-            }
-            const role = this.formatRole(message.role);
-            const content = this.extractTextContent(message);
-            // Truncate very long messages (capped at 1K chars to prevent overflow)
-            const truncatedContent = content.length > MAX_PER_MESSAGE_CHARS
-                ? `${content.slice(0, MAX_PER_MESSAGE_CHARS)}... [truncated]`
-                : content;
-            if (truncatedContent) {
-                lines.push(`${role}: ${truncatedContent}`);
-                totalChars += truncatedContent.length;
-            }
-            // Include tool call information
-            if (message.toolCalls && message.toolCalls.length > 0) {
-                const toolNames = message.toolCalls.map((tc) => tc.function.name).join(', ');
-                lines.push(`[Used tools: ${toolNames}]`);
-                totalChars += toolNames.length + 15;
-            }
-        }
-        return lines.join('\n\n');
-    }
-    /**
-     * Format role for display.
-     */
-    formatRole(role) {
-        switch (role) {
-            case 'assistant': {
-                return 'Assistant';
-            }
-            case 'system': {
-                return 'System';
-            }
-            case 'tool': {
-                return 'Tool Result';
-            }
-            case 'user': {
-                return 'User';
-            }
-            default: {
-                return role.charAt(0).toUpperCase() + role.slice(1);
-            }
-        }
-    }
     /**
      * Generate a fallback summary without LLM.
      */
@@ -225,7 +108,7 @@ export class ReactiveOverflowStrategy {
         // Extract key topics from user messages
         const topics = new Set();
         for (const msg of userMessages.slice(0, 5)) {
-            const content = this.extractTextContent(msg);
+            const content = extractTextContent(msg);
             const words = content.split(/\s+/).slice(0, 10).join(' ');
             if (words) {
                 topics.add(words);
@@ -244,7 +127,7 @@ export class ReactiveOverflowStrategy {
      * Generate a summary of messages using the LLM.
      */
     async generateSummary(messages) {
-        const conversationText = this.formatMessagesForSummary(messages);
+        const conversationText = formatMessagesForSummary(messages);
         const prompt = `You are a conversation summarizer. Summarize the following conversation concisely, preserving:
 - Key decisions made
 - Important actions taken

package/dist/agent/infra/llm/context/context-manager.d.ts CHANGED Viewed

@@ -188,6 +188,22 @@ export declare class ContextManager<T> {
      * Also clears persisted history if storage is enabled.
      */
     clearHistory(): Promise<void>;
+    /**
+     * Compress messages using the strategy chain and replace in-memory state.
+     * Called by AgentLLMService when context exceeds the threshold.
+     *
+     * Delegates to compressHistoryIfNeeded() which iterates compressionStrategies
+     * (EscalatedCompression → MiddleRemoval → OldestRemoval) until the history
+     * fits within the token budget.
+     *
+     * @param systemPromptTokens - Tokens reserved for the system prompt
+     * @param targetHistoryBudget - Target token budget for message history.
+     *   When provided, overrides maxInputTokens for threshold/budget calculations
+     *   so the strategy chain compresses to the caller's target (e.g. 70% utilization)
+     *   rather than the full context window.
+     * @returns The compressed message array (same reference as this.messages)
+     */
+    compressAndReplace(systemPromptTokens: number, targetHistoryBudget?: number): Promise<InternalMessage[]>;
     /**
      * Compress messages by removing oldest messages until total tokens fit within the budget.
      * This directly modifies the internal messages array by slicing from the beginning.
@@ -292,10 +308,13 @@ export declare class ContextManager<T> {
      * Compress conversation history if needed to fit within token limits.
      *
      * This method applies compression strategies sequentially until the history
-     * fits within the available token budget (maxInputTokens - systemPromptTokens).
+     * fits within the available token budget.
      *
      * @param systemPromptTokens - Tokens used by system prompt (reserved, not compressible)
      * @param messagesToCompress - Messages to compress (defaults to all messages)
+     * @param targetMaxTokens - Override for maxInputTokens. When provided, the method
+     *   uses this as the total token ceiling (system + history) instead of this.maxInputTokens.
+     *   This allows the caller to target a lower utilization (e.g. 70%) rather than 100%.
      * @returns Compressed message history
      */
     private compressHistoryIfNeeded;