npm - qlogicagent - Versions diffs - 0.2.1 → 0.3.0 - Mend

qlogicagent 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (229) hide show

package/dist/orchestration/context-compression.js DELETED Viewed

@@ -1,583 +0,0 @@
-// ============================================================
-// Context compression strategies — reduce message history
-// to fit within a token budget.
-//
-// Phase 1: ToolResultTrim + SlidingWindow (sync, no LLM)
-// Phase 2: Structured LLM summarization + head/tail protection
-// Phase 3: Prompt cache awareness
-// Phase 4: Adaptive threshold + metrics + engine plugin
-// ============================================================
-export function isAsyncCompressionStrategy(s) {
-    return typeof s.compressAsync === "function";
-}
-// ── Sliding Window ──────────────────────────────────────────
-// Keep the system prompt(s) + last N messages that fit the budget.
-export class SlidingWindowStrategy {
-    estimateTokens;
-    constructor(estimateTokens) {
-        this.estimateTokens = estimateTokens;
-    }
-    compress(messages, budget) {
-        const systemMessages = [];
-        const nonSystem = [];
-        for (const msg of messages) {
-            if (msg.role === "system") {
-                systemMessages.push(msg);
-            }
-            else {
-                nonSystem.push(msg);
-            }
-        }
-        let remaining = budget;
-        for (const sys of systemMessages) {
-            remaining -= this.estimateTokens(sys);
-        }
-        if (remaining <= 0) {
-            return { messages: systemMessages, droppedCount: nonSystem.length, strategy: "sliding-window" };
-        }
-        const kept = [];
-        for (let i = nonSystem.length - 1; i >= 0; i--) {
-            const cost = this.estimateTokens(nonSystem[i]);
-            if (remaining - cost < 0)
-                break;
-            remaining -= cost;
-            kept.unshift(nonSystem[i]);
-        }
-        return {
-            messages: [...systemMessages, ...kept],
-            droppedCount: nonSystem.length - kept.length,
-            strategy: "sliding-window",
-        };
-    }
-}
-// ── Summarize Old ───────────────────────────────────────────
-// Keep last `recentCount` messages as-is, replace older ones
-// with a single system-role summary placeholder.
-export class SummarizeOldStrategy {
-    recentCount;
-    summarize;
-    constructor(recentCount, summarize) {
-        this.recentCount = recentCount;
-        this.summarize = summarize;
-    }
-    compress(messages, _budget) {
-        const systemMessages = messages.filter((m) => m.role === "system");
-        const nonSystem = messages.filter((m) => m.role !== "system");
-        if (nonSystem.length <= this.recentCount) {
-            return { messages, droppedCount: 0, strategy: "summarize-old" };
-        }
-        const oldMessages = nonSystem.slice(0, nonSystem.length - this.recentCount);
-        const recentMessages = nonSystem.slice(nonSystem.length - this.recentCount);
-        const summary = this.summarize(oldMessages);
-        return {
-            messages: [
-                ...systemMessages,
-                { role: "system", content: `[Conversation summary]\n${summary}` },
-                ...recentMessages,
-            ],
-            droppedCount: oldMessages.length,
-            strategy: "summarize-old",
-        };
-    }
-}
-// ── Tool Result Trim ────────────────────────────────────────
-// Truncate oversized tool results to a max character length.
-export class ToolResultTrimStrategy {
-    maxToolResultChars;
-    constructor(maxToolResultChars = 8000) {
-        this.maxToolResultChars = maxToolResultChars;
-    }
-    compress(messages, _budget) {
-        let trimmedCount = 0;
-        const result = messages.map((msg) => {
-            if (msg.role !== "tool" || typeof msg.content !== "string")
-                return msg;
-            if (msg.content.length <= this.maxToolResultChars)
-                return msg;
-            trimmedCount++;
-            return {
-                ...msg,
-                content: msg.content.slice(0, this.maxToolResultChars) + "\n[...truncated]",
-            };
-        });
-        return {
-            messages: result,
-            droppedCount: trimmedCount,
-            strategy: "tool-result-trim",
-        };
-    }
-}
-// ── Composite ───────────────────────────────────────────────
-// Apply multiple strategies in sequence.
-export function composeStrategies(...strategies) {
-    return {
-        compress(messages, budget) {
-            let current = messages;
-            let totalDropped = 0;
-            const names = [];
-            for (const strategy of strategies) {
-                const result = strategy.compress(current, budget);
-                current = result.messages;
-                totalDropped += result.droppedCount;
-                if (result.droppedCount > 0) {
-                    names.push(result.strategy);
-                }
-            }
-            return {
-                messages: current,
-                droppedCount: totalDropped,
-                strategy: names.length > 0 ? names.join("+") : "none",
-            };
-        },
-    };
-}
-/**
- * Compose strategies with async support — if any strategy is async,
- * the pipeline becomes async.
- */
-export function composeAsyncStrategies(...strategies) {
-    return {
-        compress(messages, budget) {
-            // Sync fallback: skip async strategies
-            let current = messages;
-            let totalDropped = 0;
-            const names = [];
-            for (const strategy of strategies) {
-                const result = strategy.compress(current, budget);
-                current = result.messages;
-                totalDropped += result.droppedCount;
-                if (result.droppedCount > 0)
-                    names.push(result.strategy);
-            }
-            return {
-                messages: current,
-                droppedCount: totalDropped,
-                strategy: names.length > 0 ? names.join("+") : "none",
-            };
-        },
-        async compressAsync(messages, budget) {
-            let current = messages;
-            let totalDropped = 0;
-            const names = [];
-            let totalLatency = 0;
-            let anyLlm = false;
-            let anyCacheInvalidated = false;
-            for (const strategy of strategies) {
-                const result = isAsyncCompressionStrategy(strategy)
-                    ? await strategy.compressAsync(current, budget)
-                    : strategy.compress(current, budget);
-                current = result.messages;
-                totalDropped += result.droppedCount;
-                if (result.droppedCount > 0)
-                    names.push(result.strategy);
-                if (result.metrics) {
-                    totalLatency += result.metrics.latencyMs;
-                    anyLlm = anyLlm || result.metrics.usedLlm;
-                    anyCacheInvalidated = anyCacheInvalidated || !!result.metrics.cacheInvalidated;
-                }
-            }
-            return {
-                messages: current,
-                droppedCount: totalDropped,
-                strategy: names.length > 0 ? names.join("+") : "none",
-                metrics: totalLatency > 0 || anyLlm
-                    ? { tokensBefore: 0, tokensAfter: 0, compressionRatio: 0, latencyMs: totalLatency, usedLlm: anyLlm, cacheInvalidated: anyCacheInvalidated }
-                    : undefined,
-            };
-        },
-    };
-}
-// ════════════════════════════════════════════════════════════
-// Phase 2: LLM Summarization Strategies
-// ════════════════════════════════════════════════════════════
-// ── 2.1 Structured Summary Prompt (Claude Code style) ──────
-/**
- * Build the structured 9-section summary instruction for the LLM.
- * Based on Claude Code's Full Compact mode, adapted for Hub.
- */
-export function buildStructuredSummaryPrompt(messagesToSummarize, opts) {
-    const userMsgs = messagesToSummarize.filter((m) => m.role === "user");
-    const toolCalls = messagesToSummarize.filter((m) => m.tool_calls != null);
-    const toolResults = messagesToSummarize.filter((m) => m.role === "tool");
-    const sections = [
-        "You are a conversation summarizer. Produce a structured summary of the conversation history below.",
-        "",
-        "## Instructions",
-        "Analyze the conversation and produce a summary with these sections:",
-        "",
-        "### 1. Primary Objective",
-        "What is the user's main goal or task? State it in one sentence.",
-        "",
-        "### 2. Key Decisions Made",
-        "List the important decisions, choices, or conclusions reached during the conversation.",
-        "",
-        "### 3. Current Progress",
-        `Describe the current state. ${toolCalls.length > 0 ? `${toolCalls.length} tool calls and ${toolResults.length} tool results were exchanged.` : "No tools were used."}`,
-        "",
-        "### 4. Pending Tasks",
-        "List any tasks that are in-progress or planned but not yet completed.",
-        "",
-        "### 5. Important Context",
-        "Note any critical facts, constraints, or preferences the user mentioned that must be preserved.",
-        "",
-        "### 6. Error & Recovery History",
-        "Summarize any errors encountered and how they were resolved.",
-        "",
-        "### 7. User Preferences Expressed",
-        `The user sent ${userMsgs.length} messages. Note any stated preferences about style, approach, or constraints.`,
-        "",
-        "### 8. Technical State",
-        "Note file paths, variable names, API endpoints, or configuration values that were discussed.",
-        "",
-        "### 9. Conversation Flow",
-        "Briefly describe the overall flow: what happened first, what changed, where we are now.",
-    ];
-    if (opts?.taskContext) {
-        sections.push("", `## Additional Context`, opts.taskContext);
-    }
-    sections.push("", "## Conversation to Summarize", "", ...messagesToSummarize.map((m) => {
-        const content = typeof m.content === "string" ? m.content : JSON.stringify(m.content ?? "");
-        const truncated = content.length > 2000 ? content.slice(0, 2000) + "..." : content;
-        return `[${m.role}]: ${truncated}`;
-    }), "", "## Output Format", "Respond with a concise summary covering all 9 sections above. Use markdown headers.", "Keep the total summary under 800 words. Focus on actionable information.");
-    return sections.join("\n");
-}
-/**
- * Phase 2.2: Head/Tail protected summarization.
- *
- * Protects: system messages + first exchange + last N messages.
- * Compresses: middle section via LLM summarization.
- */
-export class HeadTailProtectedStrategy {
-    config;
-    constructor(config) {
-        this.config = {
-            protectedHeadExchanges: config.protectedHeadExchanges,
-            protectedTailMessages: config.protectedTailMessages,
-            summarize: config.summarize,
-            estimateTokens: config.estimateTokens ?? defaultEstimateTokens,
-            taskContext: config.taskContext,
-        };
-    }
-    compress(messages, _budget) {
-        // Sync fallback: just pass through (cannot call LLM synchronously)
-        return { messages, droppedCount: 0, strategy: "head-tail-protected" };
-    }
-    async compressAsync(messages, budget) {
-        const start = Date.now();
-        const { system, nonSystem } = splitSystemMessages(messages);
-        // Check if compression is needed
-        const totalTokens = messages.reduce((sum, m) => sum + this.config.estimateTokens(m), 0);
-        if (totalTokens <= budget) {
-            return { messages, droppedCount: 0, strategy: "head-tail-protected" };
-        }
-        // Identify protected head: first N user+assistant exchanges
-        let headEnd = 0;
-        let exchangeCount = 0;
-        for (let i = 0; i < nonSystem.length; i++) {
-            if (nonSystem[i].role === "user")
-                exchangeCount++;
-            if (exchangeCount > this.config.protectedHeadExchanges)
-                break;
-            headEnd = i + 1;
-        }
-        // Protected tail
-        const tailStart = Math.max(headEnd, nonSystem.length - this.config.protectedTailMessages);
-        // If there's nothing to compress in the middle, pass through
-        if (tailStart <= headEnd) {
-            return { messages, droppedCount: 0, strategy: "head-tail-protected" };
-        }
-        const headMessages = nonSystem.slice(0, headEnd);
-        const middleMessages = nonSystem.slice(headEnd, tailStart);
-        const tailMessages = nonSystem.slice(tailStart);
-        // Summarize middle section via LLM
-        const instruction = buildStructuredSummaryPrompt(middleMessages, {
-            taskContext: this.config.taskContext,
-        });
-        const summary = await this.config.summarize(middleMessages, instruction);
-        const summaryMessage = {
-            role: "system",
-            content: `[Conversation summary — ${middleMessages.length} messages compressed]\n\n${summary}`,
-        };
-        const resultMessages = [
-            ...system,
-            ...headMessages,
-            summaryMessage,
-            ...tailMessages,
-        ];
-        const latencyMs = Date.now() - start;
-        const tokensAfter = resultMessages.reduce((sum, m) => sum + this.config.estimateTokens(m), 0);
-        return {
-            messages: resultMessages,
-            droppedCount: middleMessages.length,
-            strategy: "head-tail-protected",
-            metrics: {
-                tokensBefore: totalTokens,
-                tokensAfter,
-                compressionRatio: totalTokens > 0 ? tokensAfter / totalTokens : 1,
-                latencyMs,
-                usedLlm: true,
-                cacheInvalidated: true,
-            },
-        };
-    }
-}
-/**
- * Phase 2.3: Incremental (partial) compaction.
- *
- * Only summarizes the oldest messages beyond the preserve window.
- * Avoids repeatedly re-summarizing already-compressed content.
- * If a previous summary marker exists, only new old messages are compressed.
- */
-export class IncrementalCompactStrategy {
-    config;
-    constructor(config) {
-        this.config = {
-            preserveRecentCount: config.preserveRecentCount,
-            summarize: config.summarize,
-            estimateTokens: config.estimateTokens ?? defaultEstimateTokens,
-        };
-    }
-    compress(messages, _budget) {
-        return { messages, droppedCount: 0, strategy: "incremental-compact" };
-    }
-    async compressAsync(messages, budget) {
-        const start = Date.now();
-        const { system, nonSystem } = splitSystemMessages(messages);
-        // Find existing summary marker
-        const existingSummaryIdx = system.findIndex((m) => typeof m.content === "string" && m.content.startsWith("[Conversation summary"));
-        const existingSummary = existingSummaryIdx >= 0 ? system[existingSummaryIdx] : undefined;
-        const systemWithoutOldSummary = existingSummaryIdx >= 0
-            ? [...system.slice(0, existingSummaryIdx), ...system.slice(existingSummaryIdx + 1)]
-            : system;
-        // How many messages to preserve
-        const preserveStart = Math.max(0, nonSystem.length - this.config.preserveRecentCount);
-        if (preserveStart <= 0) {
-            return { messages, droppedCount: 0, strategy: "incremental-compact" };
-        }
-        const totalTokens = messages.reduce((sum, m) => sum + this.config.estimateTokens(m), 0);
-        if (totalTokens <= budget) {
-            return { messages, droppedCount: 0, strategy: "incremental-compact" };
-        }
-        const oldMessages = nonSystem.slice(0, preserveStart);
-        const recentMessages = nonSystem.slice(preserveStart);
-        // Build summary instruction including existing summary context
-        const contextPrefix = existingSummary && typeof existingSummary.content === "string"
-            ? `Previous summary:\n${existingSummary.content}\n\nNew messages to integrate:`
-            : undefined;
-        const instruction = buildStructuredSummaryPrompt(oldMessages, { taskContext: contextPrefix });
-        const summary = await this.config.summarize(oldMessages, instruction);
-        const summaryMessage = {
-            role: "system",
-            content: `[Conversation summary — ${oldMessages.length} messages compressed]\n\n${summary}`,
-        };
-        const resultMessages = [...systemWithoutOldSummary, summaryMessage, ...recentMessages];
-        const latencyMs = Date.now() - start;
-        const tokensAfter = resultMessages.reduce((sum, m) => sum + this.config.estimateTokens(m), 0);
-        return {
-            messages: resultMessages,
-            droppedCount: oldMessages.length,
-            strategy: "incremental-compact",
-            metrics: {
-                tokensBefore: totalTokens,
-                tokensAfter,
-                compressionRatio: totalTokens > 0 ? tokensAfter / totalTokens : 1,
-                latencyMs,
-                usedLlm: true,
-                cacheInvalidated: true,
-            },
-        };
-    }
-}
-/**
- * Phase 3: Cache-aware wrapper.
- *
- * Wraps any strategy and tracks whether compression invalidated the
- * provider prompt cache. System prompt prefix stability is preserved
- * by never modifying system[0] (the original system prompt).
- */
-export class CacheAwareCompressionStrategy {
-    config;
-    constructor(config) {
-        this.config = config;
-    }
-    compress(messages, budget) {
-        const beforeHash = computeMessagePrefixHash(messages);
-        const result = this.config.inner.compress(messages, budget);
-        const afterHash = computeMessagePrefixHash(result.messages);
-        const cacheInvalidated = beforeHash !== afterHash && result.droppedCount > 0;
-        if (cacheInvalidated) {
-            this.config.onCacheInvalidated?.({ droppedCount: result.droppedCount, strategy: result.strategy });
-        }
-        return {
-            ...result,
-            metrics: {
-                ...(result.metrics ?? {
-                    tokensBefore: 0, tokensAfter: 0, compressionRatio: 0, latencyMs: 0, usedLlm: false,
-                }),
-                cacheInvalidated,
-            },
-        };
-    }
-    async compressAsync(messages, budget) {
-        const beforeHash = computeMessagePrefixHash(messages);
-        const result = isAsyncCompressionStrategy(this.config.inner)
-            ? await this.config.inner.compressAsync(messages, budget)
-            : this.config.inner.compress(messages, budget);
-        const afterHash = computeMessagePrefixHash(result.messages);
-        const cacheInvalidated = beforeHash !== afterHash && result.droppedCount > 0;
-        if (cacheInvalidated) {
-            this.config.onCacheInvalidated?.({ droppedCount: result.droppedCount, strategy: result.strategy });
-        }
-        return {
-            ...result,
-            metrics: {
-                ...(result.metrics ?? {
-                    tokensBefore: 0, tokensAfter: 0, compressionRatio: 0, latencyMs: 0, usedLlm: false,
-                }),
-                cacheInvalidated,
-            },
-        };
-    }
-}
-export const DEFAULT_ADAPTIVE_BUDGET_CONFIG = {
-    modelContextWindow: 128_000,
-    targetUsageRatio: 0.75,
-    minBudget: 16_000,
-    maxBudget: 120_000,
-};
-/**
- * Compute the adaptive token budget for a given model + message history.
- *
- * Adjusts based on model context window, and uses the target ratio
- * so compression triggers before hitting the hard limit.
- */
-export function computeAdaptiveBudget(config = {}) {
-    const c = { ...DEFAULT_ADAPTIVE_BUDGET_CONFIG, ...config };
-    const target = Math.floor(c.modelContextWindow * c.targetUsageRatio);
-    return Math.max(c.minBudget, Math.min(target, c.maxBudget));
-}
-export function selectCompressionTier(currentTokens, budget) {
-    const ratio = currentTokens / budget;
-    if (ratio <= 0.8)
-        return "none";
-    if (ratio <= 1.0)
-        return "trim-only";
-    if (ratio <= 1.5)
-        return "sliding-window";
-    return "llm-summarize";
-}
-export class CompressionMetricsCollector {
-    events = [];
-    maxEvents;
-    constructor(maxEvents = 100) {
-        this.maxEvents = maxEvents;
-    }
-    record(event) {
-        this.events.push(event);
-        if (this.events.length > this.maxEvents) {
-            this.events.shift();
-        }
-    }
-    snapshot() {
-        const total = this.events.length;
-        if (total === 0) {
-            return {
-                totalCompressions: 0,
-                totalLlmCalls: 0,
-                totalCacheInvalidations: 0,
-                averageCompressionRatio: 1,
-                averageLatencyMs: 0,
-                totalTokensSaved: 0,
-                recentEvents: [],
-            };
-        }
-        let sumRatio = 0;
-        let sumLatency = 0;
-        let tokensSaved = 0;
-        let llmCalls = 0;
-        let cacheInvalidations = 0;
-        for (const e of this.events) {
-            sumRatio += e.tokensBefore > 0 ? e.tokensAfter / e.tokensBefore : 1;
-            sumLatency += e.latencyMs;
-            tokensSaved += Math.max(0, e.tokensBefore - e.tokensAfter);
-            if (e.usedLlm)
-                llmCalls++;
-            if (e.cacheInvalidated)
-                cacheInvalidations++;
-        }
-        return {
-            totalCompressions: total,
-            totalLlmCalls: llmCalls,
-            totalCacheInvalidations: cacheInvalidations,
-            averageCompressionRatio: sumRatio / total,
-            averageLatencyMs: sumLatency / total,
-            totalTokensSaved: tokensSaved,
-            recentEvents: this.events.slice(-10),
-        };
-    }
-    reset() {
-        this.events.length = 0;
-    }
-}
-/**
- * Registry for context engines. Enforces single-active constraint.
- */
-export class ContextEngineRegistry {
-    engines = new Map();
-    activeId;
-    register(engine) {
-        this.engines.set(engine.id, engine);
-    }
-    activate(id) {
-        if (!this.engines.has(id))
-            return false;
-        this.activeId = id;
-        return true;
-    }
-    getActive() {
-        return this.activeId ? this.engines.get(this.activeId) : undefined;
-    }
-    listEngines() {
-        return Array.from(this.engines.values()).map((e) => ({
-            id: e.id,
-            label: e.label,
-            active: e.id === this.activeId,
-        }));
-    }
-}
-// ── Helpers ─────────────────────────────────────────────────
-function splitSystemMessages(messages) {
-    const system = [];
-    const nonSystem = [];
-    for (const msg of messages) {
-        if (msg.role === "system")
-            system.push(msg);
-        else
-            nonSystem.push(msg);
-    }
-    return { system, nonSystem };
-}
-function defaultEstimateTokens(msg) {
-    const text = typeof msg.content === "string"
-        ? msg.content
-        : msg.content != null
-            ? JSON.stringify(msg.content)
-            : "";
-    return Math.ceil(text.length / 4);
-}
-/**
- * Simple hash of the first few messages' role+content prefix.
- * Used to detect whether compression changed the prompt prefix
- * (which would invalidate provider prompt caching).
- */
-function computeMessagePrefixHash(messages) {
-    const prefixCount = Math.min(messages.length, 5);
-    const parts = [];
-    for (let i = 0; i < prefixCount; i++) {
-        const m = messages[i];
-        const content = typeof m.content === "string" ? m.content.slice(0, 200) : "";
-        parts.push(`${m.role}:${content}`);
-    }
-    return parts.join("|");
-}