npm - byterover-cli - Versions diffs - 3.10.1 → 3.10.2 - Mend

byterover-cli 3.10.1 → 3.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/dist/agent/infra/agent/service-initializer.js CHANGED Viewed

@@ -121,12 +121,18 @@ export async function createCipherAgentServices(config, agentEventBus) {
         basePath: promptsBasePath,
         validateConfig: true,
     });
-    // Register default contributors
+    // Register default contributors.
+    //
+    // Note: dateTime is intentionally NOT in the system prompt. Anthropic
+    // prompt caching does token-level prefix matching, so a per-iteration
+    // refreshed timestamp here would invalidate the cache for everything
+    // past it. dateTime is instead injected into the first user message
+    // by AgentLLMService, where it lives after the cache breakpoints and
+    // does not poison the cached prefix.
     systemPromptManager.registerContributors([
         { enabled: true, filepath: 'system-prompt.yml', id: 'base', priority: 0, type: 'file' },
         { enabled: true, id: 'env', priority: 10, type: 'environment' },
         { enabled: true, id: 'memories', priority: 20, type: 'memory' },
-        { enabled: true, id: 'datetime', priority: 30, type: 'dateTime' },
     ]);
     // Register context tree structure contributor for query/curate commands
     // This injects the .brv/context-tree structure into the system prompt,

package/dist/agent/infra/llm/agent-llm-service.d.ts CHANGED Viewed

@@ -14,6 +14,15 @@ import { SessionEventBus } from '../events/event-emitter.js';
 import { ContextManager, type FileData, type ImageData } from './context/context-manager.js';
 import { type ThinkingConfig } from './thought-parser.js';
 import { type TruncationConfig } from './tool-output-processor.js';
+/**
+ * Build a `<dateTime>...</dateTime>\n\n` prefix for a user-message body.
+ *
+ * Per-call timestamps must NOT enter the system prompt (they would poison
+ * the prefix cache). They are injected into the user message instead, at
+ * the boundaries where the model legitimately needs fresh time context:
+ * the iter-0 input, and after a rolling-checkpoint history clear.
+ */
+export declare function buildDateTimePrefix(now?: Date): string;
 /**
  * Configuration for ByteRover LLM service
  */
@@ -390,15 +399,6 @@ export declare class AgentLLMService implements ILLMService {
      * @param textInput - Original user input text (for continuation prompt)
      */
     private performRollingCheckpoint;
-    /**
-     * Replace the DateTime section in a cached system prompt with a fresh timestamp.
-     * DateTimeContributor wraps its output in <dateTime>...</dateTime> XML tags,
-     * enabling reliable regex replacement without rebuilding the entire prompt.
-     *
-     * @param cachedPrompt - Previously cached system prompt
-     * @returns Updated prompt with fresh DateTime
-     */
-    private refreshDateTime;
     /**
      * Check if a rolling checkpoint should trigger.
      * Triggers every N iterations for curate/query commands, or when token utilization is high.

package/dist/agent/infra/llm/agent-llm-service.js CHANGED Viewed

@@ -21,6 +21,17 @@ import { OpenRouterTokenizer } from './tokenizers/openrouter-tokenizer.js';
 import { ToolOutputProcessor } from './tool-output-processor.js';
 /** Target utilization ratio for message tokens (leaves headroom for response) */
 const TARGET_MESSAGE_TOKEN_UTILIZATION = 0.7;
+/**
+ * Build a `<dateTime>...</dateTime>\n\n` prefix for a user-message body.
+ *
+ * Per-call timestamps must NOT enter the system prompt (they would poison
+ * the prefix cache). They are injected into the user message instead, at
+ * the boundaries where the model legitimately needs fresh time context:
+ * the iter-0 input, and after a rolling-checkpoint history clear.
+ */
+export function buildDateTimePrefix(now = new Date()) {
+    return `<dateTime>Current date and time: ${now.toISOString()}</dateTime>\n\n`;
+}
 /**
  * ByteRover LLM Service.
  *
@@ -652,8 +663,11 @@ export class AgentLLMService {
             this.memoryDirtyFlag = false;
         }
         else {
-            // Cache hit: reuse base prompt, only refresh the DateTime section
-            basePrompt = this.refreshDateTime(this.cachedBasePrompt);
+            // Cache hit: reuse base prompt verbatim. The cached prompt has no
+            // dateTime section to refresh — dateTime is injected into the
+            // first user message instead so the system prefix stays byte-stable
+            // across iterations and prompt caching can engage cleanly.
+            basePrompt = this.cachedBasePrompt;
         }
         let systemPrompt = basePrompt;
         // Determine which reflection prompt to add (only highest priority is chosen)
@@ -687,9 +701,13 @@ export class AgentLLMService {
         const systemPromptTokens = this.generator.estimateTokensSync(systemPrompt);
         // Add user message and compress context within mutex lock
         return this.mutex.withLock(async () => {
-            // Add user message to context only on the first iteration
+            // Add user message to context only on the first iteration. The
+            // dateTime block is prefixed here (not in the system prompt) so
+            // the cached system prefix stays byte-stable across iterations
+            // and Anthropic/OpenAI/Google prefix caches can engage cleanly.
             if (iterationCount === 0) {
-                await this.contextManager.addUserMessage(textInput, imageData, fileData);
+                const inputWithDateTime = `${buildDateTimePrefix()}${textInput}`;
+                await this.contextManager.addUserMessage(inputWithDateTime, imageData, fileData);
             }
             // Rolling checkpoint: periodically save progress and clear history for RLM commands.
             // This prevents unbounded token accumulation during long curation/query tasks.
@@ -1179,8 +1197,12 @@ export class AgentLLMService {
         this.sandboxService.setSandboxVariable(sessionId, checkpointVar, progressSummary);
         // Clear conversation history
         await this.contextManager.clearHistory();
-        // Re-inject continuation prompt with variable reference
-        const continuationPrompt = [
+        // Re-inject continuation prompt with variable reference.
+        // Prepend the dateTime block: clearHistory wiped the iter-0 user
+        // message that originally carried it, and the iter-0 guard upstream
+        // prevents re-injection. Without this, every iteration after the
+        // first checkpoint loses time context for the rest of the run.
+        const continuationPrompt = buildDateTimePrefix() + [
             `Continue task. Iteration checkpoint at turn ${iterationCount}.`,
             `Previous progress stored in variable: ${checkpointVar}`,
             `Original task: ${textInput.slice(0, 200)}${textInput.length > 200 ? '...' : ''}`,
@@ -1191,18 +1213,6 @@ export class AgentLLMService {
             message: `Rolling checkpoint at iteration ${iterationCount}: history cleared, progress saved to ${checkpointVar}`,
         });
     }
-    /**
-     * Replace the DateTime section in a cached system prompt with a fresh timestamp.
-     * DateTimeContributor wraps its output in <dateTime>...</dateTime> XML tags,
-     * enabling reliable regex replacement without rebuilding the entire prompt.
-     *
-     * @param cachedPrompt - Previously cached system prompt
-     * @returns Updated prompt with fresh DateTime
-     */
-    refreshDateTime(cachedPrompt) {
-        const freshDateTime = `<dateTime>Current date and time: ${new Date().toISOString()}</dateTime>`;
-        return cachedPrompt.replace(/<dateTime>[\S\s]*?<\/dateTime>/, freshDateTime);
-    }
     /**
      * Check if a rolling checkpoint should trigger.
      * Triggers every N iterations for curate/query commands, or when token utilization is high.

package/dist/agent/infra/llm/generators/ai-sdk-content-generator.d.ts CHANGED Viewed

@@ -4,8 +4,17 @@
  * Universal IContentGenerator adapter wrapping any AI SDK LanguageModel.
  * Replaces per-provider content generators with one unified implementation.
  */
-import type { LanguageModel } from 'ai';
+import type { LanguageModel, ModelMessage } from 'ai';
 import type { GenerateContentChunk, GenerateContentRequest, GenerateContentResponse, IContentGenerator } from '../../../core/interfaces/i-content-generator.js';
+/**
+ * Prepend the system prompt as a system-role message carrying
+ * `providerOptions.anthropic.cacheControl: ephemeral`. AI SDK's top-level
+ * `system: string` parameter does not propagate providerOptions, so the
+ * only way to attach Anthropic cache_control to the system block is to
+ * pass it through the messages array. Non-Anthropic providers ignore the
+ * `anthropic` namespace.
+ */
+export declare function prependCachedSystemMessage(systemPrompt: string | undefined, messages: ModelMessage[]): ModelMessage[];
 /**
  * Configuration for AiSdkContentGenerator.
  */

package/dist/agent/infra/llm/generators/ai-sdk-content-generator.js CHANGED Viewed

@@ -8,6 +8,25 @@ import { generateText, streamText } from 'ai';
 import { StreamChunkType } from '../../../core/interfaces/i-content-generator.js';
 import { toAiSdkTools, toModelMessages } from './ai-sdk-message-converter.js';
 const DEFAULT_CHARS_PER_TOKEN = 4;
+/**
+ * Prepend the system prompt as a system-role message carrying
+ * `providerOptions.anthropic.cacheControl: ephemeral`. AI SDK's top-level
+ * `system: string` parameter does not propagate providerOptions, so the
+ * only way to attach Anthropic cache_control to the system block is to
+ * pass it through the messages array. Non-Anthropic providers ignore the
+ * `anthropic` namespace.
+ */
+export function prependCachedSystemMessage(systemPrompt, messages) {
+    if (!systemPrompt) {
+        return messages;
+    }
+    const systemMessage = {
+        content: systemPrompt,
+        providerOptions: { anthropic: { cacheControl: { type: 'ephemeral' } } },
+        role: 'system',
+    };
+    return [systemMessage, ...messages];
+}
 /**
  * Universal content generator that wraps any AI SDK LanguageModel.
  *
@@ -27,7 +46,7 @@ export class AiSdkContentGenerator {
         return Math.ceil(content.length / this.charsPerToken);
     }
     async generateContent(request) {
-        const messages = toModelMessages(request.contents);
+        const messages = prependCachedSystemMessage(request.systemPrompt, toModelMessages(request.contents));
         const tools = toAiSdkTools(request.tools);
         const result = await generateText({
             maxOutputTokens: request.config.maxTokens,
@@ -35,7 +54,6 @@ export class AiSdkContentGenerator {
             messages,
             model: this.model,
             temperature: request.config.temperature,
-            ...(request.systemPrompt && { system: request.systemPrompt }),
             ...(tools && { tools }),
             ...(request.config.topK !== undefined && { topK: request.config.topK }),
             ...(request.config.topP !== undefined && { topP: request.config.topP }),
@@ -68,7 +86,7 @@ export class AiSdkContentGenerator {
         };
     }
     async *generateContentStream(request) {
-        const messages = toModelMessages(request.contents);
+        const messages = prependCachedSystemMessage(request.systemPrompt, toModelMessages(request.contents));
         const tools = toAiSdkTools(request.tools);
         const result = streamText({
             maxOutputTokens: request.config.maxTokens,
@@ -76,7 +94,6 @@ export class AiSdkContentGenerator {
             messages,
             model: this.model,
             temperature: request.config.temperature,
-            ...(request.systemPrompt && { system: request.systemPrompt }),
             ...(tools && { tools }),
             ...(request.config.topK !== undefined && { topK: request.config.topK }),
             ...(request.config.topP !== undefined && { topP: request.config.topP }),

package/dist/agent/infra/llm/generators/ai-sdk-message-converter.d.ts CHANGED Viewed

@@ -16,5 +16,9 @@ export declare function toModelMessages(messages: InternalMessage[]): ModelMessa
 /**
  * Convert our ToolSet to AI SDK tool definitions.
  * Tools are declared without `execute` — our agentic loop handles execution.
+ *
+ * The last tool gets `providerOptions.anthropic.cacheControl: ephemeral`,
+ * which makes Anthropic cache the entire tool block (and the system prompt
+ * before it). Non-Anthropic providers ignore the `anthropic` namespace.
  */
 export declare function toAiSdkTools(tools?: InternalToolSet): Record<string, ReturnType<typeof aiSdkTool>> | undefined;

package/dist/agent/infra/llm/generators/ai-sdk-message-converter.js CHANGED Viewed

@@ -46,16 +46,23 @@ export function toModelMessages(messages) {
 /**
  * Convert our ToolSet to AI SDK tool definitions.
  * Tools are declared without `execute` — our agentic loop handles execution.
+ *
+ * The last tool gets `providerOptions.anthropic.cacheControl: ephemeral`,
+ * which makes Anthropic cache the entire tool block (and the system prompt
+ * before it). Non-Anthropic providers ignore the `anthropic` namespace.
  */
 export function toAiSdkTools(tools) {
     if (!tools || Object.keys(tools).length === 0) {
         return undefined;
     }
+    const entries = Object.entries(tools);
     const result = {};
-    for (const [name, def] of Object.entries(tools)) {
+    for (const [index, [name, def]] of entries.entries()) {
+        const isLast = index === entries.length - 1;
         result[name] = aiSdkTool({
             description: def.description ?? '',
             inputSchema: jsonSchema(def.parameters),
+            ...(isLast && { providerOptions: { anthropic: { cacheControl: { type: 'ephemeral' } } } }),
         });
     }
     return result;

package/dist/agent/infra/map/abstract-generator.d.ts CHANGED Viewed

@@ -8,6 +8,16 @@ export interface AbstractGenerateResult {
     /** L1: key points + structure (~1500 tokens) */
     overviewContent: string;
 }
+/**
+ * Result from a batched abstract generation. One entry per input item, in
+ * input order. Empty string fields signal the model failed to produce content
+ * for that path — the caller's existing fail-open semantics still apply.
+ */
+export interface BatchedAbstractItem {
+    abstractContent: string;
+    contextPath: string;
+    overviewContent: string;
+}
 /**
  * Generate L0 abstract and L1 overview for a knowledge file.
  *
@@ -20,3 +30,22 @@ export interface AbstractGenerateResult {
  * @returns Abstract and overview content strings
  */
 export declare function generateFileAbstracts(fullContent: string, generator: IContentGenerator): Promise<AbstractGenerateResult>;
+/**
+ * Generate L0 abstracts and L1 overviews for N knowledge files in two batched
+ * LLM calls (one batch for all L0s, one for all L1s) instead of 2N per-file
+ * calls.
+ *
+ * Two parallel calls; each call carries all input files in an XML envelope
+ * and the model is instructed to return one element per file. Output is
+ * parsed by path tag and matched back to the input order. Files the model
+ * fails to produce content for receive empty strings (caller's existing
+ * fail-open semantics still apply).
+ *
+ * Caller is responsible for capping batch size; this function does not split
+ * its input. Recommended cap is 5 files per call to keep the L1 batch's
+ * output budget under ~8K tokens.
+ */
+export declare function generateFileAbstractsBatch(items: ReadonlyArray<{
+    contextPath: string;
+    fullContent: string;
+}>, generator: IContentGenerator): Promise<BatchedAbstractItem[]>;

package/dist/agent/infra/map/abstract-generator.js CHANGED Viewed

@@ -31,6 +31,110 @@ ${content}
 }
 /** Truncate content before embedding in LLM prompts to avoid exceeding model context windows during bulk ingest. */
 const MAX_ABSTRACT_CONTENT_CHARS = 20_000;
+/**
+ * Per-file truncation when N files share a single batched call. Matches the
+ * non-batched cap (20 KB) so each file gets the same view of its content
+ * regardless of batched vs per-file mode — total batched user content scales
+ * linearly with N. Avoids quality regression on long-file curates that batched
+ * mode would otherwise see.
+ */
+const MAX_BATCHED_CONTENT_CHARS_PER_FILE = MAX_ABSTRACT_CONTENT_CHARS;
+/** L0 batch output budget: 5 files × ~80 tokens + framing tags ≈ 600 tokens. */
+const BATCH_L0_MAX_OUTPUT_TOKENS = 800;
+/** L1 batch output budget: 5 files × ~1500 tokens + framing tags ≈ 8000 tokens. */
+const BATCH_L1_MAX_OUTPUT_TOKENS = 8500;
+const BATCHED_ABSTRACT_SYSTEM_PROMPT = `You are a technical documentation assistant.
+You produce precise one-line summaries of knowledge documents in a strict XML format.
+Output ONLY the XML — no preamble, no commentary, no markdown fences.`;
+const BATCHED_OVERVIEW_SYSTEM_PROMPT = `You are a technical documentation assistant.
+You produce structured overviews of knowledge documents in a strict XML format.
+Output ONLY the XML — no preamble, no commentary, no markdown fences.`;
+function escapeXmlAttr(value) {
+    return value.replaceAll('&', '&amp;').replaceAll('"', '&quot;').replaceAll('<', '&lt;').replaceAll('>', '&gt;');
+}
+/**
+ * Wrap raw file content in a CDATA section so XML/HTML/JSX/markdown that
+ * mentions `</document>` or `</file>` (perfectly normal for docs that describe
+ * those formats) cannot terminate the envelope and conflate files. The inner
+ * `]]>` escape is the standard CDATA-in-CDATA trick: split the sequence so it
+ * never appears verbatim inside the active section.
+ */
+function wrapCdata(content) {
+    return `<![CDATA[${content.replaceAll(']]>', ']]]]><![CDATA[>')}]]>`;
+}
+function buildBatchedAbstractPrompt(items) {
+    const filesXml = items.map((it) => `<file path="${escapeXmlAttr(it.contextPath)}">
+<document>${wrapCdata(it.content)}</document>
+</file>`).join('\n');
+    return `For each of the following knowledge documents, produce a ONE-LINE summary (max 80 tokens) that is a complete sentence capturing the core topic and key insight.
+Output format — emit exactly one <file> element per input file, with the same path attribute:
+<file path="<path>"><abstract>One-line summary.</abstract></file>
+Output only these XML elements, in any order. No preamble, no markdown fences.
+<files>
+${filesXml}
+</files>`;
+}
+function buildBatchedOverviewPrompt(items) {
+    const filesXml = items.map((it) => `<file path="${escapeXmlAttr(it.contextPath)}">
+<document>${wrapCdata(it.content)}</document>
+</file>`).join('\n');
+    return `For each of the following knowledge documents, produce a structured overview (markdown, under 1500 tokens) that includes:
+- Key points (3-7 bullet points)
+- Structure / sections summary
+- Any notable entities, patterns, or decisions mentioned
+Output format — emit exactly one <file> element per input file, with the same path attribute:
+<file path="<path>"><overview>
+- bullet 1
+- bullet 2
+...
+</overview></file>
+Output only these XML elements, in any order. No preamble, no markdown fences.
+<files>
+${filesXml}
+</files>`;
+}
+/**
+ * Extract <abstract>...</abstract> per <file path="..."> from the model output.
+ * Tolerant: ignores extra whitespace, supports nested newlines inside the inner
+ * tag. Returns a Map keyed by path. Paths that don't appear are absent.
+ *
+ * Anchored on `<file path="...">` openers (not `</file>` closers) so a model
+ * overview that mentions `</file>` literally in prose — perfectly normal for
+ * docs about XML, JSX, or build systems — cannot prematurely terminate the
+ * outer match and orphan the inner tag. Each opener owns the response slice
+ * up to the next opener (or end-of-string), and the inner regex extracts
+ * the payload from that slice.
+ */
+function parseBatchedTags(response, innerTag) {
+    const result = new Map();
+    const fileOpenerRe = /<file\s+path="([^"]*)"[^>]*>/g;
+    const innerRe = new RegExp(`<${innerTag}>([\\s\\S]*?)<\\/${innerTag}>`);
+    const openers = [];
+    let m;
+    while ((m = fileOpenerRe.exec(response)) !== null) {
+        openers.push({ bodyStart: fileOpenerRe.lastIndex, rawPath: m[1] });
+    }
+    for (const [i, opener] of openers.entries()) {
+        // Each opener's slice runs from its end to the start of the next opener
+        // (or end-of-string). Within that slice, the inner regex picks up the
+        // payload. A literal `</file>` in prose has no special meaning here.
+        const sliceEnd = i + 1 < openers.length ? openers[i + 1].bodyStart : response.length;
+        const slice = response.slice(opener.bodyStart, sliceEnd);
+        const inner = innerRe.exec(slice);
+        if (inner) {
+            const path = opener.rawPath
+                .replaceAll('&amp;', '&').replaceAll('&quot;', '"').replaceAll('&lt;', '<').replaceAll('&gt;', '>');
+            result.set(path, inner[1].trim());
+        }
+    }
+    return result;
+}
 /**
  * Generate L0 abstract and L1 overview for a knowledge file.
  *
@@ -65,3 +169,60 @@ export async function generateFileAbstracts(fullContent, generator) {
         overviewContent: overviewText.trim(),
     };
 }
+/**
+ * Generate L0 abstracts and L1 overviews for N knowledge files in two batched
+ * LLM calls (one batch for all L0s, one for all L1s) instead of 2N per-file
+ * calls.
+ *
+ * Two parallel calls; each call carries all input files in an XML envelope
+ * and the model is instructed to return one element per file. Output is
+ * parsed by path tag and matched back to the input order. Files the model
+ * fails to produce content for receive empty strings (caller's existing
+ * fail-open semantics still apply).
+ *
+ * Caller is responsible for capping batch size; this function does not split
+ * its input. Recommended cap is 5 files per call to keep the L1 batch's
+ * output budget under ~8K tokens.
+ */
+export async function generateFileAbstractsBatch(items, generator) {
+    if (items.length === 0)
+        return [];
+    // Dedup by contextPath, keeping the LAST occurrence's content. The queue is
+    // FIFO so later items carry the most recent fullContent — and the disk file
+    // already reflects that write, so the abstract must summarize the latest
+    // state rather than an intermediate one. Without this dedup, duplicate paths
+    // emit two `<file path>` blocks the model may answer in either order; the
+    // tag parser keys on path and Map-collapses, leaving non-deterministic
+    // results for the duplicates.
+    const byPath = new Map();
+    for (const it of items) {
+        byPath.set(it.contextPath, {
+            content: it.fullContent.slice(0, MAX_BATCHED_CONTENT_CHARS_PER_FILE),
+            contextPath: it.contextPath,
+        });
+    }
+    const truncated = [...byPath.values()];
+    const [abstractText, overviewText] = await Promise.all([
+        streamToText(generator, {
+            config: { maxTokens: BATCH_L0_MAX_OUTPUT_TOKENS, temperature: 0 },
+            contents: [{ content: buildBatchedAbstractPrompt(truncated), role: 'user' }],
+            model: 'default',
+            systemPrompt: BATCHED_ABSTRACT_SYSTEM_PROMPT,
+            taskId: randomUUID(),
+        }),
+        streamToText(generator, {
+            config: { maxTokens: BATCH_L1_MAX_OUTPUT_TOKENS, temperature: 0 },
+            contents: [{ content: buildBatchedOverviewPrompt(truncated), role: 'user' }],
+            model: 'default',
+            systemPrompt: BATCHED_OVERVIEW_SYSTEM_PROMPT,
+            taskId: randomUUID(),
+        }),
+    ]);
+    const abstracts = parseBatchedTags(abstractText, 'abstract');
+    const overviews = parseBatchedTags(overviewText, 'overview');
+    return items.map((it) => ({
+        abstractContent: (abstracts.get(it.contextPath) ?? '').trim(),
+        contextPath: it.contextPath,
+        overviewContent: (overviews.get(it.contextPath) ?? '').trim(),
+    }));
+}

package/dist/agent/infra/map/abstract-queue.d.ts CHANGED Viewed

@@ -20,6 +20,13 @@ export interface AbstractQueueStatus {
 export declare class AbstractGenerationQueue {
     private readonly projectRoot;
     private readonly maxAttempts;
+    /**
+     * When true, scheduleNext fires the next batch even if pending is below
+     * BATCH_SIZE_CAP. Set by drain(); reset once the queue is fully idle.
+     * Without this, items below the cap would be buffered indefinitely with
+     * no flush trigger when a curate writes fewer files than the cap.
+     */
+    private drainRequested;
     private drainResolvers;
     private failed;
     private generator;