npm - byterover-cli - Versions diffs - 1.7.2 → 1.8.0 - Mend

byterover-cli 1.7.2 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

package/dist/server/infra/executor/query-executor.d.ts CHANGED Viewed

@@ -1,5 +1,19 @@
 import type { ICipherAgent } from '../../../agent/core/interfaces/i-cipher-agent.js';
+import type { IFileSystem } from '../../../agent/core/interfaces/i-file-system.js';
+import type { ISearchKnowledgeService } from '../../../agent/infra/sandbox/tools-sdk.js';
 import type { IQueryExecutor, QueryExecuteOptions } from '../../core/interfaces/executor/i-query-executor.js';
+/**
+ * Optional dependencies for QueryExecutor.
+ * All fields are optional — without them, the executor falls back to the original behavior.
+ */
+export interface QueryExecutorDeps {
+    /** Enable query result caching (default: false) */
+    enableCache?: boolean;
+    /** File system for reading full document content and computing fingerprints */
+    fileSystem?: IFileSystem;
+    /** Search service for pre-fetching relevant context before calling the LLM */
+    searchService?: ISearchKnowledgeService;
+}
 /**
  * QueryExecutor - Executes query tasks with an injected CipherAgent.
  *
@@ -12,15 +26,47 @@ import type { IQueryExecutor, QueryExecuteOptions } from '../../core/interfaces/
  * - Transport handles task lifecycle (task:started, task:completed, task:error)
  * - Executor focuses solely on query execution
  *
- * Uses code_exec with tools.* SDK for programmatic search.
+ * Tiered response strategy (fastest to slowest):
+ * - Tier 0: Exact cache hit (0ms)
+ * - Tier 1: Fuzzy cache match via Jaccard similarity (~50ms)
+ * - Tier 2: Direct search response without LLM (~100-200ms)
+ * - Tier 3: Optimized single LLM call with pre-fetched context (<5s)
+ * - Tier 4: Full agentic loop fallback (8-15s)
  */
 export declare class QueryExecutor implements IQueryExecutor {
+    private static readonly FINGERPRINT_CACHE_TTL_MS;
+    private readonly cache?;
+    private cachedFingerprint?;
+    private readonly fileSystem?;
+    private readonly searchService?;
+    constructor(deps?: QueryExecutorDeps);
     executeWithAgent(agent: ICipherAgent, options: QueryExecuteOptions): Promise<string>;
+    /**
+     * Build pre-fetched context string from search results for LLM prompt injection.
+     * Synchronous — uses already-fetched search results (no additional I/O for excerpts).
+     * Full document reads happen only for high-confidence results.
+     */
+    private buildPrefetchedContext;
     /**
      * Build a streamlined query prompt optimized for fast, accurate responses.
      *
-     * Uses code_exec with tools.* SDK for programmatic search.
-     * Designed to minimize iterations while maintaining answer quality.
+     * When pre-fetched context is available, the prompt instructs the LLM to answer
+     * directly from the provided context (reducing LLM calls from 2+ to 1).
+     * When no context is available, falls back to tool-based search.
      */
     private buildQueryPrompt;
+    /**
+     * Compute a context tree fingerprint cheaply using file mtimes.
+     * Used for cache invalidation — if any file in the context tree changes,
+     * the fingerprint changes and cached results are invalidated.
+     */
+    private computeContextTreeFingerprint;
+    /**
+     * Attempt to produce a direct response from search results without LLM.
+     * Returns formatted response if high-confidence dominant match found, undefined otherwise.
+     *
+     * Uses higher thresholds than smart routing (score >= 8, 2x dominance)
+     * to ensure only clearly answerable queries bypass the LLM.
+     */
+    private tryDirectSearchResponse;
 }

package/dist/server/infra/executor/query-executor.js CHANGED Viewed

@@ -1,3 +1,11 @@
+import { join } from 'node:path';
+import { BRV_DIR, CONTEXT_FILE_EXTENSION, CONTEXT_TREE_DIR } from '../../constants.js';
+import { canRespondDirectly, formatDirectResponse, formatNotFoundResponse, } from './direct-search-responder.js';
+import { QueryResultCache } from './query-result-cache.js';
+/** Minimum MiniSearch score to consider a result high-confidence for pre-fetching */
+const SMART_ROUTING_SCORE_THRESHOLD = 5;
+/** Maximum number of documents to pre-fetch and inject into the prompt */
+const SMART_ROUTING_MAX_DOCS = 5;
 /**
  * QueryExecutor - Executes query tasks with an injected CipherAgent.
  *
@@ -10,28 +18,137 @@
  * - Transport handles task lifecycle (task:started, task:completed, task:error)
  * - Executor focuses solely on query execution
  *
- * Uses code_exec with tools.* SDK for programmatic search.
+ * Tiered response strategy (fastest to slowest):
+ * - Tier 0: Exact cache hit (0ms)
+ * - Tier 1: Fuzzy cache match via Jaccard similarity (~50ms)
+ * - Tier 2: Direct search response without LLM (~100-200ms)
+ * - Tier 3: Optimized single LLM call with pre-fetched context (<5s)
+ * - Tier 4: Full agentic loop fallback (8-15s)
  */
 export class QueryExecutor {
+    static FINGERPRINT_CACHE_TTL_MS = 30_000;
+    cache;
+    cachedFingerprint;
+    fileSystem;
+    searchService;
+    constructor(deps) {
+        this.fileSystem = deps?.fileSystem;
+        this.searchService = deps?.searchService;
+        if (deps?.enableCache) {
+            this.cache = new QueryResultCache();
+        }
+    }
     async executeWithAgent(agent, options) {
         const { query, taskId } = options;
-        // Execute with query commandType
-        // Agent uses its default session (created during start())
-        // Task lifecycle is managed by Transport (task:started, task:completed, task:error)
-        const prompt = this.buildQueryPrompt(query);
+        // Start search early — runs in parallel with fingerprint computation (independent operations)
+        const searchPromise = this.searchService?.search(query, { limit: SMART_ROUTING_MAX_DOCS });
+        // Prevent unhandled rejection if we return early (cache hit) while search is still pending
+        searchPromise?.catch(() => { });
+        // === Tier 0: Exact cache hit (0ms) ===
+        let fingerprint;
+        if (this.cache && this.fileSystem) {
+            fingerprint = await this.computeContextTreeFingerprint();
+            const cached = this.cache.get(query, fingerprint);
+            if (cached) {
+                return cached;
+            }
+        }
+        // === Tier 1: Fuzzy cache match (~50ms) ===
+        if (this.cache && fingerprint) {
+            const fuzzyHit = this.cache.findSimilar(query, fingerprint);
+            if (fuzzyHit) {
+                return fuzzyHit;
+            }
+        }
+        // Await search result (already started in parallel with fingerprint computation)
+        let searchResult;
+        try {
+            searchResult = await searchPromise;
+        }
+        catch {
+            // Search failed, proceed without pre-fetched context
+        }
+        // === OOD short-circuit: no results means topic not covered ===
+        if (searchResult && searchResult.results.length === 0) {
+            const response = formatNotFoundResponse(query);
+            if (this.cache && fingerprint) {
+                this.cache.set(query, response, fingerprint);
+            }
+            return response;
+        }
+        // === Tier 2: Direct search response (~100-200ms) ===
+        if (searchResult && this.fileSystem) {
+            const directResult = await this.tryDirectSearchResponse(query, searchResult);
+            if (directResult) {
+                if (this.cache && fingerprint) {
+                    this.cache.set(query, directResult, fingerprint);
+                }
+                return directResult;
+            }
+        }
+        // === Tier 3: Optimized LLM call with pre-fetched context (<5s) ===
+        let prefetchedContext;
+        if (searchResult && this.fileSystem) {
+            prefetchedContext = this.buildPrefetchedContext(searchResult);
+        }
+        const prompt = this.buildQueryPrompt(query, prefetchedContext);
+        // Query-optimized LLM overrides: fewer tokens, iterations, and lower temperature
+        const queryOverrides = prefetchedContext
+            ? { maxIterations: 2, maxTokens: 1024, temperature: 0.3 }
+            : { maxIterations: 3, maxTokens: 2048, temperature: 0.5 };
         const response = await agent.execute(prompt, {
-            executionContext: { commandType: 'query' },
+            executionContext: { commandType: 'query', ...queryOverrides },
             taskId,
         });
+        // Store in cache for future Tier 0/1 hits
+        if (this.cache && fingerprint) {
+            this.cache.set(query, response, fingerprint);
+        }
         return response;
     }
+    /**
+     * Build pre-fetched context string from search results for LLM prompt injection.
+     * Synchronous — uses already-fetched search results (no additional I/O for excerpts).
+     * Full document reads happen only for high-confidence results.
+     */
+    buildPrefetchedContext(searchResult) {
+        if (searchResult.totalFound === 0)
+            return undefined;
+        const highConfidenceResults = searchResult.results.filter((r) => r.score >= SMART_ROUTING_SCORE_THRESHOLD);
+        if (highConfidenceResults.length === 0)
+            return undefined;
+        const sections = highConfidenceResults.map((r) => `### ${r.title}\n**Source**: .brv/context-tree/${r.path}\n\n${r.excerpt}`);
+        return sections.join('\n\n---\n\n');
+    }
     /**
      * Build a streamlined query prompt optimized for fast, accurate responses.
      *
-     * Uses code_exec with tools.* SDK for programmatic search.
-     * Designed to minimize iterations while maintaining answer quality.
+     * When pre-fetched context is available, the prompt instructs the LLM to answer
+     * directly from the provided context (reducing LLM calls from 2+ to 1).
+     * When no context is available, falls back to tool-based search.
      */
-    buildQueryPrompt(query) {
+    buildQueryPrompt(query, prefetchedContext) {
+        if (prefetchedContext) {
+            return `## User Query
+${query}
+## Pre-fetched Context
+The following relevant knowledge was found in the context tree:
+${prefetchedContext}
+## Instructions
+Answer the user's question using the pre-fetched context above.
+If the pre-fetched context does not directly address the user's query topic, respond that the topic is not covered in the knowledge base. Do not attempt to answer from tangentially related content.
+If the context is insufficient but relevant, you may use \`code_exec\` with the \`tools.*\` SDK for additional searches.
+### Response Format
+- **Summary**: Direct answer (2-3 sentences)
+- **Details**: Key findings with explanations
+- **Sources**: File paths from .brv/context-tree/
+- **Gaps**: Note any aspects not covered`;
+        }
         return `## User Query
 ${query}
@@ -46,4 +163,72 @@ Use \`code_exec\` to run a programmatic search with the \`tools.*\` SDK.
 - **Sources**: File paths from .brv/context-tree/
 - **Gaps**: Note any aspects not covered`;
     }
+    /**
+     * Compute a context tree fingerprint cheaply using file mtimes.
+     * Used for cache invalidation — if any file in the context tree changes,
+     * the fingerprint changes and cached results are invalidated.
+     */
+    async computeContextTreeFingerprint() {
+        // Fast path: return cached fingerprint if still valid (avoids globFiles I/O)
+        if (this.cachedFingerprint && Date.now() < this.cachedFingerprint.expiresAt) {
+            return this.cachedFingerprint.value;
+        }
+        try {
+            const contextTreePath = join(BRV_DIR, CONTEXT_TREE_DIR);
+            const globResult = await this.fileSystem.globFiles(`**/*${CONTEXT_FILE_EXTENSION}`, {
+                cwd: contextTreePath,
+                includeMetadata: true,
+                maxResults: 10_000,
+                respectGitignore: false,
+            });
+            const files = globResult.files.map((f) => ({
+                mtime: f.modified?.getTime() ?? 0,
+                path: f.path,
+            }));
+            const fingerprint = QueryResultCache.computeFingerprint(files);
+            this.cachedFingerprint = {
+                expiresAt: Date.now() + QueryExecutor.FINGERPRINT_CACHE_TTL_MS,
+                value: fingerprint,
+            };
+            return fingerprint;
+        }
+        catch {
+            return 'unknown';
+        }
+    }
+    /**
+     * Attempt to produce a direct response from search results without LLM.
+     * Returns formatted response if high-confidence dominant match found, undefined otherwise.
+     *
+     * Uses higher thresholds than smart routing (score >= 8, 2x dominance)
+     * to ensure only clearly answerable queries bypass the LLM.
+     */
+    async tryDirectSearchResponse(query, searchResult) {
+        try {
+            if (searchResult.totalFound === 0)
+                return undefined;
+            // Build full results with content
+            const fullResults = await Promise.all(searchResult.results
+                .filter((r) => r.score >= SMART_ROUTING_SCORE_THRESHOLD)
+                .slice(0, SMART_ROUTING_MAX_DOCS)
+                .map(async (result) => {
+                let content = result.excerpt;
+                try {
+                    const ctPath = join(BRV_DIR, CONTEXT_TREE_DIR, result.path);
+                    const { content: fullContent } = await this.fileSystem.readFile(ctPath);
+                    content = fullContent;
+                }
+                catch {
+                    // Use excerpt if full read fails
+                }
+                return { content, path: result.path, score: result.score, title: result.title };
+            }));
+            if (!canRespondDirectly(fullResults))
+                return undefined;
+            return formatDirectResponse(query, fullResults);
+        }
+        catch {
+            return undefined;
+        }
+    }
 }

package/dist/server/infra/executor/query-result-cache.d.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import { type QueryTokens } from './query-similarity.js';
+/**
+ * Cached query result entry.
+ */
+export interface QueryCacheEntry {
+    /** Cached response content */
+    content: string;
+    /** Context tree fingerprint at cache time */
+    fingerprint: string;
+    /** Timestamp when cached */
+    storedAt: number;
+    /** Pre-computed tokens for fuzzy similarity matching */
+    tokens: QueryTokens;
+}
+/**
+ * Configuration for QueryResultCache.
+ */
+export interface QueryResultCacheOptions {
+    /** Maximum number of entries (default: 50) */
+    maxSize?: number;
+    /** TTL in milliseconds (default: 60000) */
+    ttlMs?: number;
+}
+/**
+ * In-memory LRU cache for query results with TTL and context tree fingerprint validation.
+ *
+ * Follows the same pattern as PromptCache (src/agent/infra/system-prompt/prompt-cache.ts):
+ * - Map-based storage with configurable max size
+ * - LRU eviction when at capacity
+ * - TTL-based expiration
+ * - Fingerprint-based invalidation when the context tree changes
+ * - Fuzzy matching for semantically similar queries via Jaccard similarity
+ */
+export declare class QueryResultCache {
+    private readonly cache;
+    private readonly maxSize;
+    private readonly ttlMs;
+    constructor(options?: QueryResultCacheOptions);
+    /**
+     * Compute a context tree fingerprint from file mtimes.
+     * Uses sorted paths + mtimes to create a deterministic hash.
+     * If any file changes (added, removed, modified), the fingerprint changes.
+     *
+     * @param files - Array of file paths with modification times
+     * @returns 16-character hex fingerprint
+     */
+    static computeFingerprint(files: Array<{
+        mtime: number;
+        path: string;
+    }>): string;
+    /** Clear all entries. */
+    clear(): void;
+    /**
+     * Find a cached result by fuzzy similarity.
+     * Returns the highest-similarity match above threshold, or undefined.
+     * Called after exact-match `get()` fails.
+     *
+     * @param query - User query string
+     * @param currentFingerprint - Current context tree fingerprint
+     * @returns Cached response content or undefined
+     */
+    findSimilar(query: string, currentFingerprint: string): string | undefined;
+    /**
+     * Get a cached result if valid.
+     * Returns undefined if entry doesn't exist, TTL expired, or fingerprint mismatch.
+     *
+     * @param query - User query string
+     * @param currentFingerprint - Current context tree fingerprint
+     * @returns Cached response content or undefined
+     */
+    get(query: string, currentFingerprint: string): string | undefined;
+    /** Get cache statistics. */
+    getStats(): {
+        maxSize: number;
+        size: number;
+    };
+    /**
+     * Store a result in the cache.
+     *
+     * @param query - User query string
+     * @param content - Response content to cache
+     * @param fingerprint - Context tree fingerprint at cache time
+     */
+    set(query: string, content: string, fingerprint: string): void;
+    /** Normalize query for cache key consistency. */
+    private normalizeQuery;
+}

package/dist/server/infra/executor/query-result-cache.js ADDED Viewed

@@ -0,0 +1,127 @@
+import { createHash } from 'node:crypto';
+import { FUZZY_SIMILARITY_THRESHOLD, jaccardSimilarity, tokenizeQuery } from './query-similarity.js';
+/**
+ * In-memory LRU cache for query results with TTL and context tree fingerprint validation.
+ *
+ * Follows the same pattern as PromptCache (src/agent/infra/system-prompt/prompt-cache.ts):
+ * - Map-based storage with configurable max size
+ * - LRU eviction when at capacity
+ * - TTL-based expiration
+ * - Fingerprint-based invalidation when the context tree changes
+ * - Fuzzy matching for semantically similar queries via Jaccard similarity
+ */
+export class QueryResultCache {
+    cache = new Map();
+    maxSize;
+    ttlMs;
+    constructor(options = {}) {
+        this.maxSize = options.maxSize ?? 50;
+        this.ttlMs = options.ttlMs ?? 60_000;
+    }
+    /**
+     * Compute a context tree fingerprint from file mtimes.
+     * Uses sorted paths + mtimes to create a deterministic hash.
+     * If any file changes (added, removed, modified), the fingerprint changes.
+     *
+     * @param files - Array of file paths with modification times
+     * @returns 16-character hex fingerprint
+     */
+    static computeFingerprint(files) {
+        if (files.length === 0)
+            return 'empty';
+        const sorted = [...files].sort((a, b) => a.path.localeCompare(b.path));
+        const data = sorted.map((f) => `${f.path}:${f.mtime}`).join('|');
+        return createHash('md5').update(data).digest('hex').slice(0, 16);
+    }
+    /** Clear all entries. */
+    clear() {
+        this.cache.clear();
+    }
+    /**
+     * Find a cached result by fuzzy similarity.
+     * Returns the highest-similarity match above threshold, or undefined.
+     * Called after exact-match `get()` fails.
+     *
+     * @param query - User query string
+     * @param currentFingerprint - Current context tree fingerprint
+     * @returns Cached response content or undefined
+     */
+    findSimilar(query, currentFingerprint) {
+        const queryTokens = tokenizeQuery(query);
+        // Skip fuzzy matching if query has very few meaningful tokens
+        if (queryTokens.tokenSet.size < 2)
+            return undefined;
+        let bestMatch;
+        for (const [, entry] of this.cache) {
+            // Check fingerprint + TTL first (cheap filters)
+            if (entry.fingerprint !== currentFingerprint)
+                continue;
+            if (Date.now() - entry.storedAt > this.ttlMs)
+                continue;
+            const similarity = jaccardSimilarity(queryTokens.tokenSet, entry.tokens.tokenSet);
+            if (similarity >= FUZZY_SIMILARITY_THRESHOLD && (!bestMatch || similarity > bestMatch.similarity)) {
+                bestMatch = { content: entry.content, similarity };
+            }
+        }
+        return bestMatch?.content;
+    }
+    /**
+     * Get a cached result if valid.
+     * Returns undefined if entry doesn't exist, TTL expired, or fingerprint mismatch.
+     *
+     * @param query - User query string
+     * @param currentFingerprint - Current context tree fingerprint
+     * @returns Cached response content or undefined
+     */
+    get(query, currentFingerprint) {
+        const key = this.normalizeQuery(query);
+        const entry = this.cache.get(key);
+        if (!entry)
+            return undefined;
+        // Check TTL
+        if (Date.now() - entry.storedAt > this.ttlMs) {
+            this.cache.delete(key);
+            return undefined;
+        }
+        // Check fingerprint (context tree changed?)
+        if (entry.fingerprint !== currentFingerprint) {
+            this.cache.delete(key);
+            return undefined;
+        }
+        return entry.content;
+    }
+    /** Get cache statistics. */
+    getStats() {
+        return {
+            maxSize: this.maxSize,
+            size: this.cache.size,
+        };
+    }
+    /**
+     * Store a result in the cache.
+     *
+     * @param query - User query string
+     * @param content - Response content to cache
+     * @param fingerprint - Context tree fingerprint at cache time
+     */
+    set(query, content, fingerprint) {
+        const key = this.normalizeQuery(query);
+        const tokens = tokenizeQuery(query);
+        // Evict oldest entry if at capacity
+        if (this.cache.size >= this.maxSize && !this.cache.has(key)) {
+            const oldestKey = this.cache.keys().next().value;
+            if (oldestKey)
+                this.cache.delete(oldestKey);
+        }
+        this.cache.set(key, {
+            content,
+            fingerprint,
+            storedAt: Date.now(),
+            tokens,
+        });
+    }
+    /** Normalize query for cache key consistency. */
+    normalizeQuery(query) {
+        return query.toLowerCase().trim().replaceAll(/\s+/g, ' ');
+    }
+}

package/dist/server/infra/executor/query-similarity.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+/**
+ * Pre-computed query tokens for similarity comparison.
+ */
+export interface QueryTokens {
+    /** Original normalized form (for exact match) */
+    normalized: string;
+    /** Stopword-filtered bag of words */
+    tokenSet: Set<string>;
+}
+/** Minimum Jaccard similarity to consider a fuzzy cache match */
+export declare const FUZZY_SIMILARITY_THRESHOLD = 0.6;
+/**
+ * Tokenize and prepare a query for similarity comparison.
+ * Uses the same stopword library already used by SearchKnowledgeService.
+ *
+ * @param query - Raw user query string
+ * @returns Pre-computed tokens for similarity comparison
+ */
+export declare function tokenizeQuery(query: string): QueryTokens;
+/**
+ * Compute Jaccard similarity between two token sets.
+ * Returns value between 0 (no overlap) and 1 (identical).
+ *
+ * @param a - First token set
+ * @param b - Second token set
+ * @returns Similarity score between 0 and 1
+ */
+export declare function jaccardSimilarity(a: Set<string>, b: Set<string>): number;

package/dist/server/infra/executor/query-similarity.js ADDED Viewed

@@ -0,0 +1,41 @@
+import { removeStopwords } from 'stopword';
+/** Minimum Jaccard similarity to consider a fuzzy cache match */
+export const FUZZY_SIMILARITY_THRESHOLD = 0.6;
+/**
+ * Tokenize and prepare a query for similarity comparison.
+ * Uses the same stopword library already used by SearchKnowledgeService.
+ *
+ * @param query - Raw user query string
+ * @returns Pre-computed tokens for similarity comparison
+ */
+export function tokenizeQuery(query) {
+    const normalized = query.toLowerCase().trim().replaceAll(/\s+/g, ' ');
+    const words = normalized.split(' ');
+    const filtered = removeStopwords(words).filter((w) => w.length >= 2);
+    return {
+        normalized,
+        tokenSet: new Set(filtered),
+    };
+}
+/**
+ * Compute Jaccard similarity between two token sets.
+ * Returns value between 0 (no overlap) and 1 (identical).
+ *
+ * @param a - First token set
+ * @param b - Second token set
+ * @returns Similarity score between 0 and 1
+ */
+export function jaccardSimilarity(a, b) {
+    if (a.size === 0 && b.size === 0)
+        return 1;
+    if (a.size === 0 || b.size === 0)
+        return 0;
+    const [smaller, larger] = a.size <= b.size ? [a, b] : [b, a];
+    let intersection = 0;
+    for (const token of smaller) {
+        if (larger.has(token))
+            intersection++;
+    }
+    const union = a.size + b.size - intersection;
+    return union === 0 ? 0 : intersection / union;
+}

package/dist/server/infra/process/agent-worker.js CHANGED Viewed

@@ -22,6 +22,7 @@ import { randomUUID } from 'node:crypto';
 import { CipherAgent } from '../../../agent/infra/agent/index.js';
 import { FileSystemService } from '../../../agent/infra/file-system/file-system-service.js';
 import { FolderPackService } from '../../../agent/infra/folder-pack/folder-pack-service.js';
+import { createSearchKnowledgeService } from '../../../agent/infra/tools/implementations/search-knowledge-service.js';
 import { getCurrentConfig } from '../../config/environment.js';
 import { DEFAULT_LLM_MODEL, PROJECT } from '../../constants.js';
 import { AgentNotInitializedError, NotAuthenticatedError, ProcessorNotInitError, serializeTaskError, } from '../../core/domain/errors/task-error.js';
@@ -682,10 +683,16 @@ async function tryInitializeAgent(forceReinit = false) {
         }
         // Create Executors
         const curateExecutor = new CurateExecutor();
-        const queryExecutor = new QueryExecutor();
-        // Create FolderPackExecutor with required dependencies
+        // Create shared FileSystemService (used by FolderPackExecutor and QueryExecutor)
         const fileSystemService = new FileSystemService();
         await fileSystemService.initialize();
+        // Create QueryExecutor with smart routing and caching dependencies
+        const searchService = createSearchKnowledgeService(fileSystemService);
+        const queryExecutor = new QueryExecutor({
+            enableCache: true,
+            fileSystem: fileSystemService,
+            searchService,
+        });
         const folderPackService = new FolderPackService(fileSystemService);
         await folderPackService.initialize();
         const folderPackExecutor = new FolderPackExecutor(folderPackService);

package/dist/server/infra/process/inline-agent-executor.js CHANGED Viewed

@@ -14,6 +14,8 @@
  */
 import { randomUUID } from 'node:crypto';
 import { CipherAgent } from '../../../agent/infra/agent/index.js';
+import { FileSystemService } from '../../../agent/infra/file-system/file-system-service.js';
+import { createSearchKnowledgeService } from '../../../agent/infra/tools/implementations/search-knowledge-service.js';
 import { getCurrentConfig } from '../../config/environment.js';
 import { DEFAULT_LLM_MODEL, PROJECT } from '../../constants.js';
 import { NotAuthenticatedError, serializeTaskError } from '../../core/domain/errors/task-error.js';
@@ -28,8 +30,8 @@ import { createTokenStore } from '../storage/token-store.js';
  */
 export class InlineAgent {
     transportClient;
-    constructor(agent) {
-        this.transportClient = new InlineTransportClient(agent);
+    constructor(agent, queryExecutor) {
+        this.transportClient = new InlineTransportClient(agent, queryExecutor);
     }
     /**
      * Async factory — loads auth/config, creates and starts CipherAgent.
@@ -69,7 +71,16 @@ export class InlineAgent {
         await agent.start();
         const sessionId = `inline-session-${randomUUID()}`;
         await agent.createSession(sessionId);
-        return new InlineAgent(agent);
+        // Create FileSystemService for smart routing and caching
+        const fileSystemService = new FileSystemService();
+        await fileSystemService.initialize();
+        const searchService = createSearchKnowledgeService(fileSystemService);
+        const queryExecutor = new QueryExecutor({
+            enableCache: true,
+            fileSystem: fileSystemService,
+            searchService,
+        });
+        return new InlineAgent(agent, queryExecutor);
     }
 }
 /**
@@ -85,10 +96,10 @@ class InlineTransportClient {
     curateExecutor;
     handlers = new Map();
     queryExecutor;
-    constructor(agent) {
+    constructor(agent, queryExecutor) {
         this.agent = agent;
         this.curateExecutor = new CurateExecutor();
-        this.queryExecutor = new QueryExecutor();
+        this.queryExecutor = queryExecutor;
     }
     // ===========================================================================
     // ITransportClient implementation