npm - @neuralsea/workspace-indexer - Versions diffs - 0.1.0 → 0.3.2 - Mend

@neuralsea/workspace-indexer 0.1.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/index.d.cts ADDED Viewed

@@ -0,0 +1,1326 @@
+type SymbolRange = {
+    startLine: number;
+    startCharacter: number;
+    endLine: number;
+    endCharacter: number;
+};
+type SymbolKind = "file" | "module" | "namespace" | "package" | "class" | "interface" | "enum" | "type" | "function" | "method" | "property" | "field" | "variable" | "constant" | "constructor" | "parameter" | "unknown";
+type SymbolEdgeKind = "contains" | "definition" | "reference" | "implementation" | "typeDefinition";
+type GraphSymbol = {
+    /**
+     * Stable symbol identifier.
+     * If you don't have one, you may return an empty string and the consumer can derive one.
+     */
+    id: string;
+    repoRoot: string;
+    path: string;
+    language: string;
+    name: string;
+    kind: SymbolKind;
+    range: SymbolRange;
+    containerName?: string;
+    detail?: string;
+};
+type GraphEdge = {
+    repoRoot: string;
+    fromId: string;
+    toId: string;
+    kind: SymbolEdgeKind;
+    /** Optional repo-relative path hints for persistence/querying. */
+    fromPath?: string;
+    toPath?: string;
+};
+type SymbolGraphIndexInput = {
+    repoRoot: string;
+    path: string;
+    language: string;
+    text: string;
+    /** Optional caller-provided content hash for caching. */
+    contentHash?: string;
+};
+type SymbolGraphIndexOutput = {
+    symbols: GraphSymbol[];
+    edges: GraphEdge[];
+};
+/**
+ * Optional provider for "full fidelity" symbol graphs.
+ *
+ * In a VS Code extension, an implementation can call VS Code providers (often LSP-backed)
+ * and return a normalized set of symbols + edges.
+ */
+interface SymbolGraphProvider {
+    id: string;
+    supports(language: string): boolean;
+    indexDocument(input: SymbolGraphIndexInput): Promise<SymbolGraphIndexOutput>;
+    /**
+     * Optional staged/on-demand expansion: compute cross-file edges for a document.
+     * Implementations should be idempotent and safe to call repeatedly.
+     */
+    expandDocumentEdges?(input: SymbolGraphIndexInput, opts?: {
+        signal?: AbortSignal;
+    }): Promise<GraphEdge[]>;
+}
+type AnnMetric = "cosine" | "ip" | "l2";
+type AnnPoint = {
+    id: string;
+    vector: Float32Array;
+};
+type AnnSearchHit = {
+    id: string;
+    score: number;
+};
+type AnnIndexInit = {
+    repoId: string;
+    repoRoot: string;
+    commit: string;
+    branch: string;
+    cacheDir: string;
+    dimension: number;
+    metric: AnnMetric;
+    /**
+     * Identifier for the embedding model/provider and settings that produced vectors.
+     * Changing this should force rebuild/retrain.
+     */
+    embedderId: string;
+};
+/**
+ * Interface for an optional quantized ANN layer (e.g. FAISS IVF+PQ/OPQ).
+ *
+ * This is intentionally separate from VectorIndex:
+ * - VectorIndex can remain exact/float32 (or remote DB),
+ * - ANN can be an additional candidate generator for scale.
+ */
+interface AnnIndex {
+    readonly id: string;
+    readonly kind: string;
+    init(init: AnnIndexInit): Promise<void>;
+    upsert(points: AnnPoint[]): Promise<void>;
+    remove(ids: string[]): Promise<void>;
+    search(query: Float32Array, k: number): Promise<AnnSearchHit[]>;
+    flush(): Promise<void>;
+    close(): Promise<void>;
+}
+type AnnConfig = {
+    provider?: "noop";
+} | {
+    provider: "faiss-pq";
+    faiss?: {
+        nlist?: number;
+        m?: number;
+        nbits?: number;
+        opq?: boolean;
+    };
+};
+type RepoId = string;
+type RetrievalProfileName = "search" | "refactor" | "review" | "architecture" | "rca" | "custom";
+interface RetrievalWeights {
+    /** Weight for semantic vector similarity (cosine). */
+    vector: number;
+    /** Weight for lexical/BM25-style match (SQLite FTS). */
+    lexical: number;
+    /** Weight for file recency (mtime). */
+    recency: number;
+}
+interface RetrievalProfile {
+    name: RetrievalProfileName;
+    /** Number of primary hits to return (before expansion). */
+    k: number;
+    /** Hybrid scoring weights. */
+    weights: RetrievalWeights;
+    /** Context expansion knobs. */
+    expand: RelatedContextOptions & {
+        /**
+         * If true, include a lightweight file-level synopsis chunk (if available),
+         * useful for architecture-style queries.
+         */
+        includeFileSynopsis?: boolean;
+    };
+    /** Candidate selection knobs. */
+    candidates?: {
+        /** How many candidates to request from vector search before merging. */
+        vectorK?: number;
+        /** How many candidates to request from lexical search before merging. */
+        lexicalK?: number;
+        /** Optional hard cap on total candidates before reranking/merging. */
+        maxMergedCandidates?: number;
+    };
+}
+/** The default profiles can be overridden or extended by the consumer. */
+interface ProfilesConfig {
+    profiles?: Partial<Record<RetrievalProfileName, Partial<RetrievalProfile>>>;
+}
+/** Similarity metric for vector search. */
+type VectorMetric = "cosine" | "ip" | "l2";
+/** Built-in vector backends. */
+type VectorProviderKind = "auto" | "bruteforce" | "hnswlib" | "qdrant" | "faiss" | "custom";
+interface HnswlibVectorConfig {
+    /** Max elements for the HNSW index (will auto-resize if needed). */
+    maxElements?: number;
+    /** HNSW M parameter (graph connectivity). */
+    m?: number;
+    /** HNSW efConstruction parameter. */
+    efConstruction?: number;
+    /** HNSW efSearch parameter (quality/speed at query time). */
+    efSearch?: number;
+    /** Persist the ANN index to disk under cacheDir (default true). */
+    persist?: boolean;
+    /** Debounce in ms before writing a dirty index to disk (default 2000). */
+    persistDebounceMs?: number;
+}
+interface QdrantVectorConfig {
+    /** Prefer `url` if set, otherwise `host`+`port`. */
+    url?: string;
+    host?: string;
+    port?: number;
+    /** Optional API key (for secured Qdrant instances). */
+    apiKey?: string;
+    /** Collection naming prefix (default "petri"). */
+    collectionPrefix?: string;
+    /**
+     * "commit" (default): a separate collection per repo+commit.
+     * "repo": a single collection per repo; commit is stored in payload.
+     */
+    collectionMode?: "commit" | "repo";
+    /** If true, drop+recreate the collection on rebuild (default true). */
+    recreateOnRebuild?: boolean;
+}
+interface FaissVectorConfig {
+    /** Factory descriptor (default "HNSW,Flat"). */
+    descriptor?: string;
+    /** Persist the FAISS index to disk under cacheDir (default true). */
+    persist?: boolean;
+    /** Debounce in ms before writing a dirty index to disk (default 2000). */
+    persistDebounceMs?: number;
+    /** Rebuild strategy: "lazy" (default) rebuild on demand; "eager" rebuild on each write. */
+    rebuildStrategy?: "lazy" | "eager";
+}
+interface CustomVectorConfig {
+    /** ESM module path to import (relative to process.cwd() or absolute). */
+    module: string;
+    /** Named export to use (default: "default"). */
+    export?: string;
+    /** Arbitrary JSON options passed to the provider factory (optional). */
+    options?: Record<string, unknown>;
+}
+interface VectorConfig {
+    /** Which vector backend to use. Default: "bruteforce". */
+    provider?: VectorProviderKind;
+    /** Vector metric. Default: "cosine". */
+    metric?: VectorMetric;
+    /** Provider-specific options. */
+    hnswlib?: HnswlibVectorConfig;
+    qdrant?: QdrantVectorConfig;
+    faiss?: FaissVectorConfig;
+    custom?: CustomVectorConfig;
+}
+type IndexerProgressEvent = {
+    type: "workspace/open";
+    workspaceRoot: string;
+    repoRoots: string[];
+} | {
+    type: "workspace/index/start";
+    workspaceRoot: string;
+    repoCount: number;
+} | {
+    type: "workspace/index/done";
+    workspaceRoot: string;
+    repoCount: number;
+    ms: number;
+} | {
+    type: "workspace/watch/start";
+    workspaceRoot: string;
+    repoCount: number;
+} | {
+    type: "workspace/watch/done";
+    workspaceRoot: string;
+    repoCount: number;
+    ms: number;
+} | {
+    type: "workspace/retrieve/start";
+    workspaceRoot: string;
+    profile: string;
+    query: string;
+} | {
+    type: "workspace/retrieve/done";
+    workspaceRoot: string;
+    profile: string;
+    ms: number;
+    hits: number;
+    candidates: {
+        vector: number;
+        lexical: number;
+        merged: number;
+    };
+} | {
+    type: "repo/open";
+    repoRoot: string;
+    repoId: string;
+    commit: string;
+    branch: string;
+} | {
+    type: "repo/index/start";
+    repoRoot: string;
+    fileCount: number;
+} | {
+    type: "repo/index/file/start";
+    repoRoot: string;
+    path: string;
+} | {
+    type: "repo/index/file/skip";
+    repoRoot: string;
+    path: string;
+    reason: string;
+} | {
+    type: "repo/index/file/done";
+    repoRoot: string;
+    path: string;
+    chunkCount: number;
+    ms: number;
+} | {
+    type: "repo/index/embed/batch";
+    repoRoot: string;
+    batchSize: number;
+    batchIndex: number;
+    batchesTotal: number;
+} | {
+    type: "repo/index/vector/upsert";
+    repoRoot: string;
+    points: number;
+} | {
+    type: "repo/index/vector/remove";
+    repoRoot: string;
+    ids: number;
+} | {
+    type: "repo/vector/rebuild/start";
+    repoRoot: string;
+    kind: string;
+    points: number;
+} | {
+    type: "repo/vector/rebuild/done";
+    repoRoot: string;
+    kind: string;
+    ms: number;
+} | {
+    type: "repo/vector/flush";
+    repoRoot: string;
+    kind: string;
+} | {
+    type: "repo/watch/start";
+    repoRoot: string;
+} | {
+    type: "repo/watch/event";
+    repoRoot: string;
+    event: "add" | "change" | "unlink" | "head";
+    path: string;
+} | {
+    type: "repo/index/done";
+    repoRoot: string;
+    fileCount: number;
+    ms: number;
+} | {
+    type: "error";
+    scope: "workspace" | "repo";
+    message: string;
+    repoRoot?: string;
+    stack?: string;
+};
+type IndexerProgressHandler = (event: IndexerProgressEvent) => void;
+interface IndexerProgressSink {
+    emit(event: IndexerProgressEvent): void;
+}
+type IndexerProgress = IndexerProgressHandler | IndexerProgressSink;
+interface IndexerConfig extends ProfilesConfig {
+    cacheDir?: string;
+    /** Vector index backend (ANN) configuration. */
+    vector?: VectorConfig;
+    /** Optional quantized ANN candidate generator (interface-driven; noop by default). */
+    ann?: AnnConfig;
+    /** Workspace-only settings (used by WorkspaceIndexer). */
+    workspace?: {
+        /**
+         * Repo discovery knobs.
+         * - include/exclude are workspace-root-relative globs (POSIX-style)
+         * - maxDepth limits directory traversal depth
+         */
+        discovery?: {
+            include?: string[];
+            exclude?: string[];
+            maxDepth?: number;
+            /** If true (default), do not descend into discovered repo roots. */
+            stopAtRepoRoot?: boolean;
+            /** If true (default), add submodule repos discovered via `.gitmodules`. */
+            includeSubmodules?: boolean;
+        };
+        /**
+         * Per-repo config overrides, matched by repo-root-relative glob.
+         * Later matches win.
+         */
+        repoOverrides?: Array<{
+            match: string;
+            config: IndexerConfig;
+        }>;
+        /** Optional persistent graph store for cross-file/cross-language relationships. */
+        graph?: {
+            provider: "neo4j";
+            neo4j: {
+                uri: string;
+                user: string;
+                password: string;
+                database?: string;
+                /**
+                 * Optional label prefix to isolate multiple workspaces in one Neo4j instance.
+                 * Default: "Petri".
+                 */
+                labelPrefix?: string;
+            };
+        };
+    };
+    /** Extensions to index (lowercase, include the dot). */
+    includeExtensions?: string[];
+    /** Safety limit to avoid indexing huge binaries by accident. */
+    maxFileBytes?: number;
+    /** Chunking controls. */
+    chunk?: {
+        maxChars?: number;
+        maxLines?: number;
+        overlapLines?: number;
+    };
+    /** Embedding batch controls. */
+    embed?: {
+        batchSize?: number;
+        concurrency?: number;
+    };
+    /** Watcher controls. */
+    watch?: {
+        debounceMs?: number;
+    };
+    /** Optional progress sink (callback or observable). */
+    progress?: IndexerProgress;
+    /** Optional symbol graph provider (e.g. VS Code/LSP-backed) for high-fidelity cross-file symbol data. */
+    symbolGraphProvider?: SymbolGraphProvider;
+    /** Extra ignore files (in addition to git’s excludes). */
+    ignoreFiles?: string[];
+    /**
+     * Secret hygiene:
+     * - skip obvious secret-y files by path substring
+     * - redact patterns prior to storage + embedding
+     */
+    redact?: {
+        enabled?: boolean;
+        /** If path includes any of these substrings, file is skipped. */
+        skipPathSubstrings?: string[];
+        /** Simple redactions applied before embedding/storage (keeps structure). */
+        patterns?: Array<{
+            name: string;
+            regex: RegExp;
+            replaceWith: string;
+        }>;
+    };
+    /** Storage / index tuning. */
+    storage?: {
+        /** If true, keep full chunk text in SQLite (default true). */
+        storeText?: boolean;
+        /**
+         * Lexical index mode:
+         * - "full": store the (redacted) chunk text in FTS (best retrieval, more storage)
+         * - "tokens": store only extracted identifiers/tokens (less sensitive, still good for code search)
+         * - "off": disable FTS indexing entirely (vector-only retrieval)
+         */
+        ftsMode?: "full" | "tokens" | "off";
+    };
+}
+interface ChunkRecord {
+    id: string;
+    repoId: RepoId;
+    repoRoot: string;
+    path: string;
+    language: string;
+    startLine: number;
+    endLine: number;
+    contentHash: string;
+    text: string;
+    tokens: number;
+    fileMtimeMs: number;
+    kind?: "chunk" | "synopsis";
+}
+interface SearchOptions {
+    k?: number;
+    repoRoots?: string[];
+    language?: string;
+    pathPrefix?: string;
+}
+interface RetrievalScope {
+    /** Restrict results to files that are changed compared to baseRef. */
+    changedOnly?: boolean;
+    /** e.g. "origin/main", "main", "HEAD~1". Only used if changedOnly is true. */
+    baseRef?: string;
+    /** Restrict to these file paths (POSIX rel paths within repo). */
+    includePaths?: string[];
+}
+interface RetrieveOptions {
+    /** Choose a profile; can be overridden with explicit profile settings. */
+    profile?: RetrievalProfileName;
+    /** Optional ad-hoc overrides, merged into the chosen profile. */
+    profileOverrides?: Partial<RetrievalProfile>;
+    /** Optional scope restrictions. */
+    scope?: RetrievalScope;
+    /** Filters (applied after retrieval). */
+    filters?: {
+        repoRoots?: string[];
+        language?: string;
+        pathPrefix?: string;
+    };
+}
+interface SearchHit {
+    score: number;
+    scoreBreakdown?: {
+        vector?: number;
+        lexical?: number;
+        recency?: number;
+    };
+    chunk: Omit<ChunkRecord, "text"> & {
+        preview: string;
+    };
+}
+interface RelatedContextOptions {
+    adjacentChunks?: number;
+    followImports?: number;
+}
+interface ContextBundle {
+    /** Ordered from most to least relevant. */
+    hits: SearchHit[];
+    /** Expanded supporting context blocks (deduplicated). */
+    context: Array<{
+        repoRoot: string;
+        path: string;
+        startLine: number;
+        endLine: number;
+        text: string;
+        reason: string;
+    }>;
+    /** Useful metadata for your agent’s planner/logs. */
+    stats: {
+        profile: RetrievalProfileName;
+        reposSearched: number;
+        candidates: {
+            vector: number;
+            lexical: number;
+            merged: number;
+            returned: number;
+        };
+    };
+}
+interface EmbeddingsProvider {
+    /** Unique identifier including model, used for caching. */
+    readonly id: string;
+    /** Dimension may be unknown until first call. */
+    readonly dimension: number | null;
+    embed(texts: string[]): Promise<Float32Array[]>;
+}
+declare class OpenAIEmbeddingsProvider implements EmbeddingsProvider {
+    private readonly opts;
+    readonly id: string;
+    dimension: number | null;
+    constructor(opts: {
+        apiKey: string;
+        model: string;
+        baseUrl?: string;
+    });
+    embed(texts: string[]): Promise<Float32Array[]>;
+}
+declare class OllamaEmbeddingsProvider implements EmbeddingsProvider {
+    readonly id: string;
+    dimension: number | null;
+    private readonly baseUrl;
+    private readonly model;
+    private readonly concurrency;
+    constructor(opts: {
+        model: string;
+        baseUrl?: string;
+        concurrency?: number;
+    });
+    private tryBatchEndpoint;
+    private embedOne;
+    embed(texts: string[]): Promise<Float32Array[]>;
+}
+/**
+ * Offline deterministic embedding. Not truly semantic, but:
+ * - fully offline
+ * - deterministic
+ * - useful for wiring everything up + tests
+ */
+declare class HashEmbeddingsProvider implements EmbeddingsProvider {
+    readonly id: string;
+    readonly dimension: number;
+    constructor(dimension?: number);
+    embed(texts: string[]): Promise<Float32Array[]>;
+    private embedOne;
+}
+declare const DEFAULT_PROFILES: Record<RetrievalProfileName, RetrievalProfile>;
+declare function deepMergeProfile(base: RetrievalProfile, patch?: Partial<RetrievalProfile>): RetrievalProfile;
+interface VectorPoint {
+    id: string;
+    vector: Float32Array;
+    /** Optional payload for vector databases that support it (e.g. Qdrant). */
+    payload?: Record<string, unknown>;
+}
+interface VectorSearchHit {
+    id: string;
+    /** Similarity score (higher is better). For cosine/IP this is usually in [-1, 1]. */
+    score: number;
+}
+interface VectorIndexInit {
+    repoId: string;
+    repoRoot: string;
+    commit: string;
+    branch: string;
+    cacheDir: string;
+    dimension: number;
+    metric: VectorMetric;
+}
+/**
+ * A pluggable dense-vector index.
+ *
+ * All implementations MUST return a score where larger means more similar.
+ * This package primarily expects cosine similarity.
+ */
+interface VectorIndex {
+    readonly kind: string;
+    readonly metric: VectorMetric;
+    readonly dimension: number;
+    init(init: VectorIndexInit): Promise<void>;
+    /** Full rebuild from a set of points. */
+    rebuild(points: VectorPoint[]): Promise<void>;
+    /** Insert or update points (incremental). */
+    upsert(points: VectorPoint[]): Promise<void>;
+    /** Remove points by id. */
+    remove(ids: string[]): Promise<void>;
+    /** Search for the k nearest neighbours. */
+    search(query: Float32Array, k: number): Promise<VectorSearchHit[]>;
+    /** Number of points currently indexed (best-effort for remote stores). */
+    count(): Promise<number>;
+    /** Optional: flush pending writes to disk/network. */
+    flush(): Promise<void>;
+    /** Close resources. */
+    close(): Promise<void>;
+}
+/**
+ * Create a vector index from config.
+ *
+ * Provider selection:
+ * - explicit provider -> that backend
+ * - auto -> qdrant (if configured), else hnswlib (if dependency present), else bruteforce
+ */
+declare function createVectorIndex(vector?: VectorConfig): Promise<VectorIndex>;
+interface ChunkingConfig {
+    maxChars: number;
+    maxLines: number;
+    overlapLines: number;
+}
+interface Chunk {
+    startLine: number;
+    endLine: number;
+    text: string;
+    contentHash: string;
+    tokens: number;
+}
+declare function languageFromPath(posixPath: string): string;
+declare function chunkSource(posixPath: string, sourceText: string, cfg: ChunkingConfig): {
+    language: string;
+    chunks: Chunk[];
+};
+type Unsubscribe = () => void;
+declare class IndexerProgressObservable implements IndexerProgressSink {
+    private handlers;
+    subscribe(handler: (e: IndexerProgressEvent) => void): Unsubscribe;
+    on<TType extends IndexerProgressEvent["type"]>(type: TType, handler: (e: Extract<IndexerProgressEvent, {
+        type: TType;
+    }>) => void): Unsubscribe;
+    emit(event: IndexerProgressEvent): void;
+}
+declare function asProgressSink(progress?: IndexerProgress): IndexerProgressSink | null;
+type RepoDiscoveryOptions = {
+    include?: string[];
+    exclude?: string[];
+    maxDepth?: number;
+    stopAtRepoRoot?: boolean;
+    includeSubmodules?: boolean;
+};
+declare function discoverGitRepos(workspaceRoot: string, opts?: RepoDiscoveryOptions): string[];
+declare function pickRepoOverride(base: any, repoRoot: string, workspaceRoot: string, overrides?: Array<{
+    match: string;
+    config: any;
+}>): any;
+declare function mergeIndexerConfig(target: any, patch: any): void;
+type WorkspaceChunkRow = {
+    id: string;
+    repo_id: string;
+    repo_root: string;
+    path: string;
+    language: string;
+    kind: string;
+    start_line: number;
+    end_line: number;
+    content_hash: string;
+    tokens: number;
+    file_mtime: number;
+    text: string;
+    embedding: Buffer;
+};
+type WorkspaceStoreOptions = {
+    /** "auto" (default) tries FTS5; "off" disables it. */
+    fts?: "auto" | "off";
+};
+/**
+ * Workspace-level unified store (SQLite) for multi-repo indexing.
+ *
+ * Design goals:
+ * - Facade API for callers (stable)
+ * - Repository pattern for separation of concerns
+ * - Unit-of-work for transactional updates
+ * - Strategy for optional features (FTS)
+ * - Schema versioning + migrations
+ */
+declare class WorkspaceStore {
+    private readonly dbPath;
+    private readonly db;
+    private readonly uow;
+    private readonly meta;
+    private readonly repoHeads;
+    private readonly files;
+    private readonly edges;
+    private readonly repoLinks;
+    private readonly symbols;
+    private readonly chunks;
+    private readonly opts;
+    constructor(dbPath: string, opts?: WorkspaceStoreOptions);
+    setMeta(k: string, v: string): void;
+    getMeta(k: string): string | null;
+    /**
+     * Set repo head. If commit changes, clears prior rows for that repo_id to keep the workspace index "current".
+     */
+    setRepoHead(repoId: string, repoRoot: string, commit: string, branch: string): void;
+    deleteFile(repoId: string, posixPath: string): void;
+    upsertFile(repoId: string, posixPath: string, hash: string, mtime: number, language: string, size: number): void;
+    replaceChunksForFile(repoId: string, repoRoot: string, posixPath: string, rows: Array<{
+        id: string;
+        language: string;
+        kind: "chunk" | "synopsis";
+        startLine: number;
+        endLine: number;
+        contentHash: string;
+        text: string;
+        ftsText: string;
+        tokens: number;
+        fileMtime: number;
+        embedding: Float32Array;
+    }>): void;
+    setEdges(repoId: string, fromPath: string, kind: string, values: string[]): void;
+    replaceSymbolsForFile(repoId: string, repoRoot: string, posixPath: string, rows: Array<{
+        id: string;
+        language: string;
+        name: string;
+        kind: string;
+        startLine: number;
+        startCharacter: number;
+        endLine: number;
+        endCharacter: number;
+        containerName?: string;
+        detail?: string;
+    }>, edges: Array<{
+        fromId: string;
+        toId: string;
+        kind: string;
+        fromPath: string;
+        toPath: string;
+    }>): void;
+    replaceSymbolEdgesFromFile(repoId: string, fromPath: string, edges: Array<{
+        fromId: string;
+        toId: string;
+        kind: string;
+        toPath: string;
+    }>): void;
+    replaceRepoLinks(fromRepoId: string, links: Array<{
+        toRepoId: string;
+        kind: string;
+        detail?: string;
+    }>): void;
+    listRepoLinks(fromRepoId: string): Array<{
+        toRepoId: string;
+        kind: string;
+        detail: string;
+    }>;
+    listChunksForFile(repoId: string, posixPath: string, kind?: "chunk" | "synopsis"): Array<{
+        id: string;
+        start_line: number;
+        end_line: number;
+        kind: string;
+    }>;
+    listEdges(repoId: string, fromPath: string, kind: string): string[];
+    getChunkById(id: string): WorkspaceChunkRow | null;
+    searchFts(ftq: string, limit: number, repoIds?: string[]): Array<{
+        id: string;
+        bm25: number;
+    }>;
+    /**
+     * If text is omitted from storage, fall back to slicing from disk.
+     * The chunk boundaries are approximate; the stored row includes start/end line.
+     */
+    getChunkTextFallback(row: WorkspaceChunkRow): string;
+    close(): void;
+}
+declare function stableSymbolId(input: {
+    repoRoot: string;
+    path: string;
+    language: string;
+    kind: SymbolKind;
+    name: string;
+    range: SymbolRange;
+}): string;
+type LspPosition = {
+    line: number;
+    character: number;
+};
+type LspRange = {
+    start: LspPosition;
+    end: LspPosition;
+};
+type LspDocument = {
+    uri: string;
+    fsPath: string;
+    languageId: string;
+    version?: number;
+};
+type LspDocumentSymbol = {
+    name: string;
+    kind: SymbolKind;
+    range: LspRange;
+    selectionRange?: LspRange;
+    detail?: string;
+    containerName?: string;
+    children?: LspDocumentSymbol[];
+};
+type LspTargetLocation = {
+    uri: string;
+    fsPath: string;
+    range: LspRange;
+};
+interface LspFacade {
+    openTextDocument(absPath: string): Promise<LspDocument>;
+    documentSymbols(doc: LspDocument): Promise<LspDocumentSymbol[]>;
+    resolveEdges(doc: LspDocument, pos: LspPosition, kind: Exclude<SymbolEdgeKind, "contains">): Promise<LspTargetLocation[]>;
+}
+type SymbolGraphIndexingMode = "symbols-only" | "staged" | "full";
+type SymbolGraphStrategyOptions = {
+    mode?: SymbolGraphIndexingMode;
+    maxEdgesPerDocument?: number;
+};
+type Cancellation = {
+    signal?: AbortSignal;
+};
+declare class SymbolGraphIndexer {
+    private readonly lsp;
+    private readonly opts;
+    private cache;
+    constructor(lsp: LspFacade, opts?: SymbolGraphStrategyOptions);
+    indexDocument(input: SymbolGraphIndexInput, cancel?: Cancellation): Promise<SymbolGraphIndexOutput>;
+    /**
+     * Compute cross-file edges for a document (definition/reference/implementation/typeDefinition).
+     * Intended for staged indexing and on-demand expansion during retrieval.
+     */
+    expandDocumentEdges(input: SymbolGraphIndexInput, cancel?: Cancellation): Promise<GraphEdge[]>;
+    private bestEffortTargetSymbolId;
+}
+/**
+ * Create a SymbolGraphProvider that runs in a VS Code extension host.
+ *
+ * Strategy pattern:
+ * - `mode: "symbols-only"`: only document symbols + contains edges
+ * - `mode: "staged"` (default): symbols+contains during indexing; compute cross-file edges via `expandDocumentEdges` on demand
+ * - `mode: "full"`: compute edges during indexing (can be expensive)
+ */
+declare function createVSCodeSymbolGraphProvider(opts?: {
+    languages?: string[];
+    mode?: SymbolGraphIndexingMode;
+    maxEdgesPerDocument?: number;
+}): Promise<SymbolGraphProvider | null>;
+type GraphFileUpdate = {
+    repoId: string;
+    repoRoot: string;
+    commit: string;
+    branch: string;
+    path: string;
+    language: string;
+    imports: string[];
+    exports: string[];
+    symbols: Array<Pick<GraphSymbol, "id" | "name" | "kind" | "range" | "containerName" | "detail" | "language">>;
+    symbolEdges: Array<Pick<GraphEdge, "fromId" | "toId" | "kind">>;
+};
+interface GraphStore {
+    id: string;
+    setRepoHead(args: {
+        repoId: string;
+        repoRoot: string;
+        commit: string;
+        branch: string;
+    }): Promise<void>;
+    deleteFile(args: {
+        repoId: string;
+        path: string;
+    }): Promise<void>;
+    replaceFileGraph(update: GraphFileUpdate): Promise<void>;
+    /**
+     * Optional staged/on-demand update: replace outgoing symbol edges originating from a file's symbols.
+     * Implementations should preserve incoming edges.
+     */
+    replaceOutgoingSymbolEdgesFromFile?(args: {
+        repoId: string;
+        fromPath: string;
+        edges: Array<{
+            fromId: string;
+            toId: string;
+            kind: string;
+            toPath?: string;
+        }>;
+    }): Promise<void>;
+    /**
+     * Optional workspace-level linking (cross-repo relationships).
+     */
+    replaceRepoLinks?(args: {
+        links: Array<{
+            fromRepoId: string;
+            toRepoId: string;
+            kind: string;
+            detail?: string;
+        }>;
+    }): Promise<void>;
+    /**
+     * Optional graph-assisted retrieval expansion.
+     */
+    neighborFiles?(args: {
+        seeds: Array<{
+            repoId: string;
+            path: string;
+        }>;
+        maxPerSeed?: number;
+        limit?: number;
+        kinds?: string[];
+    }): Promise<Array<{
+        repoId: string;
+        path: string;
+        weight: number;
+    }>>;
+    shortestFilePath?(args: {
+        from: {
+            repoId: string;
+            path: string;
+        };
+        to: {
+            repoId: string;
+            path: string;
+        };
+        maxRels?: number;
+    }): Promise<Array<{
+        repoId: string;
+        path: string;
+    }> | null>;
+    extractFileSubgraph?(args: {
+        seeds: Array<{
+            repoId: string;
+            path: string;
+        }>;
+        maxRels?: number;
+        limitEdges?: number;
+    }): Promise<{
+        nodes: Array<{
+            repoId: string;
+            path: string;
+        }>;
+        edges: Array<{
+            from: {
+                repoId: string;
+                path: string;
+            };
+            to: {
+                repoId: string;
+                path: string;
+            };
+            kind: string;
+        }>;
+    }>;
+    close(): Promise<void>;
+}
+type Neo4jLikeDriver = {
+    session: (opts?: any) => {
+        run: (query: string, params?: any) => Promise<any>;
+        close: () => Promise<void>;
+    };
+    close: () => Promise<void>;
+};
+type Neo4jConfig = {
+    uri: string;
+    user: string;
+    password: string;
+    database?: string;
+    labelPrefix?: string;
+};
+declare class Neo4jGraphStore implements GraphStore {
+    private driver;
+    private cfg;
+    id: string;
+    private labelPrefix;
+    private schemaVersionLatest;
+    constructor(driver: Neo4jLikeDriver, cfg: Neo4jConfig);
+    private labels;
+    init(): Promise<void>;
+    private getSchemaVersion;
+    private setSchemaVersion;
+    private runMigrations;
+    setRepoHead(args: {
+        repoId: string;
+        repoRoot: string;
+        commit: string;
+        branch: string;
+    }): Promise<void>;
+    deleteFile(args: {
+        repoId: string;
+        path: string;
+    }): Promise<void>;
+    replaceOutgoingSymbolEdgesFromFile(args: {
+        repoId: string;
+        fromPath: string;
+        edges: Array<{
+            fromId: string;
+            toId: string;
+            kind: string;
+            toPath?: string;
+        }>;
+    }): Promise<void>;
+    replaceFileGraph(update: GraphFileUpdate): Promise<void>;
+    replaceRepoLinks(args: {
+        links: Array<{
+            fromRepoId: string;
+            toRepoId: string;
+            kind: string;
+            detail?: string;
+        }>;
+    }): Promise<void>;
+    neighborFiles(args: {
+        seeds: Array<{
+            repoId: string;
+            path: string;
+        }>;
+        maxPerSeed?: number;
+        limit?: number;
+        kinds?: string[];
+    }): Promise<Array<{
+        repoId: string;
+        path: string;
+        weight: number;
+    }>>;
+    shortestFilePath(args: {
+        from: {
+            repoId: string;
+            path: string;
+        };
+        to: {
+            repoId: string;
+            path: string;
+        };
+        maxRels?: number;
+    }): Promise<Array<{
+        repoId: string;
+        path: string;
+    }> | null>;
+    extractFileSubgraph(args: {
+        seeds: Array<{
+            repoId: string;
+            path: string;
+        }>;
+        maxRels?: number;
+        limitEdges?: number;
+    }): Promise<{
+        nodes: Array<{
+            repoId: string;
+            path: string;
+        }>;
+        edges: Array<{
+            from: {
+                repoId: string;
+                path: string;
+            };
+            to: {
+                repoId: string;
+                path: string;
+            };
+            kind: string;
+        }>;
+    }>;
+    close(): Promise<void>;
+}
+declare function createNeo4jGraphStore(cfg: Neo4jConfig): Promise<Neo4jGraphStore>;
+declare class NoopAnnIndex implements AnnIndex {
+    readonly id = "noop";
+    readonly kind = "noop";
+    init(_init: AnnIndexInit): Promise<void>;
+    upsert(_points: AnnPoint[]): Promise<void>;
+    remove(_ids: string[]): Promise<void>;
+    search(_query: Float32Array, _k: number): Promise<AnnSearchHit[]>;
+    flush(): Promise<void>;
+    close(): Promise<void>;
+}
+declare function createAnnIndex(config?: AnnConfig): AnnIndex;
+interface StoredChunkRow {
+    id: string;
+    path: string;
+    language: string;
+    kind: string;
+    start_line: number;
+    end_line: number;
+    content_hash: string;
+    tokens: number;
+    file_mtime: number;
+    text: string;
+    embedding: Buffer;
+}
+declare class RepoStore {
+    private db;
+    constructor(dbPath: string);
+    /** Monotonically increases whenever the chunk-store is mutated. */
+    getStoreVersion(): number;
+    /** Internal: bump store version (call inside the same transaction that mutates chunks). */
+    private bumpStoreVersion;
+    /** Vector index sync marker (per backend kind). */
+    getVectorIndexVersion(kind: string): number;
+    setVectorIndexVersion(kind: string, storeVersion: number): void;
+    setMeta(k: string, v: string): void;
+    getMeta(k: string): string | null;
+    getFileHash(posixPath: string): string | null;
+    getFileMtime(posixPath: string): number | null;
+    upsertFile(posixPath: string, hash: string, mtime: number, language: string, size: number): void;
+    deleteFile(posixPath: string): void;
+    replaceChunksForFile(posixPath: string, rows: Array<{
+        id: string;
+        language: string;
+        kind: "chunk" | "synopsis";
+        startLine: number;
+        endLine: number;
+        contentHash: string;
+        text: string;
+        ftsText: string;
+        tokens: number;
+        fileMtime: number;
+        embedding: Float32Array;
+    }>): void;
+    setEdges(fromPath: string, kind: string, values: string[]): void;
+    listEdges(fromPath: string, kind: string): string[];
+    listAllFiles(): string[];
+    countChunks(): number;
+    /**
+     * Returns the embedding dimension if any chunks exist, otherwise null.
+     * Efficient (doesn't load all embeddings).
+     */
+    getAnyEmbeddingDimension(): number | null;
+    loadAllChunkEmbeddings(): Array<{
+        id: string;
+        embedding: Float32Array;
+    }>;
+    getChunkById(id: string): StoredChunkRow | null;
+    listChunksForFile(posixPath: string, kind?: "chunk" | "synopsis"): Array<{
+        id: string;
+        start_line: number;
+        end_line: number;
+        kind: string;
+    }>;
+    /**
+     * Best-effort lexical search using SQLite FTS5.
+     * Returns ids with bm25 values (lower is better).
+     */
+    searchFts(ftq: string, limit: number, includePaths?: string[]): Array<{
+        id: string;
+        bm25: number;
+    }>;
+    close(): void;
+}
+declare class RepoIndexer {
+    private readonly embedder;
+    readonly repoRoot: string;
+    readonly repoId: string;
+    private readonly rawConfig;
+    private readonly config;
+    private readonly progress;
+    private readonly embeddingCache;
+    private readonly symbolGraphProvider;
+    private store;
+    private workspaceStore;
+    private graphStore;
+    private vector;
+    private ann;
+    private retriever;
+    private fileIndexer;
+    private watcher;
+    private currentCommit;
+    private currentBranch;
+    private fileIgnore;
+    private serial;
+    constructor(repoRoot: string, embedder: EmbeddingsProvider, config?: IndexerConfig, workspaceStore?: WorkspaceStore, graphStore?: GraphStore);
+    private emitProgress;
+    getCommit(): string | null;
+    getBranch(): string | null;
+    getStore(): RepoStore | null;
+    private dbPathForCommit;
+    private vectorMetric;
+    private vectorFlushDebounceMs;
+    private ensureVectorIndex;
+    openForCurrentHead(): Promise<void>;
+    indexAll(): Promise<void>;
+    indexFile(posixRelPath: string): Promise<void>;
+    deleteFile(posixRelPath: string): Promise<void>;
+    vectorCandidates(queryEmbedding: Float32Array, k: number, includePaths?: string[]): Promise<Array<{
+        id: string;
+        score: number;
+    }>>;
+    lexicalCandidates(queryText: string, k: number, includePaths?: string[]): Promise<Array<{
+        id: string;
+        score: number;
+    }>>;
+    getChunkRecord(id: string): ChunkRecord | null;
+    getChunkMeta(id: string): Omit<ChunkRecord, "text"> | null;
+    getChunkText(id: string): string;
+    getChunkPreview(id: string): string;
+    getRepresentativeChunkIdForFile(posixPath: string, preferSynopsis?: boolean): Promise<string | null>;
+    expandContext(chunkId: string, opts: {
+        adjacentChunks: number;
+        followImports: number;
+        includeFileSynopsis: boolean;
+    }): Promise<Array<{
+        id: string;
+        reason: string;
+    }>>;
+    /**
+     * Staged symbol-graph expansion: compute expensive cross-file edges only for the specified files.
+     * Safe to call repeatedly; provider caching should avoid redundant work.
+     */
+    warmSymbolGraphEdges(posixPaths: string[], opts?: {
+        signal?: AbortSignal;
+        maxFiles?: number;
+    }): Promise<void>;
+    watch(): Promise<void>;
+    closeAsync(): Promise<void>;
+    close(): void;
+}
+declare class WorkspaceIndexer {
+    private readonly workspaceRoot;
+    private readonly embedder;
+    private repos;
+    private config;
+    private progress;
+    private workspaceStore;
+    private graphStore;
+    constructor(workspaceRoot: string, embedder: EmbeddingsProvider, config?: IndexerConfig);
+    private emitProgress;
+    open(): Promise<void>;
+    indexAll(): Promise<void>;
+    watch(): Promise<void>;
+    getRepoIndexers(): RepoIndexer[];
+    private resolveProfile;
+    retrieve(query: string, opts?: RetrieveOptions): Promise<ContextBundle>;
+    search(query: string, k?: number): Promise<SearchHit[]>;
+    closeAsync(): Promise<void>;
+    close(): void;
+}
+type RepoInfo = {
+    repoId: string;
+    repoRoot: string;
+    absRoot: string;
+};
+type RepoLink = {
+    fromRepoId: string;
+    toRepoId: string;
+    kind: string;
+    detail?: string;
+};
+type WorkspaceLinkContext = {
+    workspaceRoot: string;
+    repos: RepoInfo[];
+    npmNameToRepoId: Map<string, string>;
+    goModuleToRepoId: Map<string, string>;
+};
+interface WorkspaceLinkStrategy {
+    id: string;
+    collect(ctx: WorkspaceLinkContext): RepoLink[];
+}
+declare class NestedRepoLinkStrategy implements WorkspaceLinkStrategy {
+    id: string;
+    collect(ctx: WorkspaceLinkContext): RepoLink[];
+}
+declare class NpmDependencyLinkStrategy implements WorkspaceLinkStrategy {
+    id: string;
+    collect(ctx: WorkspaceLinkContext): RepoLink[];
+}
+declare class GoModuleLinkStrategy implements WorkspaceLinkStrategy {
+    id: string;
+    collect(ctx: WorkspaceLinkContext): RepoLink[];
+}
+/**
+ * Link repos based on VS Code extension contributions.
+ *
+ * This is intentionally generic: it supports the Petri languages extension (CTL/LTL/ISL/Colour Algebra)
+ * and makes it straightforward to add additional language extensions later (just by contributing languages).
+ */
+declare class VsCodeContributesLanguageLinkStrategy implements WorkspaceLinkStrategy {
+    private readonly opts;
+    id: string;
+    constructor(opts?: {
+        targetLanguageIds?: string[];
+    });
+    collect(ctx: WorkspaceLinkContext): RepoLink[];
+}
+declare class WorkspaceLinker {
+    private readonly strategies;
+    constructor(strategies?: WorkspaceLinkStrategy[]);
+    static defaultStrategies(): WorkspaceLinkStrategy[];
+    buildContext(workspaceRoot: string, repoRoots: string[]): WorkspaceLinkContext;
+    buildLinks(ctx: WorkspaceLinkContext): RepoLink[];
+}
+declare function linkWorkspaceRepos(args: {
+    workspaceRoot: string;
+    repoRoots: string[];
+    workspaceStore?: WorkspaceStore | null;
+    graphStore?: GraphStore | null;
+    strategies?: WorkspaceLinkStrategy[];
+}): Promise<{
+    repos: RepoInfo[];
+    links: RepoLink[];
+}>;
+declare function loadConfigFile(filePath: string): IndexerConfig;
+export { type AnnConfig, type AnnIndex, type AnnIndexInit, type AnnMetric, type AnnPoint, type AnnSearchHit, type Cancellation, type Chunk, type ChunkRecord, type ChunkingConfig, type ContextBundle, type CustomVectorConfig, DEFAULT_PROFILES, type EmbeddingsProvider, type FaissVectorConfig, GoModuleLinkStrategy, type GraphEdge, type GraphFileUpdate, type GraphStore, type GraphSymbol, HashEmbeddingsProvider, type HnswlibVectorConfig, type IndexerConfig, type IndexerProgress, type IndexerProgressEvent, type IndexerProgressHandler, IndexerProgressObservable, type IndexerProgressSink, type LspDocument, type LspDocumentSymbol, type LspFacade, type LspPosition, type LspRange, type LspTargetLocation, Neo4jGraphStore, NestedRepoLinkStrategy, NoopAnnIndex, NpmDependencyLinkStrategy, OllamaEmbeddingsProvider, OpenAIEmbeddingsProvider, type ProfilesConfig, type QdrantVectorConfig, type RelatedContextOptions, type RepoDiscoveryOptions, type RepoId, RepoIndexer, type RepoInfo, type RepoLink, type RetrievalProfile, type RetrievalProfileName, type RetrievalScope, type RetrievalWeights, type RetrieveOptions, type SearchHit, type SearchOptions, type SymbolEdgeKind, type SymbolGraphIndexInput, type SymbolGraphIndexOutput, SymbolGraphIndexer, type SymbolGraphIndexingMode, type SymbolGraphProvider, type SymbolGraphStrategyOptions, type SymbolKind, type SymbolRange, type VectorConfig, type VectorIndex, type VectorIndexInit, type VectorMetric, type VectorPoint, type VectorProviderKind, type VectorSearchHit, VsCodeContributesLanguageLinkStrategy, type WorkspaceChunkRow, WorkspaceIndexer, type WorkspaceLinkContext, type WorkspaceLinkStrategy, WorkspaceLinker, WorkspaceStore, asProgressSink, chunkSource, createAnnIndex, createNeo4jGraphStore, createVSCodeSymbolGraphProvider, createVectorIndex, deepMergeProfile, discoverGitRepos, languageFromPath, linkWorkspaceRepos, loadConfigFile, mergeIndexerConfig, pickRepoOverride, stableSymbolId };