infogrep 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
infogrep/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """InfoGrep: local-first content search for coding agents."""
2
+
3
+ __version__ = "0.0.1"
infogrep/cli.py ADDED
@@ -0,0 +1,217 @@
1
+ """InfoGrep command-line interface.
2
+
3
+ Thin wrapper over the core engine; also the entry point used by the daily
4
+ scheduled re-index. Subcommands are stubbed until their milestones land.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import time
10
+ from pathlib import Path
11
+
12
+ import typer
13
+
14
+ from . import __version__
15
+ from .config import Config
16
+
17
+ app = typer.Typer(
18
+ add_completion=False,
19
+ help="Local-first content search (sparse + dense + knowledge base) for coding agents.",
20
+ )
21
+
22
+
23
+ def _version_callback(value: bool) -> None:
24
+ if value:
25
+ typer.echo(f"infogrep {__version__}")
26
+ raise typer.Exit()
27
+
28
+
29
+ @app.callback()
30
+ def _root(
31
+ version: bool = typer.Option(
32
+ False, "--version", callback=_version_callback, is_eager=True, help="Show version."
33
+ ),
34
+ ) -> None:
35
+ """InfoGrep: index and search the content of local files."""
36
+
37
+
38
+ @app.command()
39
+ def index(
40
+ directory: Path = typer.Argument(..., help="Directory to index."),
41
+ full: bool = typer.Option(False, "--full", help="Force a full re-index."),
42
+ ) -> None:
43
+ """Build or incrementally update the side-car index for a directory."""
44
+ from .indexer import Indexer
45
+
46
+ cfg = Config.load(directory)
47
+ if not cfg.target_dir.is_dir():
48
+ typer.echo(f"[infogrep] not a directory: {cfg.target_dir}", err=True)
49
+ raise typer.Exit(code=2)
50
+
51
+ typer.echo(f"[infogrep] indexing {cfg.target_dir}")
52
+ typer.echo(f"[infogrep] index location: {cfg.index_dir}")
53
+
54
+ def _progress(done: int, total: int) -> None:
55
+ typer.echo(f"[infogrep] extracted {done}/{total} files…", err=True)
56
+
57
+ report = Indexer(cfg).reindex(full=full, on_progress=_progress)
58
+ typer.echo(
59
+ "[infogrep] "
60
+ f"added={report.added} modified={report.modified} deleted={report.deleted} "
61
+ f"unchanged={report.unchanged} name_only={report.name_only}"
62
+ )
63
+ typer.echo(f"[infogrep] index now holds {report.n_files} files, {report.n_passages} passages")
64
+ for err in report.errors:
65
+ typer.echo(f"[infogrep] error: {err}", err=True)
66
+
67
+
68
+ @app.command()
69
+ def search(
70
+ query: str = typer.Argument(..., help="Search query."),
71
+ directory: Path = typer.Option(Path.cwd(), "--dir", "-d", help="Indexed directory."),
72
+ k: int = typer.Option(10, "--k", help="Number of results."),
73
+ mode: str = typer.Option("hybrid", "--mode", "-m", help="hybrid | sparse | dense | kb | graph."),
74
+ prf: bool = typer.Option(False, "--prf", help="RM3 pseudo-relevance feedback (sparse)."),
75
+ ) -> None:
76
+ """Query indexed content."""
77
+ from .engine import SearchEngine
78
+
79
+ engine = SearchEngine(Config.load(directory))
80
+
81
+ try:
82
+ if mode == "sparse":
83
+ results = engine.search_sparse(query, k=k, prf=prf)
84
+ elif mode == "dense":
85
+ results = engine.search_dense(query, k=k)
86
+ elif mode == "hybrid":
87
+ out = engine.search_hybrid(query, k=k, prf=prf)
88
+ results = out.results
89
+ if out.used:
90
+ typer.echo(f"[infogrep] fused: {', '.join(out.used)}")
91
+ for name, reason in out.skipped.items():
92
+ typer.echo(f"[infogrep] skipped {name}: {reason}")
93
+ elif mode == "kb":
94
+ results = engine.search_kb(query, k=k)
95
+ elif mode == "graph":
96
+ results = engine.search_graph(query, k=k)
97
+ else:
98
+ typer.echo(f"[infogrep] unknown mode: {mode}", err=True)
99
+ raise typer.Exit(code=2)
100
+ except FileNotFoundError as exc:
101
+ typer.echo(f"[infogrep] {exc}", err=True)
102
+ raise typer.Exit(code=2)
103
+
104
+ if not results:
105
+ typer.echo("[infogrep] no results.")
106
+ return
107
+ for i, r in enumerate(results, start=1):
108
+ file_ref = r.abs_path or r.path # original file path when known
109
+ loc = file_ref + (f" p.{r.page}" if r.page is not None else "")
110
+ typer.echo(f"{i:2}. [{r.score:.3f}] {loc} ({r.retriever})")
111
+ typer.echo(f" {r.snippet.strip()[:160]}")
112
+
113
+
114
+ @app.command()
115
+ def status(
116
+ directory: Path = typer.Argument(Path.cwd(), help="Indexed directory."),
117
+ ) -> None:
118
+ """Show index status and staleness for a directory."""
119
+ from .indexer import Indexer
120
+
121
+ cfg = Config.load(directory)
122
+ info = Indexer(cfg).status()
123
+ typer.echo(f"[infogrep] target: {cfg.target_dir}")
124
+ typer.echo(f"[infogrep] index location: {cfg.index_dir}")
125
+ if not info.get("indexed"):
126
+ typer.echo("[infogrep] indexed: no")
127
+ typer.echo("[infogrep] run `infogrep index <dir>` to build the index.")
128
+ return
129
+ typer.echo("[infogrep] indexed: yes")
130
+ typer.echo(f"[infogrep] files: {info['n_files']} passages: {info['n_passages']}")
131
+ typer.echo(f"[infogrep] index version: {info['index_version']}")
132
+ last = info.get("last_indexed_at")
133
+ if last:
134
+ when = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(float(last)))
135
+ typer.echo(f"[infogrep] last indexed: {when}")
136
+ if info.get("stale"):
137
+ typer.echo(
138
+ f"[infogrep] STALE: {info['pending']} pending "
139
+ f"(+{info['pending_added']} ~{info['pending_modified']} -{info['pending_deleted']}) "
140
+ "— run `infogrep index`"
141
+ )
142
+ elif "stale" in info:
143
+ typer.echo("[infogrep] up to date")
144
+
145
+
146
+ @app.command()
147
+ def mcp(
148
+ directory: Path = typer.Option(Path.cwd(), "--dir", "-d", help="Default indexed directory."),
149
+ ) -> None:
150
+ """Run the MCP server (stdio) so coding agents can call InfoGrep's search tools."""
151
+ from .mcp_server import main as serve
152
+
153
+ serve(directory=str(Path(directory).expanduser().resolve()))
154
+
155
+
156
+ @app.command()
157
+ def serve(
158
+ directory: Path = typer.Option(Path.cwd(), "--dir", "-d", help="Indexed directory to search."),
159
+ port: int = typer.Option(7421, "--port", "-p", help="Port (uncommon by default)."),
160
+ host: str = typer.Option("127.0.0.1", "--host", help="Bind host (localhost by default)."),
161
+ ) -> None:
162
+ """Run a local web UI to test search in a browser."""
163
+ from .web import serve as run_web
164
+
165
+ run_web(directory=directory, host=host, port=port)
166
+
167
+
168
+ schedule_app = typer.Typer(help="Manage daily auto-reindex (macOS launchd).")
169
+ app.add_typer(schedule_app, name="schedule")
170
+
171
+
172
+ @schedule_app.command("install")
173
+ def schedule_install(
174
+ directory: Path = typer.Argument(..., help="Directory to reindex daily."),
175
+ at: str = typer.Option("03:00", "--at", help="Daily run time, HH:MM (24h)."),
176
+ ) -> None:
177
+ """Install a daily reindex agent for a directory."""
178
+ from . import scheduler
179
+
180
+ try:
181
+ hour, minute = (int(x) for x in at.split(":", 1))
182
+ except ValueError:
183
+ typer.echo(f"[infogrep] invalid --at time: {at!r} (use HH:MM)", err=True)
184
+ raise typer.Exit(code=2)
185
+ path = scheduler.install(directory, hour=hour, minute=minute)
186
+ typer.echo(f"[infogrep] scheduled daily reindex of {Path(directory).resolve()} at {at}")
187
+ typer.echo(f"[infogrep] launchd agent: {path}")
188
+
189
+
190
+ @schedule_app.command("uninstall")
191
+ def schedule_uninstall(
192
+ directory: Path = typer.Argument(..., help="Directory whose schedule to remove."),
193
+ ) -> None:
194
+ """Remove the daily reindex agent for a directory."""
195
+ from . import scheduler
196
+
197
+ if scheduler.uninstall(directory):
198
+ typer.echo(f"[infogrep] removed reindex schedule for {Path(directory).resolve()}")
199
+ else:
200
+ typer.echo("[infogrep] no schedule found for that directory.")
201
+
202
+
203
+ @schedule_app.command("list")
204
+ def schedule_list() -> None:
205
+ """List installed daily reindex agents."""
206
+ from . import scheduler
207
+
208
+ agents = scheduler.list_agents()
209
+ if not agents:
210
+ typer.echo("[infogrep] no reindex schedules installed.")
211
+ return
212
+ for a in agents:
213
+ typer.echo(f"[infogrep] {a['hour']:02d}:{a['minute']:02d} daily {a['directory']}")
214
+
215
+
216
+ if __name__ == "__main__": # pragma: no cover
217
+ app()
infogrep/config.py ADDED
@@ -0,0 +1,217 @@
1
+ """Configuration model and per-directory config loading.
2
+
3
+ Indexing never writes into the indexed folder. Each directory's index lives in a
4
+ separate location under ``$INFOGREP_HOME`` (default ``~/.infogrep``):
5
+ ``$INFOGREP_HOME/indexes/<name>-<hash-of-abs-path>/``. Per-directory config is read from
6
+ that index dir's ``config.toml`` (with an optional global ``$INFOGREP_HOME/config.toml``
7
+ as a base).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import os
14
+ import re
15
+ import sys
16
+ from dataclasses import dataclass, field, asdict
17
+ from pathlib import Path
18
+
19
+ if sys.version_info >= (3, 11):
20
+ import tomllib
21
+ else: # pragma: no cover - exercised only on 3.10
22
+ import tomli as tomllib
23
+
24
+ # Legacy in-folder side-car name — still pruned during the walk so an old one (or a
25
+ # stray) inside a target never gets indexed. InfoGrep no longer creates it.
26
+ SIDECAR_DIRNAME = ".infogrep"
27
+
28
+
29
+ def index_home() -> Path:
30
+ """Root for all InfoGrep indexes (override with the INFOGREP_HOME env var)."""
31
+ return Path(os.environ.get("INFOGREP_HOME", "~/.infogrep")).expanduser()
32
+
33
+
34
+ def index_dir_for(target_dir: Path) -> Path:
35
+ """Stable, separate index location for a target directory (outside the target)."""
36
+ target = Path(target_dir).expanduser().resolve()
37
+ digest = hashlib.sha256(str(target).encode("utf-8")).hexdigest()[:12]
38
+ name = re.sub(r"[^A-Za-z0-9._-]", "_", target.name) or "root"
39
+ return index_home() / "indexes" / f"{name}-{digest}"
40
+
41
+
42
+ @dataclass
43
+ class ChunkConfig:
44
+ """How long documents are split into passages."""
45
+
46
+ size: int = 512 # target chunk size in tokens/words
47
+ overlap: int = 64 # overlap between adjacent chunks
48
+
49
+
50
+ @dataclass
51
+ class IngestConfig:
52
+ """Ingestion-side options."""
53
+
54
+ ocr: bool = False # OCR PDF pages that have little/no extractable text (needs tesseract)
55
+ ocr_min_chars: int = 16 # below this many chars on a page, try OCR
56
+ workers: int = 0 # parallel extraction processes; 0 = auto (min(8, cpu count))
57
+
58
+
59
+ @dataclass
60
+ class DenseConfig:
61
+ """Dense retrieval settings.
62
+
63
+ Off by default: embedding a large corpus needs a model download and significant
64
+ RAM/GPU. Enable per directory with ``[dense] enabled = true`` once you want semantics.
65
+ """
66
+
67
+ enabled: bool = False
68
+ embedder: str = "qwen" # registry key; see infogrep.retrieval.embedders
69
+ model_name: str = "Qwen/Qwen3-Embedding-0.6B"
70
+ device: str = "auto" # "auto" -> mps/cuda/cpu
71
+
72
+
73
+ @dataclass
74
+ class SparseConfig:
75
+ """Sparse (Pyserini/BM25) settings."""
76
+
77
+ enabled: bool = True
78
+ prf: bool = False # pseudo-relevance feedback (query expansion), off by default
79
+ prf_fb_docs: int = 10 # feedback documents (top multi-field results) to expand from
80
+ prf_fb_terms: int = 10 # expansion terms to add to the query
81
+ # Analyzer language. Default "en+zh" handles English (Porter stemming) AND CJK
82
+ # (bigrams) together. Also: "en" (English only), "zh"/"ja"/"ko" (single CJK).
83
+ # Changing it triggers a full re-index.
84
+ language: str = "en+zh"
85
+ # Multi-field BM25 weights: passage text + file name + path.
86
+ field_boosts: dict = field(
87
+ default_factory=lambda: {"contents": 1.0, "filename": 2.0, "pathtext": 1.0}
88
+ )
89
+
90
+
91
+ @dataclass
92
+ class KnowledgeBaseConfig:
93
+ """Obsidian knowledge-base settings (backed by the Obsidian CLI)."""
94
+
95
+ enabled: bool = False
96
+ vault: str | None = None # Obsidian vault name; None -> the CLI's active vault
97
+ cli: str = "obsidian" # path to the Obsidian CLI binary
98
+ hops: int = 1 # graph link hops to expand (follows links + backlinks)
99
+ search_limit: int = 10 # how many search hits to seed graph expansion from
100
+
101
+
102
+ @dataclass
103
+ class GraphConfig:
104
+ """Metadata knowledge-graph over folder/file structure.
105
+
106
+ Built from file *paths and names only* (never content) on every reindex, and
107
+ materialized as an Obsidian-compatible vault of folder notes under the index's
108
+ ``graph_vault/`` side-car dir, browsable in Obsidian if you open it there. Hybrid
109
+ search uses it (no Obsidian app required — InfoGrep reads its own graph directly) to
110
+ pull in sibling files from the folder(s) whose name/contents best match the query,
111
+ not only files whose own content matched. Cheap (pure path manipulation, no model),
112
+ so on by default.
113
+ """
114
+
115
+ enabled: bool = True
116
+ hops: int = 1 # folder hops to expand from a matched folder (parent/children/siblings)
117
+ max_folders: int = 5 # top-scoring folders to expand into file candidates per query
118
+
119
+
120
+ # Documents indexed by content (and, where supported, OCR). Code/config files are not
121
+ # included by default — set include = ["**/*"] to index everything.
122
+ DEFAULT_DOC_TYPES = [
123
+ "pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "rtf",
124
+ "odt", "ods", "odp", "txt", "md", "markdown", "rst", "tex", "csv", "tsv",
125
+ "json", "jsonl",
126
+ ]
127
+ # Images: indexed by file name / path (content only if OCR is enabled).
128
+ DEFAULT_IMAGE_TYPES = [
129
+ "png", "jpg", "jpeg", "gif", "bmp", "tif", "tiff", "webp", "svg", "heic", "heif",
130
+ ]
131
+ DEFAULT_INCLUDE = [f"**/*.{ext}" for ext in DEFAULT_DOC_TYPES + DEFAULT_IMAGE_TYPES]
132
+
133
+ # Skip dependency / VCS / cache trees and editor/OS junk during the walk.
134
+ DEFAULT_EXCLUDE = [
135
+ ".infogrep/**", "**/.git/**", "**/node_modules/**",
136
+ "**/.venv/**", "**/venv/**", "**/site-packages/**", "**/__pycache__/**",
137
+ "**/.cache/**", "**/.tox/**", "**/.mypy_cache/**", "**/.pytest_cache/**",
138
+ "**/.Trash/**", "**/~$*", "**/.dropbox.cache/**",
139
+ ]
140
+
141
+
142
+ @dataclass
143
+ class Config:
144
+ """Top-level InfoGrep configuration for one indexed directory."""
145
+
146
+ target_dir: Path
147
+ # Documents + images by default; set include = ["**/*"] to index every file.
148
+ include: list[str] = field(default_factory=lambda: list(DEFAULT_INCLUDE))
149
+ exclude: list[str] = field(default_factory=lambda: list(DEFAULT_EXCLUDE))
150
+ chunk: ChunkConfig = field(default_factory=ChunkConfig)
151
+ ingest: IngestConfig = field(default_factory=IngestConfig)
152
+ sparse: SparseConfig = field(default_factory=SparseConfig)
153
+ dense: DenseConfig = field(default_factory=DenseConfig)
154
+ kb: KnowledgeBaseConfig = field(default_factory=KnowledgeBaseConfig)
155
+ graph: GraphConfig = field(default_factory=GraphConfig)
156
+
157
+ @property
158
+ def index_dir(self) -> Path:
159
+ """Where this directory's index lives — a separate location, not in the target."""
160
+ return index_dir_for(self.target_dir)
161
+
162
+ @property
163
+ def manifest_path(self) -> Path:
164
+ return self.index_dir / "manifest.sqlite"
165
+
166
+ @property
167
+ def sparse_dir(self) -> Path:
168
+ return self.index_dir / "sparse"
169
+
170
+ @property
171
+ def dense_dir(self) -> Path:
172
+ return self.index_dir / "dense"
173
+
174
+ @property
175
+ def cache_dir(self) -> Path:
176
+ return self.index_dir / "cache"
177
+
178
+ @property
179
+ def graph_vault_dir(self) -> Path:
180
+ """Obsidian-compatible vault of folder notes (metadata graph), for browsing."""
181
+ return self.index_dir / "graph_vault"
182
+
183
+ @property
184
+ def graph_json_path(self) -> Path:
185
+ """Compact JSON form of the same graph, read directly by the graph retriever."""
186
+ return self.index_dir / "graph.json"
187
+
188
+ @classmethod
189
+ def load(cls, target_dir: str | Path) -> "Config":
190
+ """Load config for ``target_dir`` (global config.toml, then per-index override)."""
191
+ target = Path(target_dir).expanduser().resolve()
192
+ cfg = cls(target_dir=target)
193
+ for config_file in (index_home() / "config.toml", cfg.index_dir / "config.toml"):
194
+ if config_file.is_file():
195
+ with config_file.open("rb") as fh:
196
+ cfg = cls._merge(cfg, tomllib.load(fh))
197
+ return cfg
198
+
199
+ @staticmethod
200
+ def _merge(base: "Config", data: dict) -> "Config":
201
+ """Shallow-merge a parsed TOML dict onto a default Config."""
202
+ for key in ("include", "exclude"):
203
+ if key in data:
204
+ setattr(base, key, list(data[key]))
205
+ if "chunk" in data:
206
+ base.chunk = ChunkConfig(**{**asdict(base.chunk), **data["chunk"]})
207
+ if "ingest" in data:
208
+ base.ingest = IngestConfig(**{**asdict(base.ingest), **data["ingest"]})
209
+ if "sparse" in data:
210
+ base.sparse = SparseConfig(**{**asdict(base.sparse), **data["sparse"]})
211
+ if "dense" in data:
212
+ base.dense = DenseConfig(**{**asdict(base.dense), **data["dense"]})
213
+ if "kb" in data:
214
+ base.kb = KnowledgeBaseConfig(**{**asdict(base.kb), **data["kb"]})
215
+ if "graph" in data:
216
+ base.graph = GraphConfig(**{**asdict(base.graph), **data["graph"]})
217
+ return base
infogrep/engine.py ADDED
@@ -0,0 +1,166 @@
1
+ """Search engine: the shared core behind both the CLI and the MCP server.
2
+
3
+ Owns the retrievers for one indexed directory, runs them individually or fused (RRF),
4
+ and degrades gracefully when a backend's index is missing or a backend errors.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+
11
+ from .config import Config
12
+ from .retrieval.base import Result, with_file_metadata
13
+ from .retrieval.fusion import reciprocal_rank_fusion
14
+
15
+ # Per-retriever candidate pool size for fusion (>= k so RRF has material to work with).
16
+ _POOL_MIN = 20
17
+
18
+ ALL_RETRIEVERS = ("sparse", "dense", "kb", "graph")
19
+
20
+
21
+ @dataclass
22
+ class HybridResults:
23
+ """Fused results plus which retrievers actually contributed / were skipped."""
24
+
25
+ results: list[Result]
26
+ used: list[str] = field(default_factory=list)
27
+ skipped: dict[str, str] = field(default_factory=dict) # retriever -> reason
28
+
29
+
30
+ class SearchEngine:
31
+ def __init__(self, config: Config):
32
+ self.config = config
33
+ self._sparse = None
34
+ self._dense = None
35
+ self._kb = None
36
+ self._graph = None
37
+
38
+ # -- lazy backends -----------------------------------------------------
39
+
40
+ @property
41
+ def sparse(self):
42
+ if self._sparse is None:
43
+ from .retrieval.sparse import SparseIndex
44
+
45
+ self._sparse = SparseIndex(
46
+ self.config.sparse_dir,
47
+ self.config.cache_dir,
48
+ field_boosts=self.config.sparse.field_boosts,
49
+ language=self.config.sparse.language,
50
+ prf_fb_docs=self.config.sparse.prf_fb_docs,
51
+ prf_fb_terms=self.config.sparse.prf_fb_terms,
52
+ )
53
+ return self._sparse
54
+
55
+ @property
56
+ def dense(self):
57
+ if self._dense is None:
58
+ from .retrieval.dense import DenseIndex
59
+
60
+ self._dense = DenseIndex(self.config)
61
+ return self._dense
62
+
63
+ @property
64
+ def kb(self):
65
+ if self._kb is None:
66
+ from .retrieval.kb import KnowledgeBaseIndex
67
+
68
+ self._kb = KnowledgeBaseIndex(self.config)
69
+ return self._kb
70
+
71
+ @property
72
+ def graph(self):
73
+ if self._graph is None:
74
+ from .retrieval.graph import FolderGraphIndex
75
+
76
+ self._graph = FolderGraphIndex(
77
+ self.config.index_dir,
78
+ hops=self.config.graph.hops,
79
+ max_folders=self.config.graph.max_folders,
80
+ )
81
+ return self._graph
82
+
83
+ # -- individual retrievers --------------------------------------------
84
+
85
+ def _enrich(self, results: list[Result], root) -> list[Result]:
86
+ """Attach the original file path + metadata to each result."""
87
+ return [with_file_metadata(r, root) for r in results]
88
+
89
+ def search_sparse(self, query: str, k: int = 10, prf: bool = False) -> list[Result]:
90
+ # Content-file retrievers: paths are relative to the indexed directory.
91
+ return self._enrich(self.sparse.search(query, k=k, prf=prf), self.config.target_dir)
92
+
93
+ def search_dense(self, query: str, k: int = 10) -> list[Result]:
94
+ return self._enrich(self.dense.search(query, k=k), self.config.target_dir)
95
+
96
+ def search_kb(self, query: str, k: int = 10) -> list[Result]:
97
+ # KB paths are vault-relative; we have the vault name (CLI target), not its
98
+ # filesystem root, so set filename/ext only (root=None leaves abs_path unset).
99
+ return self._enrich(self.kb.search(query, k=k), None)
100
+
101
+ def search_graph(self, query: str, k: int = 10) -> list[Result]:
102
+ # Graph paths reference real files in the indexed directory, just like sparse/dense.
103
+ return self._enrich(self.graph.search(query, k=k), self.config.target_dir)
104
+
105
+ def _run(self, name: str, query: str, k: int, prf: bool) -> list[Result]:
106
+ if name == "sparse":
107
+ return self.search_sparse(query, k=k, prf=prf)
108
+ if name == "dense":
109
+ return self.search_dense(query, k=k)
110
+ if name == "kb":
111
+ return self.search_kb(query, k=k)
112
+ if name == "graph":
113
+ return self.search_graph(query, k=k)
114
+ raise ValueError(f"unknown retriever: {name}")
115
+
116
+ def _enabled(self, name: str) -> bool:
117
+ return {
118
+ "sparse": self.config.sparse.enabled,
119
+ "dense": self.config.dense.enabled,
120
+ "kb": self.config.kb.enabled,
121
+ "graph": self.config.graph.enabled,
122
+ }.get(name, False)
123
+
124
+ # -- fused -------------------------------------------------------------
125
+
126
+ def search_hybrid(
127
+ self,
128
+ query: str,
129
+ k: int = 10,
130
+ retrievers: list[str] | None = None,
131
+ prf: bool = False,
132
+ ) -> HybridResults:
133
+ names = retrievers or [r for r in ALL_RETRIEVERS if self._enabled(r)]
134
+ pool = max(k, _POOL_MIN)
135
+
136
+ lists: list[list[Result]] = []
137
+ out = HybridResults(results=[])
138
+ for name in names:
139
+ if not self._enabled(name):
140
+ out.skipped[name] = "disabled in config"
141
+ continue
142
+ try:
143
+ hits = self._run(name, query, pool, prf)
144
+ except FileNotFoundError as exc:
145
+ out.skipped[name] = str(exc)
146
+ continue
147
+ except Exception as exc: # one backend failing shouldn't sink the query
148
+ out.skipped[name] = f"error: {exc}"
149
+ continue
150
+ lists.append(hits)
151
+ out.used.append(name)
152
+
153
+ out.results = reciprocal_rank_fusion(lists, top_n=k) if lists else []
154
+ return out
155
+
156
+ # -- maintenance -------------------------------------------------------
157
+
158
+ def status(self) -> dict:
159
+ from .indexer import Indexer
160
+
161
+ return Indexer(self.config).status()
162
+
163
+ def reindex(self, full: bool = False) -> dict:
164
+ from .indexer import Indexer
165
+
166
+ return Indexer(self.config).reindex(full=full).as_dict()