haiku.rag 0.13.2__tar.gz → 0.13.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (92) hide show
  1. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/PKG-INFO +9 -2
  2. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/README.md +4 -1
  3. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/pyproject.toml +3 -1
  4. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/evaluations/benchmark.py +38 -17
  5. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/app.py +6 -9
  6. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/cli.py +7 -1
  7. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/client.py +17 -13
  8. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/monitor.py +9 -2
  9. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/reranking/__init__.py +18 -0
  10. haiku_rag-0.13.3/src/haiku/rag/reranking/zeroentropy.py +59 -0
  11. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/repositories/chunk.py +48 -7
  12. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/uv.lock +27 -2
  13. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/.dockerignore +0 -0
  14. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/.gitignore +0 -0
  15. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/.pre-commit-config.yaml +0 -0
  16. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/.python-version +0 -0
  17. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/LICENSE +0 -0
  18. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/mkdocs.yml +0 -0
  19. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/server.json +0 -0
  20. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/evaluations/__init__.py +0 -0
  21. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/evaluations/config.py +0 -0
  22. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/evaluations/datasets/__init__.py +0 -0
  23. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/evaluations/datasets/repliqa.py +0 -0
  24. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/evaluations/datasets/wix.py +0 -0
  25. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/evaluations/llm_judge.py +0 -0
  26. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/evaluations/prompts.py +0 -0
  27. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/__init__.py +0 -0
  28. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/a2a/__init__.py +0 -0
  29. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/a2a/client.py +0 -0
  30. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/a2a/context.py +0 -0
  31. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/a2a/models.py +0 -0
  32. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/a2a/prompts.py +0 -0
  33. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/a2a/skills.py +0 -0
  34. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/a2a/storage.py +0 -0
  35. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/a2a/worker.py +0 -0
  36. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/chunker.py +0 -0
  37. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/config/__init__.py +0 -0
  38. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/config/loader.py +0 -0
  39. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/config/models.py +0 -0
  40. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/embeddings/__init__.py +0 -0
  41. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/embeddings/base.py +0 -0
  42. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/embeddings/ollama.py +0 -0
  43. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/embeddings/openai.py +0 -0
  44. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/embeddings/vllm.py +0 -0
  45. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/embeddings/voyageai.py +0 -0
  46. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/__init__.py +0 -0
  47. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/base.py +0 -0
  48. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/common.py +0 -0
  49. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/models.py +0 -0
  50. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/nodes/__init__.py +0 -0
  51. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/nodes/analysis.py +0 -0
  52. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/nodes/plan.py +0 -0
  53. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/nodes/search.py +0 -0
  54. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/nodes/synthesize.py +0 -0
  55. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/graph/prompts.py +0 -0
  56. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/logging.py +0 -0
  57. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/mcp.py +0 -0
  58. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/__init__.py +0 -0
  59. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/agent.py +0 -0
  60. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/deep/__init__.py +0 -0
  61. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/deep/dependencies.py +0 -0
  62. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/deep/graph.py +0 -0
  63. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/deep/models.py +0 -0
  64. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/deep/nodes.py +0 -0
  65. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/deep/prompts.py +0 -0
  66. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/deep/state.py +0 -0
  67. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/qa/prompts.py +0 -0
  68. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/reader.py +0 -0
  69. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/reranking/base.py +0 -0
  70. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/reranking/cohere.py +0 -0
  71. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/reranking/mxbai.py +0 -0
  72. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/reranking/vllm.py +0 -0
  73. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/research/__init__.py +0 -0
  74. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/research/common.py +0 -0
  75. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/research/dependencies.py +0 -0
  76. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/research/graph.py +0 -0
  77. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/research/models.py +0 -0
  78. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/research/prompts.py +0 -0
  79. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/research/state.py +0 -0
  80. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/research/stream.py +0 -0
  81. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/__init__.py +0 -0
  82. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/engine.py +0 -0
  83. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/models/__init__.py +0 -0
  84. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/models/chunk.py +0 -0
  85. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/models/document.py +0 -0
  86. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/repositories/__init__.py +0 -0
  87. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/repositories/document.py +0 -0
  88. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/repositories/settings.py +0 -0
  89. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  90. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
  91. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
  92. {haiku_rag-0.13.2 → haiku_rag-0.13.3}/src/haiku/rag/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.13.2
3
+ Version: 0.13.3
4
4
  Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -33,10 +33,14 @@ Requires-Dist: typer<0.20.0,>=0.19.2
33
33
  Requires-Dist: watchfiles>=1.1.1
34
34
  Provides-Extra: a2a
35
35
  Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
36
+ Provides-Extra: cohere
37
+ Requires-Dist: cohere>=5.0.0; extra == 'cohere'
36
38
  Provides-Extra: mxbai
37
39
  Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
38
40
  Provides-Extra: voyageai
39
41
  Requires-Dist: voyageai>=0.3.5; extra == 'voyageai'
42
+ Provides-Extra: zeroentropy
43
+ Requires-Dist: zeroentropy>=0.1.0a6; extra == 'zeroentropy'
40
44
  Description-Content-Type: text/markdown
41
45
 
42
46
  # Haiku RAG
@@ -56,7 +60,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
56
60
  - **Multiple QA providers**: Any provider/model supported by Pydantic AI
57
61
  - **Research graph (multi‑agent)**: Plan → Search → Evaluate → Synthesize with agentic AI
58
62
  - **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
59
- - **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
63
+ - **Reranking**: Default search result reranking with MixedBread AI, Cohere, Zero Entropy, or vLLM
60
64
  - **Question answering**: Built-in QA agents on your documents
61
65
  - **File monitoring**: Auto-index files when run as server
62
66
  - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
@@ -79,6 +83,9 @@ haiku-rag add-src document.pdf --meta source=manual
79
83
  # Search
80
84
  haiku-rag search "query"
81
85
 
86
+ # Search with filters
87
+ haiku-rag search "query" --filter "uri LIKE '%.pdf' AND title LIKE '%paper%'"
88
+
82
89
  # Ask questions
83
90
  haiku-rag ask "Who is the author of haiku.rag?"
84
91
 
@@ -15,7 +15,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
15
15
  - **Multiple QA providers**: Any provider/model supported by Pydantic AI
16
16
  - **Research graph (multi‑agent)**: Plan → Search → Evaluate → Synthesize with agentic AI
17
17
  - **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
18
- - **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
18
+ - **Reranking**: Default search result reranking with MixedBread AI, Cohere, Zero Entropy, or vLLM
19
19
  - **Question answering**: Built-in QA agents on your documents
20
20
  - **File monitoring**: Auto-index files when run as server
21
21
  - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
@@ -38,6 +38,9 @@ haiku-rag add-src document.pdf --meta source=manual
38
38
  # Search
39
39
  haiku-rag search "query"
40
40
 
41
+ # Search with filters
42
+ haiku-rag search "query" --filter "uri LIKE '%.pdf' AND title LIKE '%paper%'"
43
+
41
44
  # Ask questions
42
45
  haiku-rag ask "Who is the author of haiku.rag?"
43
46
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  name = "haiku.rag"
4
4
  description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
5
- version = "0.13.2"
5
+ version = "0.13.3"
6
6
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
7
7
  license = { text = "MIT" }
8
8
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -42,6 +42,8 @@ dependencies = [
42
42
  voyageai = ["voyageai>=0.3.5"]
43
43
  mxbai = ["mxbai-rerank>=0.1.6"]
44
44
  a2a = ["fasta2a>=0.1.0"]
45
+ cohere = ["cohere>=5.0.0"]
46
+ zeroentropy = ["zeroentropy>=0.1.0a6"]
45
47
 
46
48
  [project.scripts]
47
49
  haiku-rag = "haiku.rag.cli:cli"
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  from collections.abc import Mapping
3
+ from pathlib import Path
3
4
  from typing import Any, cast
4
5
 
5
6
  import logfire
@@ -12,13 +13,12 @@ from pydantic_evals.reporting import ReportCaseFailure
12
13
  from rich.console import Console
13
14
  from rich.progress import Progress
14
15
 
15
- from evaluations.config import DatasetSpec, RetrievalSample
16
+ from evaluations.config import DatasetSpec
16
17
  from evaluations.datasets import DATASETS
17
18
  from evaluations.llm_judge import ANSWER_EQUIVALENCE_RUBRIC
18
19
  from evaluations.prompts import WIX_SUPPORT_PROMPT
19
- from haiku.rag import logging # noqa: F401
20
20
  from haiku.rag.client import HaikuRAG
21
- from haiku.rag.config import Config
21
+ from haiku.rag.config import AppConfig, find_config_file, load_yaml_config
22
22
  from haiku.rag.logging import configure_cli_logging
23
23
  from haiku.rag.qa import get_qa_agent
24
24
 
@@ -30,7 +30,7 @@ configure_cli_logging()
30
30
  console = Console()
31
31
 
32
32
 
33
- async def populate_db(spec: DatasetSpec) -> None:
33
+ async def populate_db(spec: DatasetSpec, config: AppConfig) -> None:
34
34
  spec.db_path.parent.mkdir(parents=True, exist_ok=True)
35
35
  corpus = spec.document_loader()
36
36
  if spec.document_limit is not None:
@@ -38,7 +38,7 @@ async def populate_db(spec: DatasetSpec) -> None:
38
38
 
39
39
  with Progress() as progress:
40
40
  task = progress.add_task("[green]Populating database...", total=len(corpus))
41
- async with HaikuRAG(spec.db_path) as rag:
41
+ async with HaikuRAG(spec.db_path, config=config) as rag:
42
42
  for doc in corpus:
43
43
  doc_mapping = cast(Mapping[str, Any], doc)
44
44
  payload = spec.document_mapper(doc_mapping)
@@ -64,11 +64,9 @@ async def populate_db(spec: DatasetSpec) -> None:
64
64
  progress.advance(task)
65
65
 
66
66
 
67
- def _is_relevant_match(retrieved_uri: str | None, sample: RetrievalSample) -> bool:
68
- return retrieved_uri is not None and retrieved_uri in sample.expected_uris
69
-
70
-
71
- async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
67
+ async def run_retrieval_benchmark(
68
+ spec: DatasetSpec, config: AppConfig
69
+ ) -> dict[str, float] | None:
72
70
  if spec.retrieval_loader is None or spec.retrieval_mapper is None:
73
71
  console.print("Skipping retrieval benchmark; no retrieval config.")
74
72
  return None
@@ -91,7 +89,7 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
91
89
  task = progress.add_task(
92
90
  "[blue]Running retrieval benchmark...", total=len(corpus)
93
91
  )
94
- async with HaikuRAG(spec.db_path) as rag:
92
+ async with HaikuRAG(spec.db_path, config=config) as rag:
95
93
  for doc in corpus:
96
94
  doc_mapping = cast(Mapping[str, Any], doc)
97
95
  sample = spec.retrieval_mapper(doc_mapping)
@@ -161,7 +159,7 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
161
159
 
162
160
 
163
161
  async def run_qa_benchmark(
164
- spec: DatasetSpec, qa_limit: int | None = None
162
+ spec: DatasetSpec, config: AppConfig, qa_limit: int | None = None
165
163
  ) -> ReportCaseFailure[str, str, dict[str, str]] | None:
166
164
  corpus = spec.qa_loader()
167
165
  if qa_limit is not None:
@@ -174,7 +172,7 @@ async def run_qa_benchmark(
174
172
 
175
173
  judge_model = OpenAIChatModel(
176
174
  model_name=QA_JUDGE_MODEL,
177
- provider=OllamaProvider(base_url=f"{Config.providers.ollama.base_url}/v1"),
175
+ provider=OllamaProvider(base_url=f"{config.providers.ollama.base_url}/v1"),
178
176
  )
179
177
 
180
178
  evaluation_dataset = EvalDataset[str, str, dict[str, str]](
@@ -204,7 +202,7 @@ async def run_qa_benchmark(
204
202
  total=len(evaluation_dataset.cases),
205
203
  )
206
204
 
207
- async with HaikuRAG(spec.db_path) as rag:
205
+ async with HaikuRAG(spec.db_path, config=config) as rag:
208
206
  system_prompt = WIX_SUPPORT_PROMPT if spec.key == "wix" else None
209
207
  qa = get_qa_agent(rag, system_prompt=system_prompt)
210
208
 
@@ -272,6 +270,7 @@ async def run_qa_benchmark(
272
270
 
273
271
  async def evaluate_dataset(
274
272
  spec: DatasetSpec,
273
+ config: AppConfig,
275
274
  skip_db: bool,
276
275
  skip_retrieval: bool,
277
276
  skip_qa: bool,
@@ -279,15 +278,15 @@ async def evaluate_dataset(
279
278
  ) -> None:
280
279
  if not skip_db:
281
280
  console.print(f"Using dataset: {spec.key}", style="bold magenta")
282
- await populate_db(spec)
281
+ await populate_db(spec, config)
283
282
 
284
283
  if not skip_retrieval:
285
284
  console.print("Running retrieval benchmarks...", style="bold blue")
286
- await run_retrieval_benchmark(spec)
285
+ await run_retrieval_benchmark(spec, config)
287
286
 
288
287
  if not skip_qa:
289
288
  console.print("\nRunning QA benchmarks...", style="bold yellow")
290
- await run_qa_benchmark(spec, qa_limit=qa_limit)
289
+ await run_qa_benchmark(spec, config, qa_limit=qa_limit)
291
290
 
292
291
 
293
292
  app = typer.Typer(help="Run retrieval and QA benchmarks for configured datasets.")
@@ -296,6 +295,9 @@ app = typer.Typer(help="Run retrieval and QA benchmarks for configured datasets.
296
295
  @app.command()
297
296
  def run(
298
297
  dataset: str = typer.Argument(..., help="Dataset key to evaluate."),
298
+ config: Path | None = typer.Option(
299
+ None, "--config", help="Path to haiku.rag YAML config file."
300
+ ),
299
301
  skip_db: bool = typer.Option(
300
302
  False, "--skip-db", help="Skip updateing the evaluation db."
301
303
  ),
@@ -314,9 +316,28 @@ def run(
314
316
  f"Unknown dataset '{dataset}'. Choose from: {valid_datasets}"
315
317
  )
316
318
 
319
+ # Load config from file or use defaults
320
+ if config:
321
+ if not config.exists():
322
+ raise typer.BadParameter(f"Config file not found: {config}")
323
+ console.print(f"Loading config from: {config}", style="dim")
324
+ yaml_data = load_yaml_config(config)
325
+ app_config = AppConfig.model_validate(yaml_data)
326
+ else:
327
+ # Try to find config file using standard search path
328
+ config_path = find_config_file(None)
329
+ if config_path:
330
+ console.print(f"Loading config from: {config_path}", style="dim")
331
+ yaml_data = load_yaml_config(config_path)
332
+ app_config = AppConfig.model_validate(yaml_data)
333
+ else:
334
+ console.print("No config file found, using defaults", style="dim")
335
+ app_config = AppConfig()
336
+
317
337
  asyncio.run(
318
338
  evaluate_dataset(
319
339
  spec=spec,
340
+ config=app_config,
320
341
  skip_db=skip_db,
321
342
  skip_retrieval=skip_retrieval,
322
343
  skip_qa=skip_qa,
@@ -81,13 +81,10 @@ class HaikuRAGApp:
81
81
  raw = rows[0].get("settings") or "{}"
82
82
  data = json.loads(raw) if isinstance(raw, str) else (raw or {})
83
83
  stored_version = str(data.get("version", stored_version))
84
- embed_provider = data.get("EMBEDDINGS_PROVIDER")
85
- embed_model = data.get("EMBEDDINGS_MODEL")
86
- vector_dim = (
87
- int(data.get("EMBEDDINGS_VECTOR_DIM")) # pyright: ignore[reportArgumentType]
88
- if data.get("EMBEDDINGS_VECTOR_DIM") is not None
89
- else None
90
- )
84
+ embeddings = data.get("embeddings", {})
85
+ embed_provider = embeddings.get("provider")
86
+ embed_model = embeddings.get("model")
87
+ vector_dim = embeddings.get("vector_dim")
91
88
 
92
89
  num_docs = 0
93
90
  if "documents" in table_names:
@@ -195,9 +192,9 @@ class HaikuRAGApp:
195
192
  f"[yellow]Document with id {doc_id} not found.[/yellow]"
196
193
  )
197
194
 
198
- async def search(self, query: str, limit: int = 5):
195
+ async def search(self, query: str, limit: int = 5, filter: str | None = None):
199
196
  async with HaikuRAG(db_path=self.db_path) as self.client:
200
- results = await self.client.search(query, limit=limit)
197
+ results = await self.client.search(query, limit=limit, filter=filter)
201
198
  if not results:
202
199
  self.console.print("[yellow]No results found.[/yellow]")
203
200
  return
@@ -221,6 +221,12 @@ def search(
221
221
  "-l",
222
222
  help="Maximum number of results to return",
223
223
  ),
224
+ filter: str | None = typer.Option(
225
+ None,
226
+ "--filter",
227
+ "-f",
228
+ help="SQL WHERE clause to filter documents (e.g., \"uri LIKE '%arxiv%'\")",
229
+ ),
224
230
  db: Path = typer.Option(
225
231
  Config.storage.data_dir / "haiku.rag.lancedb",
226
232
  "--db",
@@ -230,7 +236,7 @@ def search(
230
236
  from haiku.rag.app import HaikuRAGApp
231
237
 
232
238
  app = HaikuRAGApp(db_path=db)
233
- asyncio.run(app.search(query=query, limit=limit))
239
+ asyncio.run(app.search(query=query, limit=limit, filter=filter))
234
240
 
235
241
 
236
242
  @cli.command("ask", help="Ask a question using the QA agent")
@@ -135,9 +135,6 @@ class HaikuRAG:
135
135
  ValueError: If the file/URL cannot be parsed or doesn't exist
136
136
  httpx.RequestError: If URL request fails
137
137
  """
138
- # Lazy import to avoid loading docling
139
- from haiku.rag.reader import FileReader
140
-
141
138
  # Normalize metadata
142
139
  metadata = metadata or {}
143
140
 
@@ -157,15 +154,17 @@ class HaikuRAG:
157
154
 
158
155
  # Handle directories
159
156
  if source_path.is_dir():
157
+ from haiku.rag.monitor import FileFilter
158
+
160
159
  documents = []
161
- supported_extensions = set(FileReader.extensions)
162
- for file_path in source_path.rglob("*"):
163
- if (
164
- file_path.is_file()
165
- and file_path.suffix.lower() in supported_extensions
166
- ):
160
+ filter = FileFilter(
161
+ ignore_patterns=self._config.monitor.ignore_patterns or None,
162
+ include_patterns=self._config.monitor.include_patterns or None,
163
+ )
164
+ for path in source_path.rglob("*"):
165
+ if path.is_file() and filter.include_file(str(path)):
167
166
  doc = await self._create_document_from_file(
168
- file_path, title=None, metadata=metadata
167
+ path, title=None, metadata=metadata
169
168
  )
170
169
  documents.append(doc)
171
170
  return documents
@@ -424,7 +423,11 @@ class HaikuRAG:
424
423
  return await self.document_repository.list_all(limit=limit, offset=offset)
425
424
 
426
425
  async def search(
427
- self, query: str, limit: int = 5, search_type: str = "hybrid"
426
+ self,
427
+ query: str,
428
+ limit: int = 5,
429
+ search_type: str = "hybrid",
430
+ filter: str | None = None,
428
431
  ) -> list[tuple[Chunk, float]]:
429
432
  """Search for relevant chunks using the specified search method with optional reranking.
430
433
 
@@ -432,6 +435,7 @@ class HaikuRAG:
432
435
  query: The search query string.
433
436
  limit: Maximum number of results to return.
434
437
  search_type: Type of search - "vector", "fts", or "hybrid" (default).
438
+ filter: Optional SQL WHERE clause to filter documents before searching chunks.
435
439
 
436
440
  Returns:
437
441
  List of (chunk, score) tuples ordered by relevance.
@@ -441,12 +445,12 @@ class HaikuRAG:
441
445
 
442
446
  if reranker is None:
443
447
  # No reranking - return direct search results
444
- return await self.chunk_repository.search(query, limit, search_type)
448
+ return await self.chunk_repository.search(query, limit, search_type, filter)
445
449
 
446
450
  # Get more initial results (3X) for reranking
447
451
  search_limit = limit * 3
448
452
  search_results = await self.chunk_repository.search(
449
- query, search_limit, search_type
453
+ query, search_limit, search_type, filter
450
454
  )
451
455
 
452
456
  # Apply reranking
@@ -40,6 +40,14 @@ class FileFilter(DefaultFilter):
40
40
  super().__init__()
41
41
 
42
42
  def __call__(self, change: Change, path: str) -> bool:
43
+ if not self.include_file(path):
44
+ return False
45
+
46
+ # Apply default watchfiles filter
47
+ return super().__call__(change, path)
48
+
49
+ def include_file(self, path: str) -> bool:
50
+ """Check if a file should be included based on filters."""
43
51
  # Check extension filter
44
52
  if not path.endswith(self.extensions):
45
53
  return False
@@ -54,8 +62,7 @@ class FileFilter(DefaultFilter):
54
62
  if self.ignore_spec.match_file(path):
55
63
  return False
56
64
 
57
- # Apply default watchfiles filter
58
- return super().__call__(change, path)
65
+ return True
59
66
 
60
67
 
61
68
  class FileWatcher:
@@ -41,5 +41,23 @@ def get_reranker(config: AppConfig = Config) -> RerankerBase | None:
41
41
  except ImportError:
42
42
  reranker = None
43
43
 
44
+ elif config.reranking.provider == "vllm":
45
+ try:
46
+ from haiku.rag.reranking.vllm import VLLMReranker
47
+
48
+ reranker = VLLMReranker(config.reranking.model)
49
+ except ImportError:
50
+ reranker = None
51
+
52
+ elif config.reranking.provider == "zeroentropy":
53
+ try:
54
+ from haiku.rag.reranking.zeroentropy import ZeroEntropyReranker
55
+
56
+ # Use configured model or default to zerank-1
57
+ model = config.reranking.model or "zerank-1"
58
+ reranker = ZeroEntropyReranker(model)
59
+ except ImportError:
60
+ reranker = None
61
+
44
62
  _reranker_cache[config_id] = reranker
45
63
  return reranker
@@ -0,0 +1,59 @@
1
+ from zeroentropy import ZeroEntropy
2
+
3
+ from haiku.rag.reranking.base import RerankerBase
4
+ from haiku.rag.store.models.chunk import Chunk
5
+
6
+
7
+ class ZeroEntropyReranker(RerankerBase):
8
+ """Zero Entropy reranker implementation using the zerank-1 model."""
9
+
10
+ def __init__(self, model: str = "zerank-1"):
11
+ """Initialize the Zero Entropy reranker.
12
+
13
+ Args:
14
+ model: The Zero Entropy model to use (default: "zerank-1")
15
+ """
16
+ self._model = model
17
+ # Zero Entropy SDK reads ZEROENTROPY_API_KEY from environment by default
18
+ self._client = ZeroEntropy()
19
+
20
+ async def rerank(
21
+ self, query: str, chunks: list[Chunk], top_n: int = 10
22
+ ) -> list[tuple[Chunk, float]]:
23
+ """Rerank the given chunks based on relevance to the query.
24
+
25
+ Args:
26
+ query: The query to rank against
27
+ chunks: The chunks to rerank
28
+ top_n: The number of top results to return
29
+
30
+ Returns:
31
+ A list of (chunk, score) tuples, sorted by relevance
32
+ """
33
+ if not chunks:
34
+ return []
35
+
36
+ # Prepare documents for Zero Entropy API
37
+ documents = [chunk.content for chunk in chunks]
38
+
39
+ # Call Zero Entropy reranking API
40
+ response = self._client.models.rerank(
41
+ model=self._model,
42
+ query=query,
43
+ documents=documents,
44
+ )
45
+
46
+ # Extract results and map back to chunks
47
+ # Zero Entropy returns results sorted by relevance with scores
48
+ reranked_results = []
49
+
50
+ # Get top_n results
51
+ for i, result in enumerate(response.results[:top_n]):
52
+ # Zero Entropy returns index and score for each document
53
+ chunk_index = result.index
54
+ score = result.relevance_score
55
+
56
+ if chunk_index < len(chunks):
57
+ reranked_results.append((chunks[chunk_index], score))
58
+
59
+ return reranked_results
@@ -230,7 +230,11 @@ class ChunkRepository:
230
230
  return True
231
231
 
232
232
  async def search(
233
- self, query: str, limit: int = 5, search_type: str = "hybrid"
233
+ self,
234
+ query: str,
235
+ limit: int = 5,
236
+ search_type: str = "hybrid",
237
+ filter: str | None = None,
234
238
  ) -> list[tuple[Chunk, float]]:
235
239
  """Search for relevant chunks using the specified search method.
236
240
 
@@ -238,6 +242,7 @@ class ChunkRepository:
238
242
  query: The search query string.
239
243
  limit: Maximum number of results to return.
240
244
  search_type: Type of search - "vector", "fts", or "hybrid" (default).
245
+ filter: Optional SQL WHERE clause to filter documents before searching chunks.
241
246
 
242
247
  Returns:
243
248
  List of (chunk, score) tuples ordered by relevance.
@@ -245,19 +250,42 @@ class ChunkRepository:
245
250
  if not query.strip():
246
251
  return []
247
252
 
253
+ chunk_where_clause = None
254
+ if filter:
255
+ # We perform filtering as a two-step process, first filtering documents, then
256
+ # filtering chunks based on those document IDs.
257
+ # This is because LanceDB does not support joins directly in search queries.
258
+ matching_doc_ids = self._get_filtered_document_ids(filter)
259
+
260
+ if not matching_doc_ids:
261
+ return []
262
+
263
+ # Build WHERE clause for chunks table
264
+ # Use IN clause with document IDs
265
+ id_list = "', '".join(matching_doc_ids)
266
+ chunk_where_clause = f"document_id IN ('{id_list}')"
267
+
248
268
  if search_type == "vector":
249
269
  query_embedding = await self.embedder.embed(query)
250
270
 
251
271
  results = self.store.chunks_table.search(
252
272
  query_embedding, query_type="vector", vector_column_name="vector"
253
- ).limit(limit)
273
+ )
274
+
275
+ if chunk_where_clause:
276
+ results = results.where(chunk_where_clause)
277
+
278
+ results = results.limit(limit)
254
279
 
255
280
  return await self._process_search_results(results)
256
281
 
257
282
  elif search_type == "fts":
258
- results = self.store.chunks_table.search(query, query_type="fts").limit(
259
- limit
260
- )
283
+ results = self.store.chunks_table.search(query, query_type="fts")
284
+
285
+ if chunk_where_clause:
286
+ results = results.where(chunk_where_clause)
287
+
288
+ results = results.limit(limit)
261
289
  return await self._process_search_results(results)
262
290
 
263
291
  else: # hybrid (default)
@@ -267,9 +295,13 @@ class ChunkRepository:
267
295
  reranker = RRFReranker()
268
296
 
269
297
  # Perform native hybrid search with RRF reranking
298
+ results = self.store.chunks_table.search(query_type="hybrid")
299
+
300
+ if chunk_where_clause:
301
+ results = results.where(chunk_where_clause)
302
+
270
303
  results = (
271
- self.store.chunks_table.search(query_type="hybrid")
272
- .vector(query_embedding)
304
+ results.vector(query_embedding)
273
305
  .text(query)
274
306
  .rerank(reranker)
275
307
  .limit(limit)
@@ -332,6 +364,15 @@ class ChunkRepository:
332
364
 
333
365
  return adjacent_chunks
334
366
 
367
+ def _get_filtered_document_ids(self, filter: str) -> list[str]:
368
+ """Query documents table with filter and return matching document IDs."""
369
+ filtered_docs = (
370
+ self.store.documents_table.search()
371
+ .where(filter)
372
+ .to_pydantic(DocumentRecord)
373
+ )
374
+ return [doc.id for doc in filtered_docs]
375
+
335
376
  async def _process_search_results(self, query_result) -> list[tuple[Chunk, float]]:
336
377
  """Process search results into chunks with document info and scores."""
337
378
  chunks_with_scores = []
@@ -1150,7 +1150,7 @@ wheels = [
1150
1150
 
1151
1151
  [[package]]
1152
1152
  name = "haiku-rag"
1153
- version = "0.13.2"
1153
+ version = "0.13.3"
1154
1154
  source = { editable = "." }
1155
1155
  dependencies = [
1156
1156
  { name = "docling" },
@@ -1173,12 +1173,18 @@ dependencies = [
1173
1173
  a2a = [
1174
1174
  { name = "fasta2a" },
1175
1175
  ]
1176
+ cohere = [
1177
+ { name = "cohere" },
1178
+ ]
1176
1179
  mxbai = [
1177
1180
  { name = "mxbai-rerank" },
1178
1181
  ]
1179
1182
  voyageai = [
1180
1183
  { name = "voyageai" },
1181
1184
  ]
1185
+ zeroentropy = [
1186
+ { name = "zeroentropy" },
1187
+ ]
1182
1188
 
1183
1189
  [package.dev-dependencies]
1184
1190
  dev = [
@@ -1197,6 +1203,7 @@ dev = [
1197
1203
 
1198
1204
  [package.metadata]
1199
1205
  requires-dist = [
1206
+ { name = "cohere", marker = "extra == 'cohere'", specifier = ">=5.0.0" },
1200
1207
  { name = "docling", specifier = ">=2.58.0" },
1201
1208
  { name = "fasta2a", marker = "extra == 'a2a'", specifier = ">=0.1.0" },
1202
1209
  { name = "fastmcp", specifier = ">=2.13.0.2" },
@@ -1214,8 +1221,9 @@ requires-dist = [
1214
1221
  { name = "typer", specifier = ">=0.19.2,<0.20.0" },
1215
1222
  { name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.3.5" },
1216
1223
  { name = "watchfiles", specifier = ">=1.1.1" },
1224
+ { name = "zeroentropy", marker = "extra == 'zeroentropy'", specifier = ">=0.1.0a6" },
1217
1225
  ]
1218
- provides-extras = ["voyageai", "mxbai", "a2a"]
1226
+ provides-extras = ["voyageai", "mxbai", "a2a", "cohere", "zeroentropy"]
1219
1227
 
1220
1228
  [package.metadata.requires-dev]
1221
1229
  dev = [
@@ -4718,6 +4726,23 @@ wheels = [
4718
4726
  { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" },
4719
4727
  ]
4720
4728
 
4729
+ [[package]]
4730
+ name = "zeroentropy"
4731
+ version = "0.1.0a6"
4732
+ source = { registry = "https://pypi.org/simple" }
4733
+ dependencies = [
4734
+ { name = "anyio" },
4735
+ { name = "distro" },
4736
+ { name = "httpx" },
4737
+ { name = "pydantic" },
4738
+ { name = "sniffio" },
4739
+ { name = "typing-extensions" },
4740
+ ]
4741
+ sdist = { url = "https://files.pythonhosted.org/packages/08/14/05c8caaa25ae64008c2f5b021cefd30276f00845d99bab4763be300da93a/zeroentropy-0.1.0a6.tar.gz", hash = "sha256:04f38e7b40f39cfdd4bb16df0ab0b18d8f33d516c1e9a39e0494a5fb7fba358d", size = 112726, upload-time = "2025-07-08T01:51:42.796Z" }
4742
+ wheels = [
4743
+ { url = "https://files.pythonhosted.org/packages/3b/7e/594e9ec5cda6d8f4dc249c5ca0c6b31b19886eafedcea6c41a2400a2a7b9/zeroentropy-0.1.0a6-py3-none-any.whl", hash = "sha256:0caa6c4a450af80892d42848036eea0b766e3fe0bf6a097a613ab9403fdf5ad6", size = 101501, upload-time = "2025-07-08T01:51:41.527Z" },
4744
+ ]
4745
+
4721
4746
  [[package]]
4722
4747
  name = "zipp"
4723
4748
  version = "3.23.0"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes