haiku.rag 0.5.1__tar.gz → 0.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (81) hide show
  1. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/PKG-INFO +11 -3
  2. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/README.md +7 -0
  3. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/docs/cli.md +6 -1
  4. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/docs/configuration.md +32 -4
  5. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/docs/python.md +28 -1
  6. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/pyproject.toml +3 -3
  7. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/app.py +4 -4
  8. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/cli.py +8 -3
  9. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/client.py +133 -5
  10. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/config.py +3 -3
  11. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/qa/__init__.py +12 -4
  12. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/qa/anthropic.py +16 -14
  13. haiku_rag-0.5.4/src/haiku/rag/qa/base.py +89 -0
  14. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/qa/ollama.py +8 -12
  15. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/qa/openai.py +13 -16
  16. haiku_rag-0.5.4/src/haiku/rag/qa/prompts.py +58 -0
  17. haiku_rag-0.5.4/src/haiku/rag/reranking/__init__.py +40 -0
  18. haiku_rag-0.5.4/src/haiku/rag/reranking/ollama.py +84 -0
  19. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/chunk.py +46 -0
  20. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_app.py +35 -1
  21. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_chunk.py +53 -0
  22. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_cli.py +29 -1
  23. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_client.py +261 -0
  24. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_reranker.py +25 -8
  25. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_search.py +1 -1
  26. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/uv.lock +10 -8
  27. haiku_rag-0.5.1/src/haiku/rag/qa/base.py +0 -41
  28. haiku_rag-0.5.1/src/haiku/rag/qa/prompts.py +0 -21
  29. haiku_rag-0.5.1/src/haiku/rag/reranking/__init__.py +0 -37
  30. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/.github/FUNDING.yml +0 -0
  31. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/.github/workflows/build-docs.yml +0 -0
  32. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/.github/workflows/build-publish.yml +0 -0
  33. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/.gitignore +0 -0
  34. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/.pre-commit-config.yaml +0 -0
  35. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/.python-version +0 -0
  36. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/LICENSE +0 -0
  37. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/docs/benchmarks.md +0 -0
  38. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/docs/index.md +0 -0
  39. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/docs/installation.md +0 -0
  40. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/docs/mcp.md +0 -0
  41. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/docs/server.md +0 -0
  42. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/mkdocs.yml +0 -0
  43. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/__init__.py +0 -0
  44. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/chunker.py +0 -0
  45. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/__init__.py +0 -0
  46. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/base.py +0 -0
  47. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/ollama.py +0 -0
  48. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/openai.py +0 -0
  49. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/voyageai.py +0 -0
  50. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/logging.py +0 -0
  51. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/mcp.py +0 -0
  52. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/monitor.py +0 -0
  53. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/reader.py +0 -0
  54. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/reranking/base.py +0 -0
  55. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/reranking/cohere.py +0 -0
  56. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/reranking/mxbai.py +0 -0
  57. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/__init__.py +0 -0
  58. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/engine.py +0 -0
  59. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/models/__init__.py +0 -0
  60. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/models/chunk.py +0 -0
  61. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/models/document.py +0 -0
  62. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/__init__.py +0 -0
  63. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/base.py +0 -0
  64. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/document.py +0 -0
  65. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/settings.py +0 -0
  66. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  67. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/store/upgrades/v0_3_4.py +0 -0
  68. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/src/haiku/rag/utils.py +0 -0
  69. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/__init__.py +0 -0
  70. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/conftest.py +0 -0
  71. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/generate_benchmark_db.py +0 -0
  72. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/llm_judge.py +0 -0
  73. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_chunker.py +0 -0
  74. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_document.py +0 -0
  75. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_embedder.py +0 -0
  76. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_monitor.py +0 -0
  77. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_qa.py +0 -0
  78. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_reader.py +0 -0
  79. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_rebuild.py +0 -0
  80. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_settings.py +0 -0
  81. {haiku_rag-0.5.1 → haiku_rag-0.5.4}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.5.1
3
+ Version: 0.5.4
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -21,8 +21,7 @@ Requires-Python: >=3.11
21
21
  Requires-Dist: docling>=2.15.0
22
22
  Requires-Dist: fastmcp>=2.8.1
23
23
  Requires-Dist: httpx>=0.28.1
24
- Requires-Dist: mxbai-rerank>=0.1.6
25
- Requires-Dist: ollama>=0.5.1
24
+ Requires-Dist: ollama>=0.5.3
26
25
  Requires-Dist: pydantic>=2.11.7
27
26
  Requires-Dist: python-dotenv>=1.1.0
28
27
  Requires-Dist: rich>=14.0.0
@@ -34,6 +33,8 @@ Provides-Extra: anthropic
34
33
  Requires-Dist: anthropic>=0.56.0; extra == 'anthropic'
35
34
  Provides-Extra: cohere
36
35
  Requires-Dist: cohere>=5.16.1; extra == 'cohere'
36
+ Provides-Extra: mxbai
37
+ Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
37
38
  Provides-Extra: openai
38
39
  Requires-Dist: openai>=1.0.0; extra == 'openai'
39
40
  Provides-Extra: voyageai
@@ -75,6 +76,9 @@ haiku-rag search "query"
75
76
  # Ask questions
76
77
  haiku-rag ask "Who is the author of haiku.rag?"
77
78
 
79
+ # Ask questions with citations
80
+ haiku-rag ask "Who is the author of haiku.rag?" --cite
81
+
78
82
  # Rebuild database (re-chunk and re-embed all documents)
79
83
  haiku-rag rebuild
80
84
 
@@ -100,6 +104,10 @@ async with HaikuRAG("database.db") as client:
100
104
  # Ask questions
101
105
  answer = await client.ask("Who is the author of haiku.rag?")
102
106
  print(answer)
107
+
108
+ # Ask questions with citations
109
+ answer = await client.ask("Who is the author of haiku.rag?", cite=True)
110
+ print(answer)
103
111
  ```
104
112
 
105
113
  ## MCP Server
@@ -33,6 +33,9 @@ haiku-rag search "query"
33
33
  # Ask questions
34
34
  haiku-rag ask "Who is the author of haiku.rag?"
35
35
 
36
+ # Ask questions with citations
37
+ haiku-rag ask "Who is the author of haiku.rag?" --cite
38
+
36
39
  # Rebuild database (re-chunk and re-embed all documents)
37
40
  haiku-rag rebuild
38
41
 
@@ -58,6 +61,10 @@ async with HaikuRAG("database.db") as client:
58
61
  # Ask questions
59
62
  answer = await client.ask("Who is the author of haiku.rag?")
60
63
  print(answer)
64
+
65
+ # Ask questions with citations
66
+ answer = await client.ask("Who is the author of haiku.rag?", cite=True)
67
+ print(answer)
61
68
  ```
62
69
 
63
70
  ## MCP Server
@@ -64,7 +64,12 @@ Ask questions about your documents:
64
64
  haiku-rag ask "Who is the author of haiku.rag?"
65
65
  ```
66
66
 
67
- The QA agent will search your documents for relevant information and provide a comprehensive answer.
67
+ Ask questions with citations showing source documents:
68
+ ```bash
69
+ haiku-rag ask "Who is the author of haiku.rag?" --cite
70
+ ```
71
+
72
+ The QA agent will search your documents for relevant information and provide a comprehensive answer. With `--cite`, responses include citations showing which documents were used.
68
73
 
69
74
  ## Configuration
70
75
 
@@ -105,15 +105,37 @@ ANTHROPIC_API_KEY="your-api-key"
105
105
 
106
106
  ## Reranking
107
107
 
108
- Reranking is **enabled by default** and improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
108
+ Reranking improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
109
109
 
110
- If you use the default reranked (running locally), it can slow down searching significantly. To disable reranking for faster searches:
110
+ Reranking is **automatically enabled** by default using Ollama, or if you install the appropriate reranking provider package.
111
+
112
+ ### Disabling Reranking
113
+
114
+ To disable reranking completely for faster searches:
115
+
116
+ ```bash
117
+ RERANK_PROVIDER=""
118
+ ```
119
+
120
+ ### Ollama (Default)
121
+
122
+ Ollama reranking uses LLMs with structured output to rank documents by relevance:
111
123
 
112
124
  ```bash
113
- RERANK=false
125
+ RERANK_PROVIDER="ollama"
126
+ RERANK_MODEL="qwen3:1.7b" # or any model that supports structured output
127
+ OLLAMA_BASE_URL="http://localhost:11434"
128
+ ```
129
+
130
+ ### MixedBread AI
131
+
132
+ For MxBAI reranking, install with mxbai extras:
133
+
134
+ ```bash
135
+ uv pip install haiku.rag[mxbai]
114
136
  ```
115
137
 
116
- ### MixedBread AI (Default)
138
+ Then configure:
117
139
 
118
140
  ```bash
119
141
  RERANK_PROVIDER="mxbai"
@@ -150,4 +172,10 @@ DEFAULT_DATA_DIR="/path/to/data"
150
172
  ```bash
151
173
  # Chunk size for document processing
152
174
  CHUNK_SIZE=256
175
+
176
+ # Number of adjacent chunks to include before/after retrieved chunks for context
177
+ # 0 = no expansion (default), 1 = include 1 chunk before and after, etc.
178
+ # When expanded chunks overlap or are adjacent, they are automatically merged
179
+ # into single chunks with continuous content to eliminate duplication
180
+ CONTEXT_CHUNK_RADIUS=0
153
181
  ```
@@ -130,6 +130,26 @@ for chunk, relevance_score in results:
130
130
  print(f"Document metadata: {chunk.document_meta}")
131
131
  ```
132
132
 
133
+ ### Expanding Search Context
134
+
135
+ Expand search results with adjacent chunks for more complete context:
136
+
137
+ ```python
138
+ # Get initial search results
139
+ search_results = await client.search("machine learning", limit=3)
140
+
141
+ # Expand with adjacent chunks based on CONTEXT_CHUNK_RADIUS setting
142
+ expanded_results = await client.expand_context(search_results)
143
+
144
+ # The expanded results contain chunks with combined content from adjacent chunks
145
+ for chunk, score in expanded_results:
146
+ print(f"Expanded content: {chunk.content}") # Now includes before/after chunks
147
+ ```
148
+
149
+ **Smart Merging**: When expanded chunks overlap or are adjacent within the same document, they are automatically merged into single chunks with continuous content. This eliminates duplication and provides coherent text blocks. The merged chunk uses the highest relevance score from the original chunks.
150
+
151
+ This is automatically used by the QA system when `CONTEXT_CHUNK_RADIUS > 0` to provide better answers with more complete context.
152
+
133
153
  ## Question Answering
134
154
 
135
155
  Ask questions about your documents:
@@ -139,6 +159,13 @@ answer = await client.ask("Who is the author of haiku.rag?")
139
159
  print(answer)
140
160
  ```
141
161
 
142
- The QA agent will search your documents for relevant information and use the configured LLM to generate a comprehensive answer.
162
+ Ask questions with citations showing source documents:
163
+
164
+ ```python
165
+ answer = await client.ask("Who is the author of haiku.rag?", cite=True)
166
+ print(answer)
167
+ ```
168
+
169
+ The QA agent will search your documents for relevant information and use the configured LLM to generate a comprehensive answer. With `cite=True`, responses include citations showing which documents were used as sources.
143
170
 
144
171
  The QA provider and model can be configured via environment variables (see [Configuration](configuration.md)).
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.5.1"
3
+ version = "0.5.4"
4
4
  description = "Retrieval Augmented Generation (RAG) with SQLite"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
@@ -25,8 +25,7 @@ dependencies = [
25
25
  "docling>=2.15.0",
26
26
  "fastmcp>=2.8.1",
27
27
  "httpx>=0.28.1",
28
- "mxbai-rerank>=0.1.6",
29
- "ollama>=0.5.1",
28
+ "ollama>=0.5.3",
30
29
  "pydantic>=2.11.7",
31
30
  "python-dotenv>=1.1.0",
32
31
  "rich>=14.0.0",
@@ -41,6 +40,7 @@ voyageai = ["voyageai>=0.3.2"]
41
40
  openai = ["openai>=1.0.0"]
42
41
  anthropic = ["anthropic>=0.56.0"]
43
42
  cohere = ["cohere>=5.16.1"]
43
+ mxbai = ["mxbai-rerank>=0.1.6"]
44
44
 
45
45
  [project.scripts]
46
46
  haiku-rag = "haiku.rag.cli:cli"
@@ -32,9 +32,9 @@ class HaikuRAGApp:
32
32
  f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
33
33
  )
34
34
 
35
- async def add_document_from_source(self, file_path: Path):
35
+ async def add_document_from_source(self, source: str):
36
36
  async with HaikuRAG(db_path=self.db_path) as self.client:
37
- doc = await self.client.create_document_from_source(file_path)
37
+ doc = await self.client.create_document_from_source(source)
38
38
  self._rich_print_document(doc, truncate=True)
39
39
  self.console.print(
40
40
  f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
@@ -62,10 +62,10 @@ class HaikuRAGApp:
62
62
  for chunk, score in results:
63
63
  self._rich_print_search_result(chunk, score)
64
64
 
65
- async def ask(self, question: str):
65
+ async def ask(self, question: str, cite: bool = False):
66
66
  async with HaikuRAG(db_path=self.db_path) as self.client:
67
67
  try:
68
- answer = await self.client.ask(question)
68
+ answer = await self.client.ask(question, cite=cite)
69
69
  self.console.print(f"[bold blue]Question:[/bold blue] {question}")
70
70
  self.console.print()
71
71
  self.console.print("[bold green]Answer:[/bold green]")
@@ -81,7 +81,7 @@ def add_document_text(
81
81
 
82
82
  @cli.command("add-src", help="Add a document from a file path or URL")
83
83
  def add_document_src(
84
- file_path: Path = typer.Argument(
84
+ source: str = typer.Argument(
85
85
  help="The file path or URL of the document to add",
86
86
  ),
87
87
  db: Path = typer.Option(
@@ -91,7 +91,7 @@ def add_document_src(
91
91
  ),
92
92
  ):
93
93
  app = HaikuRAGApp(db_path=db)
94
- asyncio.run(app.add_document_from_source(file_path=file_path))
94
+ asyncio.run(app.add_document_from_source(source=source))
95
95
 
96
96
 
97
97
  @cli.command("get", help="Get and display a document by its ID")
@@ -160,9 +160,14 @@ def ask(
160
160
  "--db",
161
161
  help="Path to the SQLite database file",
162
162
  ),
163
+ cite: bool = typer.Option(
164
+ False,
165
+ "--cite",
166
+ help="Include citations in the response",
167
+ ),
163
168
  ):
164
169
  app = HaikuRAGApp(db_path=db)
165
- asyncio.run(app.ask(question=question))
170
+ asyncio.run(app.ask(question=question, cite=cite))
166
171
 
167
172
 
168
173
  @cli.command("settings", help="Display current configuration settings")
@@ -319,7 +319,7 @@ class HaikuRAG:
319
319
  return await self.document_repository.list_all(limit=limit, offset=offset)
320
320
 
321
321
  async def search(
322
- self, query: str, limit: int = 5, k: int = 60, rerank=Config.RERANK
322
+ self, query: str, limit: int = 5, k: int = 60
323
323
  ) -> list[tuple[Chunk, float]]:
324
324
  """Search for relevant chunks using hybrid search (vector similarity + full-text search) with reranking.
325
325
 
@@ -331,8 +331,10 @@ class HaikuRAG:
331
331
  Returns:
332
332
  List of (chunk, score) tuples ordered by relevance.
333
333
  """
334
+ # Get reranker if available
335
+ reranker = get_reranker()
334
336
 
335
- if not rerank:
337
+ if reranker is None:
336
338
  return await self.chunk_repository.search_chunks_hybrid(query, limit, k)
337
339
 
338
340
  # Get more initial results (3X) for reranking
@@ -340,25 +342,151 @@ class HaikuRAG:
340
342
  query, limit * 3, k
341
343
  )
342
344
  # Apply reranking
343
- reranker = get_reranker()
344
345
  chunks = [chunk for chunk, _ in search_results]
345
346
  reranked_results = await reranker.rerank(query, chunks, top_n=limit)
346
347
 
347
348
  # Return reranked results with scores from reranker
348
349
  return reranked_results
349
350
 
350
- async def ask(self, question: str) -> str:
351
+ async def expand_context(
352
+ self, search_results: list[tuple[Chunk, float]]
353
+ ) -> list[tuple[Chunk, float]]:
354
+ """Expand search results with adjacent chunks, merging overlapping chunks.
355
+
356
+ Args:
357
+ search_results: List of (chunk, score) tuples from search.
358
+
359
+ Returns:
360
+ List of (chunk, score) tuples with expanded and merged context chunks.
361
+ """
362
+ if Config.CONTEXT_CHUNK_RADIUS == 0:
363
+ return search_results
364
+
365
+ # Group chunks by document_id to handle merging within documents
366
+ document_groups = {}
367
+ for chunk, score in search_results:
368
+ doc_id = chunk.document_id
369
+ if doc_id not in document_groups:
370
+ document_groups[doc_id] = []
371
+ document_groups[doc_id].append((chunk, score))
372
+
373
+ results = []
374
+
375
+ for doc_id, doc_chunks in document_groups.items():
376
+ # Get all expanded ranges for this document
377
+ expanded_ranges = []
378
+ for chunk, score in doc_chunks:
379
+ adjacent_chunks = await self.chunk_repository.get_adjacent_chunks(
380
+ chunk, Config.CONTEXT_CHUNK_RADIUS
381
+ )
382
+
383
+ all_chunks = adjacent_chunks + [chunk]
384
+
385
+ # Get the range of orders for this expanded chunk
386
+ orders = [c.metadata.get("order", 0) for c in all_chunks]
387
+ min_order = min(orders)
388
+ max_order = max(orders)
389
+
390
+ expanded_ranges.append(
391
+ {
392
+ "original_chunk": chunk,
393
+ "score": score,
394
+ "min_order": min_order,
395
+ "max_order": max_order,
396
+ "all_chunks": sorted(
397
+ all_chunks, key=lambda c: c.metadata.get("order", 0)
398
+ ),
399
+ }
400
+ )
401
+
402
+ # Merge overlapping/adjacent ranges
403
+ merged_ranges = self._merge_overlapping_ranges(expanded_ranges)
404
+
405
+ # Create merged chunks
406
+ for merged_range in merged_ranges:
407
+ combined_content_parts = [c.content for c in merged_range["all_chunks"]]
408
+
409
+ # Use the first original chunk for metadata
410
+ original_chunk = merged_range["original_chunks"][0]
411
+
412
+ merged_chunk = Chunk(
413
+ id=original_chunk.id,
414
+ document_id=original_chunk.document_id,
415
+ content="".join(combined_content_parts),
416
+ metadata=original_chunk.metadata,
417
+ document_uri=original_chunk.document_uri,
418
+ document_meta=original_chunk.document_meta,
419
+ )
420
+
421
+ # Use the highest score from merged chunks
422
+ best_score = max(merged_range["scores"])
423
+ results.append((merged_chunk, best_score))
424
+
425
+ return results
426
+
427
+ def _merge_overlapping_ranges(self, expanded_ranges):
428
+ """Merge overlapping or adjacent expanded ranges."""
429
+ if not expanded_ranges:
430
+ return []
431
+
432
+ # Sort by min_order
433
+ sorted_ranges = sorted(expanded_ranges, key=lambda x: x["min_order"])
434
+ merged = []
435
+
436
+ current = {
437
+ "min_order": sorted_ranges[0]["min_order"],
438
+ "max_order": sorted_ranges[0]["max_order"],
439
+ "original_chunks": [sorted_ranges[0]["original_chunk"]],
440
+ "scores": [sorted_ranges[0]["score"]],
441
+ "all_chunks": sorted_ranges[0]["all_chunks"],
442
+ }
443
+
444
+ for range_info in sorted_ranges[1:]:
445
+ # Check if ranges overlap or are adjacent (max_order + 1 >= min_order)
446
+ if current["max_order"] >= range_info["min_order"] - 1:
447
+ # Merge ranges
448
+ current["max_order"] = max(
449
+ current["max_order"], range_info["max_order"]
450
+ )
451
+ current["original_chunks"].append(range_info["original_chunk"])
452
+ current["scores"].append(range_info["score"])
453
+
454
+ # Merge all_chunks and deduplicate by order
455
+ all_chunks_dict = {}
456
+ for chunk in current["all_chunks"] + range_info["all_chunks"]:
457
+ order = chunk.metadata.get("order", 0)
458
+ all_chunks_dict[order] = chunk
459
+ current["all_chunks"] = [
460
+ all_chunks_dict[order] for order in sorted(all_chunks_dict.keys())
461
+ ]
462
+ else:
463
+ # No overlap, add current to merged and start new
464
+ merged.append(current)
465
+ current = {
466
+ "min_order": range_info["min_order"],
467
+ "max_order": range_info["max_order"],
468
+ "original_chunks": [range_info["original_chunk"]],
469
+ "scores": [range_info["score"]],
470
+ "all_chunks": range_info["all_chunks"],
471
+ }
472
+
473
+ # Add the last range
474
+ merged.append(current)
475
+ return merged
476
+
477
+ async def ask(self, question: str, cite: bool = False) -> str:
351
478
  """Ask a question using the configured QA agent.
352
479
 
353
480
  Args:
354
481
  question: The question to ask.
482
+ cite: Whether to include citations in the response.
355
483
 
356
484
  Returns:
357
485
  The generated answer as a string.
358
486
  """
359
487
  from haiku.rag.qa import get_qa_agent
360
488
 
361
- qa_agent = get_qa_agent(self)
489
+ qa_agent = get_qa_agent(self, use_citations=cite)
362
490
  return await qa_agent.answer(question)
363
491
 
364
492
  async def rebuild_database(self) -> AsyncGenerator[int, None]:
@@ -19,14 +19,14 @@ class AppConfig(BaseModel):
19
19
  EMBEDDINGS_MODEL: str = "mxbai-embed-large"
20
20
  EMBEDDINGS_VECTOR_DIM: int = 1024
21
21
 
22
- RERANK: bool = True
23
- RERANK_PROVIDER: str = "mxbai"
24
- RERANK_MODEL: str = "mixedbread-ai/mxbai-rerank-base-v2"
22
+ RERANK_PROVIDER: str = "ollama"
23
+ RERANK_MODEL: str = "qwen3"
25
24
 
26
25
  QA_PROVIDER: str = "ollama"
27
26
  QA_MODEL: str = "qwen3"
28
27
 
29
28
  CHUNK_SIZE: int = 256
29
+ CONTEXT_CHUNK_RADIUS: int = 0
30
30
 
31
31
  OLLAMA_BASE_URL: str = "http://localhost:11434"
32
32
 
@@ -4,12 +4,16 @@ from haiku.rag.qa.base import QuestionAnswerAgentBase
4
4
  from haiku.rag.qa.ollama import QuestionAnswerOllamaAgent
5
5
 
6
6
 
7
- def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
7
+ def get_qa_agent(
8
+ client: HaikuRAG, model: str = "", use_citations: bool = False
9
+ ) -> QuestionAnswerAgentBase:
8
10
  """
9
11
  Factory function to get the appropriate QA agent based on the configuration.
10
12
  """
11
13
  if Config.QA_PROVIDER == "ollama":
12
- return QuestionAnswerOllamaAgent(client, model or Config.QA_MODEL)
14
+ return QuestionAnswerOllamaAgent(
15
+ client, model or Config.QA_MODEL, use_citations
16
+ )
13
17
 
14
18
  if Config.QA_PROVIDER == "openai":
15
19
  try:
@@ -20,7 +24,9 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
20
24
  "Please install haiku.rag with the 'openai' extra:"
21
25
  "uv pip install haiku.rag[openai]"
22
26
  )
23
- return QuestionAnswerOpenAIAgent(client, model or Config.QA_MODEL)
27
+ return QuestionAnswerOpenAIAgent(
28
+ client, model or Config.QA_MODEL, use_citations
29
+ )
24
30
 
25
31
  if Config.QA_PROVIDER == "anthropic":
26
32
  try:
@@ -31,6 +37,8 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
31
37
  "Please install haiku.rag with the 'anthropic' extra:"
32
38
  "uv pip install haiku.rag[anthropic]"
33
39
  )
34
- return QuestionAnswerAnthropicAgent(client, model or Config.QA_MODEL)
40
+ return QuestionAnswerAnthropicAgent(
41
+ client, model or Config.QA_MODEL, use_citations
42
+ )
35
43
 
36
44
  raise ValueError(f"Unsupported QA provider: {Config.QA_PROVIDER}")
@@ -1,19 +1,29 @@
1
1
  from collections.abc import Sequence
2
2
 
3
3
  try:
4
- from anthropic import AsyncAnthropic
5
- from anthropic.types import MessageParam, TextBlock, ToolParam, ToolUseBlock
4
+ from anthropic import AsyncAnthropic # type: ignore
5
+ from anthropic.types import ( # type: ignore
6
+ MessageParam,
7
+ TextBlock,
8
+ ToolParam,
9
+ ToolUseBlock,
10
+ )
6
11
 
7
12
  from haiku.rag.client import HaikuRAG
8
13
  from haiku.rag.qa.base import QuestionAnswerAgentBase
9
14
 
10
15
  class QuestionAnswerAnthropicAgent(QuestionAnswerAgentBase):
11
- def __init__(self, client: HaikuRAG, model: str = "claude-3-5-haiku-20241022"):
12
- super().__init__(client, model or self._model)
16
+ def __init__(
17
+ self,
18
+ client: HaikuRAG,
19
+ model: str = "claude-3-5-haiku-20241022",
20
+ use_citations: bool = False,
21
+ ):
22
+ super().__init__(client, model or self._model, use_citations)
13
23
  self.tools: Sequence[ToolParam] = [
14
24
  ToolParam(
15
25
  name="search_documents",
16
- description="Search the knowledge base for relevant documents",
26
+ description="Search the knowledge base for relevant documents. Returns a JSON array with content, score, and document_uri for each result.",
17
27
  input_schema={
18
28
  "type": "object",
19
29
  "properties": {
@@ -69,18 +79,10 @@ try:
69
79
  else 3
70
80
  )
71
81
 
72
- search_results = await self._client.search(
82
+ context = await self._search_and_expand(
73
83
  query, limit=limit
74
84
  )
75
85
 
76
- context_chunks = []
77
- for chunk, score in search_results:
78
- context_chunks.append(
79
- f"Content: {chunk.content}\nScore: {score:.4f}"
80
- )
81
-
82
- context = "\n\n".join(context_chunks)
83
-
84
86
  tool_results.append(
85
87
  {
86
88
  "type": "tool_result",
@@ -0,0 +1,89 @@
1
+ import json
2
+
3
+ from haiku.rag.client import HaikuRAG
4
+ from haiku.rag.qa.prompts import SYSTEM_PROMPT, SYSTEM_PROMPT_WITH_CITATIONS
5
+
6
+
7
+ class QuestionAnswerAgentBase:
8
+ _model: str = ""
9
+ _system_prompt: str = SYSTEM_PROMPT
10
+
11
+ def __init__(self, client: HaikuRAG, model: str = "", use_citations: bool = False):
12
+ self._model = model
13
+ self._client = client
14
+ self._system_prompt = (
15
+ SYSTEM_PROMPT_WITH_CITATIONS if use_citations else SYSTEM_PROMPT
16
+ )
17
+
18
+ async def answer(self, question: str) -> str:
19
+ raise NotImplementedError(
20
+ "QABase is an abstract class. Please implement the answer method in a subclass."
21
+ )
22
+
23
+ async def _search_and_expand(self, query: str, limit: int = 3) -> str:
24
+ """Search for documents and expand context, then format as JSON"""
25
+ search_results = await self._client.search(query, limit=limit)
26
+ expanded_results = await self._client.expand_context(search_results)
27
+ return self._format_search_results(expanded_results)
28
+
29
+ def _format_search_results(self, search_results) -> str:
30
+ """Format search results as JSON list of {content, score, document_uri}"""
31
+ formatted_results = []
32
+ for chunk, score in search_results:
33
+ formatted_results.append(
34
+ {
35
+ "content": chunk.content,
36
+ "score": score,
37
+ "document_uri": chunk.document_uri,
38
+ }
39
+ )
40
+ return json.dumps(formatted_results, indent=2)
41
+
42
+ tools = [
43
+ {
44
+ "type": "function",
45
+ "function": {
46
+ "name": "search_documents",
47
+ "description": "Search the knowledge base for relevant documents. Returns a JSON array of search results.",
48
+ "parameters": {
49
+ "type": "object",
50
+ "properties": {
51
+ "query": {
52
+ "type": "string",
53
+ "description": "The search query to find relevant documents",
54
+ },
55
+ "limit": {
56
+ "type": "integer",
57
+ "description": "Maximum number of results to return",
58
+ "default": 3,
59
+ },
60
+ },
61
+ "required": ["query"],
62
+ },
63
+ "returns": {
64
+ "type": "string",
65
+ "description": "JSON array of search results",
66
+ "schema": {
67
+ "type": "array",
68
+ "items": {
69
+ "type": "object",
70
+ "properties": {
71
+ "content": {
72
+ "type": "string",
73
+ "description": "The document text content",
74
+ },
75
+ "score": {
76
+ "type": "number",
77
+ "description": "Relevance score (higher is more relevant)",
78
+ },
79
+ "document_uri": {
80
+ "type": "string",
81
+ "description": "Source URI/path of the document",
82
+ },
83
+ },
84
+ },
85
+ },
86
+ },
87
+ },
88
+ }
89
+ ]