haiku.rag 0.5.2__tar.gz → 0.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/PKG-INFO +8 -1
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/README.md +7 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/docs/cli.md +6 -1
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/docs/configuration.md +6 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/docs/python.md +28 -1
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/pyproject.toml +1 -1
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/app.py +2 -2
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/cli.py +6 -1
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/client.py +129 -2
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/config.py +1 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/qa/__init__.py +12 -4
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/qa/anthropic.py +16 -14
- haiku_rag-0.5.4/src/haiku/rag/qa/base.py +89 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/qa/ollama.py +8 -12
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/qa/openai.py +13 -16
- haiku_rag-0.5.4/src/haiku/rag/qa/prompts.py +58 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/chunk.py +46 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_app.py +35 -1
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_chunk.py +53 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_cli.py +29 -1
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_client.py +261 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_search.py +1 -1
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/uv.lock +1 -1
- haiku_rag-0.5.2/src/haiku/rag/qa/base.py +0 -41
- haiku_rag-0.5.2/src/haiku/rag/qa/prompts.py +0 -21
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/.github/workflows/build-docs.yml +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/.gitignore +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/.python-version +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/LICENSE +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/docs/benchmarks.md +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/docs/index.md +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/docs/installation.md +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/docs/mcp.md +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/docs/server.md +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/mkdocs.yml +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/reranking/__init__.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/reranking/ollama.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/engine.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/base.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/document.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/store/upgrades/v0_3_4.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/src/haiku/rag/utils.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/__init__.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/conftest.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/generate_benchmark_db.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/llm_judge.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_chunker.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_document.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_embedder.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_monitor.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_qa.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_reader.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_rebuild.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_reranker.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_settings.py +0 -0
- {haiku_rag-0.5.2 → haiku_rag-0.5.4}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.4
|
|
4
4
|
Summary: Retrieval Augmented Generation (RAG) with SQLite
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -76,6 +76,9 @@ haiku-rag search "query"
|
|
|
76
76
|
# Ask questions
|
|
77
77
|
haiku-rag ask "Who is the author of haiku.rag?"
|
|
78
78
|
|
|
79
|
+
# Ask questions with citations
|
|
80
|
+
haiku-rag ask "Who is the author of haiku.rag?" --cite
|
|
81
|
+
|
|
79
82
|
# Rebuild database (re-chunk and re-embed all documents)
|
|
80
83
|
haiku-rag rebuild
|
|
81
84
|
|
|
@@ -101,6 +104,10 @@ async with HaikuRAG("database.db") as client:
|
|
|
101
104
|
# Ask questions
|
|
102
105
|
answer = await client.ask("Who is the author of haiku.rag?")
|
|
103
106
|
print(answer)
|
|
107
|
+
|
|
108
|
+
# Ask questions with citations
|
|
109
|
+
answer = await client.ask("Who is the author of haiku.rag?", cite=True)
|
|
110
|
+
print(answer)
|
|
104
111
|
```
|
|
105
112
|
|
|
106
113
|
## MCP Server
|
|
@@ -33,6 +33,9 @@ haiku-rag search "query"
|
|
|
33
33
|
# Ask questions
|
|
34
34
|
haiku-rag ask "Who is the author of haiku.rag?"
|
|
35
35
|
|
|
36
|
+
# Ask questions with citations
|
|
37
|
+
haiku-rag ask "Who is the author of haiku.rag?" --cite
|
|
38
|
+
|
|
36
39
|
# Rebuild database (re-chunk and re-embed all documents)
|
|
37
40
|
haiku-rag rebuild
|
|
38
41
|
|
|
@@ -58,6 +61,10 @@ async with HaikuRAG("database.db") as client:
|
|
|
58
61
|
# Ask questions
|
|
59
62
|
answer = await client.ask("Who is the author of haiku.rag?")
|
|
60
63
|
print(answer)
|
|
64
|
+
|
|
65
|
+
# Ask questions with citations
|
|
66
|
+
answer = await client.ask("Who is the author of haiku.rag?", cite=True)
|
|
67
|
+
print(answer)
|
|
61
68
|
```
|
|
62
69
|
|
|
63
70
|
## MCP Server
|
|
@@ -64,7 +64,12 @@ Ask questions about your documents:
|
|
|
64
64
|
haiku-rag ask "Who is the author of haiku.rag?"
|
|
65
65
|
```
|
|
66
66
|
|
|
67
|
-
|
|
67
|
+
Ask questions with citations showing source documents:
|
|
68
|
+
```bash
|
|
69
|
+
haiku-rag ask "Who is the author of haiku.rag?" --cite
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
The QA agent will search your documents for relevant information and provide a comprehensive answer. With `--cite`, responses include citations showing which documents were used.
|
|
68
73
|
|
|
69
74
|
## Configuration
|
|
70
75
|
|
|
@@ -172,4 +172,10 @@ DEFAULT_DATA_DIR="/path/to/data"
|
|
|
172
172
|
```bash
|
|
173
173
|
# Chunk size for document processing
|
|
174
174
|
CHUNK_SIZE=256
|
|
175
|
+
|
|
176
|
+
# Number of adjacent chunks to include before/after retrieved chunks for context
|
|
177
|
+
# 0 = no expansion (default), 1 = include 1 chunk before and after, etc.
|
|
178
|
+
# When expanded chunks overlap or are adjacent, they are automatically merged
|
|
179
|
+
# into single chunks with continuous content to eliminate duplication
|
|
180
|
+
CONTEXT_CHUNK_RADIUS=0
|
|
175
181
|
```
|
|
@@ -130,6 +130,26 @@ for chunk, relevance_score in results:
|
|
|
130
130
|
print(f"Document metadata: {chunk.document_meta}")
|
|
131
131
|
```
|
|
132
132
|
|
|
133
|
+
### Expanding Search Context
|
|
134
|
+
|
|
135
|
+
Expand search results with adjacent chunks for more complete context:
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
# Get initial search results
|
|
139
|
+
search_results = await client.search("machine learning", limit=3)
|
|
140
|
+
|
|
141
|
+
# Expand with adjacent chunks based on CONTEXT_CHUNK_RADIUS setting
|
|
142
|
+
expanded_results = await client.expand_context(search_results)
|
|
143
|
+
|
|
144
|
+
# The expanded results contain chunks with combined content from adjacent chunks
|
|
145
|
+
for chunk, score in expanded_results:
|
|
146
|
+
print(f"Expanded content: {chunk.content}") # Now includes before/after chunks
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Smart Merging**: When expanded chunks overlap or are adjacent within the same document, they are automatically merged into single chunks with continuous content. This eliminates duplication and provides coherent text blocks. The merged chunk uses the highest relevance score from the original chunks.
|
|
150
|
+
|
|
151
|
+
This is automatically used by the QA system when `CONTEXT_CHUNK_RADIUS > 0` to provide better answers with more complete context.
|
|
152
|
+
|
|
133
153
|
## Question Answering
|
|
134
154
|
|
|
135
155
|
Ask questions about your documents:
|
|
@@ -139,6 +159,13 @@ answer = await client.ask("Who is the author of haiku.rag?")
|
|
|
139
159
|
print(answer)
|
|
140
160
|
```
|
|
141
161
|
|
|
142
|
-
|
|
162
|
+
Ask questions with citations showing source documents:
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
answer = await client.ask("Who is the author of haiku.rag?", cite=True)
|
|
166
|
+
print(answer)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
The QA agent will search your documents for relevant information and use the configured LLM to generate a comprehensive answer. With `cite=True`, responses include citations showing which documents were used as sources.
|
|
143
170
|
|
|
144
171
|
The QA provider and model can be configured via environment variables (see [Configuration](configuration.md)).
|
|
@@ -62,10 +62,10 @@ class HaikuRAGApp:
|
|
|
62
62
|
for chunk, score in results:
|
|
63
63
|
self._rich_print_search_result(chunk, score)
|
|
64
64
|
|
|
65
|
-
async def ask(self, question: str):
|
|
65
|
+
async def ask(self, question: str, cite: bool = False):
|
|
66
66
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
67
67
|
try:
|
|
68
|
-
answer = await self.client.ask(question)
|
|
68
|
+
answer = await self.client.ask(question, cite=cite)
|
|
69
69
|
self.console.print(f"[bold blue]Question:[/bold blue] {question}")
|
|
70
70
|
self.console.print()
|
|
71
71
|
self.console.print("[bold green]Answer:[/bold green]")
|
|
@@ -160,9 +160,14 @@ def ask(
|
|
|
160
160
|
"--db",
|
|
161
161
|
help="Path to the SQLite database file",
|
|
162
162
|
),
|
|
163
|
+
cite: bool = typer.Option(
|
|
164
|
+
False,
|
|
165
|
+
"--cite",
|
|
166
|
+
help="Include citations in the response",
|
|
167
|
+
),
|
|
163
168
|
):
|
|
164
169
|
app = HaikuRAGApp(db_path=db)
|
|
165
|
-
asyncio.run(app.ask(question=question))
|
|
170
|
+
asyncio.run(app.ask(question=question, cite=cite))
|
|
166
171
|
|
|
167
172
|
|
|
168
173
|
@cli.command("settings", help="Display current configuration settings")
|
|
@@ -348,18 +348,145 @@ class HaikuRAG:
|
|
|
348
348
|
# Return reranked results with scores from reranker
|
|
349
349
|
return reranked_results
|
|
350
350
|
|
|
351
|
-
async def
|
|
351
|
+
async def expand_context(
|
|
352
|
+
self, search_results: list[tuple[Chunk, float]]
|
|
353
|
+
) -> list[tuple[Chunk, float]]:
|
|
354
|
+
"""Expand search results with adjacent chunks, merging overlapping chunks.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
search_results: List of (chunk, score) tuples from search.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
List of (chunk, score) tuples with expanded and merged context chunks.
|
|
361
|
+
"""
|
|
362
|
+
if Config.CONTEXT_CHUNK_RADIUS == 0:
|
|
363
|
+
return search_results
|
|
364
|
+
|
|
365
|
+
# Group chunks by document_id to handle merging within documents
|
|
366
|
+
document_groups = {}
|
|
367
|
+
for chunk, score in search_results:
|
|
368
|
+
doc_id = chunk.document_id
|
|
369
|
+
if doc_id not in document_groups:
|
|
370
|
+
document_groups[doc_id] = []
|
|
371
|
+
document_groups[doc_id].append((chunk, score))
|
|
372
|
+
|
|
373
|
+
results = []
|
|
374
|
+
|
|
375
|
+
for doc_id, doc_chunks in document_groups.items():
|
|
376
|
+
# Get all expanded ranges for this document
|
|
377
|
+
expanded_ranges = []
|
|
378
|
+
for chunk, score in doc_chunks:
|
|
379
|
+
adjacent_chunks = await self.chunk_repository.get_adjacent_chunks(
|
|
380
|
+
chunk, Config.CONTEXT_CHUNK_RADIUS
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
all_chunks = adjacent_chunks + [chunk]
|
|
384
|
+
|
|
385
|
+
# Get the range of orders for this expanded chunk
|
|
386
|
+
orders = [c.metadata.get("order", 0) for c in all_chunks]
|
|
387
|
+
min_order = min(orders)
|
|
388
|
+
max_order = max(orders)
|
|
389
|
+
|
|
390
|
+
expanded_ranges.append(
|
|
391
|
+
{
|
|
392
|
+
"original_chunk": chunk,
|
|
393
|
+
"score": score,
|
|
394
|
+
"min_order": min_order,
|
|
395
|
+
"max_order": max_order,
|
|
396
|
+
"all_chunks": sorted(
|
|
397
|
+
all_chunks, key=lambda c: c.metadata.get("order", 0)
|
|
398
|
+
),
|
|
399
|
+
}
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Merge overlapping/adjacent ranges
|
|
403
|
+
merged_ranges = self._merge_overlapping_ranges(expanded_ranges)
|
|
404
|
+
|
|
405
|
+
# Create merged chunks
|
|
406
|
+
for merged_range in merged_ranges:
|
|
407
|
+
combined_content_parts = [c.content for c in merged_range["all_chunks"]]
|
|
408
|
+
|
|
409
|
+
# Use the first original chunk for metadata
|
|
410
|
+
original_chunk = merged_range["original_chunks"][0]
|
|
411
|
+
|
|
412
|
+
merged_chunk = Chunk(
|
|
413
|
+
id=original_chunk.id,
|
|
414
|
+
document_id=original_chunk.document_id,
|
|
415
|
+
content="".join(combined_content_parts),
|
|
416
|
+
metadata=original_chunk.metadata,
|
|
417
|
+
document_uri=original_chunk.document_uri,
|
|
418
|
+
document_meta=original_chunk.document_meta,
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# Use the highest score from merged chunks
|
|
422
|
+
best_score = max(merged_range["scores"])
|
|
423
|
+
results.append((merged_chunk, best_score))
|
|
424
|
+
|
|
425
|
+
return results
|
|
426
|
+
|
|
427
|
+
def _merge_overlapping_ranges(self, expanded_ranges):
|
|
428
|
+
"""Merge overlapping or adjacent expanded ranges."""
|
|
429
|
+
if not expanded_ranges:
|
|
430
|
+
return []
|
|
431
|
+
|
|
432
|
+
# Sort by min_order
|
|
433
|
+
sorted_ranges = sorted(expanded_ranges, key=lambda x: x["min_order"])
|
|
434
|
+
merged = []
|
|
435
|
+
|
|
436
|
+
current = {
|
|
437
|
+
"min_order": sorted_ranges[0]["min_order"],
|
|
438
|
+
"max_order": sorted_ranges[0]["max_order"],
|
|
439
|
+
"original_chunks": [sorted_ranges[0]["original_chunk"]],
|
|
440
|
+
"scores": [sorted_ranges[0]["score"]],
|
|
441
|
+
"all_chunks": sorted_ranges[0]["all_chunks"],
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
for range_info in sorted_ranges[1:]:
|
|
445
|
+
# Check if ranges overlap or are adjacent (max_order + 1 >= min_order)
|
|
446
|
+
if current["max_order"] >= range_info["min_order"] - 1:
|
|
447
|
+
# Merge ranges
|
|
448
|
+
current["max_order"] = max(
|
|
449
|
+
current["max_order"], range_info["max_order"]
|
|
450
|
+
)
|
|
451
|
+
current["original_chunks"].append(range_info["original_chunk"])
|
|
452
|
+
current["scores"].append(range_info["score"])
|
|
453
|
+
|
|
454
|
+
# Merge all_chunks and deduplicate by order
|
|
455
|
+
all_chunks_dict = {}
|
|
456
|
+
for chunk in current["all_chunks"] + range_info["all_chunks"]:
|
|
457
|
+
order = chunk.metadata.get("order", 0)
|
|
458
|
+
all_chunks_dict[order] = chunk
|
|
459
|
+
current["all_chunks"] = [
|
|
460
|
+
all_chunks_dict[order] for order in sorted(all_chunks_dict.keys())
|
|
461
|
+
]
|
|
462
|
+
else:
|
|
463
|
+
# No overlap, add current to merged and start new
|
|
464
|
+
merged.append(current)
|
|
465
|
+
current = {
|
|
466
|
+
"min_order": range_info["min_order"],
|
|
467
|
+
"max_order": range_info["max_order"],
|
|
468
|
+
"original_chunks": [range_info["original_chunk"]],
|
|
469
|
+
"scores": [range_info["score"]],
|
|
470
|
+
"all_chunks": range_info["all_chunks"],
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
# Add the last range
|
|
474
|
+
merged.append(current)
|
|
475
|
+
return merged
|
|
476
|
+
|
|
477
|
+
async def ask(self, question: str, cite: bool = False) -> str:
|
|
352
478
|
"""Ask a question using the configured QA agent.
|
|
353
479
|
|
|
354
480
|
Args:
|
|
355
481
|
question: The question to ask.
|
|
482
|
+
cite: Whether to include citations in the response.
|
|
356
483
|
|
|
357
484
|
Returns:
|
|
358
485
|
The generated answer as a string.
|
|
359
486
|
"""
|
|
360
487
|
from haiku.rag.qa import get_qa_agent
|
|
361
488
|
|
|
362
|
-
qa_agent = get_qa_agent(self)
|
|
489
|
+
qa_agent = get_qa_agent(self, use_citations=cite)
|
|
363
490
|
return await qa_agent.answer(question)
|
|
364
491
|
|
|
365
492
|
async def rebuild_database(self) -> AsyncGenerator[int, None]:
|
|
@@ -4,12 +4,16 @@ from haiku.rag.qa.base import QuestionAnswerAgentBase
|
|
|
4
4
|
from haiku.rag.qa.ollama import QuestionAnswerOllamaAgent
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
def get_qa_agent(
|
|
7
|
+
def get_qa_agent(
|
|
8
|
+
client: HaikuRAG, model: str = "", use_citations: bool = False
|
|
9
|
+
) -> QuestionAnswerAgentBase:
|
|
8
10
|
"""
|
|
9
11
|
Factory function to get the appropriate QA agent based on the configuration.
|
|
10
12
|
"""
|
|
11
13
|
if Config.QA_PROVIDER == "ollama":
|
|
12
|
-
return QuestionAnswerOllamaAgent(
|
|
14
|
+
return QuestionAnswerOllamaAgent(
|
|
15
|
+
client, model or Config.QA_MODEL, use_citations
|
|
16
|
+
)
|
|
13
17
|
|
|
14
18
|
if Config.QA_PROVIDER == "openai":
|
|
15
19
|
try:
|
|
@@ -20,7 +24,9 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
|
|
|
20
24
|
"Please install haiku.rag with the 'openai' extra:"
|
|
21
25
|
"uv pip install haiku.rag[openai]"
|
|
22
26
|
)
|
|
23
|
-
return QuestionAnswerOpenAIAgent(
|
|
27
|
+
return QuestionAnswerOpenAIAgent(
|
|
28
|
+
client, model or Config.QA_MODEL, use_citations
|
|
29
|
+
)
|
|
24
30
|
|
|
25
31
|
if Config.QA_PROVIDER == "anthropic":
|
|
26
32
|
try:
|
|
@@ -31,6 +37,8 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
|
|
|
31
37
|
"Please install haiku.rag with the 'anthropic' extra:"
|
|
32
38
|
"uv pip install haiku.rag[anthropic]"
|
|
33
39
|
)
|
|
34
|
-
return QuestionAnswerAnthropicAgent(
|
|
40
|
+
return QuestionAnswerAnthropicAgent(
|
|
41
|
+
client, model or Config.QA_MODEL, use_citations
|
|
42
|
+
)
|
|
35
43
|
|
|
36
44
|
raise ValueError(f"Unsupported QA provider: {Config.QA_PROVIDER}")
|
|
@@ -1,19 +1,29 @@
|
|
|
1
1
|
from collections.abc import Sequence
|
|
2
2
|
|
|
3
3
|
try:
|
|
4
|
-
from anthropic import AsyncAnthropic
|
|
5
|
-
from anthropic.types import
|
|
4
|
+
from anthropic import AsyncAnthropic # type: ignore
|
|
5
|
+
from anthropic.types import ( # type: ignore
|
|
6
|
+
MessageParam,
|
|
7
|
+
TextBlock,
|
|
8
|
+
ToolParam,
|
|
9
|
+
ToolUseBlock,
|
|
10
|
+
)
|
|
6
11
|
|
|
7
12
|
from haiku.rag.client import HaikuRAG
|
|
8
13
|
from haiku.rag.qa.base import QuestionAnswerAgentBase
|
|
9
14
|
|
|
10
15
|
class QuestionAnswerAnthropicAgent(QuestionAnswerAgentBase):
|
|
11
|
-
def __init__(
|
|
12
|
-
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
client: HaikuRAG,
|
|
19
|
+
model: str = "claude-3-5-haiku-20241022",
|
|
20
|
+
use_citations: bool = False,
|
|
21
|
+
):
|
|
22
|
+
super().__init__(client, model or self._model, use_citations)
|
|
13
23
|
self.tools: Sequence[ToolParam] = [
|
|
14
24
|
ToolParam(
|
|
15
25
|
name="search_documents",
|
|
16
|
-
description="Search the knowledge base for relevant documents",
|
|
26
|
+
description="Search the knowledge base for relevant documents. Returns a JSON array with content, score, and document_uri for each result.",
|
|
17
27
|
input_schema={
|
|
18
28
|
"type": "object",
|
|
19
29
|
"properties": {
|
|
@@ -69,18 +79,10 @@ try:
|
|
|
69
79
|
else 3
|
|
70
80
|
)
|
|
71
81
|
|
|
72
|
-
|
|
82
|
+
context = await self._search_and_expand(
|
|
73
83
|
query, limit=limit
|
|
74
84
|
)
|
|
75
85
|
|
|
76
|
-
context_chunks = []
|
|
77
|
-
for chunk, score in search_results:
|
|
78
|
-
context_chunks.append(
|
|
79
|
-
f"Content: {chunk.content}\nScore: {score:.4f}"
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
context = "\n\n".join(context_chunks)
|
|
83
|
-
|
|
84
86
|
tool_results.append(
|
|
85
87
|
{
|
|
86
88
|
"type": "tool_result",
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from haiku.rag.client import HaikuRAG
|
|
4
|
+
from haiku.rag.qa.prompts import SYSTEM_PROMPT, SYSTEM_PROMPT_WITH_CITATIONS
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class QuestionAnswerAgentBase:
|
|
8
|
+
_model: str = ""
|
|
9
|
+
_system_prompt: str = SYSTEM_PROMPT
|
|
10
|
+
|
|
11
|
+
def __init__(self, client: HaikuRAG, model: str = "", use_citations: bool = False):
|
|
12
|
+
self._model = model
|
|
13
|
+
self._client = client
|
|
14
|
+
self._system_prompt = (
|
|
15
|
+
SYSTEM_PROMPT_WITH_CITATIONS if use_citations else SYSTEM_PROMPT
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
async def answer(self, question: str) -> str:
|
|
19
|
+
raise NotImplementedError(
|
|
20
|
+
"QABase is an abstract class. Please implement the answer method in a subclass."
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
async def _search_and_expand(self, query: str, limit: int = 3) -> str:
|
|
24
|
+
"""Search for documents and expand context, then format as JSON"""
|
|
25
|
+
search_results = await self._client.search(query, limit=limit)
|
|
26
|
+
expanded_results = await self._client.expand_context(search_results)
|
|
27
|
+
return self._format_search_results(expanded_results)
|
|
28
|
+
|
|
29
|
+
def _format_search_results(self, search_results) -> str:
|
|
30
|
+
"""Format search results as JSON list of {content, score, document_uri}"""
|
|
31
|
+
formatted_results = []
|
|
32
|
+
for chunk, score in search_results:
|
|
33
|
+
formatted_results.append(
|
|
34
|
+
{
|
|
35
|
+
"content": chunk.content,
|
|
36
|
+
"score": score,
|
|
37
|
+
"document_uri": chunk.document_uri,
|
|
38
|
+
}
|
|
39
|
+
)
|
|
40
|
+
return json.dumps(formatted_results, indent=2)
|
|
41
|
+
|
|
42
|
+
tools = [
|
|
43
|
+
{
|
|
44
|
+
"type": "function",
|
|
45
|
+
"function": {
|
|
46
|
+
"name": "search_documents",
|
|
47
|
+
"description": "Search the knowledge base for relevant documents. Returns a JSON array of search results.",
|
|
48
|
+
"parameters": {
|
|
49
|
+
"type": "object",
|
|
50
|
+
"properties": {
|
|
51
|
+
"query": {
|
|
52
|
+
"type": "string",
|
|
53
|
+
"description": "The search query to find relevant documents",
|
|
54
|
+
},
|
|
55
|
+
"limit": {
|
|
56
|
+
"type": "integer",
|
|
57
|
+
"description": "Maximum number of results to return",
|
|
58
|
+
"default": 3,
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
"required": ["query"],
|
|
62
|
+
},
|
|
63
|
+
"returns": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"description": "JSON array of search results",
|
|
66
|
+
"schema": {
|
|
67
|
+
"type": "array",
|
|
68
|
+
"items": {
|
|
69
|
+
"type": "object",
|
|
70
|
+
"properties": {
|
|
71
|
+
"content": {
|
|
72
|
+
"type": "string",
|
|
73
|
+
"description": "The document text content",
|
|
74
|
+
},
|
|
75
|
+
"score": {
|
|
76
|
+
"type": "number",
|
|
77
|
+
"description": "Relevance score (higher is more relevant)",
|
|
78
|
+
},
|
|
79
|
+
"document_uri": {
|
|
80
|
+
"type": "string",
|
|
81
|
+
"description": "Source URI/path of the document",
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
}
|
|
89
|
+
]
|
|
@@ -8,8 +8,13 @@ OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 16384}
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
|
|
11
|
-
def __init__(
|
|
12
|
-
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
client: HaikuRAG,
|
|
14
|
+
model: str = Config.QA_MODEL,
|
|
15
|
+
use_citations: bool = False,
|
|
16
|
+
):
|
|
17
|
+
super().__init__(client, model or self._model, use_citations)
|
|
13
18
|
|
|
14
19
|
async def answer(self, question: str) -> str:
|
|
15
20
|
ollama_client = AsyncClient(host=Config.OLLAMA_BASE_URL)
|
|
@@ -39,16 +44,7 @@ class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
|
|
|
39
44
|
query = args.get("query", question)
|
|
40
45
|
limit = int(args.get("limit", 3))
|
|
41
46
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
context_chunks = []
|
|
45
|
-
for chunk, score in search_results:
|
|
46
|
-
context_chunks.append(
|
|
47
|
-
f"Content: {chunk.content}\nScore: {score:.4f}"
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
context = "\n\n".join(context_chunks)
|
|
51
|
-
|
|
47
|
+
context = await self._search_and_expand(query, limit=limit)
|
|
52
48
|
messages.append(
|
|
53
49
|
{
|
|
54
50
|
"role": "tool",
|
|
@@ -1,22 +1,29 @@
|
|
|
1
1
|
from collections.abc import Sequence
|
|
2
2
|
|
|
3
3
|
try:
|
|
4
|
-
from openai import AsyncOpenAI
|
|
5
|
-
from openai.types.chat import (
|
|
4
|
+
from openai import AsyncOpenAI # type: ignore
|
|
5
|
+
from openai.types.chat import ( # type: ignore
|
|
6
6
|
ChatCompletionAssistantMessageParam,
|
|
7
7
|
ChatCompletionMessageParam,
|
|
8
8
|
ChatCompletionSystemMessageParam,
|
|
9
9
|
ChatCompletionToolMessageParam,
|
|
10
10
|
ChatCompletionUserMessageParam,
|
|
11
11
|
)
|
|
12
|
-
from openai.types.chat.chat_completion_tool_param import
|
|
12
|
+
from openai.types.chat.chat_completion_tool_param import ( # type: ignore
|
|
13
|
+
ChatCompletionToolParam,
|
|
14
|
+
)
|
|
13
15
|
|
|
14
16
|
from haiku.rag.client import HaikuRAG
|
|
15
17
|
from haiku.rag.qa.base import QuestionAnswerAgentBase
|
|
16
18
|
|
|
17
19
|
class QuestionAnswerOpenAIAgent(QuestionAnswerAgentBase):
|
|
18
|
-
def __init__(
|
|
19
|
-
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
client: HaikuRAG,
|
|
23
|
+
model: str = "gpt-4o-mini",
|
|
24
|
+
use_citations: bool = False,
|
|
25
|
+
):
|
|
26
|
+
super().__init__(client, model or self._model, use_citations)
|
|
20
27
|
self.tools: Sequence[ChatCompletionToolParam] = [
|
|
21
28
|
ChatCompletionToolParam(tool) for tool in self.tools
|
|
22
29
|
]
|
|
@@ -70,17 +77,7 @@ try:
|
|
|
70
77
|
query = args.get("query", question)
|
|
71
78
|
limit = int(args.get("limit", 3))
|
|
72
79
|
|
|
73
|
-
|
|
74
|
-
query, limit=limit
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
context_chunks = []
|
|
78
|
-
for chunk, score in search_results:
|
|
79
|
-
context_chunks.append(
|
|
80
|
-
f"Content: {chunk.content}\nScore: {score:.4f}"
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
context = "\n\n".join(context_chunks)
|
|
80
|
+
context = await self._search_and_expand(query, limit=limit)
|
|
84
81
|
|
|
85
82
|
messages.append(
|
|
86
83
|
ChatCompletionToolMessageParam(
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
SYSTEM_PROMPT = """
|
|
2
|
+
You are a knowledgeable assistant that helps users find information from a document knowledge base.
|
|
3
|
+
|
|
4
|
+
Your process:
|
|
5
|
+
1. When a user asks a question, use the search_documents tool to find relevant information
|
|
6
|
+
2. Search with specific keywords and phrases from the user's question
|
|
7
|
+
3. Review the search results and their relevance scores
|
|
8
|
+
4. If you need additional context, perform follow-up searches with different keywords
|
|
9
|
+
5. Provide a short and to the point comprehensive answer based only on the retrieved documents
|
|
10
|
+
|
|
11
|
+
Guidelines:
|
|
12
|
+
- Base your answers strictly on the provided document content
|
|
13
|
+
- Quote or reference specific information when possible
|
|
14
|
+
- If multiple documents contain relevant information, synthesize them coherently
|
|
15
|
+
- Indicate when information is incomplete or when you need to search for additional context
|
|
16
|
+
- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
|
|
17
|
+
- For complex questions, consider breaking them down and performing multiple searches
|
|
18
|
+
- Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
|
|
19
|
+
|
|
20
|
+
Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
SYSTEM_PROMPT_WITH_CITATIONS = """
|
|
24
|
+
You are a knowledgeable assistant that helps users find information from a document knowledge base.
|
|
25
|
+
|
|
26
|
+
IMPORTANT: You MUST use the search_documents tool for every question. Do not answer any question without first searching the knowledge base.
|
|
27
|
+
|
|
28
|
+
Your process:
|
|
29
|
+
1. IMMEDIATELY call the search_documents tool with relevant keywords from the user's question
|
|
30
|
+
2. Review the search results and their relevance scores
|
|
31
|
+
3. If you need additional context, perform follow-up searches with different keywords
|
|
32
|
+
4. Provide a short and to the point comprehensive answer based only on the retrieved documents
|
|
33
|
+
5. Always include citations for the sources used in your answer
|
|
34
|
+
|
|
35
|
+
Guidelines:
|
|
36
|
+
- Base your answers strictly on the provided document content
|
|
37
|
+
- If multiple documents contain relevant information, synthesize them coherently
|
|
38
|
+
- Indicate when information is incomplete or when you need to search for additional context
|
|
39
|
+
- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
|
|
40
|
+
- For complex questions, consider breaking them down and performing multiple searches
|
|
41
|
+
- Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
|
|
42
|
+
- ALWAYS include citations at the end of your response using the format below
|
|
43
|
+
|
|
44
|
+
Citation Format:
|
|
45
|
+
After your answer, include a "Citations:" section that lists:
|
|
46
|
+
- The document URI from each search result used
|
|
47
|
+
- A brief excerpt (first 50-100 characters) of the content that supported your answer
|
|
48
|
+
- Format: "Citations:\n- [document_uri]: [content_excerpt]..."
|
|
49
|
+
|
|
50
|
+
Example response format:
|
|
51
|
+
[Your answer here]
|
|
52
|
+
|
|
53
|
+
Citations:
|
|
54
|
+
- /path/to/document1.pdf: "This document explains that AFMAN stands for Air Force Manual..."
|
|
55
|
+
- /path/to/document2.pdf: "The manual provides guidance on military procedures and..."
|
|
56
|
+
|
|
57
|
+
Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
|
|
58
|
+
"""
|