haiku.rag 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (66) hide show
  1. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/PKG-INFO +7 -1
  2. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/README.md +4 -0
  3. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/docs/cli.md +17 -0
  4. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/docs/configuration.md +16 -0
  5. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/docs/index.md +1 -1
  6. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/docs/installation.md +6 -0
  7. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/docs/python.md +7 -0
  8. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/pyproject.toml +2 -1
  9. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/app.py +51 -0
  10. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/cli.py +21 -0
  11. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/client.py +24 -0
  12. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/config.py +3 -0
  13. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/qa/__init__.py +13 -0
  14. haiku_rag-0.3.1/src/haiku/rag/qa/anthropic.py +112 -0
  15. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/store/repositories/chunk.py +16 -0
  16. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_qa.py +34 -2
  17. haiku_rag-0.3.1/tests/test_rebuild.py +52 -0
  18. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/uv.lock +24 -2
  19. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/.github/FUNDING.yml +0 -0
  20. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/.github/workflows/build-docs.yml +0 -0
  21. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/.github/workflows/build-publish.yml +0 -0
  22. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/.gitignore +0 -0
  23. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/.pre-commit-config.yaml +0 -0
  24. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/.python-version +0 -0
  25. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/BENCHMARKS.md +0 -0
  26. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/LICENSE +0 -0
  27. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/docs/mcp.md +0 -0
  28. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/docs/server.md +0 -0
  29. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/mkdocs.yml +0 -0
  30. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/__init__.py +0 -0
  31. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/chunker.py +0 -0
  32. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/embeddings/__init__.py +0 -0
  33. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/embeddings/base.py +0 -0
  34. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/embeddings/ollama.py +0 -0
  35. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/embeddings/openai.py +0 -0
  36. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/embeddings/voyageai.py +0 -0
  37. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/logging.py +0 -0
  38. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/mcp.py +0 -0
  39. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/monitor.py +0 -0
  40. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/qa/base.py +0 -0
  41. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/qa/ollama.py +0 -0
  42. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/qa/openai.py +0 -0
  43. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/qa/prompts.py +0 -0
  44. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/reader.py +0 -0
  45. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/store/__init__.py +0 -0
  46. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/store/engine.py +0 -0
  47. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/store/models/__init__.py +0 -0
  48. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/store/models/chunk.py +0 -0
  49. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/store/models/document.py +0 -0
  50. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/store/repositories/__init__.py +0 -0
  51. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/store/repositories/base.py +0 -0
  52. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/store/repositories/document.py +0 -0
  53. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/src/haiku/rag/utils.py +0 -0
  54. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/__init__.py +0 -0
  55. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/conftest.py +0 -0
  56. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/generate_benchmark_db.py +0 -0
  57. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/llm_judge.py +0 -0
  58. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_app.py +0 -0
  59. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_chunk.py +0 -0
  60. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_chunker.py +0 -0
  61. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_cli.py +0 -0
  62. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_client.py +0 -0
  63. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_document.py +0 -0
  64. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_embedder.py +0 -0
  65. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_monitor.py +0 -0
  66. {haiku_rag-0.3.0 → haiku_rag-0.3.1}/tests/test_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -29,6 +29,8 @@ Requires-Dist: sqlite-vec>=0.1.6
29
29
  Requires-Dist: tiktoken>=0.9.0
30
30
  Requires-Dist: typer>=0.16.0
31
31
  Requires-Dist: watchfiles>=1.1.0
32
+ Provides-Extra: anthropic
33
+ Requires-Dist: anthropic>=0.56.0; extra == 'anthropic'
32
34
  Provides-Extra: openai
33
35
  Requires-Dist: openai>=1.0.0; extra == 'openai'
34
36
  Provides-Extra: voyageai
@@ -45,6 +47,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
45
47
 
46
48
  - **Local SQLite**: No external servers required
47
49
  - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
50
+ - **Multiple QA providers**: Ollama, OpenAI, Anthropic
48
51
  - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
49
52
  - **Question answering**: Built-in QA agents on your documents
50
53
  - **File monitoring**: Auto-index files when run as server
@@ -68,6 +71,9 @@ haiku-rag search "query"
68
71
  # Ask questions
69
72
  haiku-rag ask "Who is the author of haiku.rag?"
70
73
 
74
+ # Rebuild database (re-chunk and re-embed all documents)
75
+ haiku-rag rebuild
76
+
71
77
  # Start server with file monitoring
72
78
  export MONITOR_DIRECTORIES="/path/to/docs"
73
79
  haiku-rag serve
@@ -8,6 +8,7 @@ Retrieval-Augmented Generation (RAG) library on SQLite.
8
8
 
9
9
  - **Local SQLite**: No external servers required
10
10
  - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
11
+ - **Multiple QA providers**: Ollama, OpenAI, Anthropic
11
12
  - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
12
13
  - **Question answering**: Built-in QA agents on your documents
13
14
  - **File monitoring**: Auto-index files when run as server
@@ -31,6 +32,9 @@ haiku-rag search "query"
31
32
  # Ask questions
32
33
  haiku-rag ask "Who is the author of haiku.rag?"
33
34
 
35
+ # Rebuild database (re-chunk and re-embed all documents)
36
+ haiku-rag rebuild
37
+
34
38
  # Start server with file monitoring
35
39
  export MONITOR_DIRECTORIES="/path/to/docs"
36
40
  haiku-rag serve
@@ -35,6 +35,16 @@ haiku-rag get 1
35
35
  haiku-rag delete 1
36
36
  ```
37
37
 
38
+ ### Rebuild Database
39
+
40
+ Rebuild the database by deleting all chunks & embeddings and re-indexing all documents:
41
+
42
+ ```bash
43
+ haiku-rag rebuild
44
+ ```
45
+
46
+ Use this when you want to change things like the embedding model or chunk size for example.
47
+
38
48
  ## Search
39
49
 
40
50
  Basic search:
@@ -56,6 +66,13 @@ haiku-rag ask "Who is the author of haiku.rag?"
56
66
 
57
67
  The QA agent will search your documents for relevant information and provide a comprehensive answer.
58
68
 
69
+ ## Configuration
70
+
71
+ View current configuration settings:
72
+ ```bash
73
+ haiku-rag settings
74
+ ```
75
+
59
76
  ## Server
60
77
 
61
78
  Start the MCP server:
@@ -84,6 +84,22 @@ QA_MODEL="gpt-4o-mini" # or gpt-4, gpt-3.5-turbo, etc.
84
84
  OPENAI_API_KEY="your-api-key"
85
85
  ```
86
86
 
87
+ ### Anthropic
88
+
89
+ For Anthropic QA, you need to install haiku.rag with Anthropic extras:
90
+
91
+ ```bash
92
+ uv pip install haiku.rag --extra anthropic
93
+ ```
94
+
95
+ Then configure:
96
+
97
+ ```bash
98
+ QA_PROVIDER="anthropic"
99
+ QA_MODEL="claude-3-5-haiku-20241022" # or claude-3-5-sonnet-20241022, etc.
100
+ ANTHROPIC_API_KEY="your-api-key"
101
+ ```
102
+
87
103
  ## Other Settings
88
104
 
89
105
  ### Database and Storage
@@ -8,7 +8,7 @@
8
8
  - **Local SQLite**: No need to run additional servers
9
9
  - **Support for various embedding providers**: Ollama, VoyageAI, OpenAI or add your own
10
10
  - **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
11
- - **Question Answering**: Built-in QA agents using Ollama or OpenAI.
11
+ - **Question Answering**: Built-in QA agents using Ollama, OpenAI, or Anthropic.
12
12
  - **File monitoring**: Automatically index files when run as a server
13
13
  - **Extended file format support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a URL!
14
14
  - **MCP server**: Exposes functionality as MCP tools
@@ -24,6 +24,12 @@ uv pip install haiku.rag --extra voyageai
24
24
  uv pip install haiku.rag --extra openai
25
25
  ```
26
26
 
27
+ ### Anthropic
28
+
29
+ ```bash
30
+ uv pip install haiku.rag --extra anthropic
31
+ ```
32
+
27
33
  ## Requirements
28
34
 
29
35
  - Python 3.10+
@@ -67,6 +67,13 @@ await client.update_document(doc)
67
67
  await client.delete_document(doc.id)
68
68
  ```
69
69
 
70
+ ### Rebuilding the Database
71
+
72
+ ```python
73
+ async for doc_id in client.rebuild_database():
74
+ print(f"Processed document {doc_id}")
75
+ ```
76
+
70
77
  ## Searching Documents
71
78
 
72
79
  Basic search:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.3.0"
3
+ version = "0.3.1"
4
4
  description = "Retrieval Augmented Generation (RAG) with SQLite"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
@@ -38,6 +38,7 @@ dependencies = [
38
38
  [project.optional-dependencies]
39
39
  voyageai = ["voyageai>=0.3.2"]
40
40
  openai = ["openai>=1.0.0"]
41
+ anthropic = ["anthropic>=0.56.0"]
41
42
 
42
43
  [project.scripts]
43
44
  haiku-rag = "haiku.rag.cli:cli"
@@ -3,6 +3,7 @@ from pathlib import Path
3
3
 
4
4
  from rich.console import Console
5
5
  from rich.markdown import Markdown
6
+ from rich.progress import Progress
6
7
 
7
8
  from haiku.rag.client import HaikuRAG
8
9
  from haiku.rag.config import Config
@@ -72,6 +73,50 @@ class HaikuRAGApp:
72
73
  except Exception as e:
73
74
  self.console.print(f"[red]Error: {e}[/red]")
74
75
 
76
+ async def rebuild(self):
77
+ async with HaikuRAG(db_path=self.db_path) as client:
78
+ try:
79
+ documents = await client.list_documents()
80
+ total_docs = len(documents)
81
+
82
+ if total_docs == 0:
83
+ self.console.print(
84
+ "[yellow]No documents found in database.[/yellow]"
85
+ )
86
+ return
87
+
88
+ self.console.print(
89
+ f"[b]Rebuilding database with {total_docs} documents...[/b]"
90
+ )
91
+ with Progress() as progress:
92
+ task = progress.add_task("Rebuilding...", total=total_docs)
93
+ async for _ in client.rebuild_database():
94
+ progress.update(task, advance=1)
95
+
96
+ self.console.print("[b]Database rebuild completed successfully.[/b]")
97
+ except Exception as e:
98
+ self.console.print(f"[red]Error rebuilding database: {e}[/red]")
99
+
100
+ def show_settings(self):
101
+ """Display current configuration settings."""
102
+ self.console.print("[bold]haiku.rag configuration[/bold]")
103
+ self.console.print()
104
+
105
+ # Get all config fields dynamically
106
+ for field_name, field_value in Config.model_dump().items():
107
+ # Format the display value
108
+ if isinstance(field_value, str) and (
109
+ "key" in field_name.lower()
110
+ or "password" in field_name.lower()
111
+ or "token" in field_name.lower()
112
+ ):
113
+ # Hide sensitive values but show if they're set
114
+ display_value = "✓ Set" if field_value else "✗ Not set"
115
+ else:
116
+ display_value = field_value
117
+
118
+ self.console.print(f" [cyan]{field_name}[/cyan]: {display_value}")
119
+
75
120
  def _rich_print_document(self, doc: Document, truncate: bool = False):
76
121
  """Format a document for display."""
77
122
  if truncate:
@@ -99,6 +144,12 @@ class HaikuRAGApp:
99
144
  f"[repr.attrib_name]document_id[/repr.attrib_name]: {chunk.document_id} "
100
145
  f"[repr.attrib_name]score[/repr.attrib_name]: {score:.4f}"
101
146
  )
147
+ if chunk.document_uri:
148
+ self.console.print("[repr.attrib_name]document uri[/repr.attrib_name]:")
149
+ self.console.print(chunk.document_uri)
150
+ if chunk.document_meta:
151
+ self.console.print("[repr.attrib_name]document meta[/repr.attrib_name]:")
152
+ self.console.print(chunk.document_meta)
102
153
  self.console.print("[repr.attrib_name]content[/repr.attrib_name]:")
103
154
  self.console.print(content)
104
155
  self.console.rule()
@@ -128,6 +128,27 @@ def ask(
128
128
  event_loop.run_until_complete(app.ask(question=question))
129
129
 
130
130
 
131
+ @cli.command("settings", help="Display current configuration settings")
132
+ def settings():
133
+ app = HaikuRAGApp(db_path=Path()) # Don't need actual DB for settings
134
+ app.show_settings()
135
+
136
+
137
+ @cli.command(
138
+ "rebuild",
139
+ help="Rebuild the database by deleting all chunks and re-indexing all documents",
140
+ )
141
+ def rebuild(
142
+ db: Path = typer.Option(
143
+ get_default_data_dir() / "haiku.rag.sqlite",
144
+ "--db",
145
+ help="Path to the SQLite database file",
146
+ ),
147
+ ):
148
+ app = HaikuRAGApp(db_path=db)
149
+ event_loop.run_until_complete(app.rebuild())
150
+
151
+
131
152
  @cli.command(
132
153
  "serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
133
154
  )
@@ -1,6 +1,7 @@
1
1
  import hashlib
2
2
  import mimetypes
3
3
  import tempfile
4
+ from collections.abc import AsyncGenerator
4
5
  from pathlib import Path
5
6
  from typing import Literal
6
7
  from urllib.parse import urlparse
@@ -270,6 +271,29 @@ class HaikuRAG:
270
271
  qa_agent = get_qa_agent(self)
271
272
  return await qa_agent.answer(question)
272
273
 
274
+ async def rebuild_database(self) -> AsyncGenerator[int, None]:
275
+ """Rebuild the database by deleting all chunks and re-indexing all documents.
276
+
277
+ Yields:
278
+ int: The ID of the document currently being processed
279
+ """
280
+ documents = await self.list_documents()
281
+
282
+ if not documents:
283
+ return
284
+
285
+ await self.chunk_repository.delete_all()
286
+
287
+ for doc in documents:
288
+ if doc.id is not None:
289
+ await self.chunk_repository.create_chunks_for_document(
290
+ doc.id, doc.content, commit=False
291
+ )
292
+ yield doc.id
293
+
294
+ if self.store._connection:
295
+ self.store._connection.commit()
296
+
273
297
  def close(self):
274
298
  """Close the underlying store connection."""
275
299
  self.store.close()
@@ -30,6 +30,7 @@ class AppConfig(BaseModel):
30
30
  # Provider keys
31
31
  VOYAGE_API_KEY: str = ""
32
32
  OPENAI_API_KEY: str = ""
33
+ ANTHROPIC_API_KEY: str = ""
33
34
 
34
35
  @field_validator("MONITOR_DIRECTORIES", mode="before")
35
36
  @classmethod
@@ -49,3 +50,5 @@ if Config.OPENAI_API_KEY:
49
50
  os.environ["OPENAI_API_KEY"] = Config.OPENAI_API_KEY
50
51
  if Config.VOYAGE_API_KEY:
51
52
  os.environ["VOYAGE_API_KEY"] = Config.VOYAGE_API_KEY
53
+ if Config.ANTHROPIC_API_KEY:
54
+ os.environ["ANTHROPIC_API_KEY"] = Config.ANTHROPIC_API_KEY
@@ -23,4 +23,17 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
23
23
  )
24
24
  return QuestionAnswerOpenAIAgent(client, model or "gpt-4o-mini")
25
25
 
26
+ if Config.QA_PROVIDER == "anthropic":
27
+ try:
28
+ from haiku.rag.qa.anthropic import QuestionAnswerAnthropicAgent
29
+ except ImportError:
30
+ raise ImportError(
31
+ "Anthropic QA agent requires the 'anthropic' package. "
32
+ "Please install haiku.rag with the 'anthropic' extra:"
33
+ "uv pip install haiku.rag --extra anthropic"
34
+ )
35
+ return QuestionAnswerAnthropicAgent(
36
+ client, model or "claude-3-5-haiku-20241022"
37
+ )
38
+
26
39
  raise ValueError(f"Unsupported QA provider: {Config.QA_PROVIDER}")
@@ -0,0 +1,112 @@
1
+ from collections.abc import Sequence
2
+
3
+ try:
4
+ from anthropic import AsyncAnthropic
5
+ from anthropic.types import MessageParam, TextBlock, ToolParam, ToolUseBlock
6
+
7
+ from haiku.rag.client import HaikuRAG
8
+ from haiku.rag.qa.base import QuestionAnswerAgentBase
9
+
10
+ class QuestionAnswerAnthropicAgent(QuestionAnswerAgentBase):
11
+ def __init__(self, client: HaikuRAG, model: str = "claude-3-5-haiku-20241022"):
12
+ super().__init__(client, model or self._model)
13
+ self.tools: Sequence[ToolParam] = [
14
+ ToolParam(
15
+ name="search_documents",
16
+ description="Search the knowledge base for relevant documents",
17
+ input_schema={
18
+ "type": "object",
19
+ "properties": {
20
+ "query": {
21
+ "type": "string",
22
+ "description": "The search query to find relevant documents",
23
+ },
24
+ "limit": {
25
+ "type": "integer",
26
+ "description": "Maximum number of results to return",
27
+ "default": 3,
28
+ },
29
+ },
30
+ "required": ["query"],
31
+ },
32
+ )
33
+ ]
34
+
35
+ async def answer(self, question: str) -> str:
36
+ anthropic_client = AsyncAnthropic()
37
+
38
+ messages: list[MessageParam] = [{"role": "user", "content": question}]
39
+
40
+ response = await anthropic_client.messages.create(
41
+ model=self._model,
42
+ max_tokens=4096,
43
+ system=self._system_prompt,
44
+ messages=messages,
45
+ tools=self.tools,
46
+ temperature=0.0,
47
+ )
48
+
49
+ if response.stop_reason == "tool_use":
50
+ messages.append({"role": "assistant", "content": response.content})
51
+
52
+ # Process tool calls
53
+ tool_results = []
54
+ for content_block in response.content:
55
+ if isinstance(content_block, ToolUseBlock):
56
+ if content_block.name == "search_documents":
57
+ args = content_block.input
58
+ query = (
59
+ args.get("query", question)
60
+ if isinstance(args, dict)
61
+ else question
62
+ )
63
+ limit = (
64
+ int(args.get("limit", 3))
65
+ if isinstance(args, dict)
66
+ else 3
67
+ )
68
+
69
+ search_results = await self._client.search(
70
+ query, limit=limit
71
+ )
72
+
73
+ context_chunks = []
74
+ for chunk, score in search_results:
75
+ context_chunks.append(
76
+ f"Content: {chunk.content}\nScore: {score:.4f}"
77
+ )
78
+
79
+ context = "\n\n".join(context_chunks)
80
+
81
+ tool_results.append(
82
+ {
83
+ "type": "tool_result",
84
+ "tool_use_id": content_block.id,
85
+ "content": context,
86
+ }
87
+ )
88
+
89
+ if tool_results:
90
+ messages.append({"role": "user", "content": tool_results})
91
+
92
+ final_response = await anthropic_client.messages.create(
93
+ model=self._model,
94
+ max_tokens=4096,
95
+ system=self._system_prompt,
96
+ messages=messages,
97
+ temperature=0.0,
98
+ )
99
+ if final_response.content:
100
+ first_content = final_response.content[0]
101
+ if isinstance(first_content, TextBlock):
102
+ return first_content.text
103
+ return ""
104
+
105
+ if response.content:
106
+ first_content = response.content[0]
107
+ if isinstance(first_content, TextBlock):
108
+ return first_content.text
109
+ return ""
110
+
111
+ except ImportError:
112
+ pass
@@ -208,6 +208,22 @@ class ChunkRepository(BaseRepository[Chunk]):
208
208
 
209
209
  return created_chunks
210
210
 
211
+ async def delete_all(self, commit: bool = True) -> bool:
212
+ """Delete all chunks from the database."""
213
+ if self.store._connection is None:
214
+ raise ValueError("Store connection is not available")
215
+
216
+ cursor = self.store._connection.cursor()
217
+
218
+ cursor.execute("DELETE FROM chunks_fts")
219
+ cursor.execute("DELETE FROM chunk_embeddings")
220
+ cursor.execute("DELETE FROM chunks")
221
+
222
+ deleted = cursor.rowcount > 0
223
+ if commit:
224
+ self.store._connection.commit()
225
+ return deleted
226
+
211
227
  async def delete_by_document_id(
212
228
  self, document_id: int, commit: bool = True
213
229
  ) -> bool:
@@ -12,11 +12,19 @@ except ImportError:
12
12
  QuestionAnswerOpenAIAgent = None
13
13
  OPENAI_AVAILABLE = False
14
14
 
15
+ try:
16
+ from haiku.rag.qa.anthropic import QuestionAnswerAnthropicAgent
17
+
18
+ ANTHROPIC_AVAILABLE = True
19
+ except ImportError:
20
+ QuestionAnswerAnthropicAgent = None
21
+ ANTHROPIC_AVAILABLE = False
22
+
15
23
  from .llm_judge import LLMJudge
16
24
 
17
25
 
18
26
  @pytest.mark.asyncio
19
- async def test_qa_ollama_with_dataset_question(qa_corpus: Dataset):
27
+ async def test_qa_ollama(qa_corpus: Dataset):
20
28
  """Test QA with actual question from the dataset using LLM judge."""
21
29
  client = HaikuRAG(":memory:")
22
30
  qa = QuestionAnswerOllamaAgent(client)
@@ -40,7 +48,7 @@ async def test_qa_ollama_with_dataset_question(qa_corpus: Dataset):
40
48
 
41
49
  @pytest.mark.asyncio
42
50
  @pytest.mark.skipif(not OPENAI_AVAILABLE, reason="OpenAI not available")
43
- async def test_qa_openai_basic(qa_corpus: Dataset):
51
+ async def test_qa_openai(qa_corpus: Dataset):
44
52
  """Test OpenAI QA basic functionality."""
45
53
  client = HaikuRAG(":memory:")
46
54
  qa = QuestionAnswerOpenAIAgent(client) # type: ignore
@@ -60,3 +68,27 @@ async def test_qa_openai_basic(qa_corpus: Dataset):
60
68
  assert is_equivalent, (
61
69
  f"Generated answer not equivalent to expected answer.\nQuestion: {question}\nGenerated: {answer}\nExpected: {expected_answer}"
62
70
  )
71
+
72
+
73
+ @pytest.mark.asyncio
74
+ @pytest.mark.skipif(not ANTHROPIC_AVAILABLE, reason="Anthropic not available")
75
+ async def test_qa_anthropic(qa_corpus: Dataset):
76
+ """Test Anthropic QA basic functionality."""
77
+ client = HaikuRAG(":memory:")
78
+ qa = QuestionAnswerAnthropicAgent(client) # type: ignore
79
+ llm_judge = LLMJudge()
80
+
81
+ doc = qa_corpus[1]
82
+ await client.create_document(
83
+ content=doc["document_extracted"], uri=doc["document_id"]
84
+ )
85
+
86
+ question = doc["question"]
87
+ expected_answer = doc["answer"]
88
+
89
+ answer = await qa.answer(question)
90
+ is_equivalent = await llm_judge.judge_answers(question, answer, expected_answer)
91
+
92
+ assert is_equivalent, (
93
+ f"Generated answer not equivalent to expected answer.\nQuestion: {question}\nGenerated: {answer}\nExpected: {expected_answer}"
94
+ )
@@ -0,0 +1,52 @@
1
+ import pytest
2
+ from datasets import Dataset
3
+
4
+ from haiku.rag.client import HaikuRAG
5
+ from haiku.rag.store.models.document import Document
6
+
7
+
8
+ @pytest.mark.asyncio
9
+ async def test_rebuild_database(qa_corpus: Dataset):
10
+ """Test rebuild functionality with existing documents."""
11
+ client = HaikuRAG(":memory:")
12
+
13
+ created_docs: list[Document] = []
14
+ for content in qa_corpus["document_extracted"][:3]:
15
+ doc = await client.create_document(
16
+ content=content,
17
+ )
18
+ created_docs.append(doc)
19
+
20
+ documents_before = await client.list_documents()
21
+ assert len(documents_before) == 3
22
+
23
+ chunks_before = []
24
+ for doc in created_docs:
25
+ assert doc.id is not None
26
+ doc_chunks = await client.chunk_repository.get_by_document_id(doc.id)
27
+ chunks_before.extend(doc_chunks)
28
+
29
+ assert len(chunks_before) > 0
30
+
31
+ # Perform rebuild
32
+ processed_doc_ids = []
33
+ async for doc_id in client.rebuild_database():
34
+ processed_doc_ids.append(doc_id)
35
+
36
+ # Verify all documents were processed
37
+ expected_doc_ids = [doc.id for doc in created_docs]
38
+ assert set(processed_doc_ids) == set(expected_doc_ids)
39
+
40
+ documents_after = await client.list_documents()
41
+ assert len(documents_after) == 3
42
+
43
+ # Verify chunks were recreated
44
+ chunks_after = []
45
+ for doc in documents_after:
46
+ if doc.id is not None:
47
+ doc_chunks = await client.chunk_repository.get_by_document_id(doc.id)
48
+ chunks_after.extend(doc_chunks)
49
+
50
+ assert len(chunks_after) > 0
51
+
52
+ client.close()
@@ -133,6 +133,24 @@ wheels = [
133
133
  { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
134
134
  ]
135
135
 
136
+ [[package]]
137
+ name = "anthropic"
138
+ version = "0.56.0"
139
+ source = { registry = "https://pypi.org/simple" }
140
+ dependencies = [
141
+ { name = "anyio" },
142
+ { name = "distro" },
143
+ { name = "httpx" },
144
+ { name = "jiter" },
145
+ { name = "pydantic" },
146
+ { name = "sniffio" },
147
+ { name = "typing-extensions" },
148
+ ]
149
+ sdist = { url = "https://files.pythonhosted.org/packages/4d/40/0c4eb5728466849803782c8a86eb315af1a6eb0efea6a751de120ab845c9/anthropic-0.56.0.tar.gz", hash = "sha256:56fa9eb61afa004a1664bc85eed071e77b96c579b77395e9cc893097e599f72e", size = 421538, upload-time = "2025-07-01T19:39:10.805Z" }
150
+ wheels = [
151
+ { url = "https://files.pythonhosted.org/packages/e5/90/7f4d4084f9c35c3ea3e784646ec12f9b2c8cf8743b2bb5489252659b5bda/anthropic-0.56.0-py3-none-any.whl", hash = "sha256:91f1f74abdcf0958d3296b657304588cc244b1107b89f973ff6f511afdacfc56", size = 289603, upload-time = "2025-07-01T19:39:08.794Z" },
152
+ ]
153
+
136
154
  [[package]]
137
155
  name = "anyio"
138
156
  version = "4.9.0"
@@ -798,7 +816,7 @@ wheels = [
798
816
 
799
817
  [[package]]
800
818
  name = "haiku-rag"
801
- version = "0.3.0"
819
+ version = "0.3.1"
802
820
  source = { editable = "." }
803
821
  dependencies = [
804
822
  { name = "fastmcp" },
@@ -815,6 +833,9 @@ dependencies = [
815
833
  ]
816
834
 
817
835
  [package.optional-dependencies]
836
+ anthropic = [
837
+ { name = "anthropic" },
838
+ ]
818
839
  openai = [
819
840
  { name = "openai" },
820
841
  ]
@@ -837,6 +858,7 @@ dev = [
837
858
 
838
859
  [package.metadata]
839
860
  requires-dist = [
861
+ { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.56.0" },
840
862
  { name = "fastmcp", specifier = ">=2.8.1" },
841
863
  { name = "httpx", specifier = ">=0.28.1" },
842
864
  { name = "markitdown", extras = ["audio-transcription", "docx", "pdf", "pptx", "xlsx"], specifier = ">=0.1.2" },
@@ -851,7 +873,7 @@ requires-dist = [
851
873
  { name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.3.2" },
852
874
  { name = "watchfiles", specifier = ">=1.1.0" },
853
875
  ]
854
- provides-extras = ["voyageai", "openai"]
876
+ provides-extras = ["voyageai", "openai", "anthropic"]
855
877
 
856
878
  [package.metadata.requires-dev]
857
879
  dev = [
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes