PyPI - haiku.rag - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.4__py3-none-any.whl - Mend

haiku.rag 0.5.1py3-none-any.whl → 0.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (18) hide show

haiku/rag/app.py +4 -4
haiku/rag/cli.py +8 -3
haiku/rag/client.py +133 -5
haiku/rag/config.py +3 -3
haiku/rag/qa/__init__.py +12 -4
haiku/rag/qa/anthropic.py +16 -14
haiku/rag/qa/base.py +51 -3
haiku/rag/qa/ollama.py +8 -12
haiku/rag/qa/openai.py +13 -16
haiku/rag/qa/prompts.py +37 -0
haiku/rag/reranking/__init__.py +19 -16
haiku/rag/reranking/ollama.py +84 -0
haiku/rag/store/repositories/chunk.py +46 -0
{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/METADATA +11 -3
{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/RECORD +18 -17
{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/WHEEL +0 -0
{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/entry_points.txt +0 -0
{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/licenses/LICENSE +0 -0

haiku/rag/app.py CHANGED Viewed

@@ -32,9 +32,9 @@ class HaikuRAGApp:
                 f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
             )
-    async def add_document_from_source(self, file_path: Path):
+    async def add_document_from_source(self, source: str):
         async with HaikuRAG(db_path=self.db_path) as self.client:
-            doc = await self.client.create_document_from_source(file_path)
+            doc = await self.client.create_document_from_source(source)
             self._rich_print_document(doc, truncate=True)
             self.console.print(
                 f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
@@ -62,10 +62,10 @@ class HaikuRAGApp:
             for chunk, score in results:
                 self._rich_print_search_result(chunk, score)
-    async def ask(self, question: str):
+    async def ask(self, question: str, cite: bool = False):
         async with HaikuRAG(db_path=self.db_path) as self.client:
             try:
-                answer = await self.client.ask(question)
+                answer = await self.client.ask(question, cite=cite)
                 self.console.print(f"[bold blue]Question:[/bold blue] {question}")
                 self.console.print()
                 self.console.print("[bold green]Answer:[/bold green]")

haiku/rag/cli.py CHANGED Viewed

@@ -81,7 +81,7 @@ def add_document_text(
 @cli.command("add-src", help="Add a document from a file path or URL")
 def add_document_src(
-    file_path: Path = typer.Argument(
+    source: str = typer.Argument(
         help="The file path or URL of the document to add",
     ),
     db: Path = typer.Option(
@@ -91,7 +91,7 @@ def add_document_src(
     ),
 ):
     app = HaikuRAGApp(db_path=db)
-    asyncio.run(app.add_document_from_source(file_path=file_path))
+    asyncio.run(app.add_document_from_source(source=source))
 @cli.command("get", help="Get and display a document by its ID")
@@ -160,9 +160,14 @@ def ask(
         "--db",
         help="Path to the SQLite database file",
     ),
+    cite: bool = typer.Option(
+        False,
+        "--cite",
+        help="Include citations in the response",
+    ),
 ):
     app = HaikuRAGApp(db_path=db)
-    asyncio.run(app.ask(question=question))
+    asyncio.run(app.ask(question=question, cite=cite))
 @cli.command("settings", help="Display current configuration settings")

haiku/rag/client.py CHANGED Viewed

@@ -319,7 +319,7 @@ class HaikuRAG:
         return await self.document_repository.list_all(limit=limit, offset=offset)
     async def search(
-        self, query: str, limit: int = 5, k: int = 60, rerank=Config.RERANK
+        self, query: str, limit: int = 5, k: int = 60
     ) -> list[tuple[Chunk, float]]:
         """Search for relevant chunks using hybrid search (vector similarity + full-text search) with reranking.
@@ -331,8 +331,10 @@ class HaikuRAG:
         Returns:
             List of (chunk, score) tuples ordered by relevance.
         """
+        # Get reranker if available
+        reranker = get_reranker()
-        if not rerank:
+        if reranker is None:
             return await self.chunk_repository.search_chunks_hybrid(query, limit, k)
         # Get more initial results (3X) for reranking
@@ -340,25 +342,151 @@ class HaikuRAG:
             query, limit * 3, k
         )
         # Apply reranking
-        reranker = get_reranker()
         chunks = [chunk for chunk, _ in search_results]
         reranked_results = await reranker.rerank(query, chunks, top_n=limit)
         # Return reranked results with scores from reranker
         return reranked_results
-    async def ask(self, question: str) -> str:
+    async def expand_context(
+        self, search_results: list[tuple[Chunk, float]]
+    ) -> list[tuple[Chunk, float]]:
+        """Expand search results with adjacent chunks, merging overlapping chunks.
+        Args:
+            search_results: List of (chunk, score) tuples from search.
+        Returns:
+            List of (chunk, score) tuples with expanded and merged context chunks.
+        """
+        if Config.CONTEXT_CHUNK_RADIUS == 0:
+            return search_results
+        # Group chunks by document_id to handle merging within documents
+        document_groups = {}
+        for chunk, score in search_results:
+            doc_id = chunk.document_id
+            if doc_id not in document_groups:
+                document_groups[doc_id] = []
+            document_groups[doc_id].append((chunk, score))
+        results = []
+        for doc_id, doc_chunks in document_groups.items():
+            # Get all expanded ranges for this document
+            expanded_ranges = []
+            for chunk, score in doc_chunks:
+                adjacent_chunks = await self.chunk_repository.get_adjacent_chunks(
+                    chunk, Config.CONTEXT_CHUNK_RADIUS
+                )
+                all_chunks = adjacent_chunks + [chunk]
+                # Get the range of orders for this expanded chunk
+                orders = [c.metadata.get("order", 0) for c in all_chunks]
+                min_order = min(orders)
+                max_order = max(orders)
+                expanded_ranges.append(
+                    {
+                        "original_chunk": chunk,
+                        "score": score,
+                        "min_order": min_order,
+                        "max_order": max_order,
+                        "all_chunks": sorted(
+                            all_chunks, key=lambda c: c.metadata.get("order", 0)
+                        ),
+                    }
+                )
+            # Merge overlapping/adjacent ranges
+            merged_ranges = self._merge_overlapping_ranges(expanded_ranges)
+            # Create merged chunks
+            for merged_range in merged_ranges:
+                combined_content_parts = [c.content for c in merged_range["all_chunks"]]
+                # Use the first original chunk for metadata
+                original_chunk = merged_range["original_chunks"][0]
+                merged_chunk = Chunk(
+                    id=original_chunk.id,
+                    document_id=original_chunk.document_id,
+                    content="".join(combined_content_parts),
+                    metadata=original_chunk.metadata,
+                    document_uri=original_chunk.document_uri,
+                    document_meta=original_chunk.document_meta,
+                )
+                # Use the highest score from merged chunks
+                best_score = max(merged_range["scores"])
+                results.append((merged_chunk, best_score))
+        return results
+    def _merge_overlapping_ranges(self, expanded_ranges):
+        """Merge overlapping or adjacent expanded ranges."""
+        if not expanded_ranges:
+            return []
+        # Sort by min_order
+        sorted_ranges = sorted(expanded_ranges, key=lambda x: x["min_order"])
+        merged = []
+        current = {
+            "min_order": sorted_ranges[0]["min_order"],
+            "max_order": sorted_ranges[0]["max_order"],
+            "original_chunks": [sorted_ranges[0]["original_chunk"]],
+            "scores": [sorted_ranges[0]["score"]],
+            "all_chunks": sorted_ranges[0]["all_chunks"],
+        }
+        for range_info in sorted_ranges[1:]:
+            # Check if ranges overlap or are adjacent (max_order + 1 >= min_order)
+            if current["max_order"] >= range_info["min_order"] - 1:
+                # Merge ranges
+                current["max_order"] = max(
+                    current["max_order"], range_info["max_order"]
+                )
+                current["original_chunks"].append(range_info["original_chunk"])
+                current["scores"].append(range_info["score"])
+                # Merge all_chunks and deduplicate by order
+                all_chunks_dict = {}
+                for chunk in current["all_chunks"] + range_info["all_chunks"]:
+                    order = chunk.metadata.get("order", 0)
+                    all_chunks_dict[order] = chunk
+                current["all_chunks"] = [
+                    all_chunks_dict[order] for order in sorted(all_chunks_dict.keys())
+                ]
+            else:
+                # No overlap, add current to merged and start new
+                merged.append(current)
+                current = {
+                    "min_order": range_info["min_order"],
+                    "max_order": range_info["max_order"],
+                    "original_chunks": [range_info["original_chunk"]],
+                    "scores": [range_info["score"]],
+                    "all_chunks": range_info["all_chunks"],
+                }
+        # Add the last range
+        merged.append(current)
+        return merged
+    async def ask(self, question: str, cite: bool = False) -> str:
         """Ask a question using the configured QA agent.
         Args:
             question: The question to ask.
+            cite: Whether to include citations in the response.
         Returns:
             The generated answer as a string.
         """
         from haiku.rag.qa import get_qa_agent
-        qa_agent = get_qa_agent(self)
+        qa_agent = get_qa_agent(self, use_citations=cite)
         return await qa_agent.answer(question)
     async def rebuild_database(self) -> AsyncGenerator[int, None]:

haiku/rag/config.py CHANGED Viewed

@@ -19,14 +19,14 @@ class AppConfig(BaseModel):
     EMBEDDINGS_MODEL: str = "mxbai-embed-large"
     EMBEDDINGS_VECTOR_DIM: int = 1024
-    RERANK: bool = True
-    RERANK_PROVIDER: str = "mxbai"
-    RERANK_MODEL: str = "mixedbread-ai/mxbai-rerank-base-v2"
+    RERANK_PROVIDER: str = "ollama"
+    RERANK_MODEL: str = "qwen3"
     QA_PROVIDER: str = "ollama"
     QA_MODEL: str = "qwen3"
     CHUNK_SIZE: int = 256
+    CONTEXT_CHUNK_RADIUS: int = 0
     OLLAMA_BASE_URL: str = "http://localhost:11434"

haiku/rag/qa/__init__.py CHANGED Viewed

@@ -4,12 +4,16 @@ from haiku.rag.qa.base import QuestionAnswerAgentBase
 from haiku.rag.qa.ollama import QuestionAnswerOllamaAgent
-def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
+def get_qa_agent(
+    client: HaikuRAG, model: str = "", use_citations: bool = False
+) -> QuestionAnswerAgentBase:
     """
     Factory function to get the appropriate QA agent based on the configuration.
     """
     if Config.QA_PROVIDER == "ollama":
-        return QuestionAnswerOllamaAgent(client, model or Config.QA_MODEL)
+        return QuestionAnswerOllamaAgent(
+            client, model or Config.QA_MODEL, use_citations
+        )
     if Config.QA_PROVIDER == "openai":
         try:
@@ -20,7 +24,9 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
                 "Please install haiku.rag with the 'openai' extra:"
                 "uv pip install haiku.rag[openai]"
             )
-        return QuestionAnswerOpenAIAgent(client, model or Config.QA_MODEL)
+        return QuestionAnswerOpenAIAgent(
+            client, model or Config.QA_MODEL, use_citations
+        )
     if Config.QA_PROVIDER == "anthropic":
         try:
@@ -31,6 +37,8 @@ def get_qa_agent(client: HaikuRAG, model: str = "") -> QuestionAnswerAgentBase:
                 "Please install haiku.rag with the 'anthropic' extra:"
                 "uv pip install haiku.rag[anthropic]"
             )
-        return QuestionAnswerAnthropicAgent(client, model or Config.QA_MODEL)
+        return QuestionAnswerAnthropicAgent(
+            client, model or Config.QA_MODEL, use_citations
+        )
     raise ValueError(f"Unsupported QA provider: {Config.QA_PROVIDER}")

haiku/rag/qa/anthropic.py CHANGED Viewed

@@ -1,19 +1,29 @@
 from collections.abc import Sequence
 try:
-    from anthropic import AsyncAnthropic
-    from anthropic.types import MessageParam, TextBlock, ToolParam, ToolUseBlock
+    from anthropic import AsyncAnthropic  # type: ignore
+    from anthropic.types import (  # type: ignore
+        MessageParam,
+        TextBlock,
+        ToolParam,
+        ToolUseBlock,
+    )
     from haiku.rag.client import HaikuRAG
     from haiku.rag.qa.base import QuestionAnswerAgentBase
     class QuestionAnswerAnthropicAgent(QuestionAnswerAgentBase):
-        def __init__(self, client: HaikuRAG, model: str = "claude-3-5-haiku-20241022"):
-            super().__init__(client, model or self._model)
+        def __init__(
+            self,
+            client: HaikuRAG,
+            model: str = "claude-3-5-haiku-20241022",
+            use_citations: bool = False,
+        ):
+            super().__init__(client, model or self._model, use_citations)
             self.tools: Sequence[ToolParam] = [
                 ToolParam(
                     name="search_documents",
-                    description="Search the knowledge base for relevant documents",
+                    description="Search the knowledge base for relevant documents. Returns a JSON array with content, score, and document_uri for each result.",
                     input_schema={
                         "type": "object",
                         "properties": {
@@ -69,18 +79,10 @@ try:
                                     else 3
                                 )
-                                search_results = await self._client.search(
+                                context = await self._search_and_expand(
                                     query, limit=limit
                                 )
-                                context_chunks = []
-                                for chunk, score in search_results:
-                                    context_chunks.append(
-                                        f"Content: {chunk.content}\nScore: {score:.4f}"
-                                    )
-                                context = "\n\n".join(context_chunks)
                                 tool_results.append(
                                     {
                                         "type": "tool_result",

haiku/rag/qa/base.py CHANGED Viewed

@@ -1,26 +1,50 @@
+import json
 from haiku.rag.client import HaikuRAG
-from haiku.rag.qa.prompts import SYSTEM_PROMPT
+from haiku.rag.qa.prompts import SYSTEM_PROMPT, SYSTEM_PROMPT_WITH_CITATIONS
 class QuestionAnswerAgentBase:
     _model: str = ""
     _system_prompt: str = SYSTEM_PROMPT
-    def __init__(self, client: HaikuRAG, model: str = ""):
+    def __init__(self, client: HaikuRAG, model: str = "", use_citations: bool = False):
         self._model = model
         self._client = client
+        self._system_prompt = (
+            SYSTEM_PROMPT_WITH_CITATIONS if use_citations else SYSTEM_PROMPT
+        )
     async def answer(self, question: str) -> str:
         raise NotImplementedError(
             "QABase is an abstract class. Please implement the answer method in a subclass."
         )
+    async def _search_and_expand(self, query: str, limit: int = 3) -> str:
+        """Search for documents and expand context, then format as JSON"""
+        search_results = await self._client.search(query, limit=limit)
+        expanded_results = await self._client.expand_context(search_results)
+        return self._format_search_results(expanded_results)
+    def _format_search_results(self, search_results) -> str:
+        """Format search results as JSON list of {content, score, document_uri}"""
+        formatted_results = []
+        for chunk, score in search_results:
+            formatted_results.append(
+                {
+                    "content": chunk.content,
+                    "score": score,
+                    "document_uri": chunk.document_uri,
+                }
+            )
+        return json.dumps(formatted_results, indent=2)
     tools = [
         {
             "type": "function",
             "function": {
                 "name": "search_documents",
-                "description": "Search the knowledge base for relevant documents",
+                "description": "Search the knowledge base for relevant documents. Returns a JSON array of search results.",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -36,6 +60,30 @@ class QuestionAnswerAgentBase:
                     },
                     "required": ["query"],
                 },
+                "returns": {
+                    "type": "string",
+                    "description": "JSON array of search results",
+                    "schema": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "content": {
+                                    "type": "string",
+                                    "description": "The document text content",
+                                },
+                                "score": {
+                                    "type": "number",
+                                    "description": "Relevance score (higher is more relevant)",
+                                },
+                                "document_uri": {
+                                    "type": "string",
+                                    "description": "Source URI/path of the document",
+                                },
+                            },
+                        },
+                    },
+                },
             },
         }
     ]

haiku/rag/qa/ollama.py CHANGED Viewed

@@ -8,8 +8,13 @@ OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 16384}
 class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
-    def __init__(self, client: HaikuRAG, model: str = Config.QA_MODEL):
-        super().__init__(client, model or self._model)
+    def __init__(
+        self,
+        client: HaikuRAG,
+        model: str = Config.QA_MODEL,
+        use_citations: bool = False,
+    ):
+        super().__init__(client, model or self._model, use_citations)
     async def answer(self, question: str) -> str:
         ollama_client = AsyncClient(host=Config.OLLAMA_BASE_URL)
@@ -39,16 +44,7 @@ class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
                         query = args.get("query", question)
                         limit = int(args.get("limit", 3))
-                        search_results = await self._client.search(query, limit=limit)
-                        context_chunks = []
-                        for chunk, score in search_results:
-                            context_chunks.append(
-                                f"Content: {chunk.content}\nScore: {score:.4f}"
-                            )
-                        context = "\n\n".join(context_chunks)
+                        context = await self._search_and_expand(query, limit=limit)
                         messages.append(
                             {
                                 "role": "tool",

haiku/rag/qa/openai.py CHANGED Viewed

@@ -1,22 +1,29 @@
 from collections.abc import Sequence
 try:
-    from openai import AsyncOpenAI
-    from openai.types.chat import (
+    from openai import AsyncOpenAI  # type: ignore
+    from openai.types.chat import (  # type: ignore
         ChatCompletionAssistantMessageParam,
         ChatCompletionMessageParam,
         ChatCompletionSystemMessageParam,
         ChatCompletionToolMessageParam,
         ChatCompletionUserMessageParam,
     )
-    from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
+    from openai.types.chat.chat_completion_tool_param import (  # type: ignore
+        ChatCompletionToolParam,
+    )
     from haiku.rag.client import HaikuRAG
     from haiku.rag.qa.base import QuestionAnswerAgentBase
     class QuestionAnswerOpenAIAgent(QuestionAnswerAgentBase):
-        def __init__(self, client: HaikuRAG, model: str = "gpt-4o-mini"):
-            super().__init__(client, model or self._model)
+        def __init__(
+            self,
+            client: HaikuRAG,
+            model: str = "gpt-4o-mini",
+            use_citations: bool = False,
+        ):
+            super().__init__(client, model or self._model, use_citations)
             self.tools: Sequence[ChatCompletionToolParam] = [
                 ChatCompletionToolParam(tool) for tool in self.tools
             ]
@@ -70,17 +77,7 @@ try:
                             query = args.get("query", question)
                             limit = int(args.get("limit", 3))
-                            search_results = await self._client.search(
-                                query, limit=limit
-                            )
-                            context_chunks = []
-                            for chunk, score in search_results:
-                                context_chunks.append(
-                                    f"Content: {chunk.content}\nScore: {score:.4f}"
-                                )
-                            context = "\n\n".join(context_chunks)
+                            context = await self._search_and_expand(query, limit=limit)
                             messages.append(
                                 ChatCompletionToolMessageParam(

haiku/rag/qa/prompts.py CHANGED Viewed

@@ -19,3 +19,40 @@ Guidelines:
 Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
 """
+SYSTEM_PROMPT_WITH_CITATIONS = """
+You are a knowledgeable assistant that helps users find information from a document knowledge base.
+IMPORTANT: You MUST use the search_documents tool for every question. Do not answer any question without first searching the knowledge base.
+Your process:
+1. IMMEDIATELY call the search_documents tool with relevant keywords from the user's question
+2. Review the search results and their relevance scores
+3. If you need additional context, perform follow-up searches with different keywords
+4. Provide a short and to the point comprehensive answer based only on the retrieved documents
+5. Always include citations for the sources used in your answer
+Guidelines:
+- Base your answers strictly on the provided document content
+- If multiple documents contain relevant information, synthesize them coherently
+- Indicate when information is incomplete or when you need to search for additional context
+- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
+- For complex questions, consider breaking them down and performing multiple searches
+- Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
+- ALWAYS include citations at the end of your response using the format below
+Citation Format:
+After your answer, include a "Citations:" section that lists:
+- The document URI from each search result used
+- A brief excerpt (first 50-100 characters) of the content that supported your answer
+- Format: "Citations:\n- [document_uri]: [content_excerpt]..."
+Example response format:
+[Your answer here]
+Citations:
+- /path/to/document1.pdf: "This document explains that AFMAN stands for Air Force Manual..."
+- /path/to/document2.pdf: "The manual provides guidance on military procedures and..."
+Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
+"""

haiku/rag/reranking/__init__.py CHANGED Viewed

@@ -1,37 +1,40 @@
 from haiku.rag.config import Config
 from haiku.rag.reranking.base import RerankerBase
-try:
-    from haiku.rag.reranking.cohere import CohereReranker
-except ImportError:
-    pass
 _reranker: RerankerBase | None = None
-def get_reranker() -> RerankerBase:
+def get_reranker() -> RerankerBase | None:
     """
     Factory function to get the appropriate reranker based on the configuration.
+    Returns None if if reranking is disabled.
     """
     global _reranker
     if _reranker is not None:
         return _reranker
     if Config.RERANK_PROVIDER == "mxbai":
-        from haiku.rag.reranking.mxbai import MxBAIReranker
+        try:
+            from haiku.rag.reranking.mxbai import MxBAIReranker
-        _reranker = MxBAIReranker()
-        return _reranker
+            _reranker = MxBAIReranker()
+            return _reranker
+        except ImportError:
+            return None
     if Config.RERANK_PROVIDER == "cohere":
         try:
             from haiku.rag.reranking.cohere import CohereReranker
+            _reranker = CohereReranker()
+            return _reranker
         except ImportError:
-            raise ImportError(
-                "Cohere reranker requires the 'cohere' package. "
-                "Please install haiku.rag with the 'cohere' extra:"
-                "uv pip install haiku.rag[cohere]"
-            )
-        _reranker = CohereReranker()
+            return None
+    if Config.RERANK_PROVIDER == "ollama":
+        from haiku.rag.reranking.ollama import OllamaReranker
+        _reranker = OllamaReranker()
         return _reranker
-    raise ValueError(f"Unsupported reranker provider: {Config.RERANK_PROVIDER}")
+    return None

haiku/rag/reranking/ollama.py ADDED Viewed

@@ -0,0 +1,84 @@
+import json
+from ollama import AsyncClient
+from pydantic import BaseModel
+from haiku.rag.config import Config
+from haiku.rag.reranking.base import RerankerBase
+from haiku.rag.store.models.chunk import Chunk
+OLLAMA_OPTIONS = {"temperature": 0.0, "seed": 42, "num_ctx": 16384}
+class RerankResult(BaseModel):
+    """Individual rerank result with index and relevance score."""
+    index: int
+    relevance_score: float
+class RerankResponse(BaseModel):
+    """Response from the reranking model containing ranked results."""
+    results: list[RerankResult]
+class OllamaReranker(RerankerBase):
+    def __init__(self, model: str = Config.RERANK_MODEL):
+        self._model = model
+        self._client = AsyncClient(host=Config.OLLAMA_BASE_URL)
+    async def rerank(
+        self, query: str, chunks: list[Chunk], top_n: int = 10
+    ) -> list[tuple[Chunk, float]]:
+        if not chunks:
+            return []
+        documents = []
+        for i, chunk in enumerate(chunks):
+            documents.append({"index": i, "content": chunk.content})
+        # Create the prompt for reranking
+        system_prompt = """You are a document reranking assistant. Given a query and a list of document chunks, you must rank them by relevance to the query.
+Return your response as a JSON object with a "results" array. Each result should have:
+- "index": the original index of the document (integer)
+- "relevance_score": a score between 0.0 and 1.0 indicating relevance (float, where 1.0 is most relevant)
+Only return the top documents up to the requested limit, ordered by decreasing relevance score."""
+        documents_text = ""
+        for doc in documents:
+            documents_text += f"Index {doc['index']}: {doc['content']}\n\n"
+        user_prompt = f"""Query: {query}
+Documents to rerank:
+{documents_text.strip()}
+Please rank these documents by relevance to the query and return the top {top_n} results as JSON."""
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+        try:
+            response = await self._client.chat(
+                model=self._model,
+                messages=messages,
+                format=RerankResponse.model_json_schema(),
+                options=OLLAMA_OPTIONS,
+            )
+            content = response["message"]["content"]
+            parsed_response = RerankResponse.model_validate(json.loads(content))
+            return [
+                (chunks[result.index], result.relevance_score)
+                for result in parsed_response.results[:top_n]
+            ]
+        except Exception:
+            # Fallback: return chunks in original order with same score
+            return [(chunks[i], 1.0) for i in range(min(top_n, len(chunks)))]

haiku/rag/store/repositories/chunk.py CHANGED Viewed

@@ -468,3 +468,49 @@ class ChunkRepository(BaseRepository[Chunk]):
             )
             for chunk_id, document_id, content, metadata_json, document_uri, document_metadata_json in rows
         ]
+    async def get_adjacent_chunks(self, chunk: Chunk, num_adjacent: int) -> list[Chunk]:
+        """Get adjacent chunks before and after the given chunk within the same document."""
+        if self.store._connection is None:
+            raise ValueError("Store connection is not available")
+        if chunk.document_id is None:
+            return []
+        cursor = self.store._connection.cursor()
+        chunk_order = chunk.metadata.get("order")
+        if chunk_order is None:
+            return []
+        # Get adjacent chunks within the same document
+        cursor.execute(
+            """
+            SELECT c.id, c.document_id, c.content, c.metadata, d.uri, d.metadata as document_metadata
+            FROM chunks c
+            JOIN documents d ON c.document_id = d.id
+            WHERE c.document_id = :document_id
+            AND JSON_EXTRACT(c.metadata, '$.order') BETWEEN :start_order AND :end_order
+            AND c.id != :chunk_id
+            ORDER BY JSON_EXTRACT(c.metadata, '$.order')
+            """,
+            {
+                "document_id": chunk.document_id,
+                "start_order": max(0, chunk_order - num_adjacent),
+                "end_order": chunk_order + num_adjacent,
+                "chunk_id": chunk.id,
+            },
+        )
+        rows = cursor.fetchall()
+        return [
+            Chunk(
+                id=chunk_id,
+                document_id=document_id,
+                content=content,
+                metadata=json.loads(metadata_json) if metadata_json else {},
+                document_uri=document_uri,
+                document_meta=json.loads(document_metadata_json)
+                if document_metadata_json
+                else {},
+            )
+            for chunk_id, document_id, content, metadata_json, document_uri, document_metadata_json in rows
+        ]

{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haiku.rag
-Version: 0.5.1
+Version: 0.5.4
 Summary: Retrieval Augmented Generation (RAG) with SQLite
 Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
 License: MIT
@@ -21,8 +21,7 @@ Requires-Python: >=3.11
 Requires-Dist: docling>=2.15.0
 Requires-Dist: fastmcp>=2.8.1
 Requires-Dist: httpx>=0.28.1
-Requires-Dist: mxbai-rerank>=0.1.6
-Requires-Dist: ollama>=0.5.1
+Requires-Dist: ollama>=0.5.3
 Requires-Dist: pydantic>=2.11.7
 Requires-Dist: python-dotenv>=1.1.0
 Requires-Dist: rich>=14.0.0
@@ -34,6 +33,8 @@ Provides-Extra: anthropic
 Requires-Dist: anthropic>=0.56.0; extra == 'anthropic'
 Provides-Extra: cohere
 Requires-Dist: cohere>=5.16.1; extra == 'cohere'
+Provides-Extra: mxbai
+Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
 Provides-Extra: openai
 Requires-Dist: openai>=1.0.0; extra == 'openai'
 Provides-Extra: voyageai
@@ -75,6 +76,9 @@ haiku-rag search "query"
 # Ask questions
 haiku-rag ask "Who is the author of haiku.rag?"
+# Ask questions with citations
+haiku-rag ask "Who is the author of haiku.rag?" --cite
 # Rebuild database (re-chunk and re-embed all documents)
 haiku-rag rebuild
@@ -100,6 +104,10 @@ async with HaikuRAG("database.db") as client:
     # Ask questions
     answer = await client.ask("Who is the author of haiku.rag?")
     print(answer)
+    # Ask questions with citations
+    answer = await client.ask("Who is the author of haiku.rag?", cite=True)
+    print(answer)
 ```
 ## MCP Server

{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
 haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-haiku/rag/app.py,sha256=FpLVyP1-zAq_XPmU8CPVLkuIAeuhBOGvMqhYS8RbN40,7649
+haiku/rag/app.py,sha256=k45EOz-rbYg_8RSII3btqsZo2TpGqj3ysamFehhaCGo,7673
 haiku/rag/chunker.py,sha256=PVe6ysv8UlacUd4Zb3_8RFWIaWDXnzBAy2VDJ4TaUsE,1555
-haiku/rag/cli.py,sha256=rk4uUwN_FdMC-rai9_R2sgXXMI3TIWKRtdWWHg_WoWM,5865
-haiku/rag/client.py,sha256=pFcrPkQo1h1zJ76jts-72goP_kGVtnJNfLuoT8qpsb8,15795
-haiku/rag/config.py,sha256=8mlQ8gYFxxq1q9gi9tjY9StjqhfhiHkO1FvS4b0et0E,1633
+haiku/rag/cli.py,sha256=mGpdnEH8rS-rZLGmE4MbcDci8uexci7UkGTdCxrz1Lg,5987
+haiku/rag/client.py,sha256=CTc4OJ-rnAI3pcjQgazK7B06wkNLP6wYXD1spQtXXzg,20961
+haiku/rag/config.py,sha256=oLrmwGp1OjcKPpJFnf9GgTpoBSOXalFWO6PCKFwQe0w,1615
 haiku/rag/logging.py,sha256=zTTGpGq5tPdcd7RpCbd9EGw1IZlQDbYkrCg9t9pqRc4,580
 haiku/rag/mcp.py,sha256=tMN6fNX7ZtAER1R6DL1GkC9HZozTC4HzuQs199p7icI,4551
 haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
@@ -14,16 +14,17 @@ haiku/rag/embeddings/base.py,sha256=NTQvuzbZPu0LBo5wAu3qGyJ4xXUaRAt1fjBO0ygWn_Y,
 haiku/rag/embeddings/ollama.py,sha256=y6-lp0XpbnyIjoOEdtSzMdEVkU5glOwnWQ1FkpUZnpI,370
 haiku/rag/embeddings/openai.py,sha256=i4Ui5hAJkcKqJkH9L3jJo7fuGYHn07td532w-ksg_T8,431
 haiku/rag/embeddings/voyageai.py,sha256=0hiRTIqu-bpl-4OaCtMHvWfPdgbrzhnfZJowSV8pLRA,415
-haiku/rag/qa/__init__.py,sha256=f9ZU7YDzJJoyglV1hGja1j9B6NcWerAImuKO1gFP-qs,1487
-haiku/rag/qa/anthropic.py,sha256=6I6cf6ySNkYbmDFdy22sA8r3GO5moiiH75tJnHcgJQA,4448
-haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
-haiku/rag/qa/ollama.py,sha256=EGUi4urSx9nrnsr5j-qHVDVOnvRTbSMKUbMvXEMIcxM,2381
-haiku/rag/qa/openai.py,sha256=dF32sGgVt8mZi5oVxByaeECs9NqLjvDiZnnpJBsrHm8,3968
-haiku/rag/qa/prompts.py,sha256=8uYMxHzbzI9vo2FPkCSSNTh_RNL96WkBbUWPCMBlLpo,1315
-haiku/rag/reranking/__init__.py,sha256=DsPCdU94wRzDCYl6hz2DySOMWwOvNxKviqKAUfyykK8,1118
+haiku/rag/qa/__init__.py,sha256=vC9S6cvZtPz-UfA_v4DMwI7eam6567BXNrUwHsMo_i8,1633
+haiku/rag/qa/anthropic.py,sha256=o0RVn7lcdYvoCUGXh551jeuoB3ANJSZ7uz2R_h_pZ2w,4321
+haiku/rag/qa/base.py,sha256=dCX14ifJW4QMCNFP_pmss9SYWM9Qm1cSWZrMl6A-2C8,3541
+haiku/rag/qa/ollama.py,sha256=3T9ciKWpCIY7jejvdrsMC_wIvGRWQEWA0AwKjOlX35M,2131
+haiku/rag/qa/openai.py,sha256=4BFc8pzFI-CTDxxKMskMxMKkacvUoRTVWI8kKntl3Jw,3718
+haiku/rag/qa/prompts.py,sha256=WTA66brySfzIkuDZ_hRQQKGx12ngIu9nUDKMNGg2-Bg,3321
+haiku/rag/reranking/__init__.py,sha256=fwC3pauteJwh9Ulm2270QvwAdwr4NMr4RUEuolC-wKU,1063
 haiku/rag/reranking/base.py,sha256=LM9yUSSJ414UgBZhFTgxGprlRqzfTe4I1vgjricz2JY,405
 haiku/rag/reranking/cohere.py,sha256=1iTdiaa8vvb6oHVB2qpWzUOVkyfUcimVSZp6Qr4aq4c,1049
 haiku/rag/reranking/mxbai.py,sha256=46sVTsTIkzIX9THgM3u8HaEmgY7evvEyB-N54JTHvK8,867
+haiku/rag/reranking/ollama.py,sha256=tCrLlNNDBCZu7J3to1gvBq-sOvN1flYEA7E3H3Jq0mU,2790
 haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
 haiku/rag/store/engine.py,sha256=cOMBToLilI1Di1qQrFzGLqtRMsuvtiX0Q5RNIEzQy9w,6232
 haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
@@ -31,13 +32,13 @@ haiku/rag/store/models/chunk.py,sha256=9-vIxW75-kMTelIhgVIMd_WhP-Drc1q65vjaWMP8w
 haiku/rag/store/models/document.py,sha256=TVXVY-nQs-1vCORQEs9rA7zOtndeGC4dgCoujLAS054,396
 haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPvrDIHVhY5T1A,263
 haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
-haiku/rag/store/repositories/chunk.py,sha256=DIIdpHVemvxZOPHOLBL7pJGWY4VyNrUiQSWPWt24BYo,16974
+haiku/rag/store/repositories/chunk.py,sha256=R8dvNy3po2FspZvkWKZTGlqccbekLjY39GroXRfAU18,18808
 haiku/rag/store/repositories/document.py,sha256=ki8LiDukwU1469Yw51i0rQFvBzUQeYkFYWs3Ly83akc,8815
 haiku/rag/store/repositories/settings.py,sha256=qZLXvLsErnCWL0nBQQNfRnatHzCKhtUDLvUK9k-W_fU,2463
 haiku/rag/store/upgrades/__init__.py,sha256=kKS1YWT_P-CYKhKtokOLTIFNKf9jlfjFFr8lyIMeogM,100
 haiku/rag/store/upgrades/v0_3_4.py,sha256=GLogKZdZ40NX1vBHKdOJju7fFzNUCHoEnjSZg17Hm2U,663
-haiku_rag-0.5.1.dist-info/METADATA,sha256=X4r-1CBCTef3_T9HWPgCHi5XumqOSF4tlHfUpxO533E,4198
-haiku_rag-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-haiku_rag-0.5.1.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
-haiku_rag-0.5.1.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
-haiku_rag-0.5.1.dist-info/RECORD,,
+haiku_rag-0.5.4.dist-info/METADATA,sha256=hUovrigbcJX6I3vewMVXut3QaI-PXe5BiDzs84noBts,4455
+haiku_rag-0.5.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+haiku_rag-0.5.4.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
+haiku_rag-0.5.4.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
+haiku_rag-0.5.4.dist-info/RECORD,,

{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{haiku_rag-0.5.1.dist-info → haiku_rag-0.5.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

haiku.rag 0.5.1__py3-none-any.whl → 0.5.4__py3-none-any.whl

Potentially problematic release.

haiku.rag 0.5.1py3-none-any.whl → 0.5.4py3-none-any.whl