PyPI - haiku.rag - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

haiku.rag 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of haiku.rag might be problematic. Click here for more details.

Files changed (9) hide show

haiku/rag/qa/anthropic.py +57 -63
haiku/rag/qa/ollama.py +37 -40
haiku/rag/qa/openai.py +54 -55
haiku/rag/qa/prompts.py +18 -5
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/METADATA +2 -1
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/RECORD +9 -9
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/WHEEL +0 -0
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/entry_points.txt +0 -0
{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/licenses/LICENSE +0 -0

haiku/rag/qa/anthropic.py CHANGED Viewed

@@ -37,75 +37,69 @@ try:
             messages: list[MessageParam] = [{"role": "user", "content": question}]
-            response = await anthropic_client.messages.create(
-                model=self._model,
-                max_tokens=4096,
-                system=self._system_prompt,
-                messages=messages,
-                tools=self.tools,
-                temperature=0.0,
-            )
-            if response.stop_reason == "tool_use":
-                messages.append({"role": "assistant", "content": response.content})
-                # Process tool calls
-                tool_results = []
-                for content_block in response.content:
-                    if isinstance(content_block, ToolUseBlock):
-                        if content_block.name == "search_documents":
-                            args = content_block.input
-                            query = (
-                                args.get("query", question)
-                                if isinstance(args, dict)
-                                else question
-                            )
-                            limit = (
-                                int(args.get("limit", 3))
-                                if isinstance(args, dict)
-                                else 3
-                            )
-                            search_results = await self._client.search(
-                                query, limit=limit
-                            )
-                            context_chunks = []
-                            for chunk, score in search_results:
-                                context_chunks.append(
-                                    f"Content: {chunk.content}\nScore: {score:.4f}"
+            max_rounds = 5  # Prevent infinite loops
+            for _ in range(max_rounds):
+                response = await anthropic_client.messages.create(
+                    model=self._model,
+                    max_tokens=4096,
+                    system=self._system_prompt,
+                    messages=messages,
+                    tools=self.tools,
+                    temperature=0.0,
+                )
+                if response.stop_reason == "tool_use":
+                    messages.append({"role": "assistant", "content": response.content})
+                    # Process tool calls
+                    tool_results = []
+                    for content_block in response.content:
+                        if isinstance(content_block, ToolUseBlock):
+                            if content_block.name == "search_documents":
+                                args = content_block.input
+                                query = (
+                                    args.get("query", question)
+                                    if isinstance(args, dict)
+                                    else question
+                                )
+                                limit = (
+                                    int(args.get("limit", 3))
+                                    if isinstance(args, dict)
+                                    else 3
+                                )
+                                search_results = await self._client.search(
+                                    query, limit=limit
+                                )
+                                context_chunks = []
+                                for chunk, score in search_results:
+                                    context_chunks.append(
+                                        f"Content: {chunk.content}\nScore: {score:.4f}"
+                                    )
+                                context = "\n\n".join(context_chunks)
+                                tool_results.append(
+                                    {
+                                        "type": "tool_result",
+                                        "tool_use_id": content_block.id,
+                                        "content": context,
+                                    }
                                 )
-                            context = "\n\n".join(context_chunks)
-                            tool_results.append(
-                                {
-                                    "type": "tool_result",
-                                    "tool_use_id": content_block.id,
-                                    "content": context,
-                                }
-                            )
-                if tool_results:
-                    messages.append({"role": "user", "content": tool_results})
-                    final_response = await anthropic_client.messages.create(
-                        model=self._model,
-                        max_tokens=4096,
-                        system=self._system_prompt,
-                        messages=messages,
-                        temperature=0.0,
-                    )
-                    if final_response.content:
-                        first_content = final_response.content[0]
+                    if tool_results:
+                        messages.append({"role": "user", "content": tool_results})
+                else:
+                    # No tool use, return the response
+                    if response.content:
+                        first_content = response.content[0]
                         if isinstance(first_content, TextBlock):
                             return first_content.text
                     return ""
-            if response.content:
-                first_content = response.content[0]
-                if isinstance(first_content, TextBlock):
-                    return first_content.text
+            # If we've exhausted max rounds, return empty string
             return ""
 except ImportError:

haiku/rag/qa/ollama.py CHANGED Viewed

@@ -14,54 +14,51 @@ class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
     async def answer(self, question: str) -> str:
         ollama_client = AsyncClient(host=Config.OLLAMA_BASE_URL)
-        # Define the search tool
         messages = [
             {"role": "system", "content": self._system_prompt},
             {"role": "user", "content": question},
         ]
-        # Initial response with tool calling
-        response = await ollama_client.chat(
-            model=self._model,
-            messages=messages,
-            tools=self.tools,
-            options=OLLAMA_OPTIONS,
-            think=False,
-        )
+        max_rounds = 5  # Prevent infinite loops
-        if response.get("message", {}).get("tool_calls"):
-            for tool_call in response["message"]["tool_calls"]:
-                if tool_call["function"]["name"] == "search_documents":
-                    args = tool_call["function"]["arguments"]
-                    query = args.get("query", question)
-                    limit = int(args.get("limit", 3))
+        for _ in range(max_rounds):
+            response = await ollama_client.chat(
+                model=self._model,
+                messages=messages,
+                tools=self.tools,
+                options=OLLAMA_OPTIONS,
+                think=False,
+            )
-                    search_results = await self._client.search(query, limit=limit)
+            if response.get("message", {}).get("tool_calls"):
+                messages.append(response["message"])
-                    context_chunks = []
-                    for chunk, score in search_results:
-                        context_chunks.append(
-                            f"Content: {chunk.content}\nScore: {score:.4f}"
-                        )
+                for tool_call in response["message"]["tool_calls"]:
+                    if tool_call["function"]["name"] == "search_documents":
+                        args = tool_call["function"]["arguments"]
+                        query = args.get("query", question)
+                        limit = int(args.get("limit", 3))
-                    context = "\n\n".join(context_chunks)
+                        search_results = await self._client.search(query, limit=limit)
-                    messages.append(response["message"])
-                    messages.append(
-                        {
-                            "role": "tool",
-                            "content": context,
-                            "tool_call_id": tool_call.get("id", "search_tool"),
-                        }
-                    )
+                        context_chunks = []
+                        for chunk, score in search_results:
+                            context_chunks.append(
+                                f"Content: {chunk.content}\nScore: {score:.4f}"
+                            )
-            final_response = await ollama_client.chat(
-                model=self._model,
-                messages=messages,
-                think=False,
-                options=OLLAMA_OPTIONS,
-            )
-            return final_response["message"]["content"]
-        else:
-            return response["message"]["content"]
+                        context = "\n\n".join(context_chunks)
+                        messages.append(
+                            {
+                                "role": "tool",
+                                "content": context,
+                                "tool_call_id": tool_call.get("id", "search_tool"),
+                            }
+                        )
+            else:
+                # No tool calls, return the response
+                return response["message"]["content"]
+        # If we've exhausted max rounds, return empty string
+        return ""

haiku/rag/qa/openai.py CHANGED Viewed

@@ -24,8 +24,6 @@ try:
         async def answer(self, question: str) -> str:
             openai_client = AsyncOpenAI()
-            # Define the search tool
             messages: list[ChatCompletionMessageParam] = [
                 ChatCompletionSystemMessageParam(
                     role="system", content=self._system_prompt
@@ -33,69 +31,70 @@ try:
                 ChatCompletionUserMessageParam(role="user", content=question),
             ]
-            # Initial response with tool calling
-            response = await openai_client.chat.completions.create(
-                model=self._model,
-                messages=messages,
-                tools=self.tools,
-                temperature=0.0,
-            )
-            response_message = response.choices[0].message
-            if response_message.tool_calls:
-                messages.append(
-                    ChatCompletionAssistantMessageParam(
-                        role="assistant",
-                        content=response_message.content,
-                        tool_calls=[
-                            {
-                                "id": tc.id,
-                                "type": "function",
-                                "function": {
-                                    "name": tc.function.name,
-                                    "arguments": tc.function.arguments,
-                                },
-                            }
-                            for tc in response_message.tool_calls
-                        ],
-                    )
+            max_rounds = 5  # Prevent infinite loops
+            for _ in range(max_rounds):
+                response = await openai_client.chat.completions.create(
+                    model=self._model,
+                    messages=messages,
+                    tools=self.tools,
+                    temperature=0.0,
                 )
-                for tool_call in response_message.tool_calls:
-                    if tool_call.function.name == "search_documents":
-                        import json
+                response_message = response.choices[0].message
+                if response_message.tool_calls:
+                    messages.append(
+                        ChatCompletionAssistantMessageParam(
+                            role="assistant",
+                            content=response_message.content,
+                            tool_calls=[
+                                {
+                                    "id": tc.id,
+                                    "type": "function",
+                                    "function": {
+                                        "name": tc.function.name,
+                                        "arguments": tc.function.arguments,
+                                    },
+                                }
+                                for tc in response_message.tool_calls
+                            ],
+                        )
+                    )
-                        args = json.loads(tool_call.function.arguments)
-                        query = args.get("query", question)
-                        limit = int(args.get("limit", 3))
+                    for tool_call in response_message.tool_calls:
+                        if tool_call.function.name == "search_documents":
+                            import json
-                        search_results = await self._client.search(query, limit=limit)
+                            args = json.loads(tool_call.function.arguments)
+                            query = args.get("query", question)
+                            limit = int(args.get("limit", 3))
-                        context_chunks = []
-                        for chunk, score in search_results:
-                            context_chunks.append(
-                                f"Content: {chunk.content}\nScore: {score:.4f}"
+                            search_results = await self._client.search(
+                                query, limit=limit
                             )
-                        context = "\n\n".join(context_chunks)
+                            context_chunks = []
+                            for chunk, score in search_results:
+                                context_chunks.append(
+                                    f"Content: {chunk.content}\nScore: {score:.4f}"
+                                )
+                            context = "\n\n".join(context_chunks)
-                        messages.append(
-                            ChatCompletionToolMessageParam(
-                                role="tool",
-                                content=context,
-                                tool_call_id=tool_call.id,
+                            messages.append(
+                                ChatCompletionToolMessageParam(
+                                    role="tool",
+                                    content=context,
+                                    tool_call_id=tool_call.id,
+                                )
                             )
-                        )
+                else:
+                    # No tool calls, return the response
+                    return response_message.content or ""
-                final_response = await openai_client.chat.completions.create(
-                    model=self._model,
-                    messages=messages,
-                    temperature=0.0,
-                )
-                return final_response.choices[0].message.content or ""
-            else:
-                return response_message.content or ""
+            # If we've exhausted max rounds, return empty string
+            return ""
 except ImportError:
     pass

haiku/rag/qa/prompts.py CHANGED Viewed

@@ -1,7 +1,20 @@
 SYSTEM_PROMPT = """
-You are a helpful assistant that uses a RAG library to answer the user's prompt.
-Your task is to provide a concise and accurate answer based on the provided context.
-You should ask the provided tools to find relevant documents and then use the content of those documents to answer the question.
-Never make up information, always use the context to answer the question.
-If the context does not contain enough information to answer the question, respond with "I cannot answer that based on the provided context."
+You are a knowledgeable assistant that helps users find information from a document knowledge base.
+Your process:
+1. When a user asks a question, use the search_documents tool to find relevant information
+2. Search with specific keywords and phrases from the user's question
+3. Review the search results and their relevance scores
+4. If you need additional context, perform follow-up searches with different keywords
+5. Provide a comprehensive answer based only on the retrieved documents
+Guidelines:
+- Base your answers strictly on the provided document content
+- Quote or reference specific information when possible
+- If multiple documents contain relevant information, synthesize them coherently
+- Indicate when information is incomplete or when you need to search for additional context
+- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
+- For complex questions, consider breaking them down and performing multiple searches
+Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
 """

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haiku.rag
-Version: 0.3.2
+Version: 0.3.3
 Summary: Retrieval Augmented Generation (RAG) with SQLite
 Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
 License: MIT
@@ -116,3 +116,4 @@ Full documentation at: https://ggozad.github.io/haiku.rag/
 - [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
 - [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
 - [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
+- [Benchmarks](https://ggozad.github.io/haiku.rag/benchmarks/) - Performance Benchmarks

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/RECORD RENAMED Viewed

@@ -15,11 +15,11 @@ haiku/rag/embeddings/ollama.py,sha256=hWdrTiuJwNSRYCqP0WP-z6XXA3RBGkAiknZMsPLH0q
 haiku/rag/embeddings/openai.py,sha256=reh8AykG2f9f5hhRDmqSsjiuCPi9SsXfe2YEZFlxXk8,550
 haiku/rag/embeddings/voyageai.py,sha256=jc0JywdLJD3Ee1MUv1m8MhWCEo0enNnVcrIBtUvD-Ss,534
 haiku/rag/qa/__init__.py,sha256=oso98Ypti7mBLTJ6Zk71YaSJ9Rgc89QXp9RSB6zSpYs,1501
-haiku/rag/qa/anthropic.py,sha256=lzHRQxpEv6Qd6iBIqexUgWnq-ITqytppwkfOuRGWdDs,4556
+haiku/rag/qa/anthropic.py,sha256=6I6cf6ySNkYbmDFdy22sA8r3GO5moiiH75tJnHcgJQA,4448
 haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
-haiku/rag/qa/ollama.py,sha256=poShrse-RgLTwa5gbVzoERNTrn5QRpovJCZKYkIpOZI,2393
-haiku/rag/qa/openai.py,sha256=yBbSjGlG4Lo5p2B2NOTa5C6JceX0OJ1jXar_ABFZYYI,3849
-haiku/rag/qa/prompts.py,sha256=dAz2HjD4eJ8tcW534Tx7EuFOs6pSv2kPr7yrHnHtS0E,535
+haiku/rag/qa/ollama.py,sha256=-UtNFErYlA_66g3WLU6lK38a1Y5zhAL6s_uZ5AP0TFs,2381
+haiku/rag/qa/openai.py,sha256=dF32sGgVt8mZi5oVxByaeECs9NqLjvDiZnnpJBsrHm8,3968
+haiku/rag/qa/prompts.py,sha256=578LJGZJ0LQ_q7ccyj5hLabtHo8Zcfw5-DiLGN9lC-w,1200
 haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
 haiku/rag/store/engine.py,sha256=BeYZRZ08zaYeeu375ysnAL3tGz4roA3GzP7WRNwznCo,2603
 haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
@@ -29,8 +29,8 @@ haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPv
 haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
 haiku/rag/store/repositories/chunk.py,sha256=gik7ZPOK3gCoG6tU1pGueAZBPmJxIb7obYFUhwINrYg,16497
 haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
-haiku_rag-0.3.2.dist-info/METADATA,sha256=0A8BVZDp38i_xLznvkrZBq3f3OYtWPtqBx_U2eHRIow,3931
-haiku_rag-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-haiku_rag-0.3.2.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
-haiku_rag-0.3.2.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
-haiku_rag-0.3.2.dist-info/RECORD,,
+haiku_rag-0.3.3.dist-info/METADATA,sha256=nDI-sy2F8h7qr9hK1S7VQLOMRcWYP1clxJYxNVB1AaA,4019
+haiku_rag-0.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+haiku_rag-0.3.3.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
+haiku_rag-0.3.3.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
+haiku_rag-0.3.3.dist-info/RECORD,,

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

haiku.rag 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

Potentially problematic release.

haiku.rag 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl