haiku.rag 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/qa/anthropic.py CHANGED
@@ -37,75 +37,69 @@ try:
37
37
 
38
38
  messages: list[MessageParam] = [{"role": "user", "content": question}]
39
39
 
40
- response = await anthropic_client.messages.create(
41
- model=self._model,
42
- max_tokens=4096,
43
- system=self._system_prompt,
44
- messages=messages,
45
- tools=self.tools,
46
- temperature=0.0,
47
- )
48
-
49
- if response.stop_reason == "tool_use":
50
- messages.append({"role": "assistant", "content": response.content})
51
-
52
- # Process tool calls
53
- tool_results = []
54
- for content_block in response.content:
55
- if isinstance(content_block, ToolUseBlock):
56
- if content_block.name == "search_documents":
57
- args = content_block.input
58
- query = (
59
- args.get("query", question)
60
- if isinstance(args, dict)
61
- else question
62
- )
63
- limit = (
64
- int(args.get("limit", 3))
65
- if isinstance(args, dict)
66
- else 3
67
- )
68
-
69
- search_results = await self._client.search(
70
- query, limit=limit
71
- )
72
-
73
- context_chunks = []
74
- for chunk, score in search_results:
75
- context_chunks.append(
76
- f"Content: {chunk.content}\nScore: {score:.4f}"
40
+ max_rounds = 5 # Prevent infinite loops
41
+
42
+ for _ in range(max_rounds):
43
+ response = await anthropic_client.messages.create(
44
+ model=self._model,
45
+ max_tokens=4096,
46
+ system=self._system_prompt,
47
+ messages=messages,
48
+ tools=self.tools,
49
+ temperature=0.0,
50
+ )
51
+
52
+ if response.stop_reason == "tool_use":
53
+ messages.append({"role": "assistant", "content": response.content})
54
+
55
+ # Process tool calls
56
+ tool_results = []
57
+ for content_block in response.content:
58
+ if isinstance(content_block, ToolUseBlock):
59
+ if content_block.name == "search_documents":
60
+ args = content_block.input
61
+ query = (
62
+ args.get("query", question)
63
+ if isinstance(args, dict)
64
+ else question
65
+ )
66
+ limit = (
67
+ int(args.get("limit", 3))
68
+ if isinstance(args, dict)
69
+ else 3
70
+ )
71
+
72
+ search_results = await self._client.search(
73
+ query, limit=limit
74
+ )
75
+
76
+ context_chunks = []
77
+ for chunk, score in search_results:
78
+ context_chunks.append(
79
+ f"Content: {chunk.content}\nScore: {score:.4f}"
80
+ )
81
+
82
+ context = "\n\n".join(context_chunks)
83
+
84
+ tool_results.append(
85
+ {
86
+ "type": "tool_result",
87
+ "tool_use_id": content_block.id,
88
+ "content": context,
89
+ }
77
90
  )
78
91
 
79
- context = "\n\n".join(context_chunks)
80
-
81
- tool_results.append(
82
- {
83
- "type": "tool_result",
84
- "tool_use_id": content_block.id,
85
- "content": context,
86
- }
87
- )
88
-
89
- if tool_results:
90
- messages.append({"role": "user", "content": tool_results})
91
-
92
- final_response = await anthropic_client.messages.create(
93
- model=self._model,
94
- max_tokens=4096,
95
- system=self._system_prompt,
96
- messages=messages,
97
- temperature=0.0,
98
- )
99
- if final_response.content:
100
- first_content = final_response.content[0]
92
+ if tool_results:
93
+ messages.append({"role": "user", "content": tool_results})
94
+ else:
95
+ # No tool use, return the response
96
+ if response.content:
97
+ first_content = response.content[0]
101
98
  if isinstance(first_content, TextBlock):
102
99
  return first_content.text
103
100
  return ""
104
101
 
105
- if response.content:
106
- first_content = response.content[0]
107
- if isinstance(first_content, TextBlock):
108
- return first_content.text
102
+ # If we've exhausted max rounds, return empty string
109
103
  return ""
110
104
 
111
105
  except ImportError:
haiku/rag/qa/ollama.py CHANGED
@@ -14,54 +14,51 @@ class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
14
14
  async def answer(self, question: str) -> str:
15
15
  ollama_client = AsyncClient(host=Config.OLLAMA_BASE_URL)
16
16
 
17
- # Define the search tool
18
-
19
17
  messages = [
20
18
  {"role": "system", "content": self._system_prompt},
21
19
  {"role": "user", "content": question},
22
20
  ]
23
21
 
24
- # Initial response with tool calling
25
- response = await ollama_client.chat(
26
- model=self._model,
27
- messages=messages,
28
- tools=self.tools,
29
- options=OLLAMA_OPTIONS,
30
- think=False,
31
- )
22
+ max_rounds = 5 # Prevent infinite loops
32
23
 
33
- if response.get("message", {}).get("tool_calls"):
34
- for tool_call in response["message"]["tool_calls"]:
35
- if tool_call["function"]["name"] == "search_documents":
36
- args = tool_call["function"]["arguments"]
37
- query = args.get("query", question)
38
- limit = int(args.get("limit", 3))
24
+ for _ in range(max_rounds):
25
+ response = await ollama_client.chat(
26
+ model=self._model,
27
+ messages=messages,
28
+ tools=self.tools,
29
+ options=OLLAMA_OPTIONS,
30
+ think=False,
31
+ )
39
32
 
40
- search_results = await self._client.search(query, limit=limit)
33
+ if response.get("message", {}).get("tool_calls"):
34
+ messages.append(response["message"])
41
35
 
42
- context_chunks = []
43
- for chunk, score in search_results:
44
- context_chunks.append(
45
- f"Content: {chunk.content}\nScore: {score:.4f}"
46
- )
36
+ for tool_call in response["message"]["tool_calls"]:
37
+ if tool_call["function"]["name"] == "search_documents":
38
+ args = tool_call["function"]["arguments"]
39
+ query = args.get("query", question)
40
+ limit = int(args.get("limit", 3))
47
41
 
48
- context = "\n\n".join(context_chunks)
42
+ search_results = await self._client.search(query, limit=limit)
49
43
 
50
- messages.append(response["message"])
51
- messages.append(
52
- {
53
- "role": "tool",
54
- "content": context,
55
- "tool_call_id": tool_call.get("id", "search_tool"),
56
- }
57
- )
44
+ context_chunks = []
45
+ for chunk, score in search_results:
46
+ context_chunks.append(
47
+ f"Content: {chunk.content}\nScore: {score:.4f}"
48
+ )
58
49
 
59
- final_response = await ollama_client.chat(
60
- model=self._model,
61
- messages=messages,
62
- think=False,
63
- options=OLLAMA_OPTIONS,
64
- )
65
- return final_response["message"]["content"]
66
- else:
67
- return response["message"]["content"]
50
+ context = "\n\n".join(context_chunks)
51
+
52
+ messages.append(
53
+ {
54
+ "role": "tool",
55
+ "content": context,
56
+ "tool_call_id": tool_call.get("id", "search_tool"),
57
+ }
58
+ )
59
+ else:
60
+ # No tool calls, return the response
61
+ return response["message"]["content"]
62
+
63
+ # If we've exhausted max rounds, return empty string
64
+ return ""
haiku/rag/qa/openai.py CHANGED
@@ -24,8 +24,6 @@ try:
24
24
  async def answer(self, question: str) -> str:
25
25
  openai_client = AsyncOpenAI()
26
26
 
27
- # Define the search tool
28
-
29
27
  messages: list[ChatCompletionMessageParam] = [
30
28
  ChatCompletionSystemMessageParam(
31
29
  role="system", content=self._system_prompt
@@ -33,69 +31,70 @@ try:
33
31
  ChatCompletionUserMessageParam(role="user", content=question),
34
32
  ]
35
33
 
36
- # Initial response with tool calling
37
- response = await openai_client.chat.completions.create(
38
- model=self._model,
39
- messages=messages,
40
- tools=self.tools,
41
- temperature=0.0,
42
- )
43
-
44
- response_message = response.choices[0].message
45
-
46
- if response_message.tool_calls:
47
- messages.append(
48
- ChatCompletionAssistantMessageParam(
49
- role="assistant",
50
- content=response_message.content,
51
- tool_calls=[
52
- {
53
- "id": tc.id,
54
- "type": "function",
55
- "function": {
56
- "name": tc.function.name,
57
- "arguments": tc.function.arguments,
58
- },
59
- }
60
- for tc in response_message.tool_calls
61
- ],
62
- )
34
+ max_rounds = 5 # Prevent infinite loops
35
+
36
+ for _ in range(max_rounds):
37
+ response = await openai_client.chat.completions.create(
38
+ model=self._model,
39
+ messages=messages,
40
+ tools=self.tools,
41
+ temperature=0.0,
63
42
  )
64
43
 
65
- for tool_call in response_message.tool_calls:
66
- if tool_call.function.name == "search_documents":
67
- import json
44
+ response_message = response.choices[0].message
45
+
46
+ if response_message.tool_calls:
47
+ messages.append(
48
+ ChatCompletionAssistantMessageParam(
49
+ role="assistant",
50
+ content=response_message.content,
51
+ tool_calls=[
52
+ {
53
+ "id": tc.id,
54
+ "type": "function",
55
+ "function": {
56
+ "name": tc.function.name,
57
+ "arguments": tc.function.arguments,
58
+ },
59
+ }
60
+ for tc in response_message.tool_calls
61
+ ],
62
+ )
63
+ )
68
64
 
69
- args = json.loads(tool_call.function.arguments)
70
- query = args.get("query", question)
71
- limit = int(args.get("limit", 3))
65
+ for tool_call in response_message.tool_calls:
66
+ if tool_call.function.name == "search_documents":
67
+ import json
72
68
 
73
- search_results = await self._client.search(query, limit=limit)
69
+ args = json.loads(tool_call.function.arguments)
70
+ query = args.get("query", question)
71
+ limit = int(args.get("limit", 3))
74
72
 
75
- context_chunks = []
76
- for chunk, score in search_results:
77
- context_chunks.append(
78
- f"Content: {chunk.content}\nScore: {score:.4f}"
73
+ search_results = await self._client.search(
74
+ query, limit=limit
79
75
  )
80
76
 
81
- context = "\n\n".join(context_chunks)
77
+ context_chunks = []
78
+ for chunk, score in search_results:
79
+ context_chunks.append(
80
+ f"Content: {chunk.content}\nScore: {score:.4f}"
81
+ )
82
+
83
+ context = "\n\n".join(context_chunks)
82
84
 
83
- messages.append(
84
- ChatCompletionToolMessageParam(
85
- role="tool",
86
- content=context,
87
- tool_call_id=tool_call.id,
85
+ messages.append(
86
+ ChatCompletionToolMessageParam(
87
+ role="tool",
88
+ content=context,
89
+ tool_call_id=tool_call.id,
90
+ )
88
91
  )
89
- )
92
+ else:
93
+ # No tool calls, return the response
94
+ return response_message.content or ""
90
95
 
91
- final_response = await openai_client.chat.completions.create(
92
- model=self._model,
93
- messages=messages,
94
- temperature=0.0,
95
- )
96
- return final_response.choices[0].message.content or ""
97
- else:
98
- return response_message.content or ""
96
+ # If we've exhausted max rounds, return empty string
97
+ return ""
99
98
 
100
99
  except ImportError:
101
100
  pass
haiku/rag/qa/prompts.py CHANGED
@@ -1,7 +1,20 @@
1
1
  SYSTEM_PROMPT = """
2
- You are a helpful assistant that uses a RAG library to answer the user's prompt.
3
- Your task is to provide a concise and accurate answer based on the provided context.
4
- You should ask the provided tools to find relevant documents and then use the content of those documents to answer the question.
5
- Never make up information, always use the context to answer the question.
6
- If the context does not contain enough information to answer the question, respond with "I cannot answer that based on the provided context."
2
+ You are a knowledgeable assistant that helps users find information from a document knowledge base.
3
+
4
+ Your process:
5
+ 1. When a user asks a question, use the search_documents tool to find relevant information
6
+ 2. Search with specific keywords and phrases from the user's question
7
+ 3. Review the search results and their relevance scores
8
+ 4. If you need additional context, perform follow-up searches with different keywords
9
+ 5. Provide a comprehensive answer based only on the retrieved documents
10
+
11
+ Guidelines:
12
+ - Base your answers strictly on the provided document content
13
+ - Quote or reference specific information when possible
14
+ - If multiple documents contain relevant information, synthesize them coherently
15
+ - Indicate when information is incomplete or when you need to search for additional context
16
+ - If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
17
+ - For complex questions, consider breaking them down and performing multiple searches
18
+
19
+ Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
7
20
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -116,3 +116,4 @@ Full documentation at: https://ggozad.github.io/haiku.rag/
116
116
  - [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
117
117
  - [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
118
118
  - [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
119
+ - [Benchmarks](https://ggozad.github.io/haiku.rag/benchmarks/) - Performance Benchmarks
@@ -15,11 +15,11 @@ haiku/rag/embeddings/ollama.py,sha256=hWdrTiuJwNSRYCqP0WP-z6XXA3RBGkAiknZMsPLH0q
15
15
  haiku/rag/embeddings/openai.py,sha256=reh8AykG2f9f5hhRDmqSsjiuCPi9SsXfe2YEZFlxXk8,550
16
16
  haiku/rag/embeddings/voyageai.py,sha256=jc0JywdLJD3Ee1MUv1m8MhWCEo0enNnVcrIBtUvD-Ss,534
17
17
  haiku/rag/qa/__init__.py,sha256=oso98Ypti7mBLTJ6Zk71YaSJ9Rgc89QXp9RSB6zSpYs,1501
18
- haiku/rag/qa/anthropic.py,sha256=lzHRQxpEv6Qd6iBIqexUgWnq-ITqytppwkfOuRGWdDs,4556
18
+ haiku/rag/qa/anthropic.py,sha256=6I6cf6ySNkYbmDFdy22sA8r3GO5moiiH75tJnHcgJQA,4448
19
19
  haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
20
- haiku/rag/qa/ollama.py,sha256=poShrse-RgLTwa5gbVzoERNTrn5QRpovJCZKYkIpOZI,2393
21
- haiku/rag/qa/openai.py,sha256=yBbSjGlG4Lo5p2B2NOTa5C6JceX0OJ1jXar_ABFZYYI,3849
22
- haiku/rag/qa/prompts.py,sha256=dAz2HjD4eJ8tcW534Tx7EuFOs6pSv2kPr7yrHnHtS0E,535
20
+ haiku/rag/qa/ollama.py,sha256=-UtNFErYlA_66g3WLU6lK38a1Y5zhAL6s_uZ5AP0TFs,2381
21
+ haiku/rag/qa/openai.py,sha256=dF32sGgVt8mZi5oVxByaeECs9NqLjvDiZnnpJBsrHm8,3968
22
+ haiku/rag/qa/prompts.py,sha256=578LJGZJ0LQ_q7ccyj5hLabtHo8Zcfw5-DiLGN9lC-w,1200
23
23
  haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
24
24
  haiku/rag/store/engine.py,sha256=BeYZRZ08zaYeeu375ysnAL3tGz4roA3GzP7WRNwznCo,2603
25
25
  haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
@@ -29,8 +29,8 @@ haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPv
29
29
  haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
30
30
  haiku/rag/store/repositories/chunk.py,sha256=gik7ZPOK3gCoG6tU1pGueAZBPmJxIb7obYFUhwINrYg,16497
31
31
  haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
32
- haiku_rag-0.3.2.dist-info/METADATA,sha256=0A8BVZDp38i_xLznvkrZBq3f3OYtWPtqBx_U2eHRIow,3931
33
- haiku_rag-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
- haiku_rag-0.3.2.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
35
- haiku_rag-0.3.2.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
36
- haiku_rag-0.3.2.dist-info/RECORD,,
32
+ haiku_rag-0.3.3.dist-info/METADATA,sha256=nDI-sy2F8h7qr9hK1S7VQLOMRcWYP1clxJYxNVB1AaA,4019
33
+ haiku_rag-0.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
+ haiku_rag-0.3.3.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
35
+ haiku_rag-0.3.3.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
36
+ haiku_rag-0.3.3.dist-info/RECORD,,