haiku.rag 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/qa/anthropic.py +57 -63
- haiku/rag/qa/ollama.py +37 -40
- haiku/rag/qa/openai.py +54 -55
- haiku/rag/qa/prompts.py +18 -5
- {haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/METADATA +2 -1
- {haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/RECORD +9 -9
- {haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/WHEEL +0 -0
- {haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.3.2.dist-info → haiku_rag-0.3.3.dist-info}/licenses/LICENSE +0 -0
haiku/rag/qa/anthropic.py
CHANGED
|
@@ -37,75 +37,69 @@ try:
|
|
|
37
37
|
|
|
38
38
|
messages: list[MessageParam] = [{"role": "user", "content": question}]
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
40
|
+
max_rounds = 5 # Prevent infinite loops
|
|
41
|
+
|
|
42
|
+
for _ in range(max_rounds):
|
|
43
|
+
response = await anthropic_client.messages.create(
|
|
44
|
+
model=self._model,
|
|
45
|
+
max_tokens=4096,
|
|
46
|
+
system=self._system_prompt,
|
|
47
|
+
messages=messages,
|
|
48
|
+
tools=self.tools,
|
|
49
|
+
temperature=0.0,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if response.stop_reason == "tool_use":
|
|
53
|
+
messages.append({"role": "assistant", "content": response.content})
|
|
54
|
+
|
|
55
|
+
# Process tool calls
|
|
56
|
+
tool_results = []
|
|
57
|
+
for content_block in response.content:
|
|
58
|
+
if isinstance(content_block, ToolUseBlock):
|
|
59
|
+
if content_block.name == "search_documents":
|
|
60
|
+
args = content_block.input
|
|
61
|
+
query = (
|
|
62
|
+
args.get("query", question)
|
|
63
|
+
if isinstance(args, dict)
|
|
64
|
+
else question
|
|
65
|
+
)
|
|
66
|
+
limit = (
|
|
67
|
+
int(args.get("limit", 3))
|
|
68
|
+
if isinstance(args, dict)
|
|
69
|
+
else 3
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
search_results = await self._client.search(
|
|
73
|
+
query, limit=limit
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
context_chunks = []
|
|
77
|
+
for chunk, score in search_results:
|
|
78
|
+
context_chunks.append(
|
|
79
|
+
f"Content: {chunk.content}\nScore: {score:.4f}"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
context = "\n\n".join(context_chunks)
|
|
83
|
+
|
|
84
|
+
tool_results.append(
|
|
85
|
+
{
|
|
86
|
+
"type": "tool_result",
|
|
87
|
+
"tool_use_id": content_block.id,
|
|
88
|
+
"content": context,
|
|
89
|
+
}
|
|
77
90
|
)
|
|
78
91
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
"content": context,
|
|
86
|
-
}
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
if tool_results:
|
|
90
|
-
messages.append({"role": "user", "content": tool_results})
|
|
91
|
-
|
|
92
|
-
final_response = await anthropic_client.messages.create(
|
|
93
|
-
model=self._model,
|
|
94
|
-
max_tokens=4096,
|
|
95
|
-
system=self._system_prompt,
|
|
96
|
-
messages=messages,
|
|
97
|
-
temperature=0.0,
|
|
98
|
-
)
|
|
99
|
-
if final_response.content:
|
|
100
|
-
first_content = final_response.content[0]
|
|
92
|
+
if tool_results:
|
|
93
|
+
messages.append({"role": "user", "content": tool_results})
|
|
94
|
+
else:
|
|
95
|
+
# No tool use, return the response
|
|
96
|
+
if response.content:
|
|
97
|
+
first_content = response.content[0]
|
|
101
98
|
if isinstance(first_content, TextBlock):
|
|
102
99
|
return first_content.text
|
|
103
100
|
return ""
|
|
104
101
|
|
|
105
|
-
|
|
106
|
-
first_content = response.content[0]
|
|
107
|
-
if isinstance(first_content, TextBlock):
|
|
108
|
-
return first_content.text
|
|
102
|
+
# If we've exhausted max rounds, return empty string
|
|
109
103
|
return ""
|
|
110
104
|
|
|
111
105
|
except ImportError:
|
haiku/rag/qa/ollama.py
CHANGED
|
@@ -14,54 +14,51 @@ class QuestionAnswerOllamaAgent(QuestionAnswerAgentBase):
|
|
|
14
14
|
async def answer(self, question: str) -> str:
|
|
15
15
|
ollama_client = AsyncClient(host=Config.OLLAMA_BASE_URL)
|
|
16
16
|
|
|
17
|
-
# Define the search tool
|
|
18
|
-
|
|
19
17
|
messages = [
|
|
20
18
|
{"role": "system", "content": self._system_prompt},
|
|
21
19
|
{"role": "user", "content": question},
|
|
22
20
|
]
|
|
23
21
|
|
|
24
|
-
|
|
25
|
-
response = await ollama_client.chat(
|
|
26
|
-
model=self._model,
|
|
27
|
-
messages=messages,
|
|
28
|
-
tools=self.tools,
|
|
29
|
-
options=OLLAMA_OPTIONS,
|
|
30
|
-
think=False,
|
|
31
|
-
)
|
|
22
|
+
max_rounds = 5 # Prevent infinite loops
|
|
32
23
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
24
|
+
for _ in range(max_rounds):
|
|
25
|
+
response = await ollama_client.chat(
|
|
26
|
+
model=self._model,
|
|
27
|
+
messages=messages,
|
|
28
|
+
tools=self.tools,
|
|
29
|
+
options=OLLAMA_OPTIONS,
|
|
30
|
+
think=False,
|
|
31
|
+
)
|
|
39
32
|
|
|
40
|
-
|
|
33
|
+
if response.get("message", {}).get("tool_calls"):
|
|
34
|
+
messages.append(response["message"])
|
|
41
35
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
)
|
|
36
|
+
for tool_call in response["message"]["tool_calls"]:
|
|
37
|
+
if tool_call["function"]["name"] == "search_documents":
|
|
38
|
+
args = tool_call["function"]["arguments"]
|
|
39
|
+
query = args.get("query", question)
|
|
40
|
+
limit = int(args.get("limit", 3))
|
|
47
41
|
|
|
48
|
-
|
|
42
|
+
search_results = await self._client.search(query, limit=limit)
|
|
49
43
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
"tool_call_id": tool_call.get("id", "search_tool"),
|
|
56
|
-
}
|
|
57
|
-
)
|
|
44
|
+
context_chunks = []
|
|
45
|
+
for chunk, score in search_results:
|
|
46
|
+
context_chunks.append(
|
|
47
|
+
f"Content: {chunk.content}\nScore: {score:.4f}"
|
|
48
|
+
)
|
|
58
49
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
50
|
+
context = "\n\n".join(context_chunks)
|
|
51
|
+
|
|
52
|
+
messages.append(
|
|
53
|
+
{
|
|
54
|
+
"role": "tool",
|
|
55
|
+
"content": context,
|
|
56
|
+
"tool_call_id": tool_call.get("id", "search_tool"),
|
|
57
|
+
}
|
|
58
|
+
)
|
|
59
|
+
else:
|
|
60
|
+
# No tool calls, return the response
|
|
61
|
+
return response["message"]["content"]
|
|
62
|
+
|
|
63
|
+
# If we've exhausted max rounds, return empty string
|
|
64
|
+
return ""
|
haiku/rag/qa/openai.py
CHANGED
|
@@ -24,8 +24,6 @@ try:
|
|
|
24
24
|
async def answer(self, question: str) -> str:
|
|
25
25
|
openai_client = AsyncOpenAI()
|
|
26
26
|
|
|
27
|
-
# Define the search tool
|
|
28
|
-
|
|
29
27
|
messages: list[ChatCompletionMessageParam] = [
|
|
30
28
|
ChatCompletionSystemMessageParam(
|
|
31
29
|
role="system", content=self._system_prompt
|
|
@@ -33,69 +31,70 @@ try:
|
|
|
33
31
|
ChatCompletionUserMessageParam(role="user", content=question),
|
|
34
32
|
]
|
|
35
33
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
response_message = response.choices[0].message
|
|
45
|
-
|
|
46
|
-
if response_message.tool_calls:
|
|
47
|
-
messages.append(
|
|
48
|
-
ChatCompletionAssistantMessageParam(
|
|
49
|
-
role="assistant",
|
|
50
|
-
content=response_message.content,
|
|
51
|
-
tool_calls=[
|
|
52
|
-
{
|
|
53
|
-
"id": tc.id,
|
|
54
|
-
"type": "function",
|
|
55
|
-
"function": {
|
|
56
|
-
"name": tc.function.name,
|
|
57
|
-
"arguments": tc.function.arguments,
|
|
58
|
-
},
|
|
59
|
-
}
|
|
60
|
-
for tc in response_message.tool_calls
|
|
61
|
-
],
|
|
62
|
-
)
|
|
34
|
+
max_rounds = 5 # Prevent infinite loops
|
|
35
|
+
|
|
36
|
+
for _ in range(max_rounds):
|
|
37
|
+
response = await openai_client.chat.completions.create(
|
|
38
|
+
model=self._model,
|
|
39
|
+
messages=messages,
|
|
40
|
+
tools=self.tools,
|
|
41
|
+
temperature=0.0,
|
|
63
42
|
)
|
|
64
43
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
44
|
+
response_message = response.choices[0].message
|
|
45
|
+
|
|
46
|
+
if response_message.tool_calls:
|
|
47
|
+
messages.append(
|
|
48
|
+
ChatCompletionAssistantMessageParam(
|
|
49
|
+
role="assistant",
|
|
50
|
+
content=response_message.content,
|
|
51
|
+
tool_calls=[
|
|
52
|
+
{
|
|
53
|
+
"id": tc.id,
|
|
54
|
+
"type": "function",
|
|
55
|
+
"function": {
|
|
56
|
+
"name": tc.function.name,
|
|
57
|
+
"arguments": tc.function.arguments,
|
|
58
|
+
},
|
|
59
|
+
}
|
|
60
|
+
for tc in response_message.tool_calls
|
|
61
|
+
],
|
|
62
|
+
)
|
|
63
|
+
)
|
|
68
64
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
65
|
+
for tool_call in response_message.tool_calls:
|
|
66
|
+
if tool_call.function.name == "search_documents":
|
|
67
|
+
import json
|
|
72
68
|
|
|
73
|
-
|
|
69
|
+
args = json.loads(tool_call.function.arguments)
|
|
70
|
+
query = args.get("query", question)
|
|
71
|
+
limit = int(args.get("limit", 3))
|
|
74
72
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
context_chunks.append(
|
|
78
|
-
f"Content: {chunk.content}\nScore: {score:.4f}"
|
|
73
|
+
search_results = await self._client.search(
|
|
74
|
+
query, limit=limit
|
|
79
75
|
)
|
|
80
76
|
|
|
81
|
-
|
|
77
|
+
context_chunks = []
|
|
78
|
+
for chunk, score in search_results:
|
|
79
|
+
context_chunks.append(
|
|
80
|
+
f"Content: {chunk.content}\nScore: {score:.4f}"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
context = "\n\n".join(context_chunks)
|
|
82
84
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
messages.append(
|
|
86
|
+
ChatCompletionToolMessageParam(
|
|
87
|
+
role="tool",
|
|
88
|
+
content=context,
|
|
89
|
+
tool_call_id=tool_call.id,
|
|
90
|
+
)
|
|
88
91
|
)
|
|
89
|
-
|
|
92
|
+
else:
|
|
93
|
+
# No tool calls, return the response
|
|
94
|
+
return response_message.content or ""
|
|
90
95
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
messages=messages,
|
|
94
|
-
temperature=0.0,
|
|
95
|
-
)
|
|
96
|
-
return final_response.choices[0].message.content or ""
|
|
97
|
-
else:
|
|
98
|
-
return response_message.content or ""
|
|
96
|
+
# If we've exhausted max rounds, return empty string
|
|
97
|
+
return ""
|
|
99
98
|
|
|
100
99
|
except ImportError:
|
|
101
100
|
pass
|
haiku/rag/qa/prompts.py
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
1
|
SYSTEM_PROMPT = """
|
|
2
|
-
You are a
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
You are a knowledgeable assistant that helps users find information from a document knowledge base.
|
|
3
|
+
|
|
4
|
+
Your process:
|
|
5
|
+
1. When a user asks a question, use the search_documents tool to find relevant information
|
|
6
|
+
2. Search with specific keywords and phrases from the user's question
|
|
7
|
+
3. Review the search results and their relevance scores
|
|
8
|
+
4. If you need additional context, perform follow-up searches with different keywords
|
|
9
|
+
5. Provide a comprehensive answer based only on the retrieved documents
|
|
10
|
+
|
|
11
|
+
Guidelines:
|
|
12
|
+
- Base your answers strictly on the provided document content
|
|
13
|
+
- Quote or reference specific information when possible
|
|
14
|
+
- If multiple documents contain relevant information, synthesize them coherently
|
|
15
|
+
- Indicate when information is incomplete or when you need to search for additional context
|
|
16
|
+
- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
|
|
17
|
+
- For complex questions, consider breaking them down and performing multiple searches
|
|
18
|
+
|
|
19
|
+
Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
|
|
7
20
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Retrieval Augmented Generation (RAG) with SQLite
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -116,3 +116,4 @@ Full documentation at: https://ggozad.github.io/haiku.rag/
|
|
|
116
116
|
- [Configuration](https://ggozad.github.io/haiku.rag/configuration/) - Environment variables
|
|
117
117
|
- [CLI](https://ggozad.github.io/haiku.rag/cli/) - Command reference
|
|
118
118
|
- [Python API](https://ggozad.github.io/haiku.rag/python/) - Complete API docs
|
|
119
|
+
- [Benchmarks](https://ggozad.github.io/haiku.rag/benchmarks/) - Performance Benchmarks
|
|
@@ -15,11 +15,11 @@ haiku/rag/embeddings/ollama.py,sha256=hWdrTiuJwNSRYCqP0WP-z6XXA3RBGkAiknZMsPLH0q
|
|
|
15
15
|
haiku/rag/embeddings/openai.py,sha256=reh8AykG2f9f5hhRDmqSsjiuCPi9SsXfe2YEZFlxXk8,550
|
|
16
16
|
haiku/rag/embeddings/voyageai.py,sha256=jc0JywdLJD3Ee1MUv1m8MhWCEo0enNnVcrIBtUvD-Ss,534
|
|
17
17
|
haiku/rag/qa/__init__.py,sha256=oso98Ypti7mBLTJ6Zk71YaSJ9Rgc89QXp9RSB6zSpYs,1501
|
|
18
|
-
haiku/rag/qa/anthropic.py,sha256=
|
|
18
|
+
haiku/rag/qa/anthropic.py,sha256=6I6cf6ySNkYbmDFdy22sA8r3GO5moiiH75tJnHcgJQA,4448
|
|
19
19
|
haiku/rag/qa/base.py,sha256=4ZTM_l5FAZ9cA0f8NeqRJiUAmjatwCTmSoclFw0gTFQ,1349
|
|
20
|
-
haiku/rag/qa/ollama.py,sha256
|
|
21
|
-
haiku/rag/qa/openai.py,sha256=
|
|
22
|
-
haiku/rag/qa/prompts.py,sha256=
|
|
20
|
+
haiku/rag/qa/ollama.py,sha256=-UtNFErYlA_66g3WLU6lK38a1Y5zhAL6s_uZ5AP0TFs,2381
|
|
21
|
+
haiku/rag/qa/openai.py,sha256=dF32sGgVt8mZi5oVxByaeECs9NqLjvDiZnnpJBsrHm8,3968
|
|
22
|
+
haiku/rag/qa/prompts.py,sha256=578LJGZJ0LQ_q7ccyj5hLabtHo8Zcfw5-DiLGN9lC-w,1200
|
|
23
23
|
haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
|
|
24
24
|
haiku/rag/store/engine.py,sha256=BeYZRZ08zaYeeu375ysnAL3tGz4roA3GzP7WRNwznCo,2603
|
|
25
25
|
haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
|
|
@@ -29,8 +29,8 @@ haiku/rag/store/repositories/__init__.py,sha256=uIBhxjQh-4o3O-ck8b7BQ58qXQTuJdPv
|
|
|
29
29
|
haiku/rag/store/repositories/base.py,sha256=cm3VyQXhtxvRfk1uJHpA0fDSxMpYN-mjQmRiDiLsQ68,1008
|
|
30
30
|
haiku/rag/store/repositories/chunk.py,sha256=gik7ZPOK3gCoG6tU1pGueAZBPmJxIb7obYFUhwINrYg,16497
|
|
31
31
|
haiku/rag/store/repositories/document.py,sha256=xpWOpjHFbhVwNJ1gpusEKNY6l_Qyibg9y_bdHCwcfpk,7133
|
|
32
|
-
haiku_rag-0.3.
|
|
33
|
-
haiku_rag-0.3.
|
|
34
|
-
haiku_rag-0.3.
|
|
35
|
-
haiku_rag-0.3.
|
|
36
|
-
haiku_rag-0.3.
|
|
32
|
+
haiku_rag-0.3.3.dist-info/METADATA,sha256=nDI-sy2F8h7qr9hK1S7VQLOMRcWYP1clxJYxNVB1AaA,4019
|
|
33
|
+
haiku_rag-0.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
34
|
+
haiku_rag-0.3.3.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
|
|
35
|
+
haiku_rag-0.3.3.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
|
|
36
|
+
haiku_rag-0.3.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|