haiku.rag 0.9.2__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- README.md +205 -0
- haiku_rag-0.14.0.dist-info/METADATA +227 -0
- haiku_rag-0.14.0.dist-info/RECORD +6 -0
- haiku/rag/__init__.py +0 -0
- haiku/rag/app.py +0 -267
- haiku/rag/chunker.py +0 -51
- haiku/rag/cli.py +0 -359
- haiku/rag/client.py +0 -565
- haiku/rag/config.py +0 -77
- haiku/rag/embeddings/__init__.py +0 -35
- haiku/rag/embeddings/base.py +0 -15
- haiku/rag/embeddings/ollama.py +0 -17
- haiku/rag/embeddings/openai.py +0 -16
- haiku/rag/embeddings/vllm.py +0 -19
- haiku/rag/embeddings/voyageai.py +0 -17
- haiku/rag/logging.py +0 -56
- haiku/rag/mcp.py +0 -144
- haiku/rag/migration.py +0 -316
- haiku/rag/monitor.py +0 -73
- haiku/rag/qa/__init__.py +0 -15
- haiku/rag/qa/agent.py +0 -89
- haiku/rag/qa/prompts.py +0 -60
- haiku/rag/reader.py +0 -115
- haiku/rag/reranking/__init__.py +0 -34
- haiku/rag/reranking/base.py +0 -13
- haiku/rag/reranking/cohere.py +0 -34
- haiku/rag/reranking/mxbai.py +0 -28
- haiku/rag/reranking/vllm.py +0 -44
- haiku/rag/research/__init__.py +0 -37
- haiku/rag/research/base.py +0 -130
- haiku/rag/research/dependencies.py +0 -45
- haiku/rag/research/evaluation_agent.py +0 -42
- haiku/rag/research/orchestrator.py +0 -300
- haiku/rag/research/presearch_agent.py +0 -34
- haiku/rag/research/prompts.py +0 -129
- haiku/rag/research/search_agent.py +0 -65
- haiku/rag/research/synthesis_agent.py +0 -40
- haiku/rag/store/__init__.py +0 -4
- haiku/rag/store/engine.py +0 -230
- haiku/rag/store/models/__init__.py +0 -4
- haiku/rag/store/models/chunk.py +0 -15
- haiku/rag/store/models/document.py +0 -16
- haiku/rag/store/repositories/__init__.py +0 -9
- haiku/rag/store/repositories/chunk.py +0 -399
- haiku/rag/store/repositories/document.py +0 -234
- haiku/rag/store/repositories/settings.py +0 -148
- haiku/rag/store/upgrades/__init__.py +0 -1
- haiku/rag/utils.py +0 -162
- haiku_rag-0.9.2.dist-info/METADATA +0 -131
- haiku_rag-0.9.2.dist-info/RECORD +0 -50
- {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/WHEEL +0 -0
- {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/licenses/LICENSE +0 -0
haiku/rag/qa/agent.py
DELETED
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field
|
|
2
|
-
from pydantic_ai import Agent, RunContext
|
|
3
|
-
from pydantic_ai.models.openai import OpenAIChatModel
|
|
4
|
-
from pydantic_ai.providers.ollama import OllamaProvider
|
|
5
|
-
from pydantic_ai.providers.openai import OpenAIProvider
|
|
6
|
-
|
|
7
|
-
from haiku.rag.client import HaikuRAG
|
|
8
|
-
from haiku.rag.config import Config
|
|
9
|
-
from haiku.rag.qa.prompts import QA_SYSTEM_PROMPT, QA_SYSTEM_PROMPT_WITH_CITATIONS
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class SearchResult(BaseModel):
|
|
13
|
-
content: str = Field(description="The document text content")
|
|
14
|
-
score: float = Field(description="Relevance score (higher is more relevant)")
|
|
15
|
-
document_uri: str = Field(description="Source URI/path of the document")
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class Dependencies(BaseModel):
|
|
19
|
-
model_config = {"arbitrary_types_allowed": True}
|
|
20
|
-
client: HaikuRAG
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class QuestionAnswerAgent:
|
|
24
|
-
def __init__(
|
|
25
|
-
self,
|
|
26
|
-
client: HaikuRAG,
|
|
27
|
-
provider: str,
|
|
28
|
-
model: str,
|
|
29
|
-
use_citations: bool = False,
|
|
30
|
-
q: float = 0.0,
|
|
31
|
-
):
|
|
32
|
-
self._client = client
|
|
33
|
-
|
|
34
|
-
system_prompt = (
|
|
35
|
-
QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
|
|
36
|
-
)
|
|
37
|
-
model_obj = self._get_model(provider, model)
|
|
38
|
-
|
|
39
|
-
self._agent = Agent(
|
|
40
|
-
model=model_obj,
|
|
41
|
-
deps_type=Dependencies,
|
|
42
|
-
system_prompt=system_prompt,
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
@self._agent.tool
|
|
46
|
-
async def search_documents(
|
|
47
|
-
ctx: RunContext[Dependencies],
|
|
48
|
-
query: str,
|
|
49
|
-
limit: int = 3,
|
|
50
|
-
) -> list[SearchResult]:
|
|
51
|
-
"""Search the knowledge base for relevant documents."""
|
|
52
|
-
|
|
53
|
-
# Remove quotes from queries as this requires positional indexing in lancedb
|
|
54
|
-
query = query.replace('"', "")
|
|
55
|
-
search_results = await ctx.deps.client.search(query, limit=limit)
|
|
56
|
-
expanded_results = await ctx.deps.client.expand_context(search_results)
|
|
57
|
-
|
|
58
|
-
return [
|
|
59
|
-
SearchResult(
|
|
60
|
-
content=chunk.content,
|
|
61
|
-
score=score,
|
|
62
|
-
document_uri=chunk.document_uri or "",
|
|
63
|
-
)
|
|
64
|
-
for chunk, score in expanded_results
|
|
65
|
-
]
|
|
66
|
-
|
|
67
|
-
def _get_model(self, provider: str, model: str):
|
|
68
|
-
"""Get the appropriate model object for the provider."""
|
|
69
|
-
if provider == "ollama":
|
|
70
|
-
return OpenAIChatModel(
|
|
71
|
-
model_name=model,
|
|
72
|
-
provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
|
|
73
|
-
)
|
|
74
|
-
elif provider == "vllm":
|
|
75
|
-
return OpenAIChatModel(
|
|
76
|
-
model_name=model,
|
|
77
|
-
provider=OpenAIProvider(
|
|
78
|
-
base_url=f"{Config.VLLM_QA_BASE_URL}/v1", api_key="none"
|
|
79
|
-
),
|
|
80
|
-
)
|
|
81
|
-
else:
|
|
82
|
-
# For all other providers, use the provider:model format
|
|
83
|
-
return f"{provider}:{model}"
|
|
84
|
-
|
|
85
|
-
async def answer(self, question: str) -> str:
|
|
86
|
-
"""Answer a question using the RAG system."""
|
|
87
|
-
deps = Dependencies(client=self._client)
|
|
88
|
-
result = await self._agent.run(question, deps=deps)
|
|
89
|
-
return result.output
|
haiku/rag/qa/prompts.py
DELETED
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
QA_SYSTEM_PROMPT = """
|
|
2
|
-
You are a knowledgeable assistant that helps users find information from a document knowledge base.
|
|
3
|
-
|
|
4
|
-
Your process:
|
|
5
|
-
1. When a user asks a question, use the search_documents tool to find relevant information
|
|
6
|
-
2. Search with specific keywords and phrases from the user's question
|
|
7
|
-
3. Review the search results and their relevance scores
|
|
8
|
-
4. If you need additional context, perform follow-up searches with different keywords
|
|
9
|
-
5. Provide a short and to the point comprehensive answer based only on the retrieved documents
|
|
10
|
-
|
|
11
|
-
Guidelines:
|
|
12
|
-
- Base your answers strictly on the provided document content
|
|
13
|
-
- Quote or reference specific information when possible
|
|
14
|
-
- If multiple documents contain relevant information, synthesize them coherently
|
|
15
|
-
- Indicate when information is incomplete or when you need to search for additional context
|
|
16
|
-
- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
|
|
17
|
-
- For complex questions, consider breaking them down and performing multiple searches
|
|
18
|
-
- Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
|
|
19
|
-
|
|
20
|
-
Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
|
|
21
|
-
/no_think
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
QA_SYSTEM_PROMPT_WITH_CITATIONS = """
|
|
25
|
-
You are a knowledgeable assistant that helps users find information from a document knowledge base.
|
|
26
|
-
|
|
27
|
-
IMPORTANT: You MUST use the search_documents tool for every question. Do not answer any question without first searching the knowledge base.
|
|
28
|
-
|
|
29
|
-
Your process:
|
|
30
|
-
1. IMMEDIATELY call the search_documents tool with relevant keywords from the user's question
|
|
31
|
-
2. Review the search results and their relevance scores
|
|
32
|
-
3. If you need additional context, perform follow-up searches with different keywords
|
|
33
|
-
4. Provide a short and to the point comprehensive answer based only on the retrieved documents
|
|
34
|
-
5. Always include citations for the sources used in your answer
|
|
35
|
-
|
|
36
|
-
Guidelines:
|
|
37
|
-
- Base your answers strictly on the provided document content
|
|
38
|
-
- If multiple documents contain relevant information, synthesize them coherently
|
|
39
|
-
- Indicate when information is incomplete or when you need to search for additional context
|
|
40
|
-
- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
|
|
41
|
-
- For complex questions, consider breaking them down and performing multiple searches
|
|
42
|
-
- Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
|
|
43
|
-
- ALWAYS include citations at the end of your response using the format below
|
|
44
|
-
|
|
45
|
-
Citation Format:
|
|
46
|
-
After your answer, include a "Citations:" section that lists:
|
|
47
|
-
- The document URI from each search result used
|
|
48
|
-
- A brief excerpt (first 50-100 characters) of the content that supported your answer
|
|
49
|
-
- Format: "Citations:\n- [document_uri]: [content_excerpt]..."
|
|
50
|
-
|
|
51
|
-
Example response format:
|
|
52
|
-
[Your answer here]
|
|
53
|
-
|
|
54
|
-
Citations:
|
|
55
|
-
- /path/to/document1.pdf: "This document explains that AFMAN stands for Air Force Manual..."
|
|
56
|
-
- /path/to/document2.pdf: "The manual provides guidance on military procedures and..."
|
|
57
|
-
|
|
58
|
-
Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
|
|
59
|
-
/no_think
|
|
60
|
-
"""
|
haiku/rag/reader.py
DELETED
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
from typing import ClassVar
|
|
3
|
-
|
|
4
|
-
from docling.document_converter import DocumentConverter
|
|
5
|
-
from docling_core.types.doc.document import DoclingDocument
|
|
6
|
-
|
|
7
|
-
from haiku.rag.utils import text_to_docling_document
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class FileReader:
|
|
11
|
-
# Extensions supported by docling
|
|
12
|
-
docling_extensions: ClassVar[list[str]] = [
|
|
13
|
-
".asciidoc",
|
|
14
|
-
".bmp",
|
|
15
|
-
".csv",
|
|
16
|
-
".docx",
|
|
17
|
-
".html",
|
|
18
|
-
".xhtml",
|
|
19
|
-
".jpeg",
|
|
20
|
-
".jpg",
|
|
21
|
-
".md",
|
|
22
|
-
".pdf",
|
|
23
|
-
".png",
|
|
24
|
-
".pptx",
|
|
25
|
-
".tiff",
|
|
26
|
-
".xlsx",
|
|
27
|
-
".xml",
|
|
28
|
-
".webp",
|
|
29
|
-
]
|
|
30
|
-
|
|
31
|
-
# Plain text extensions that we'll read directly
|
|
32
|
-
text_extensions: ClassVar[list[str]] = [
|
|
33
|
-
".astro",
|
|
34
|
-
".c",
|
|
35
|
-
".cpp",
|
|
36
|
-
".css",
|
|
37
|
-
".go",
|
|
38
|
-
".h",
|
|
39
|
-
".hpp",
|
|
40
|
-
".java",
|
|
41
|
-
".js",
|
|
42
|
-
".json",
|
|
43
|
-
".kt",
|
|
44
|
-
".mdx",
|
|
45
|
-
".mjs",
|
|
46
|
-
".php",
|
|
47
|
-
".py",
|
|
48
|
-
".rb",
|
|
49
|
-
".rs",
|
|
50
|
-
".svelte",
|
|
51
|
-
".swift",
|
|
52
|
-
".ts",
|
|
53
|
-
".tsx",
|
|
54
|
-
".txt",
|
|
55
|
-
".vue",
|
|
56
|
-
".yaml",
|
|
57
|
-
".yml",
|
|
58
|
-
]
|
|
59
|
-
|
|
60
|
-
# Code file extensions with their markdown language identifiers for syntax highlighting
|
|
61
|
-
code_markdown_identifier: ClassVar[dict[str, str]] = {
|
|
62
|
-
".astro": "astro",
|
|
63
|
-
".c": "c",
|
|
64
|
-
".cpp": "cpp",
|
|
65
|
-
".css": "css",
|
|
66
|
-
".go": "go",
|
|
67
|
-
".h": "c",
|
|
68
|
-
".hpp": "cpp",
|
|
69
|
-
".java": "java",
|
|
70
|
-
".js": "javascript",
|
|
71
|
-
".json": "json",
|
|
72
|
-
".kt": "kotlin",
|
|
73
|
-
".mjs": "javascript",
|
|
74
|
-
".php": "php",
|
|
75
|
-
".py": "python",
|
|
76
|
-
".rb": "ruby",
|
|
77
|
-
".rs": "rust",
|
|
78
|
-
".svelte": "svelte",
|
|
79
|
-
".swift": "swift",
|
|
80
|
-
".ts": "typescript",
|
|
81
|
-
".tsx": "tsx",
|
|
82
|
-
".vue": "vue",
|
|
83
|
-
".yaml": "yaml",
|
|
84
|
-
".yml": "yaml",
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
extensions: ClassVar[list[str]] = docling_extensions + text_extensions
|
|
88
|
-
|
|
89
|
-
@staticmethod
|
|
90
|
-
def parse_file(path: Path) -> DoclingDocument:
|
|
91
|
-
try:
|
|
92
|
-
file_extension = path.suffix.lower()
|
|
93
|
-
|
|
94
|
-
if file_extension in FileReader.docling_extensions:
|
|
95
|
-
# Use docling for complex document formats
|
|
96
|
-
converter = DocumentConverter()
|
|
97
|
-
result = converter.convert(path)
|
|
98
|
-
return result.document
|
|
99
|
-
elif file_extension in FileReader.text_extensions:
|
|
100
|
-
# Read plain text files directly
|
|
101
|
-
content = path.read_text(encoding="utf-8")
|
|
102
|
-
|
|
103
|
-
# Wrap code files (but not plain txt) in markdown code blocks for better presentation
|
|
104
|
-
if file_extension in FileReader.code_markdown_identifier:
|
|
105
|
-
language = FileReader.code_markdown_identifier[file_extension]
|
|
106
|
-
content = f"```{language}\n{content}\n```"
|
|
107
|
-
|
|
108
|
-
# Convert text to DoclingDocument by wrapping as markdown
|
|
109
|
-
return text_to_docling_document(content, name=f"{path.stem}.md")
|
|
110
|
-
else:
|
|
111
|
-
# Fallback: try to read as text and convert to DoclingDocument
|
|
112
|
-
content = path.read_text(encoding="utf-8")
|
|
113
|
-
return text_to_docling_document(content, name=f"{path.stem}.md")
|
|
114
|
-
except Exception:
|
|
115
|
-
raise ValueError(f"Failed to parse file: {path}")
|
haiku/rag/reranking/__init__.py
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
from haiku.rag.config import Config
|
|
2
|
-
from haiku.rag.reranking.base import RerankerBase
|
|
3
|
-
|
|
4
|
-
_reranker: RerankerBase | None = None
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def get_reranker() -> RerankerBase | None:
|
|
8
|
-
"""
|
|
9
|
-
Factory function to get the appropriate reranker based on the configuration.
|
|
10
|
-
Returns None if if reranking is disabled.
|
|
11
|
-
"""
|
|
12
|
-
global _reranker
|
|
13
|
-
if _reranker is not None:
|
|
14
|
-
return _reranker
|
|
15
|
-
|
|
16
|
-
if Config.RERANK_PROVIDER == "mxbai":
|
|
17
|
-
try:
|
|
18
|
-
from haiku.rag.reranking.mxbai import MxBAIReranker
|
|
19
|
-
|
|
20
|
-
_reranker = MxBAIReranker()
|
|
21
|
-
return _reranker
|
|
22
|
-
except ImportError:
|
|
23
|
-
return None
|
|
24
|
-
|
|
25
|
-
if Config.RERANK_PROVIDER == "cohere":
|
|
26
|
-
try:
|
|
27
|
-
from haiku.rag.reranking.cohere import CohereReranker
|
|
28
|
-
|
|
29
|
-
_reranker = CohereReranker()
|
|
30
|
-
return _reranker
|
|
31
|
-
except ImportError:
|
|
32
|
-
return None
|
|
33
|
-
|
|
34
|
-
return None
|
haiku/rag/reranking/base.py
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from haiku.rag.config import Config
|
|
2
|
-
from haiku.rag.store.models.chunk import Chunk
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class RerankerBase:
|
|
6
|
-
_model: str = Config.RERANK_MODEL
|
|
7
|
-
|
|
8
|
-
async def rerank(
|
|
9
|
-
self, query: str, chunks: list[Chunk], top_n: int = 10
|
|
10
|
-
) -> list[tuple[Chunk, float]]:
|
|
11
|
-
raise NotImplementedError(
|
|
12
|
-
"Reranker is an abstract class. Please implement the rerank method in a subclass."
|
|
13
|
-
)
|
haiku/rag/reranking/cohere.py
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
from haiku.rag.config import Config
|
|
2
|
-
from haiku.rag.reranking.base import RerankerBase
|
|
3
|
-
from haiku.rag.store.models.chunk import Chunk
|
|
4
|
-
|
|
5
|
-
try:
|
|
6
|
-
import cohere
|
|
7
|
-
except ImportError as e:
|
|
8
|
-
raise ImportError(
|
|
9
|
-
"cohere is not installed. Please install it with `pip install cohere` or use the cohere optional dependency."
|
|
10
|
-
) from e
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class CohereReranker(RerankerBase):
|
|
14
|
-
def __init__(self):
|
|
15
|
-
self._client = cohere.ClientV2(api_key=Config.COHERE_API_KEY)
|
|
16
|
-
|
|
17
|
-
async def rerank(
|
|
18
|
-
self, query: str, chunks: list[Chunk], top_n: int = 10
|
|
19
|
-
) -> list[tuple[Chunk, float]]:
|
|
20
|
-
if not chunks:
|
|
21
|
-
return []
|
|
22
|
-
|
|
23
|
-
documents = [chunk.content for chunk in chunks]
|
|
24
|
-
|
|
25
|
-
response = self._client.rerank(
|
|
26
|
-
model=self._model, query=query, documents=documents, top_n=top_n
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
reranked_chunks = []
|
|
30
|
-
for result in response.results:
|
|
31
|
-
original_chunk = chunks[result.index]
|
|
32
|
-
reranked_chunks.append((original_chunk, result.relevance_score))
|
|
33
|
-
|
|
34
|
-
return reranked_chunks
|
haiku/rag/reranking/mxbai.py
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from mxbai_rerank import MxbaiRerankV2
|
|
2
|
-
|
|
3
|
-
from haiku.rag.config import Config
|
|
4
|
-
from haiku.rag.reranking.base import RerankerBase
|
|
5
|
-
from haiku.rag.store.models.chunk import Chunk
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class MxBAIReranker(RerankerBase):
|
|
9
|
-
def __init__(self):
|
|
10
|
-
self._client = MxbaiRerankV2(
|
|
11
|
-
Config.RERANK_MODEL, disable_transformers_warnings=True
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
async def rerank(
|
|
15
|
-
self, query: str, chunks: list[Chunk], top_n: int = 10
|
|
16
|
-
) -> list[tuple[Chunk, float]]:
|
|
17
|
-
if not chunks:
|
|
18
|
-
return []
|
|
19
|
-
|
|
20
|
-
documents = [chunk.content for chunk in chunks]
|
|
21
|
-
|
|
22
|
-
results = self._client.rank(query=query, documents=documents, top_k=top_n)
|
|
23
|
-
reranked_chunks = []
|
|
24
|
-
for result in results:
|
|
25
|
-
original_chunk = chunks[result.index]
|
|
26
|
-
reranked_chunks.append((original_chunk, result.score))
|
|
27
|
-
|
|
28
|
-
return reranked_chunks
|
haiku/rag/reranking/vllm.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import httpx
|
|
2
|
-
|
|
3
|
-
from haiku.rag.config import Config
|
|
4
|
-
from haiku.rag.reranking.base import RerankerBase
|
|
5
|
-
from haiku.rag.store.models.chunk import Chunk
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class VLLMReranker(RerankerBase):
|
|
9
|
-
def __init__(self, model: str):
|
|
10
|
-
self._model = model
|
|
11
|
-
self._base_url = Config.VLLM_RERANK_BASE_URL
|
|
12
|
-
|
|
13
|
-
async def rerank(
|
|
14
|
-
self, query: str, chunks: list[Chunk], top_n: int = 10
|
|
15
|
-
) -> list[tuple[Chunk, float]]:
|
|
16
|
-
if not chunks:
|
|
17
|
-
return []
|
|
18
|
-
|
|
19
|
-
# Prepare documents for reranking
|
|
20
|
-
documents = [chunk.content for chunk in chunks]
|
|
21
|
-
|
|
22
|
-
async with httpx.AsyncClient() as client:
|
|
23
|
-
response = await client.post(
|
|
24
|
-
f"{self._base_url}/v1/rerank",
|
|
25
|
-
json={"model": self._model, "query": query, "documents": documents},
|
|
26
|
-
headers={
|
|
27
|
-
"accept": "application/json",
|
|
28
|
-
"Content-Type": "application/json",
|
|
29
|
-
},
|
|
30
|
-
)
|
|
31
|
-
response.raise_for_status()
|
|
32
|
-
|
|
33
|
-
result = response.json()
|
|
34
|
-
|
|
35
|
-
# Extract scores and pair with chunks
|
|
36
|
-
scored_chunks = []
|
|
37
|
-
for item in result.get("results", []):
|
|
38
|
-
index = item["index"]
|
|
39
|
-
score = item["relevance_score"]
|
|
40
|
-
scored_chunks.append((chunks[index], score))
|
|
41
|
-
|
|
42
|
-
# Sort by score (descending) and return top_n
|
|
43
|
-
scored_chunks.sort(key=lambda x: x[1], reverse=True)
|
|
44
|
-
return scored_chunks[:top_n]
|
haiku/rag/research/__init__.py
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
"""Multi-agent research workflow for advanced RAG queries."""
|
|
2
|
-
|
|
3
|
-
from haiku.rag.research.base import (
|
|
4
|
-
BaseResearchAgent,
|
|
5
|
-
ResearchOutput,
|
|
6
|
-
SearchAnswer,
|
|
7
|
-
SearchResult,
|
|
8
|
-
)
|
|
9
|
-
from haiku.rag.research.dependencies import ResearchContext, ResearchDependencies
|
|
10
|
-
from haiku.rag.research.evaluation_agent import (
|
|
11
|
-
AnalysisEvaluationAgent,
|
|
12
|
-
EvaluationResult,
|
|
13
|
-
)
|
|
14
|
-
from haiku.rag.research.orchestrator import ResearchOrchestrator, ResearchPlan
|
|
15
|
-
from haiku.rag.research.presearch_agent import PresearchSurveyAgent
|
|
16
|
-
from haiku.rag.research.search_agent import SearchSpecialistAgent
|
|
17
|
-
from haiku.rag.research.synthesis_agent import ResearchReport, SynthesisAgent
|
|
18
|
-
|
|
19
|
-
__all__ = [
|
|
20
|
-
# Base classes
|
|
21
|
-
"BaseResearchAgent",
|
|
22
|
-
"ResearchDependencies",
|
|
23
|
-
"ResearchContext",
|
|
24
|
-
"SearchResult",
|
|
25
|
-
"ResearchOutput",
|
|
26
|
-
# Specialized agents
|
|
27
|
-
"SearchAnswer",
|
|
28
|
-
"SearchSpecialistAgent",
|
|
29
|
-
"PresearchSurveyAgent",
|
|
30
|
-
"AnalysisEvaluationAgent",
|
|
31
|
-
"EvaluationResult",
|
|
32
|
-
"SynthesisAgent",
|
|
33
|
-
"ResearchReport",
|
|
34
|
-
# Orchestrator
|
|
35
|
-
"ResearchOrchestrator",
|
|
36
|
-
"ResearchPlan",
|
|
37
|
-
]
|
haiku/rag/research/base.py
DELETED
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import TYPE_CHECKING, Any
|
|
3
|
-
|
|
4
|
-
from pydantic import BaseModel, Field
|
|
5
|
-
from pydantic_ai import Agent
|
|
6
|
-
from pydantic_ai.models.openai import OpenAIChatModel
|
|
7
|
-
from pydantic_ai.output import ToolOutput
|
|
8
|
-
from pydantic_ai.providers.ollama import OllamaProvider
|
|
9
|
-
from pydantic_ai.providers.openai import OpenAIProvider
|
|
10
|
-
from pydantic_ai.run import AgentRunResult
|
|
11
|
-
|
|
12
|
-
from haiku.rag.config import Config
|
|
13
|
-
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from haiku.rag.research.dependencies import ResearchDependencies
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class BaseResearchAgent[T](ABC):
|
|
19
|
-
"""Base class for all research agents."""
|
|
20
|
-
|
|
21
|
-
def __init__(
|
|
22
|
-
self,
|
|
23
|
-
provider: str,
|
|
24
|
-
model: str,
|
|
25
|
-
output_type: type[T],
|
|
26
|
-
):
|
|
27
|
-
self.provider = provider
|
|
28
|
-
self.model = model
|
|
29
|
-
self.output_type = output_type
|
|
30
|
-
|
|
31
|
-
model_obj = self._get_model(provider, model)
|
|
32
|
-
|
|
33
|
-
# Import deps type lazily to avoid circular import during module load
|
|
34
|
-
from haiku.rag.research.dependencies import ResearchDependencies
|
|
35
|
-
|
|
36
|
-
# If the agent is expected to return plain text, pass `str` directly.
|
|
37
|
-
# Otherwise, wrap the model with ToolOutput for robust tool-handling retries.
|
|
38
|
-
agent_output_type: Any
|
|
39
|
-
if self.output_type is str: # plain text output
|
|
40
|
-
agent_output_type = str
|
|
41
|
-
else:
|
|
42
|
-
agent_output_type = ToolOutput(self.output_type, max_retries=3)
|
|
43
|
-
|
|
44
|
-
self._agent = Agent(
|
|
45
|
-
model=model_obj,
|
|
46
|
-
deps_type=ResearchDependencies,
|
|
47
|
-
output_type=agent_output_type,
|
|
48
|
-
system_prompt=self.get_system_prompt(),
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
# Register tools
|
|
52
|
-
self.register_tools()
|
|
53
|
-
|
|
54
|
-
def _get_model(self, provider: str, model: str):
|
|
55
|
-
"""Get the appropriate model object for the provider."""
|
|
56
|
-
if provider == "ollama":
|
|
57
|
-
return OpenAIChatModel(
|
|
58
|
-
model_name=model,
|
|
59
|
-
provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
|
|
60
|
-
)
|
|
61
|
-
elif provider == "vllm":
|
|
62
|
-
return OpenAIChatModel(
|
|
63
|
-
model_name=model,
|
|
64
|
-
provider=OpenAIProvider(
|
|
65
|
-
base_url=f"{Config.VLLM_RESEARCH_BASE_URL or Config.VLLM_QA_BASE_URL}/v1",
|
|
66
|
-
api_key="none",
|
|
67
|
-
),
|
|
68
|
-
)
|
|
69
|
-
else:
|
|
70
|
-
# For all other providers, use the provider:model format
|
|
71
|
-
return f"{provider}:{model}"
|
|
72
|
-
|
|
73
|
-
@abstractmethod
|
|
74
|
-
def get_system_prompt(self) -> str:
|
|
75
|
-
"""Return the system prompt for this agent."""
|
|
76
|
-
pass
|
|
77
|
-
|
|
78
|
-
@abstractmethod
|
|
79
|
-
def register_tools(self) -> None:
|
|
80
|
-
"""Register agent-specific tools."""
|
|
81
|
-
pass
|
|
82
|
-
|
|
83
|
-
async def run(
|
|
84
|
-
self, prompt: str, deps: "ResearchDependencies", **kwargs
|
|
85
|
-
) -> AgentRunResult[T]:
|
|
86
|
-
"""Execute the agent."""
|
|
87
|
-
return await self._agent.run(prompt, deps=deps, **kwargs)
|
|
88
|
-
|
|
89
|
-
@property
|
|
90
|
-
def agent(self) -> Agent[Any, T]:
|
|
91
|
-
"""Access the underlying Pydantic AI agent."""
|
|
92
|
-
return self._agent
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
class SearchResult(BaseModel):
|
|
96
|
-
"""Standard search result format."""
|
|
97
|
-
|
|
98
|
-
content: str
|
|
99
|
-
score: float
|
|
100
|
-
document_uri: str
|
|
101
|
-
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
class ResearchOutput(BaseModel):
|
|
105
|
-
"""Standard research output format."""
|
|
106
|
-
|
|
107
|
-
summary: str
|
|
108
|
-
detailed_findings: list[str]
|
|
109
|
-
sources: list[str]
|
|
110
|
-
confidence: float
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
class SearchAnswer(BaseModel):
|
|
114
|
-
"""Structured output for the SearchSpecialist agent."""
|
|
115
|
-
|
|
116
|
-
query: str = Field(description="The search query that was performed")
|
|
117
|
-
answer: str = Field(description="The answer generated based on the context")
|
|
118
|
-
context: list[str] = Field(
|
|
119
|
-
description=(
|
|
120
|
-
"Only the minimal set of relevant snippets (verbatim) that directly "
|
|
121
|
-
"support the answer"
|
|
122
|
-
)
|
|
123
|
-
)
|
|
124
|
-
sources: list[str] = Field(
|
|
125
|
-
description=(
|
|
126
|
-
"Document URIs corresponding to the snippets actually used in the"
|
|
127
|
-
" answer (one URI per snippet; omit if none)"
|
|
128
|
-
),
|
|
129
|
-
default_factory=list,
|
|
130
|
-
)
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field
|
|
2
|
-
|
|
3
|
-
from haiku.rag.client import HaikuRAG
|
|
4
|
-
from haiku.rag.research.base import SearchAnswer
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class ResearchContext(BaseModel):
|
|
8
|
-
"""Context shared across research agents."""
|
|
9
|
-
|
|
10
|
-
original_question: str = Field(description="The original research question")
|
|
11
|
-
sub_questions: list[str] = Field(
|
|
12
|
-
default_factory=list, description="Decomposed sub-questions"
|
|
13
|
-
)
|
|
14
|
-
qa_responses: list["SearchAnswer"] = Field(
|
|
15
|
-
default_factory=list, description="Structured QA pairs used during research"
|
|
16
|
-
)
|
|
17
|
-
insights: list[str] = Field(
|
|
18
|
-
default_factory=list, description="Key insights discovered"
|
|
19
|
-
)
|
|
20
|
-
gaps: list[str] = Field(
|
|
21
|
-
default_factory=list, description="Identified information gaps"
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
def add_qa_response(self, qa: "SearchAnswer") -> None:
|
|
25
|
-
"""Add a structured QA response (minimal context already included)."""
|
|
26
|
-
self.qa_responses.append(qa)
|
|
27
|
-
|
|
28
|
-
def add_insight(self, insight: str) -> None:
|
|
29
|
-
"""Add a key insight."""
|
|
30
|
-
if insight not in self.insights:
|
|
31
|
-
self.insights.append(insight)
|
|
32
|
-
|
|
33
|
-
def add_gap(self, gap: str) -> None:
|
|
34
|
-
"""Identify an information gap."""
|
|
35
|
-
if gap not in self.gaps:
|
|
36
|
-
self.gaps.append(gap)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class ResearchDependencies(BaseModel):
|
|
40
|
-
"""Dependencies for research agents with multi-agent context."""
|
|
41
|
-
|
|
42
|
-
model_config = {"arbitrary_types_allowed": True}
|
|
43
|
-
|
|
44
|
-
client: HaikuRAG = Field(description="RAG client for document operations")
|
|
45
|
-
context: ResearchContext = Field(description="Shared research context")
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field
|
|
2
|
-
|
|
3
|
-
from haiku.rag.research.base import BaseResearchAgent
|
|
4
|
-
from haiku.rag.research.prompts import EVALUATION_AGENT_PROMPT
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class EvaluationResult(BaseModel):
|
|
8
|
-
"""Result of analysis and evaluation."""
|
|
9
|
-
|
|
10
|
-
key_insights: list[str] = Field(
|
|
11
|
-
description="Main insights extracted from the research so far"
|
|
12
|
-
)
|
|
13
|
-
new_questions: list[str] = Field(
|
|
14
|
-
description="New sub-questions to add to the research (max 3)",
|
|
15
|
-
max_length=3,
|
|
16
|
-
default=[],
|
|
17
|
-
)
|
|
18
|
-
confidence_score: float = Field(
|
|
19
|
-
description="Confidence level in the completeness of research (0-1)",
|
|
20
|
-
ge=0.0,
|
|
21
|
-
le=1.0,
|
|
22
|
-
)
|
|
23
|
-
is_sufficient: bool = Field(
|
|
24
|
-
description="Whether the research is sufficient to answer the original question"
|
|
25
|
-
)
|
|
26
|
-
reasoning: str = Field(
|
|
27
|
-
description="Explanation of why the research is or isn't complete"
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class AnalysisEvaluationAgent(BaseResearchAgent[EvaluationResult]):
|
|
32
|
-
"""Agent that analyzes findings and evaluates research completeness."""
|
|
33
|
-
|
|
34
|
-
def __init__(self, provider: str, model: str) -> None:
|
|
35
|
-
super().__init__(provider, model, output_type=EvaluationResult)
|
|
36
|
-
|
|
37
|
-
def get_system_prompt(self) -> str:
|
|
38
|
-
return EVALUATION_AGENT_PROMPT
|
|
39
|
-
|
|
40
|
-
def register_tools(self) -> None:
|
|
41
|
-
"""No additional tools needed - uses LLM capabilities directly."""
|
|
42
|
-
pass
|