haiku.rag 0.10.2__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. README.md +205 -0
  2. {haiku_rag-0.10.2.dist-info → haiku_rag-0.14.0.dist-info}/METADATA +100 -41
  3. haiku_rag-0.14.0.dist-info/RECORD +6 -0
  4. haiku/rag/__init__.py +0 -0
  5. haiku/rag/app.py +0 -437
  6. haiku/rag/chunker.py +0 -51
  7. haiku/rag/cli.py +0 -466
  8. haiku/rag/client.py +0 -605
  9. haiku/rag/config.py +0 -81
  10. haiku/rag/embeddings/__init__.py +0 -35
  11. haiku/rag/embeddings/base.py +0 -15
  12. haiku/rag/embeddings/ollama.py +0 -17
  13. haiku/rag/embeddings/openai.py +0 -16
  14. haiku/rag/embeddings/vllm.py +0 -19
  15. haiku/rag/embeddings/voyageai.py +0 -17
  16. haiku/rag/logging.py +0 -56
  17. haiku/rag/mcp.py +0 -156
  18. haiku/rag/migration.py +0 -316
  19. haiku/rag/monitor.py +0 -73
  20. haiku/rag/qa/__init__.py +0 -15
  21. haiku/rag/qa/agent.py +0 -91
  22. haiku/rag/qa/prompts.py +0 -60
  23. haiku/rag/reader.py +0 -115
  24. haiku/rag/reranking/__init__.py +0 -34
  25. haiku/rag/reranking/base.py +0 -13
  26. haiku/rag/reranking/cohere.py +0 -34
  27. haiku/rag/reranking/mxbai.py +0 -28
  28. haiku/rag/reranking/vllm.py +0 -44
  29. haiku/rag/research/__init__.py +0 -20
  30. haiku/rag/research/common.py +0 -53
  31. haiku/rag/research/dependencies.py +0 -47
  32. haiku/rag/research/graph.py +0 -29
  33. haiku/rag/research/models.py +0 -70
  34. haiku/rag/research/nodes/evaluate.py +0 -80
  35. haiku/rag/research/nodes/plan.py +0 -63
  36. haiku/rag/research/nodes/search.py +0 -93
  37. haiku/rag/research/nodes/synthesize.py +0 -51
  38. haiku/rag/research/prompts.py +0 -114
  39. haiku/rag/research/state.py +0 -25
  40. haiku/rag/store/__init__.py +0 -4
  41. haiku/rag/store/engine.py +0 -269
  42. haiku/rag/store/models/__init__.py +0 -4
  43. haiku/rag/store/models/chunk.py +0 -17
  44. haiku/rag/store/models/document.py +0 -17
  45. haiku/rag/store/repositories/__init__.py +0 -9
  46. haiku/rag/store/repositories/chunk.py +0 -424
  47. haiku/rag/store/repositories/document.py +0 -237
  48. haiku/rag/store/repositories/settings.py +0 -155
  49. haiku/rag/store/upgrades/__init__.py +0 -62
  50. haiku/rag/store/upgrades/v0_10_1.py +0 -64
  51. haiku/rag/store/upgrades/v0_9_3.py +0 -112
  52. haiku/rag/utils.py +0 -199
  53. haiku_rag-0.10.2.dist-info/RECORD +0 -54
  54. {haiku_rag-0.10.2.dist-info → haiku_rag-0.14.0.dist-info}/WHEEL +0 -0
  55. {haiku_rag-0.10.2.dist-info → haiku_rag-0.14.0.dist-info}/entry_points.txt +0 -0
  56. {haiku_rag-0.10.2.dist-info → haiku_rag-0.14.0.dist-info}/licenses/LICENSE +0 -0
haiku/rag/monitor.py DELETED
@@ -1,73 +0,0 @@
1
- from pathlib import Path
2
-
3
- from watchfiles import Change, DefaultFilter, awatch
4
-
5
- from haiku.rag.client import HaikuRAG
6
- from haiku.rag.logging import get_logger
7
- from haiku.rag.reader import FileReader
8
- from haiku.rag.store.models.document import Document
9
-
10
- logger = get_logger()
11
-
12
-
13
- class FileFilter(DefaultFilter):
14
- def __init__(self, *, ignore_paths: list[Path] | None = None) -> None:
15
- self.extensions = tuple(FileReader.extensions)
16
- super().__init__(ignore_paths=ignore_paths)
17
-
18
- def __call__(self, change: "Change", path: str) -> bool:
19
- return path.endswith(self.extensions) and super().__call__(change, path)
20
-
21
-
22
- class FileWatcher:
23
- def __init__(self, paths: list[Path], client: HaikuRAG):
24
- self.paths = paths
25
- self.client = client
26
-
27
- async def observe(self):
28
- logger.info(f"Watching files in {self.paths}")
29
- filter = FileFilter()
30
- await self.refresh()
31
-
32
- async for changes in awatch(*self.paths, watch_filter=filter):
33
- await self.handler(changes)
34
-
35
- async def handler(self, changes: set[tuple[Change, str]]):
36
- for change, path in changes:
37
- if change == Change.added or change == Change.modified:
38
- await self._upsert_document(Path(path))
39
- elif change == Change.deleted:
40
- await self._delete_document(Path(path))
41
-
42
- async def refresh(self):
43
- for path in self.paths:
44
- for f in Path(path).rglob("**/*"):
45
- if f.is_file() and f.suffix in FileReader.extensions:
46
- await self._upsert_document(f)
47
-
48
- async def _upsert_document(self, file: Path) -> Document | None:
49
- try:
50
- uri = file.as_uri()
51
- existing_doc = await self.client.get_document_by_uri(uri)
52
- if existing_doc:
53
- doc = await self.client.create_document_from_source(str(file))
54
- logger.info(f"Updated document {existing_doc.id} from {file}")
55
- return doc
56
- else:
57
- doc = await self.client.create_document_from_source(str(file))
58
- logger.info(f"Created new document {doc.id} from {file}")
59
- return doc
60
- except Exception as e:
61
- logger.error(f"Failed to upsert document from {file}: {e}")
62
- return None
63
-
64
- async def _delete_document(self, file: Path):
65
- try:
66
- uri = file.as_uri()
67
- existing_doc = await self.client.get_document_by_uri(uri)
68
-
69
- if existing_doc and existing_doc.id:
70
- await self.client.delete_document(existing_doc.id)
71
- logger.info(f"Deleted document {existing_doc.id} for {file}")
72
- except Exception as e:
73
- logger.error(f"Failed to delete document for {file}: {e}")
haiku/rag/qa/__init__.py DELETED
@@ -1,15 +0,0 @@
1
- from haiku.rag.client import HaikuRAG
2
- from haiku.rag.config import Config
3
- from haiku.rag.qa.agent import QuestionAnswerAgent
4
-
5
-
6
- def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswerAgent:
7
- provider = Config.QA_PROVIDER
8
- model_name = Config.QA_MODEL
9
-
10
- return QuestionAnswerAgent(
11
- client=client,
12
- provider=provider,
13
- model=model_name,
14
- use_citations=use_citations,
15
- )
haiku/rag/qa/agent.py DELETED
@@ -1,91 +0,0 @@
1
- from pydantic import BaseModel, Field
2
- from pydantic_ai import Agent, RunContext
3
- from pydantic_ai.models.openai import OpenAIChatModel
4
- from pydantic_ai.providers.ollama import OllamaProvider
5
- from pydantic_ai.providers.openai import OpenAIProvider
6
-
7
- from haiku.rag.client import HaikuRAG
8
- from haiku.rag.config import Config
9
- from haiku.rag.qa.prompts import QA_SYSTEM_PROMPT, QA_SYSTEM_PROMPT_WITH_CITATIONS
10
-
11
-
12
- class SearchResult(BaseModel):
13
- content: str = Field(description="The document text content")
14
- score: float = Field(description="Relevance score (higher is more relevant)")
15
- document_uri: str = Field(
16
- description="Source title (if available) or URI/path of the document"
17
- )
18
-
19
-
20
- class Dependencies(BaseModel):
21
- model_config = {"arbitrary_types_allowed": True}
22
- client: HaikuRAG
23
-
24
-
25
- class QuestionAnswerAgent:
26
- def __init__(
27
- self,
28
- client: HaikuRAG,
29
- provider: str,
30
- model: str,
31
- use_citations: bool = False,
32
- q: float = 0.0,
33
- ):
34
- self._client = client
35
-
36
- system_prompt = (
37
- QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
38
- )
39
- model_obj = self._get_model(provider, model)
40
-
41
- self._agent = Agent(
42
- model=model_obj,
43
- deps_type=Dependencies,
44
- system_prompt=system_prompt,
45
- )
46
-
47
- @self._agent.tool
48
- async def search_documents(
49
- ctx: RunContext[Dependencies],
50
- query: str,
51
- limit: int = 3,
52
- ) -> list[SearchResult]:
53
- """Search the knowledge base for relevant documents."""
54
-
55
- # Remove quotes from queries as this requires positional indexing in lancedb
56
- query = query.replace('"', "")
57
- search_results = await ctx.deps.client.search(query, limit=limit)
58
- expanded_results = await ctx.deps.client.expand_context(search_results)
59
-
60
- return [
61
- SearchResult(
62
- content=chunk.content,
63
- score=score,
64
- document_uri=(chunk.document_title or chunk.document_uri or ""),
65
- )
66
- for chunk, score in expanded_results
67
- ]
68
-
69
- def _get_model(self, provider: str, model: str):
70
- """Get the appropriate model object for the provider."""
71
- if provider == "ollama":
72
- return OpenAIChatModel(
73
- model_name=model,
74
- provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
75
- )
76
- elif provider == "vllm":
77
- return OpenAIChatModel(
78
- model_name=model,
79
- provider=OpenAIProvider(
80
- base_url=f"{Config.VLLM_QA_BASE_URL}/v1", api_key="none"
81
- ),
82
- )
83
- else:
84
- # For all other providers, use the provider:model format
85
- return f"{provider}:{model}"
86
-
87
- async def answer(self, question: str) -> str:
88
- """Answer a question using the RAG system."""
89
- deps = Dependencies(client=self._client)
90
- result = await self._agent.run(question, deps=deps)
91
- return result.output
haiku/rag/qa/prompts.py DELETED
@@ -1,60 +0,0 @@
1
- QA_SYSTEM_PROMPT = """
2
- You are a knowledgeable assistant that helps users find information from a document knowledge base.
3
-
4
- Your process:
5
- 1. When a user asks a question, use the search_documents tool to find relevant information
6
- 2. Search with specific keywords and phrases from the user's question
7
- 3. Review the search results and their relevance scores
8
- 4. If you need additional context, perform follow-up searches with different keywords
9
- 5. Provide a short and to the point comprehensive answer based only on the retrieved documents
10
-
11
- Guidelines:
12
- - Base your answers strictly on the provided document content
13
- - Quote or reference specific information when possible
14
- - If multiple documents contain relevant information, synthesize them coherently
15
- - Indicate when information is incomplete or when you need to search for additional context
16
- - If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
17
- - For complex questions, consider breaking them down and performing multiple searches
18
- - Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
19
-
20
- Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
21
- /no_think
22
- """
23
-
24
- QA_SYSTEM_PROMPT_WITH_CITATIONS = """
25
- You are a knowledgeable assistant that helps users find information from a document knowledge base.
26
-
27
- IMPORTANT: You MUST use the search_documents tool for every question. Do not answer any question without first searching the knowledge base.
28
-
29
- Your process:
30
- 1. IMMEDIATELY call the search_documents tool with relevant keywords from the user's question
31
- 2. Review the search results and their relevance scores
32
- 3. If you need additional context, perform follow-up searches with different keywords
33
- 4. Provide a short and to the point comprehensive answer based only on the retrieved documents
34
- 5. Always include citations for the sources used in your answer
35
-
36
- Guidelines:
37
- - Base your answers strictly on the provided document content
38
- - If multiple documents contain relevant information, synthesize them coherently
39
- - Indicate when information is incomplete or when you need to search for additional context
40
- - If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
41
- - For complex questions, consider breaking them down and performing multiple searches
42
- - Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
43
- - ALWAYS include citations at the end of your response using the format below
44
-
45
- Citation Format:
46
- After your answer, include a "Citations:" section that lists:
47
- - The document title (if available) or URI from each search result used
48
- - A brief excerpt (first 50-100 characters) of the content that supported your answer
49
- - Format: "Citations:\n- [document title or URI]: [content_excerpt]..."
50
-
51
- Example response format:
52
- [Your answer here]
53
-
54
- Citations:
55
- - /path/to/document1.pdf: "This document explains that AFMAN stands for Air Force Manual..."
56
- - /path/to/document2.pdf: "The manual provides guidance on military procedures and..."
57
-
58
- Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
59
- /no_think
60
- """
haiku/rag/reader.py DELETED
@@ -1,115 +0,0 @@
1
- from pathlib import Path
2
- from typing import ClassVar
3
-
4
- from docling.document_converter import DocumentConverter
5
- from docling_core.types.doc.document import DoclingDocument
6
-
7
- from haiku.rag.utils import text_to_docling_document
8
-
9
-
10
- class FileReader:
11
- # Extensions supported by docling
12
- docling_extensions: ClassVar[list[str]] = [
13
- ".asciidoc",
14
- ".bmp",
15
- ".csv",
16
- ".docx",
17
- ".html",
18
- ".xhtml",
19
- ".jpeg",
20
- ".jpg",
21
- ".md",
22
- ".pdf",
23
- ".png",
24
- ".pptx",
25
- ".tiff",
26
- ".xlsx",
27
- ".xml",
28
- ".webp",
29
- ]
30
-
31
- # Plain text extensions that we'll read directly
32
- text_extensions: ClassVar[list[str]] = [
33
- ".astro",
34
- ".c",
35
- ".cpp",
36
- ".css",
37
- ".go",
38
- ".h",
39
- ".hpp",
40
- ".java",
41
- ".js",
42
- ".json",
43
- ".kt",
44
- ".mdx",
45
- ".mjs",
46
- ".php",
47
- ".py",
48
- ".rb",
49
- ".rs",
50
- ".svelte",
51
- ".swift",
52
- ".ts",
53
- ".tsx",
54
- ".txt",
55
- ".vue",
56
- ".yaml",
57
- ".yml",
58
- ]
59
-
60
- # Code file extensions with their markdown language identifiers for syntax highlighting
61
- code_markdown_identifier: ClassVar[dict[str, str]] = {
62
- ".astro": "astro",
63
- ".c": "c",
64
- ".cpp": "cpp",
65
- ".css": "css",
66
- ".go": "go",
67
- ".h": "c",
68
- ".hpp": "cpp",
69
- ".java": "java",
70
- ".js": "javascript",
71
- ".json": "json",
72
- ".kt": "kotlin",
73
- ".mjs": "javascript",
74
- ".php": "php",
75
- ".py": "python",
76
- ".rb": "ruby",
77
- ".rs": "rust",
78
- ".svelte": "svelte",
79
- ".swift": "swift",
80
- ".ts": "typescript",
81
- ".tsx": "tsx",
82
- ".vue": "vue",
83
- ".yaml": "yaml",
84
- ".yml": "yaml",
85
- }
86
-
87
- extensions: ClassVar[list[str]] = docling_extensions + text_extensions
88
-
89
- @staticmethod
90
- def parse_file(path: Path) -> DoclingDocument:
91
- try:
92
- file_extension = path.suffix.lower()
93
-
94
- if file_extension in FileReader.docling_extensions:
95
- # Use docling for complex document formats
96
- converter = DocumentConverter()
97
- result = converter.convert(path)
98
- return result.document
99
- elif file_extension in FileReader.text_extensions:
100
- # Read plain text files directly
101
- content = path.read_text(encoding="utf-8")
102
-
103
- # Wrap code files (but not plain txt) in markdown code blocks for better presentation
104
- if file_extension in FileReader.code_markdown_identifier:
105
- language = FileReader.code_markdown_identifier[file_extension]
106
- content = f"```{language}\n{content}\n```"
107
-
108
- # Convert text to DoclingDocument by wrapping as markdown
109
- return text_to_docling_document(content, name=f"{path.stem}.md")
110
- else:
111
- # Fallback: try to read as text and convert to DoclingDocument
112
- content = path.read_text(encoding="utf-8")
113
- return text_to_docling_document(content, name=f"{path.stem}.md")
114
- except Exception:
115
- raise ValueError(f"Failed to parse file: {path}")
@@ -1,34 +0,0 @@
1
- from haiku.rag.config import Config
2
- from haiku.rag.reranking.base import RerankerBase
3
-
4
- _reranker: RerankerBase | None = None
5
-
6
-
7
- def get_reranker() -> RerankerBase | None:
8
- """
9
- Factory function to get the appropriate reranker based on the configuration.
10
- Returns None if if reranking is disabled.
11
- """
12
- global _reranker
13
- if _reranker is not None:
14
- return _reranker
15
-
16
- if Config.RERANK_PROVIDER == "mxbai":
17
- try:
18
- from haiku.rag.reranking.mxbai import MxBAIReranker
19
-
20
- _reranker = MxBAIReranker()
21
- return _reranker
22
- except ImportError:
23
- return None
24
-
25
- if Config.RERANK_PROVIDER == "cohere":
26
- try:
27
- from haiku.rag.reranking.cohere import CohereReranker
28
-
29
- _reranker = CohereReranker()
30
- return _reranker
31
- except ImportError:
32
- return None
33
-
34
- return None
@@ -1,13 +0,0 @@
1
- from haiku.rag.config import Config
2
- from haiku.rag.store.models.chunk import Chunk
3
-
4
-
5
- class RerankerBase:
6
- _model: str = Config.RERANK_MODEL
7
-
8
- async def rerank(
9
- self, query: str, chunks: list[Chunk], top_n: int = 10
10
- ) -> list[tuple[Chunk, float]]:
11
- raise NotImplementedError(
12
- "Reranker is an abstract class. Please implement the rerank method in a subclass."
13
- )
@@ -1,34 +0,0 @@
1
- from haiku.rag.config import Config
2
- from haiku.rag.reranking.base import RerankerBase
3
- from haiku.rag.store.models.chunk import Chunk
4
-
5
- try:
6
- import cohere
7
- except ImportError as e:
8
- raise ImportError(
9
- "cohere is not installed. Please install it with `pip install cohere` or use the cohere optional dependency."
10
- ) from e
11
-
12
-
13
- class CohereReranker(RerankerBase):
14
- def __init__(self):
15
- self._client = cohere.ClientV2(api_key=Config.COHERE_API_KEY)
16
-
17
- async def rerank(
18
- self, query: str, chunks: list[Chunk], top_n: int = 10
19
- ) -> list[tuple[Chunk, float]]:
20
- if not chunks:
21
- return []
22
-
23
- documents = [chunk.content for chunk in chunks]
24
-
25
- response = self._client.rerank(
26
- model=self._model, query=query, documents=documents, top_n=top_n
27
- )
28
-
29
- reranked_chunks = []
30
- for result in response.results:
31
- original_chunk = chunks[result.index]
32
- reranked_chunks.append((original_chunk, result.relevance_score))
33
-
34
- return reranked_chunks
@@ -1,28 +0,0 @@
1
- from mxbai_rerank import MxbaiRerankV2 # pyright: ignore[reportMissingImports]
2
-
3
- from haiku.rag.config import Config
4
- from haiku.rag.reranking.base import RerankerBase
5
- from haiku.rag.store.models.chunk import Chunk
6
-
7
-
8
- class MxBAIReranker(RerankerBase):
9
- def __init__(self):
10
- self._client = MxbaiRerankV2(
11
- Config.RERANK_MODEL, disable_transformers_warnings=True
12
- )
13
-
14
- async def rerank(
15
- self, query: str, chunks: list[Chunk], top_n: int = 10
16
- ) -> list[tuple[Chunk, float]]:
17
- if not chunks:
18
- return []
19
-
20
- documents = [chunk.content for chunk in chunks]
21
-
22
- results = self._client.rank(query=query, documents=documents, top_k=top_n)
23
- reranked_chunks = []
24
- for result in results:
25
- original_chunk = chunks[result.index]
26
- reranked_chunks.append((original_chunk, result.score))
27
-
28
- return reranked_chunks
@@ -1,44 +0,0 @@
1
- import httpx
2
-
3
- from haiku.rag.config import Config
4
- from haiku.rag.reranking.base import RerankerBase
5
- from haiku.rag.store.models.chunk import Chunk
6
-
7
-
8
- class VLLMReranker(RerankerBase):
9
- def __init__(self, model: str):
10
- self._model = model
11
- self._base_url = Config.VLLM_RERANK_BASE_URL
12
-
13
- async def rerank(
14
- self, query: str, chunks: list[Chunk], top_n: int = 10
15
- ) -> list[tuple[Chunk, float]]:
16
- if not chunks:
17
- return []
18
-
19
- # Prepare documents for reranking
20
- documents = [chunk.content for chunk in chunks]
21
-
22
- async with httpx.AsyncClient() as client:
23
- response = await client.post(
24
- f"{self._base_url}/v1/rerank",
25
- json={"model": self._model, "query": query, "documents": documents},
26
- headers={
27
- "accept": "application/json",
28
- "Content-Type": "application/json",
29
- },
30
- )
31
- response.raise_for_status()
32
-
33
- result = response.json()
34
-
35
- # Extract scores and pair with chunks
36
- scored_chunks = []
37
- for item in result.get("results", []):
38
- index = item["index"]
39
- score = item["relevance_score"]
40
- scored_chunks.append((chunks[index], score))
41
-
42
- # Sort by score (descending) and return top_n
43
- scored_chunks.sort(key=lambda x: x[1], reverse=True)
44
- return scored_chunks[:top_n]
@@ -1,20 +0,0 @@
1
- from haiku.rag.research.dependencies import ResearchContext, ResearchDependencies
2
- from haiku.rag.research.graph import (
3
- PlanNode,
4
- ResearchDeps,
5
- ResearchState,
6
- build_research_graph,
7
- )
8
- from haiku.rag.research.models import EvaluationResult, ResearchReport, SearchAnswer
9
-
10
- __all__ = [
11
- "ResearchDependencies",
12
- "ResearchContext",
13
- "SearchAnswer",
14
- "EvaluationResult",
15
- "ResearchReport",
16
- "ResearchDeps",
17
- "ResearchState",
18
- "PlanNode",
19
- "build_research_graph",
20
- ]
@@ -1,53 +0,0 @@
1
- from typing import Any
2
-
3
- from pydantic_ai import format_as_xml
4
- from pydantic_ai.models.openai import OpenAIChatModel
5
- from pydantic_ai.providers.ollama import OllamaProvider
6
- from pydantic_ai.providers.openai import OpenAIProvider
7
-
8
- from haiku.rag.config import Config
9
- from haiku.rag.research.dependencies import ResearchContext
10
-
11
-
12
- def get_model(provider: str, model: str) -> Any:
13
- if provider == "ollama":
14
- return OpenAIChatModel(
15
- model_name=model,
16
- provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
17
- )
18
- elif provider == "vllm":
19
- return OpenAIChatModel(
20
- model_name=model,
21
- provider=OpenAIProvider(
22
- base_url=f"{Config.VLLM_RESEARCH_BASE_URL or Config.VLLM_QA_BASE_URL}/v1",
23
- api_key="none",
24
- ),
25
- )
26
- else:
27
- return f"{provider}:{model}"
28
-
29
-
30
- def log(console, msg: str) -> None:
31
- if console:
32
- console.print(msg)
33
-
34
-
35
- def format_context_for_prompt(context: ResearchContext) -> str:
36
- """Format the research context as XML for inclusion in prompts."""
37
-
38
- context_data = {
39
- "original_question": context.original_question,
40
- "unanswered_questions": context.sub_questions,
41
- "qa_responses": [
42
- {
43
- "question": qa.query,
44
- "answer": qa.answer,
45
- "context_snippets": qa.context,
46
- "sources": qa.sources, # pyright: ignore[reportAttributeAccessIssue]
47
- }
48
- for qa in context.qa_responses
49
- ],
50
- "insights": context.insights,
51
- "gaps": context.gaps,
52
- }
53
- return format_as_xml(context_data, root_tag="research_context")
@@ -1,47 +0,0 @@
1
- from pydantic import BaseModel, Field
2
- from rich.console import Console
3
-
4
- from haiku.rag.client import HaikuRAG
5
- from haiku.rag.research.models import SearchAnswer
6
-
7
-
8
- class ResearchContext(BaseModel):
9
- """Context shared across research agents."""
10
-
11
- original_question: str = Field(description="The original research question")
12
- sub_questions: list[str] = Field(
13
- default_factory=list, description="Decomposed sub-questions"
14
- )
15
- qa_responses: list[SearchAnswer] = Field(
16
- default_factory=list, description="Structured QA pairs used during research"
17
- )
18
- insights: list[str] = Field(
19
- default_factory=list, description="Key insights discovered"
20
- )
21
- gaps: list[str] = Field(
22
- default_factory=list, description="Identified information gaps"
23
- )
24
-
25
- def add_qa_response(self, qa: SearchAnswer) -> None:
26
- """Add a structured QA response (minimal context already included)."""
27
- self.qa_responses.append(qa)
28
-
29
- def add_insight(self, insight: str) -> None:
30
- """Add a key insight."""
31
- if insight not in self.insights:
32
- self.insights.append(insight)
33
-
34
- def add_gap(self, gap: str) -> None:
35
- """Identify an information gap."""
36
- if gap not in self.gaps:
37
- self.gaps.append(gap)
38
-
39
-
40
- class ResearchDependencies(BaseModel):
41
- """Dependencies for research agents with multi-agent context."""
42
-
43
- model_config = {"arbitrary_types_allowed": True}
44
-
45
- client: HaikuRAG = Field(description="RAG client for document operations")
46
- context: ResearchContext = Field(description="Shared research context")
47
- console: Console | None = None
@@ -1,29 +0,0 @@
1
- from pydantic_graph import Graph
2
-
3
- from haiku.rag.research.models import ResearchReport
4
- from haiku.rag.research.nodes.evaluate import EvaluateNode
5
- from haiku.rag.research.nodes.plan import PlanNode
6
- from haiku.rag.research.nodes.search import SearchDispatchNode
7
- from haiku.rag.research.nodes.synthesize import SynthesizeNode
8
- from haiku.rag.research.state import ResearchDeps, ResearchState
9
-
10
- __all__ = [
11
- "PlanNode",
12
- "SearchDispatchNode",
13
- "EvaluateNode",
14
- "SynthesizeNode",
15
- "ResearchState",
16
- "ResearchDeps",
17
- "build_research_graph",
18
- ]
19
-
20
-
21
- def build_research_graph() -> Graph[ResearchState, ResearchDeps, ResearchReport]:
22
- return Graph(
23
- nodes=[
24
- PlanNode,
25
- SearchDispatchNode,
26
- EvaluateNode,
27
- SynthesizeNode,
28
- ]
29
- )