haiku.rag 0.9.2__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- README.md +205 -0
- haiku_rag-0.14.0.dist-info/METADATA +227 -0
- haiku_rag-0.14.0.dist-info/RECORD +6 -0
- haiku/rag/__init__.py +0 -0
- haiku/rag/app.py +0 -267
- haiku/rag/chunker.py +0 -51
- haiku/rag/cli.py +0 -359
- haiku/rag/client.py +0 -565
- haiku/rag/config.py +0 -77
- haiku/rag/embeddings/__init__.py +0 -35
- haiku/rag/embeddings/base.py +0 -15
- haiku/rag/embeddings/ollama.py +0 -17
- haiku/rag/embeddings/openai.py +0 -16
- haiku/rag/embeddings/vllm.py +0 -19
- haiku/rag/embeddings/voyageai.py +0 -17
- haiku/rag/logging.py +0 -56
- haiku/rag/mcp.py +0 -144
- haiku/rag/migration.py +0 -316
- haiku/rag/monitor.py +0 -73
- haiku/rag/qa/__init__.py +0 -15
- haiku/rag/qa/agent.py +0 -89
- haiku/rag/qa/prompts.py +0 -60
- haiku/rag/reader.py +0 -115
- haiku/rag/reranking/__init__.py +0 -34
- haiku/rag/reranking/base.py +0 -13
- haiku/rag/reranking/cohere.py +0 -34
- haiku/rag/reranking/mxbai.py +0 -28
- haiku/rag/reranking/vllm.py +0 -44
- haiku/rag/research/__init__.py +0 -37
- haiku/rag/research/base.py +0 -130
- haiku/rag/research/dependencies.py +0 -45
- haiku/rag/research/evaluation_agent.py +0 -42
- haiku/rag/research/orchestrator.py +0 -300
- haiku/rag/research/presearch_agent.py +0 -34
- haiku/rag/research/prompts.py +0 -129
- haiku/rag/research/search_agent.py +0 -65
- haiku/rag/research/synthesis_agent.py +0 -40
- haiku/rag/store/__init__.py +0 -4
- haiku/rag/store/engine.py +0 -230
- haiku/rag/store/models/__init__.py +0 -4
- haiku/rag/store/models/chunk.py +0 -15
- haiku/rag/store/models/document.py +0 -16
- haiku/rag/store/repositories/__init__.py +0 -9
- haiku/rag/store/repositories/chunk.py +0 -399
- haiku/rag/store/repositories/document.py +0 -234
- haiku/rag/store/repositories/settings.py +0 -148
- haiku/rag/store/upgrades/__init__.py +0 -1
- haiku/rag/utils.py +0 -162
- haiku_rag-0.9.2.dist-info/METADATA +0 -131
- haiku_rag-0.9.2.dist-info/RECORD +0 -50
- {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/WHEEL +0 -0
- {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/licenses/LICENSE +0 -0
haiku/rag/app.py
DELETED
|
@@ -1,267 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from rich.console import Console
|
|
5
|
-
from rich.markdown import Markdown
|
|
6
|
-
from rich.progress import Progress
|
|
7
|
-
|
|
8
|
-
from haiku.rag.client import HaikuRAG
|
|
9
|
-
from haiku.rag.config import Config
|
|
10
|
-
from haiku.rag.mcp import create_mcp_server
|
|
11
|
-
from haiku.rag.monitor import FileWatcher
|
|
12
|
-
from haiku.rag.research.orchestrator import ResearchOrchestrator
|
|
13
|
-
from haiku.rag.store.models.chunk import Chunk
|
|
14
|
-
from haiku.rag.store.models.document import Document
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class HaikuRAGApp:
|
|
18
|
-
def __init__(self, db_path: Path):
|
|
19
|
-
self.db_path = db_path
|
|
20
|
-
self.console = Console()
|
|
21
|
-
|
|
22
|
-
async def list_documents(self):
|
|
23
|
-
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
24
|
-
documents = await self.client.list_documents()
|
|
25
|
-
for doc in documents:
|
|
26
|
-
self._rich_print_document(doc, truncate=True)
|
|
27
|
-
|
|
28
|
-
async def add_document_from_text(self, text: str):
|
|
29
|
-
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
30
|
-
doc = await self.client.create_document(text)
|
|
31
|
-
self._rich_print_document(doc, truncate=True)
|
|
32
|
-
self.console.print(
|
|
33
|
-
f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
async def add_document_from_source(self, source: str):
|
|
37
|
-
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
38
|
-
doc = await self.client.create_document_from_source(source)
|
|
39
|
-
self._rich_print_document(doc, truncate=True)
|
|
40
|
-
self.console.print(
|
|
41
|
-
f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
async def get_document(self, doc_id: str):
|
|
45
|
-
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
46
|
-
doc = await self.client.get_document_by_id(doc_id)
|
|
47
|
-
if doc is None:
|
|
48
|
-
self.console.print(f"[red]Document with id {doc_id} not found.[/red]")
|
|
49
|
-
return
|
|
50
|
-
self._rich_print_document(doc, truncate=False)
|
|
51
|
-
|
|
52
|
-
async def delete_document(self, doc_id: str):
|
|
53
|
-
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
54
|
-
deleted = await self.client.delete_document(doc_id)
|
|
55
|
-
if deleted:
|
|
56
|
-
self.console.print(f"[b]Document {doc_id} deleted successfully.[/b]")
|
|
57
|
-
else:
|
|
58
|
-
self.console.print(
|
|
59
|
-
f"[yellow]Document with id {doc_id} not found.[/yellow]"
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
async def search(self, query: str, limit: int = 5):
|
|
63
|
-
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
64
|
-
results = await self.client.search(query, limit=limit)
|
|
65
|
-
if not results:
|
|
66
|
-
self.console.print("[red]No results found.[/red]")
|
|
67
|
-
return
|
|
68
|
-
for chunk, score in results:
|
|
69
|
-
self._rich_print_search_result(chunk, score)
|
|
70
|
-
|
|
71
|
-
async def ask(self, question: str, cite: bool = False):
|
|
72
|
-
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
73
|
-
try:
|
|
74
|
-
answer = await self.client.ask(question, cite=cite)
|
|
75
|
-
self.console.print(f"[bold blue]Question:[/bold blue] {question}")
|
|
76
|
-
self.console.print()
|
|
77
|
-
self.console.print("[bold green]Answer:[/bold green]")
|
|
78
|
-
self.console.print(Markdown(answer))
|
|
79
|
-
except Exception as e:
|
|
80
|
-
self.console.print(f"[red]Error: {e}[/red]")
|
|
81
|
-
|
|
82
|
-
async def research(
|
|
83
|
-
self, question: str, max_iterations: int = 3, verbose: bool = False
|
|
84
|
-
):
|
|
85
|
-
"""Run multi-agent research on a question."""
|
|
86
|
-
async with HaikuRAG(db_path=self.db_path) as client:
|
|
87
|
-
try:
|
|
88
|
-
# Create orchestrator with default config or fallback to QA
|
|
89
|
-
orchestrator = ResearchOrchestrator()
|
|
90
|
-
|
|
91
|
-
if verbose:
|
|
92
|
-
self.console.print(
|
|
93
|
-
f"[bold cyan]Starting research with {orchestrator.provider}:{orchestrator.model}[/bold cyan]"
|
|
94
|
-
)
|
|
95
|
-
self.console.print(f"[bold blue]Question:[/bold blue] {question}")
|
|
96
|
-
self.console.print()
|
|
97
|
-
|
|
98
|
-
# Conduct research
|
|
99
|
-
report = await orchestrator.conduct_research(
|
|
100
|
-
question=question,
|
|
101
|
-
client=client,
|
|
102
|
-
max_iterations=max_iterations,
|
|
103
|
-
verbose=verbose,
|
|
104
|
-
console=self.console if verbose else None,
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
# Display the report
|
|
108
|
-
self.console.print("[bold green]Research Report[/bold green]")
|
|
109
|
-
self.console.rule()
|
|
110
|
-
|
|
111
|
-
# Title and Executive Summary
|
|
112
|
-
self.console.print(f"[bold]{report.title}[/bold]")
|
|
113
|
-
self.console.print()
|
|
114
|
-
self.console.print("[bold cyan]Executive Summary:[/bold cyan]")
|
|
115
|
-
self.console.print(report.executive_summary)
|
|
116
|
-
self.console.print()
|
|
117
|
-
|
|
118
|
-
# Main Findings
|
|
119
|
-
if report.main_findings:
|
|
120
|
-
self.console.print("[bold cyan]Main Findings:[/bold cyan]")
|
|
121
|
-
for finding in report.main_findings:
|
|
122
|
-
self.console.print(f"• {finding}")
|
|
123
|
-
self.console.print()
|
|
124
|
-
|
|
125
|
-
# (Themes section removed)
|
|
126
|
-
|
|
127
|
-
# Conclusions
|
|
128
|
-
if report.conclusions:
|
|
129
|
-
self.console.print("[bold cyan]Conclusions:[/bold cyan]")
|
|
130
|
-
for conclusion in report.conclusions:
|
|
131
|
-
self.console.print(f"• {conclusion}")
|
|
132
|
-
self.console.print()
|
|
133
|
-
|
|
134
|
-
# Recommendations
|
|
135
|
-
if report.recommendations:
|
|
136
|
-
self.console.print("[bold cyan]Recommendations:[/bold cyan]")
|
|
137
|
-
for rec in report.recommendations:
|
|
138
|
-
self.console.print(f"• {rec}")
|
|
139
|
-
self.console.print()
|
|
140
|
-
|
|
141
|
-
# Limitations
|
|
142
|
-
if report.limitations:
|
|
143
|
-
self.console.print("[bold yellow]Limitations:[/bold yellow]")
|
|
144
|
-
for limitation in report.limitations:
|
|
145
|
-
self.console.print(f"• {limitation}")
|
|
146
|
-
self.console.print()
|
|
147
|
-
|
|
148
|
-
# Sources Summary
|
|
149
|
-
if report.sources_summary:
|
|
150
|
-
self.console.print("[bold cyan]Sources:[/bold cyan]")
|
|
151
|
-
self.console.print(report.sources_summary)
|
|
152
|
-
|
|
153
|
-
except Exception as e:
|
|
154
|
-
self.console.print(f"[red]Error during research: {e}[/red]")
|
|
155
|
-
|
|
156
|
-
async def rebuild(self):
|
|
157
|
-
async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
|
|
158
|
-
try:
|
|
159
|
-
documents = await client.list_documents()
|
|
160
|
-
total_docs = len(documents)
|
|
161
|
-
|
|
162
|
-
if total_docs == 0:
|
|
163
|
-
self.console.print(
|
|
164
|
-
"[yellow]No documents found in database.[/yellow]"
|
|
165
|
-
)
|
|
166
|
-
return
|
|
167
|
-
|
|
168
|
-
self.console.print(
|
|
169
|
-
f"[b]Rebuilding database with {total_docs} documents...[/b]"
|
|
170
|
-
)
|
|
171
|
-
with Progress() as progress:
|
|
172
|
-
task = progress.add_task("Rebuilding...", total=total_docs)
|
|
173
|
-
async for _ in client.rebuild_database():
|
|
174
|
-
progress.update(task, advance=1)
|
|
175
|
-
|
|
176
|
-
self.console.print("[b]Database rebuild completed successfully.[/b]")
|
|
177
|
-
except Exception as e:
|
|
178
|
-
self.console.print(f"[red]Error rebuilding database: {e}[/red]")
|
|
179
|
-
|
|
180
|
-
async def vacuum(self):
|
|
181
|
-
"""Run database maintenance: optimize and cleanup table history."""
|
|
182
|
-
try:
|
|
183
|
-
async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
|
|
184
|
-
await client.vacuum()
|
|
185
|
-
self.console.print("[b]Vacuum completed successfully.[/b]")
|
|
186
|
-
except Exception as e:
|
|
187
|
-
self.console.print(f"[red]Error during vacuum: {e}[/red]")
|
|
188
|
-
|
|
189
|
-
def show_settings(self):
|
|
190
|
-
"""Display current configuration settings."""
|
|
191
|
-
self.console.print("[bold]haiku.rag configuration[/bold]")
|
|
192
|
-
self.console.print()
|
|
193
|
-
|
|
194
|
-
# Get all config fields dynamically
|
|
195
|
-
for field_name, field_value in Config.model_dump().items():
|
|
196
|
-
# Format the display value
|
|
197
|
-
if isinstance(field_value, str) and (
|
|
198
|
-
"key" in field_name.lower()
|
|
199
|
-
or "password" in field_name.lower()
|
|
200
|
-
or "token" in field_name.lower()
|
|
201
|
-
):
|
|
202
|
-
# Hide sensitive values but show if they're set
|
|
203
|
-
display_value = "✓ Set" if field_value else "✗ Not set"
|
|
204
|
-
else:
|
|
205
|
-
display_value = field_value
|
|
206
|
-
|
|
207
|
-
self.console.print(f" [cyan]{field_name}[/cyan]: {display_value}")
|
|
208
|
-
|
|
209
|
-
def _rich_print_document(self, doc: Document, truncate: bool = False):
|
|
210
|
-
"""Format a document for display."""
|
|
211
|
-
if truncate:
|
|
212
|
-
content = doc.content.splitlines()
|
|
213
|
-
if len(content) > 3:
|
|
214
|
-
content = content[:3] + ["\n…"]
|
|
215
|
-
content = "\n".join(content)
|
|
216
|
-
content = Markdown(content)
|
|
217
|
-
else:
|
|
218
|
-
content = Markdown(doc.content)
|
|
219
|
-
self.console.print(
|
|
220
|
-
f"[repr.attrib_name]id[/repr.attrib_name]: {doc.id} [repr.attrib_name]uri[/repr.attrib_name]: {doc.uri} [repr.attrib_name]meta[/repr.attrib_name]: {doc.metadata}"
|
|
221
|
-
)
|
|
222
|
-
self.console.print(
|
|
223
|
-
f"[repr.attrib_name]created at[/repr.attrib_name]: {doc.created_at} [repr.attrib_name]updated at[/repr.attrib_name]: {doc.updated_at}"
|
|
224
|
-
)
|
|
225
|
-
self.console.print("[repr.attrib_name]content[/repr.attrib_name]:")
|
|
226
|
-
self.console.print(content)
|
|
227
|
-
self.console.rule()
|
|
228
|
-
|
|
229
|
-
def _rich_print_search_result(self, chunk: Chunk, score: float):
|
|
230
|
-
"""Format a search result chunk for display."""
|
|
231
|
-
content = Markdown(chunk.content)
|
|
232
|
-
self.console.print(
|
|
233
|
-
f"[repr.attrib_name]document_id[/repr.attrib_name]: {chunk.document_id} "
|
|
234
|
-
f"[repr.attrib_name]score[/repr.attrib_name]: {score:.4f}"
|
|
235
|
-
)
|
|
236
|
-
if chunk.document_uri:
|
|
237
|
-
self.console.print("[repr.attrib_name]document uri[/repr.attrib_name]:")
|
|
238
|
-
self.console.print(chunk.document_uri)
|
|
239
|
-
if chunk.document_meta:
|
|
240
|
-
self.console.print("[repr.attrib_name]document meta[/repr.attrib_name]:")
|
|
241
|
-
self.console.print(chunk.document_meta)
|
|
242
|
-
self.console.print("[repr.attrib_name]content[/repr.attrib_name]:")
|
|
243
|
-
self.console.print(content)
|
|
244
|
-
self.console.rule()
|
|
245
|
-
|
|
246
|
-
async def serve(self, transport: str | None = None):
|
|
247
|
-
"""Start the MCP server."""
|
|
248
|
-
async with HaikuRAG(self.db_path) as client:
|
|
249
|
-
monitor = FileWatcher(paths=Config.MONITOR_DIRECTORIES, client=client)
|
|
250
|
-
monitor_task = asyncio.create_task(monitor.observe())
|
|
251
|
-
server = create_mcp_server(self.db_path)
|
|
252
|
-
|
|
253
|
-
try:
|
|
254
|
-
if transport == "stdio":
|
|
255
|
-
await server.run_stdio_async()
|
|
256
|
-
elif transport == "sse":
|
|
257
|
-
await server.run_sse_async()
|
|
258
|
-
else:
|
|
259
|
-
await server.run_http_async(transport="streamable-http")
|
|
260
|
-
except KeyboardInterrupt:
|
|
261
|
-
pass
|
|
262
|
-
finally:
|
|
263
|
-
monitor_task.cancel()
|
|
264
|
-
try:
|
|
265
|
-
await monitor_task
|
|
266
|
-
except asyncio.CancelledError:
|
|
267
|
-
pass
|
haiku/rag/chunker.py
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
from typing import ClassVar
|
|
2
|
-
|
|
3
|
-
import tiktoken
|
|
4
|
-
from docling.chunking import HybridChunker # type: ignore
|
|
5
|
-
from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
|
|
6
|
-
from docling_core.types.doc.document import DoclingDocument
|
|
7
|
-
|
|
8
|
-
from haiku.rag.config import Config
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class Chunker:
|
|
12
|
-
"""A class that chunks text into smaller pieces for embedding and retrieval.
|
|
13
|
-
|
|
14
|
-
Uses docling's structure-aware chunking to create semantically meaningful chunks
|
|
15
|
-
that respect document boundaries.
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
chunk_size: The maximum size of a chunk in tokens.
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
encoder: ClassVar[tiktoken.Encoding] = tiktoken.encoding_for_model("gpt-4o")
|
|
22
|
-
|
|
23
|
-
def __init__(
|
|
24
|
-
self,
|
|
25
|
-
chunk_size: int = Config.CHUNK_SIZE,
|
|
26
|
-
):
|
|
27
|
-
self.chunk_size = chunk_size
|
|
28
|
-
tokenizer = OpenAITokenizer(
|
|
29
|
-
tokenizer=tiktoken.encoding_for_model("gpt-4o"), max_tokens=chunk_size
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
self.chunker = HybridChunker(tokenizer=tokenizer) # type: ignore
|
|
33
|
-
|
|
34
|
-
async def chunk(self, document: DoclingDocument) -> list[str]:
|
|
35
|
-
"""Split the document into chunks using docling's structure-aware chunking.
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
document: The DoclingDocument to be split into chunks.
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
A list of text chunks with semantic boundaries.
|
|
42
|
-
"""
|
|
43
|
-
if document is None:
|
|
44
|
-
return []
|
|
45
|
-
|
|
46
|
-
# Chunk using docling's hybrid chunker
|
|
47
|
-
chunks = list(self.chunker.chunk(document))
|
|
48
|
-
return [self.chunker.contextualize(chunk) for chunk in chunks]
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
chunker = Chunker()
|
haiku/rag/cli.py
DELETED
|
@@ -1,359 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import warnings
|
|
3
|
-
from importlib.metadata import version
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
import logfire
|
|
7
|
-
import typer
|
|
8
|
-
from rich.console import Console
|
|
9
|
-
|
|
10
|
-
from haiku.rag.app import HaikuRAGApp
|
|
11
|
-
from haiku.rag.config import Config
|
|
12
|
-
from haiku.rag.logging import configure_cli_logging
|
|
13
|
-
from haiku.rag.migration import migrate_sqlite_to_lancedb
|
|
14
|
-
from haiku.rag.utils import is_up_to_date
|
|
15
|
-
|
|
16
|
-
logfire.configure(send_to_logfire="if-token-present")
|
|
17
|
-
logfire.instrument_pydantic_ai()
|
|
18
|
-
|
|
19
|
-
if not Config.ENV == "development":
|
|
20
|
-
warnings.filterwarnings("ignore")
|
|
21
|
-
|
|
22
|
-
cli = typer.Typer(
|
|
23
|
-
context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
console = Console()
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def complete_document_ids(ctx: typer.Context, incomplete: str):
|
|
30
|
-
"""Autocomplete document IDs from the selected DB."""
|
|
31
|
-
db_path = ctx.params.get("db") or (Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb")
|
|
32
|
-
|
|
33
|
-
try:
|
|
34
|
-
from haiku.rag.client import HaikuRAG
|
|
35
|
-
|
|
36
|
-
async def _list_ids():
|
|
37
|
-
async with HaikuRAG(db_path) as client:
|
|
38
|
-
docs = await client.list_documents()
|
|
39
|
-
return [d.id for d in docs if d.id]
|
|
40
|
-
|
|
41
|
-
ids = asyncio.run(_list_ids())
|
|
42
|
-
except Exception:
|
|
43
|
-
return []
|
|
44
|
-
|
|
45
|
-
return [i for i in ids if i and i.startswith(incomplete)]
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def complete_local_paths(ctx: typer.Context, incomplete: str) -> list[str]:
|
|
49
|
-
"""Autocomplete local filesystem paths.
|
|
50
|
-
|
|
51
|
-
Provides directory/file suggestions based on the current incomplete input.
|
|
52
|
-
Does not validate or restrict to specific extensions to keep it flexible
|
|
53
|
-
(URLs are still allowed to be typed manually).
|
|
54
|
-
"""
|
|
55
|
-
try:
|
|
56
|
-
text = incomplete or ""
|
|
57
|
-
|
|
58
|
-
# Expand user home
|
|
59
|
-
from os.path import expanduser
|
|
60
|
-
|
|
61
|
-
expanded = expanduser(text)
|
|
62
|
-
p = Path(expanded)
|
|
63
|
-
|
|
64
|
-
# Choose directory to list and prefix to filter
|
|
65
|
-
if text == "" or text.endswith(("/", "\\")):
|
|
66
|
-
directory = p
|
|
67
|
-
prefix = ""
|
|
68
|
-
else:
|
|
69
|
-
directory = p.parent
|
|
70
|
-
prefix = p.name
|
|
71
|
-
|
|
72
|
-
if not directory.exists():
|
|
73
|
-
return []
|
|
74
|
-
|
|
75
|
-
suggestions: list[str] = []
|
|
76
|
-
for entry in directory.iterdir():
|
|
77
|
-
name = entry.name
|
|
78
|
-
if not prefix or name.startswith(prefix):
|
|
79
|
-
suggestion = str(directory / name)
|
|
80
|
-
if entry.is_dir():
|
|
81
|
-
suggestion += "/"
|
|
82
|
-
suggestions.append(suggestion)
|
|
83
|
-
return suggestions
|
|
84
|
-
except Exception:
|
|
85
|
-
return []
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
async def check_version():
|
|
89
|
-
"""Check if haiku.rag is up to date and show warning if not."""
|
|
90
|
-
up_to_date, current_version, latest_version = await is_up_to_date()
|
|
91
|
-
if not up_to_date:
|
|
92
|
-
console.print(
|
|
93
|
-
f"[yellow]Warning: haiku.rag is outdated. Current: {current_version}, Latest: {latest_version}[/yellow]"
|
|
94
|
-
)
|
|
95
|
-
console.print("[yellow]Please update.[/yellow]")
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def version_callback(value: bool):
|
|
99
|
-
if value:
|
|
100
|
-
v = version("haiku.rag")
|
|
101
|
-
console.print(f"haiku.rag version {v}")
|
|
102
|
-
raise typer.Exit()
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
@cli.callback()
|
|
106
|
-
def main(
|
|
107
|
-
_version: bool = typer.Option(
|
|
108
|
-
False,
|
|
109
|
-
"-v",
|
|
110
|
-
"--version",
|
|
111
|
-
callback=version_callback,
|
|
112
|
-
help="Show version and exit",
|
|
113
|
-
),
|
|
114
|
-
):
|
|
115
|
-
"""haiku.rag CLI - Vector database RAG system"""
|
|
116
|
-
# Ensure only haiku.rag logs are emitted in CLI context
|
|
117
|
-
configure_cli_logging()
|
|
118
|
-
# Run version check before any command
|
|
119
|
-
asyncio.run(check_version())
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
@cli.command("list", help="List all stored documents")
|
|
123
|
-
def list_documents(
|
|
124
|
-
db: Path = typer.Option(
|
|
125
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
126
|
-
"--db",
|
|
127
|
-
help="Path to the LanceDB database file",
|
|
128
|
-
),
|
|
129
|
-
):
|
|
130
|
-
app = HaikuRAGApp(db_path=db)
|
|
131
|
-
asyncio.run(app.list_documents())
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
@cli.command("add", help="Add a document from text input")
|
|
135
|
-
def add_document_text(
|
|
136
|
-
text: str = typer.Argument(
|
|
137
|
-
help="The text content of the document to add",
|
|
138
|
-
),
|
|
139
|
-
db: Path = typer.Option(
|
|
140
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
141
|
-
"--db",
|
|
142
|
-
help="Path to the LanceDB database file",
|
|
143
|
-
),
|
|
144
|
-
):
|
|
145
|
-
app = HaikuRAGApp(db_path=db)
|
|
146
|
-
asyncio.run(app.add_document_from_text(text=text))
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
@cli.command("add-src", help="Add a document from a file path or URL")
|
|
150
|
-
def add_document_src(
|
|
151
|
-
source: str = typer.Argument(
|
|
152
|
-
help="The file path or URL of the document to add",
|
|
153
|
-
autocompletion=complete_local_paths,
|
|
154
|
-
),
|
|
155
|
-
db: Path = typer.Option(
|
|
156
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
157
|
-
"--db",
|
|
158
|
-
help="Path to the LanceDB database file",
|
|
159
|
-
),
|
|
160
|
-
):
|
|
161
|
-
app = HaikuRAGApp(db_path=db)
|
|
162
|
-
asyncio.run(app.add_document_from_source(source=source))
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
@cli.command("get", help="Get and display a document by its ID")
|
|
166
|
-
def get_document(
|
|
167
|
-
doc_id: str = typer.Argument(
|
|
168
|
-
help="The ID of the document to get",
|
|
169
|
-
autocompletion=complete_document_ids,
|
|
170
|
-
),
|
|
171
|
-
db: Path = typer.Option(
|
|
172
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
173
|
-
"--db",
|
|
174
|
-
help="Path to the LanceDB database file",
|
|
175
|
-
),
|
|
176
|
-
):
|
|
177
|
-
app = HaikuRAGApp(db_path=db)
|
|
178
|
-
asyncio.run(app.get_document(doc_id=doc_id))
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
@cli.command("delete", help="Delete a document by its ID")
|
|
182
|
-
def delete_document(
|
|
183
|
-
doc_id: str = typer.Argument(
|
|
184
|
-
help="The ID of the document to delete",
|
|
185
|
-
autocompletion=complete_document_ids,
|
|
186
|
-
),
|
|
187
|
-
db: Path = typer.Option(
|
|
188
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
189
|
-
"--db",
|
|
190
|
-
help="Path to the LanceDB database file",
|
|
191
|
-
),
|
|
192
|
-
):
|
|
193
|
-
app = HaikuRAGApp(db_path=db)
|
|
194
|
-
asyncio.run(app.delete_document(doc_id=doc_id))
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
# Add alias `rm` for delete
|
|
198
|
-
cli.command("rm", help="Alias for delete: remove a document by its ID")(delete_document)
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
@cli.command("search", help="Search for documents by a query")
|
|
202
|
-
def search(
|
|
203
|
-
query: str = typer.Argument(
|
|
204
|
-
help="The search query to use",
|
|
205
|
-
),
|
|
206
|
-
limit: int = typer.Option(
|
|
207
|
-
5,
|
|
208
|
-
"--limit",
|
|
209
|
-
"-l",
|
|
210
|
-
help="Maximum number of results to return",
|
|
211
|
-
),
|
|
212
|
-
db: Path = typer.Option(
|
|
213
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
214
|
-
"--db",
|
|
215
|
-
help="Path to the LanceDB database file",
|
|
216
|
-
),
|
|
217
|
-
):
|
|
218
|
-
app = HaikuRAGApp(db_path=db)
|
|
219
|
-
asyncio.run(app.search(query=query, limit=limit))
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
@cli.command("ask", help="Ask a question using the QA agent")
|
|
223
|
-
def ask(
|
|
224
|
-
question: str = typer.Argument(
|
|
225
|
-
help="The question to ask",
|
|
226
|
-
),
|
|
227
|
-
db: Path = typer.Option(
|
|
228
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
229
|
-
"--db",
|
|
230
|
-
help="Path to the LanceDB database file",
|
|
231
|
-
),
|
|
232
|
-
cite: bool = typer.Option(
|
|
233
|
-
False,
|
|
234
|
-
"--cite",
|
|
235
|
-
help="Include citations in the response",
|
|
236
|
-
),
|
|
237
|
-
):
|
|
238
|
-
app = HaikuRAGApp(db_path=db)
|
|
239
|
-
asyncio.run(app.ask(question=question, cite=cite))
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
@cli.command("research", help="Run multi-agent research and output a concise report")
|
|
243
|
-
def research(
|
|
244
|
-
question: str = typer.Argument(
|
|
245
|
-
help="The research question to investigate",
|
|
246
|
-
),
|
|
247
|
-
max_iterations: int = typer.Option(
|
|
248
|
-
3,
|
|
249
|
-
"--max-iterations",
|
|
250
|
-
"-n",
|
|
251
|
-
help="Maximum search/analyze iterations",
|
|
252
|
-
),
|
|
253
|
-
db: Path = typer.Option(
|
|
254
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
255
|
-
"--db",
|
|
256
|
-
help="Path to the LanceDB database file",
|
|
257
|
-
),
|
|
258
|
-
verbose: bool = typer.Option(
|
|
259
|
-
False,
|
|
260
|
-
"--verbose",
|
|
261
|
-
help="Show verbose progress output",
|
|
262
|
-
),
|
|
263
|
-
):
|
|
264
|
-
app = HaikuRAGApp(db_path=db)
|
|
265
|
-
asyncio.run(
|
|
266
|
-
app.research(
|
|
267
|
-
question=question,
|
|
268
|
-
max_iterations=max_iterations,
|
|
269
|
-
verbose=verbose,
|
|
270
|
-
)
|
|
271
|
-
)
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
@cli.command("settings", help="Display current configuration settings")
|
|
275
|
-
def settings():
|
|
276
|
-
app = HaikuRAGApp(db_path=Path()) # Don't need actual DB for settings
|
|
277
|
-
app.show_settings()
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
@cli.command(
|
|
281
|
-
"rebuild",
|
|
282
|
-
help="Rebuild the database by deleting all chunks and re-indexing all documents",
|
|
283
|
-
)
|
|
284
|
-
def rebuild(
|
|
285
|
-
db: Path = typer.Option(
|
|
286
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
287
|
-
"--db",
|
|
288
|
-
help="Path to the LanceDB database file",
|
|
289
|
-
),
|
|
290
|
-
):
|
|
291
|
-
app = HaikuRAGApp(db_path=db)
|
|
292
|
-
asyncio.run(app.rebuild())
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
@cli.command("vacuum", help="Optimize and clean up all tables to reduce disk usage")
|
|
296
|
-
def vacuum(
|
|
297
|
-
db: Path = typer.Option(
|
|
298
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
299
|
-
"--db",
|
|
300
|
-
help="Path to the LanceDB database file",
|
|
301
|
-
),
|
|
302
|
-
):
|
|
303
|
-
app = HaikuRAGApp(db_path=db)
|
|
304
|
-
asyncio.run(app.vacuum())
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
@cli.command(
|
|
308
|
-
"serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
|
|
309
|
-
)
|
|
310
|
-
def serve(
|
|
311
|
-
db: Path = typer.Option(
|
|
312
|
-
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
313
|
-
"--db",
|
|
314
|
-
help="Path to the LanceDB database file",
|
|
315
|
-
),
|
|
316
|
-
stdio: bool = typer.Option(
|
|
317
|
-
False,
|
|
318
|
-
"--stdio",
|
|
319
|
-
help="Run MCP server on stdio Transport",
|
|
320
|
-
),
|
|
321
|
-
sse: bool = typer.Option(
|
|
322
|
-
False,
|
|
323
|
-
"--sse",
|
|
324
|
-
help="Run MCP server on SSE transport",
|
|
325
|
-
),
|
|
326
|
-
) -> None:
|
|
327
|
-
"""Start the MCP server."""
|
|
328
|
-
if stdio and sse:
|
|
329
|
-
console.print("[red]Error: Cannot use both --stdio and --http options[/red]")
|
|
330
|
-
raise typer.Exit(1)
|
|
331
|
-
|
|
332
|
-
app = HaikuRAGApp(db_path=db)
|
|
333
|
-
|
|
334
|
-
transport = None
|
|
335
|
-
if stdio:
|
|
336
|
-
transport = "stdio"
|
|
337
|
-
elif sse:
|
|
338
|
-
transport = "sse"
|
|
339
|
-
|
|
340
|
-
asyncio.run(app.serve(transport=transport))
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
@cli.command("migrate", help="Migrate an SQLite database to LanceDB")
|
|
344
|
-
def migrate(
|
|
345
|
-
sqlite_path: Path = typer.Argument(
|
|
346
|
-
help="Path to the SQLite database file to migrate",
|
|
347
|
-
),
|
|
348
|
-
):
|
|
349
|
-
# Generate LanceDB path in same parent directory
|
|
350
|
-
lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
|
|
351
|
-
|
|
352
|
-
success = asyncio.run(migrate_sqlite_to_lancedb(sqlite_path, lancedb_path))
|
|
353
|
-
|
|
354
|
-
if not success:
|
|
355
|
-
raise typer.Exit(1)
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
if __name__ == "__main__":
|
|
359
|
-
cli()
|