haiku.rag 0.9.2__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. README.md +205 -0
  2. haiku_rag-0.14.0.dist-info/METADATA +227 -0
  3. haiku_rag-0.14.0.dist-info/RECORD +6 -0
  4. haiku/rag/__init__.py +0 -0
  5. haiku/rag/app.py +0 -267
  6. haiku/rag/chunker.py +0 -51
  7. haiku/rag/cli.py +0 -359
  8. haiku/rag/client.py +0 -565
  9. haiku/rag/config.py +0 -77
  10. haiku/rag/embeddings/__init__.py +0 -35
  11. haiku/rag/embeddings/base.py +0 -15
  12. haiku/rag/embeddings/ollama.py +0 -17
  13. haiku/rag/embeddings/openai.py +0 -16
  14. haiku/rag/embeddings/vllm.py +0 -19
  15. haiku/rag/embeddings/voyageai.py +0 -17
  16. haiku/rag/logging.py +0 -56
  17. haiku/rag/mcp.py +0 -144
  18. haiku/rag/migration.py +0 -316
  19. haiku/rag/monitor.py +0 -73
  20. haiku/rag/qa/__init__.py +0 -15
  21. haiku/rag/qa/agent.py +0 -89
  22. haiku/rag/qa/prompts.py +0 -60
  23. haiku/rag/reader.py +0 -115
  24. haiku/rag/reranking/__init__.py +0 -34
  25. haiku/rag/reranking/base.py +0 -13
  26. haiku/rag/reranking/cohere.py +0 -34
  27. haiku/rag/reranking/mxbai.py +0 -28
  28. haiku/rag/reranking/vllm.py +0 -44
  29. haiku/rag/research/__init__.py +0 -37
  30. haiku/rag/research/base.py +0 -130
  31. haiku/rag/research/dependencies.py +0 -45
  32. haiku/rag/research/evaluation_agent.py +0 -42
  33. haiku/rag/research/orchestrator.py +0 -300
  34. haiku/rag/research/presearch_agent.py +0 -34
  35. haiku/rag/research/prompts.py +0 -129
  36. haiku/rag/research/search_agent.py +0 -65
  37. haiku/rag/research/synthesis_agent.py +0 -40
  38. haiku/rag/store/__init__.py +0 -4
  39. haiku/rag/store/engine.py +0 -230
  40. haiku/rag/store/models/__init__.py +0 -4
  41. haiku/rag/store/models/chunk.py +0 -15
  42. haiku/rag/store/models/document.py +0 -16
  43. haiku/rag/store/repositories/__init__.py +0 -9
  44. haiku/rag/store/repositories/chunk.py +0 -399
  45. haiku/rag/store/repositories/document.py +0 -234
  46. haiku/rag/store/repositories/settings.py +0 -148
  47. haiku/rag/store/upgrades/__init__.py +0 -1
  48. haiku/rag/utils.py +0 -162
  49. haiku_rag-0.9.2.dist-info/METADATA +0 -131
  50. haiku_rag-0.9.2.dist-info/RECORD +0 -50
  51. {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/WHEEL +0 -0
  52. {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/entry_points.txt +0 -0
  53. {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/licenses/LICENSE +0 -0
haiku/rag/app.py DELETED
@@ -1,267 +0,0 @@
1
- import asyncio
2
- from pathlib import Path
3
-
4
- from rich.console import Console
5
- from rich.markdown import Markdown
6
- from rich.progress import Progress
7
-
8
- from haiku.rag.client import HaikuRAG
9
- from haiku.rag.config import Config
10
- from haiku.rag.mcp import create_mcp_server
11
- from haiku.rag.monitor import FileWatcher
12
- from haiku.rag.research.orchestrator import ResearchOrchestrator
13
- from haiku.rag.store.models.chunk import Chunk
14
- from haiku.rag.store.models.document import Document
15
-
16
-
17
- class HaikuRAGApp:
18
- def __init__(self, db_path: Path):
19
- self.db_path = db_path
20
- self.console = Console()
21
-
22
- async def list_documents(self):
23
- async with HaikuRAG(db_path=self.db_path) as self.client:
24
- documents = await self.client.list_documents()
25
- for doc in documents:
26
- self._rich_print_document(doc, truncate=True)
27
-
28
- async def add_document_from_text(self, text: str):
29
- async with HaikuRAG(db_path=self.db_path) as self.client:
30
- doc = await self.client.create_document(text)
31
- self._rich_print_document(doc, truncate=True)
32
- self.console.print(
33
- f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
34
- )
35
-
36
- async def add_document_from_source(self, source: str):
37
- async with HaikuRAG(db_path=self.db_path) as self.client:
38
- doc = await self.client.create_document_from_source(source)
39
- self._rich_print_document(doc, truncate=True)
40
- self.console.print(
41
- f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
42
- )
43
-
44
- async def get_document(self, doc_id: str):
45
- async with HaikuRAG(db_path=self.db_path) as self.client:
46
- doc = await self.client.get_document_by_id(doc_id)
47
- if doc is None:
48
- self.console.print(f"[red]Document with id {doc_id} not found.[/red]")
49
- return
50
- self._rich_print_document(doc, truncate=False)
51
-
52
- async def delete_document(self, doc_id: str):
53
- async with HaikuRAG(db_path=self.db_path) as self.client:
54
- deleted = await self.client.delete_document(doc_id)
55
- if deleted:
56
- self.console.print(f"[b]Document {doc_id} deleted successfully.[/b]")
57
- else:
58
- self.console.print(
59
- f"[yellow]Document with id {doc_id} not found.[/yellow]"
60
- )
61
-
62
- async def search(self, query: str, limit: int = 5):
63
- async with HaikuRAG(db_path=self.db_path) as self.client:
64
- results = await self.client.search(query, limit=limit)
65
- if not results:
66
- self.console.print("[red]No results found.[/red]")
67
- return
68
- for chunk, score in results:
69
- self._rich_print_search_result(chunk, score)
70
-
71
- async def ask(self, question: str, cite: bool = False):
72
- async with HaikuRAG(db_path=self.db_path) as self.client:
73
- try:
74
- answer = await self.client.ask(question, cite=cite)
75
- self.console.print(f"[bold blue]Question:[/bold blue] {question}")
76
- self.console.print()
77
- self.console.print("[bold green]Answer:[/bold green]")
78
- self.console.print(Markdown(answer))
79
- except Exception as e:
80
- self.console.print(f"[red]Error: {e}[/red]")
81
-
82
- async def research(
83
- self, question: str, max_iterations: int = 3, verbose: bool = False
84
- ):
85
- """Run multi-agent research on a question."""
86
- async with HaikuRAG(db_path=self.db_path) as client:
87
- try:
88
- # Create orchestrator with default config or fallback to QA
89
- orchestrator = ResearchOrchestrator()
90
-
91
- if verbose:
92
- self.console.print(
93
- f"[bold cyan]Starting research with {orchestrator.provider}:{orchestrator.model}[/bold cyan]"
94
- )
95
- self.console.print(f"[bold blue]Question:[/bold blue] {question}")
96
- self.console.print()
97
-
98
- # Conduct research
99
- report = await orchestrator.conduct_research(
100
- question=question,
101
- client=client,
102
- max_iterations=max_iterations,
103
- verbose=verbose,
104
- console=self.console if verbose else None,
105
- )
106
-
107
- # Display the report
108
- self.console.print("[bold green]Research Report[/bold green]")
109
- self.console.rule()
110
-
111
- # Title and Executive Summary
112
- self.console.print(f"[bold]{report.title}[/bold]")
113
- self.console.print()
114
- self.console.print("[bold cyan]Executive Summary:[/bold cyan]")
115
- self.console.print(report.executive_summary)
116
- self.console.print()
117
-
118
- # Main Findings
119
- if report.main_findings:
120
- self.console.print("[bold cyan]Main Findings:[/bold cyan]")
121
- for finding in report.main_findings:
122
- self.console.print(f"• {finding}")
123
- self.console.print()
124
-
125
- # (Themes section removed)
126
-
127
- # Conclusions
128
- if report.conclusions:
129
- self.console.print("[bold cyan]Conclusions:[/bold cyan]")
130
- for conclusion in report.conclusions:
131
- self.console.print(f"• {conclusion}")
132
- self.console.print()
133
-
134
- # Recommendations
135
- if report.recommendations:
136
- self.console.print("[bold cyan]Recommendations:[/bold cyan]")
137
- for rec in report.recommendations:
138
- self.console.print(f"• {rec}")
139
- self.console.print()
140
-
141
- # Limitations
142
- if report.limitations:
143
- self.console.print("[bold yellow]Limitations:[/bold yellow]")
144
- for limitation in report.limitations:
145
- self.console.print(f"• {limitation}")
146
- self.console.print()
147
-
148
- # Sources Summary
149
- if report.sources_summary:
150
- self.console.print("[bold cyan]Sources:[/bold cyan]")
151
- self.console.print(report.sources_summary)
152
-
153
- except Exception as e:
154
- self.console.print(f"[red]Error during research: {e}[/red]")
155
-
156
- async def rebuild(self):
157
- async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
158
- try:
159
- documents = await client.list_documents()
160
- total_docs = len(documents)
161
-
162
- if total_docs == 0:
163
- self.console.print(
164
- "[yellow]No documents found in database.[/yellow]"
165
- )
166
- return
167
-
168
- self.console.print(
169
- f"[b]Rebuilding database with {total_docs} documents...[/b]"
170
- )
171
- with Progress() as progress:
172
- task = progress.add_task("Rebuilding...", total=total_docs)
173
- async for _ in client.rebuild_database():
174
- progress.update(task, advance=1)
175
-
176
- self.console.print("[b]Database rebuild completed successfully.[/b]")
177
- except Exception as e:
178
- self.console.print(f"[red]Error rebuilding database: {e}[/red]")
179
-
180
- async def vacuum(self):
181
- """Run database maintenance: optimize and cleanup table history."""
182
- try:
183
- async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
184
- await client.vacuum()
185
- self.console.print("[b]Vacuum completed successfully.[/b]")
186
- except Exception as e:
187
- self.console.print(f"[red]Error during vacuum: {e}[/red]")
188
-
189
- def show_settings(self):
190
- """Display current configuration settings."""
191
- self.console.print("[bold]haiku.rag configuration[/bold]")
192
- self.console.print()
193
-
194
- # Get all config fields dynamically
195
- for field_name, field_value in Config.model_dump().items():
196
- # Format the display value
197
- if isinstance(field_value, str) and (
198
- "key" in field_name.lower()
199
- or "password" in field_name.lower()
200
- or "token" in field_name.lower()
201
- ):
202
- # Hide sensitive values but show if they're set
203
- display_value = "✓ Set" if field_value else "✗ Not set"
204
- else:
205
- display_value = field_value
206
-
207
- self.console.print(f" [cyan]{field_name}[/cyan]: {display_value}")
208
-
209
- def _rich_print_document(self, doc: Document, truncate: bool = False):
210
- """Format a document for display."""
211
- if truncate:
212
- content = doc.content.splitlines()
213
- if len(content) > 3:
214
- content = content[:3] + ["\n…"]
215
- content = "\n".join(content)
216
- content = Markdown(content)
217
- else:
218
- content = Markdown(doc.content)
219
- self.console.print(
220
- f"[repr.attrib_name]id[/repr.attrib_name]: {doc.id} [repr.attrib_name]uri[/repr.attrib_name]: {doc.uri} [repr.attrib_name]meta[/repr.attrib_name]: {doc.metadata}"
221
- )
222
- self.console.print(
223
- f"[repr.attrib_name]created at[/repr.attrib_name]: {doc.created_at} [repr.attrib_name]updated at[/repr.attrib_name]: {doc.updated_at}"
224
- )
225
- self.console.print("[repr.attrib_name]content[/repr.attrib_name]:")
226
- self.console.print(content)
227
- self.console.rule()
228
-
229
- def _rich_print_search_result(self, chunk: Chunk, score: float):
230
- """Format a search result chunk for display."""
231
- content = Markdown(chunk.content)
232
- self.console.print(
233
- f"[repr.attrib_name]document_id[/repr.attrib_name]: {chunk.document_id} "
234
- f"[repr.attrib_name]score[/repr.attrib_name]: {score:.4f}"
235
- )
236
- if chunk.document_uri:
237
- self.console.print("[repr.attrib_name]document uri[/repr.attrib_name]:")
238
- self.console.print(chunk.document_uri)
239
- if chunk.document_meta:
240
- self.console.print("[repr.attrib_name]document meta[/repr.attrib_name]:")
241
- self.console.print(chunk.document_meta)
242
- self.console.print("[repr.attrib_name]content[/repr.attrib_name]:")
243
- self.console.print(content)
244
- self.console.rule()
245
-
246
- async def serve(self, transport: str | None = None):
247
- """Start the MCP server."""
248
- async with HaikuRAG(self.db_path) as client:
249
- monitor = FileWatcher(paths=Config.MONITOR_DIRECTORIES, client=client)
250
- monitor_task = asyncio.create_task(monitor.observe())
251
- server = create_mcp_server(self.db_path)
252
-
253
- try:
254
- if transport == "stdio":
255
- await server.run_stdio_async()
256
- elif transport == "sse":
257
- await server.run_sse_async()
258
- else:
259
- await server.run_http_async(transport="streamable-http")
260
- except KeyboardInterrupt:
261
- pass
262
- finally:
263
- monitor_task.cancel()
264
- try:
265
- await monitor_task
266
- except asyncio.CancelledError:
267
- pass
haiku/rag/chunker.py DELETED
@@ -1,51 +0,0 @@
1
- from typing import ClassVar
2
-
3
- import tiktoken
4
- from docling.chunking import HybridChunker # type: ignore
5
- from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
6
- from docling_core.types.doc.document import DoclingDocument
7
-
8
- from haiku.rag.config import Config
9
-
10
-
11
- class Chunker:
12
- """A class that chunks text into smaller pieces for embedding and retrieval.
13
-
14
- Uses docling's structure-aware chunking to create semantically meaningful chunks
15
- that respect document boundaries.
16
-
17
- Args:
18
- chunk_size: The maximum size of a chunk in tokens.
19
- """
20
-
21
- encoder: ClassVar[tiktoken.Encoding] = tiktoken.encoding_for_model("gpt-4o")
22
-
23
- def __init__(
24
- self,
25
- chunk_size: int = Config.CHUNK_SIZE,
26
- ):
27
- self.chunk_size = chunk_size
28
- tokenizer = OpenAITokenizer(
29
- tokenizer=tiktoken.encoding_for_model("gpt-4o"), max_tokens=chunk_size
30
- )
31
-
32
- self.chunker = HybridChunker(tokenizer=tokenizer) # type: ignore
33
-
34
- async def chunk(self, document: DoclingDocument) -> list[str]:
35
- """Split the document into chunks using docling's structure-aware chunking.
36
-
37
- Args:
38
- document: The DoclingDocument to be split into chunks.
39
-
40
- Returns:
41
- A list of text chunks with semantic boundaries.
42
- """
43
- if document is None:
44
- return []
45
-
46
- # Chunk using docling's hybrid chunker
47
- chunks = list(self.chunker.chunk(document))
48
- return [self.chunker.contextualize(chunk) for chunk in chunks]
49
-
50
-
51
- chunker = Chunker()
haiku/rag/cli.py DELETED
@@ -1,359 +0,0 @@
1
- import asyncio
2
- import warnings
3
- from importlib.metadata import version
4
- from pathlib import Path
5
-
6
- import logfire
7
- import typer
8
- from rich.console import Console
9
-
10
- from haiku.rag.app import HaikuRAGApp
11
- from haiku.rag.config import Config
12
- from haiku.rag.logging import configure_cli_logging
13
- from haiku.rag.migration import migrate_sqlite_to_lancedb
14
- from haiku.rag.utils import is_up_to_date
15
-
16
- logfire.configure(send_to_logfire="if-token-present")
17
- logfire.instrument_pydantic_ai()
18
-
19
- if not Config.ENV == "development":
20
- warnings.filterwarnings("ignore")
21
-
22
- cli = typer.Typer(
23
- context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
24
- )
25
-
26
- console = Console()
27
-
28
-
29
- def complete_document_ids(ctx: typer.Context, incomplete: str):
30
- """Autocomplete document IDs from the selected DB."""
31
- db_path = ctx.params.get("db") or (Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb")
32
-
33
- try:
34
- from haiku.rag.client import HaikuRAG
35
-
36
- async def _list_ids():
37
- async with HaikuRAG(db_path) as client:
38
- docs = await client.list_documents()
39
- return [d.id for d in docs if d.id]
40
-
41
- ids = asyncio.run(_list_ids())
42
- except Exception:
43
- return []
44
-
45
- return [i for i in ids if i and i.startswith(incomplete)]
46
-
47
-
48
- def complete_local_paths(ctx: typer.Context, incomplete: str) -> list[str]:
49
- """Autocomplete local filesystem paths.
50
-
51
- Provides directory/file suggestions based on the current incomplete input.
52
- Does not validate or restrict to specific extensions to keep it flexible
53
- (URLs are still allowed to be typed manually).
54
- """
55
- try:
56
- text = incomplete or ""
57
-
58
- # Expand user home
59
- from os.path import expanduser
60
-
61
- expanded = expanduser(text)
62
- p = Path(expanded)
63
-
64
- # Choose directory to list and prefix to filter
65
- if text == "" or text.endswith(("/", "\\")):
66
- directory = p
67
- prefix = ""
68
- else:
69
- directory = p.parent
70
- prefix = p.name
71
-
72
- if not directory.exists():
73
- return []
74
-
75
- suggestions: list[str] = []
76
- for entry in directory.iterdir():
77
- name = entry.name
78
- if not prefix or name.startswith(prefix):
79
- suggestion = str(directory / name)
80
- if entry.is_dir():
81
- suggestion += "/"
82
- suggestions.append(suggestion)
83
- return suggestions
84
- except Exception:
85
- return []
86
-
87
-
88
- async def check_version():
89
- """Check if haiku.rag is up to date and show warning if not."""
90
- up_to_date, current_version, latest_version = await is_up_to_date()
91
- if not up_to_date:
92
- console.print(
93
- f"[yellow]Warning: haiku.rag is outdated. Current: {current_version}, Latest: {latest_version}[/yellow]"
94
- )
95
- console.print("[yellow]Please update.[/yellow]")
96
-
97
-
98
- def version_callback(value: bool):
99
- if value:
100
- v = version("haiku.rag")
101
- console.print(f"haiku.rag version {v}")
102
- raise typer.Exit()
103
-
104
-
105
- @cli.callback()
106
- def main(
107
- _version: bool = typer.Option(
108
- False,
109
- "-v",
110
- "--version",
111
- callback=version_callback,
112
- help="Show version and exit",
113
- ),
114
- ):
115
- """haiku.rag CLI - Vector database RAG system"""
116
- # Ensure only haiku.rag logs are emitted in CLI context
117
- configure_cli_logging()
118
- # Run version check before any command
119
- asyncio.run(check_version())
120
-
121
-
122
- @cli.command("list", help="List all stored documents")
123
- def list_documents(
124
- db: Path = typer.Option(
125
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
126
- "--db",
127
- help="Path to the LanceDB database file",
128
- ),
129
- ):
130
- app = HaikuRAGApp(db_path=db)
131
- asyncio.run(app.list_documents())
132
-
133
-
134
- @cli.command("add", help="Add a document from text input")
135
- def add_document_text(
136
- text: str = typer.Argument(
137
- help="The text content of the document to add",
138
- ),
139
- db: Path = typer.Option(
140
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
141
- "--db",
142
- help="Path to the LanceDB database file",
143
- ),
144
- ):
145
- app = HaikuRAGApp(db_path=db)
146
- asyncio.run(app.add_document_from_text(text=text))
147
-
148
-
149
- @cli.command("add-src", help="Add a document from a file path or URL")
150
- def add_document_src(
151
- source: str = typer.Argument(
152
- help="The file path or URL of the document to add",
153
- autocompletion=complete_local_paths,
154
- ),
155
- db: Path = typer.Option(
156
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
157
- "--db",
158
- help="Path to the LanceDB database file",
159
- ),
160
- ):
161
- app = HaikuRAGApp(db_path=db)
162
- asyncio.run(app.add_document_from_source(source=source))
163
-
164
-
165
- @cli.command("get", help="Get and display a document by its ID")
166
- def get_document(
167
- doc_id: str = typer.Argument(
168
- help="The ID of the document to get",
169
- autocompletion=complete_document_ids,
170
- ),
171
- db: Path = typer.Option(
172
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
173
- "--db",
174
- help="Path to the LanceDB database file",
175
- ),
176
- ):
177
- app = HaikuRAGApp(db_path=db)
178
- asyncio.run(app.get_document(doc_id=doc_id))
179
-
180
-
181
- @cli.command("delete", help="Delete a document by its ID")
182
- def delete_document(
183
- doc_id: str = typer.Argument(
184
- help="The ID of the document to delete",
185
- autocompletion=complete_document_ids,
186
- ),
187
- db: Path = typer.Option(
188
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
189
- "--db",
190
- help="Path to the LanceDB database file",
191
- ),
192
- ):
193
- app = HaikuRAGApp(db_path=db)
194
- asyncio.run(app.delete_document(doc_id=doc_id))
195
-
196
-
197
- # Add alias `rm` for delete
198
- cli.command("rm", help="Alias for delete: remove a document by its ID")(delete_document)
199
-
200
-
201
- @cli.command("search", help="Search for documents by a query")
202
- def search(
203
- query: str = typer.Argument(
204
- help="The search query to use",
205
- ),
206
- limit: int = typer.Option(
207
- 5,
208
- "--limit",
209
- "-l",
210
- help="Maximum number of results to return",
211
- ),
212
- db: Path = typer.Option(
213
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
214
- "--db",
215
- help="Path to the LanceDB database file",
216
- ),
217
- ):
218
- app = HaikuRAGApp(db_path=db)
219
- asyncio.run(app.search(query=query, limit=limit))
220
-
221
-
222
- @cli.command("ask", help="Ask a question using the QA agent")
223
- def ask(
224
- question: str = typer.Argument(
225
- help="The question to ask",
226
- ),
227
- db: Path = typer.Option(
228
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
229
- "--db",
230
- help="Path to the LanceDB database file",
231
- ),
232
- cite: bool = typer.Option(
233
- False,
234
- "--cite",
235
- help="Include citations in the response",
236
- ),
237
- ):
238
- app = HaikuRAGApp(db_path=db)
239
- asyncio.run(app.ask(question=question, cite=cite))
240
-
241
-
242
- @cli.command("research", help="Run multi-agent research and output a concise report")
243
- def research(
244
- question: str = typer.Argument(
245
- help="The research question to investigate",
246
- ),
247
- max_iterations: int = typer.Option(
248
- 3,
249
- "--max-iterations",
250
- "-n",
251
- help="Maximum search/analyze iterations",
252
- ),
253
- db: Path = typer.Option(
254
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
255
- "--db",
256
- help="Path to the LanceDB database file",
257
- ),
258
- verbose: bool = typer.Option(
259
- False,
260
- "--verbose",
261
- help="Show verbose progress output",
262
- ),
263
- ):
264
- app = HaikuRAGApp(db_path=db)
265
- asyncio.run(
266
- app.research(
267
- question=question,
268
- max_iterations=max_iterations,
269
- verbose=verbose,
270
- )
271
- )
272
-
273
-
274
- @cli.command("settings", help="Display current configuration settings")
275
- def settings():
276
- app = HaikuRAGApp(db_path=Path()) # Don't need actual DB for settings
277
- app.show_settings()
278
-
279
-
280
- @cli.command(
281
- "rebuild",
282
- help="Rebuild the database by deleting all chunks and re-indexing all documents",
283
- )
284
- def rebuild(
285
- db: Path = typer.Option(
286
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
287
- "--db",
288
- help="Path to the LanceDB database file",
289
- ),
290
- ):
291
- app = HaikuRAGApp(db_path=db)
292
- asyncio.run(app.rebuild())
293
-
294
-
295
- @cli.command("vacuum", help="Optimize and clean up all tables to reduce disk usage")
296
- def vacuum(
297
- db: Path = typer.Option(
298
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
299
- "--db",
300
- help="Path to the LanceDB database file",
301
- ),
302
- ):
303
- app = HaikuRAGApp(db_path=db)
304
- asyncio.run(app.vacuum())
305
-
306
-
307
- @cli.command(
308
- "serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
309
- )
310
- def serve(
311
- db: Path = typer.Option(
312
- Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
313
- "--db",
314
- help="Path to the LanceDB database file",
315
- ),
316
- stdio: bool = typer.Option(
317
- False,
318
- "--stdio",
319
- help="Run MCP server on stdio Transport",
320
- ),
321
- sse: bool = typer.Option(
322
- False,
323
- "--sse",
324
- help="Run MCP server on SSE transport",
325
- ),
326
- ) -> None:
327
- """Start the MCP server."""
328
- if stdio and sse:
329
- console.print("[red]Error: Cannot use both --stdio and --http options[/red]")
330
- raise typer.Exit(1)
331
-
332
- app = HaikuRAGApp(db_path=db)
333
-
334
- transport = None
335
- if stdio:
336
- transport = "stdio"
337
- elif sse:
338
- transport = "sse"
339
-
340
- asyncio.run(app.serve(transport=transport))
341
-
342
-
343
- @cli.command("migrate", help="Migrate an SQLite database to LanceDB")
344
- def migrate(
345
- sqlite_path: Path = typer.Argument(
346
- help="Path to the SQLite database file to migrate",
347
- ),
348
- ):
349
- # Generate LanceDB path in same parent directory
350
- lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
351
-
352
- success = asyncio.run(migrate_sqlite_to_lancedb(sqlite_path, lancedb_path))
353
-
354
- if not success:
355
- raise typer.Exit(1)
356
-
357
-
358
- if __name__ == "__main__":
359
- cli()