haiku.rag 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- haiku/rag/app.py +149 -15
- haiku/rag/cli.py +126 -31
- haiku/rag/client.py +63 -21
- haiku/rag/config.py +4 -0
- haiku/rag/mcp.py +18 -6
- haiku/rag/migration.py +2 -2
- haiku/rag/qa/agent.py +4 -2
- haiku/rag/qa/prompts.py +2 -2
- haiku/rag/research/models.py +2 -2
- haiku/rag/research/nodes/search.py +3 -1
- haiku/rag/research/prompts.py +4 -3
- haiku/rag/store/__init__.py +1 -1
- haiku/rag/store/engine.py +14 -0
- haiku/rag/store/models/__init__.py +1 -1
- haiku/rag/store/models/chunk.py +1 -0
- haiku/rag/store/models/document.py +1 -0
- haiku/rag/store/repositories/chunk.py +4 -0
- haiku/rag/store/repositories/document.py +3 -0
- haiku/rag/store/upgrades/__init__.py +2 -0
- haiku/rag/store/upgrades/v0_10_1.py +64 -0
- haiku/rag/utils.py +42 -5
- {haiku_rag-0.10.0.dist-info → haiku_rag-0.10.2.dist-info}/METADATA +3 -2
- {haiku_rag-0.10.0.dist-info → haiku_rag-0.10.2.dist-info}/RECORD +26 -25
- {haiku_rag-0.10.0.dist-info → haiku_rag-0.10.2.dist-info}/WHEEL +0 -0
- {haiku_rag-0.10.0.dist-info → haiku_rag-0.10.2.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.10.0.dist-info → haiku_rag-0.10.2.dist-info}/licenses/LICENSE +0 -0
haiku/rag/app.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
from importlib.metadata import version as pkg_version
|
|
2
4
|
from pathlib import Path
|
|
3
5
|
|
|
4
6
|
from rich.console import Console
|
|
@@ -25,26 +27,141 @@ class HaikuRAGApp:
|
|
|
25
27
|
self.db_path = db_path
|
|
26
28
|
self.console = Console()
|
|
27
29
|
|
|
30
|
+
async def info(self):
|
|
31
|
+
"""Display read-only information about the database without modifying it."""
|
|
32
|
+
|
|
33
|
+
import lancedb
|
|
34
|
+
|
|
35
|
+
# Basic: show path
|
|
36
|
+
self.console.print("[bold]haiku.rag database info[/bold]")
|
|
37
|
+
self.console.print(
|
|
38
|
+
f" [repr.attrib_name]path[/repr.attrib_name]: {self.db_path}"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
if not self.db_path.exists():
|
|
42
|
+
self.console.print("[red]Database path does not exist.[/red]")
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
# Connect without going through Store to avoid upgrades/validation writes
|
|
46
|
+
try:
|
|
47
|
+
db = lancedb.connect(self.db_path)
|
|
48
|
+
table_names = set(db.table_names())
|
|
49
|
+
except Exception as e:
|
|
50
|
+
self.console.print(f"[red]Failed to open database: {e}[/red]")
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
ldb_version = pkg_version("lancedb")
|
|
55
|
+
except Exception:
|
|
56
|
+
ldb_version = "unknown"
|
|
57
|
+
try:
|
|
58
|
+
hr_version = pkg_version("haiku.rag")
|
|
59
|
+
except Exception:
|
|
60
|
+
hr_version = "unknown"
|
|
61
|
+
try:
|
|
62
|
+
docling_version = pkg_version("docling")
|
|
63
|
+
except Exception:
|
|
64
|
+
docling_version = "unknown"
|
|
65
|
+
|
|
66
|
+
# Read settings (if present) to find stored haiku.rag version and embedding config
|
|
67
|
+
stored_version = "unknown"
|
|
68
|
+
embed_provider: str | None = None
|
|
69
|
+
embed_model: str | None = None
|
|
70
|
+
vector_dim: int | None = None
|
|
71
|
+
|
|
72
|
+
if "settings" in table_names:
|
|
73
|
+
settings_tbl = db.open_table("settings")
|
|
74
|
+
arrow = settings_tbl.search().where("id = 'settings'").limit(1).to_arrow()
|
|
75
|
+
rows = arrow.to_pylist() if arrow is not None else []
|
|
76
|
+
if rows:
|
|
77
|
+
raw = rows[0].get("settings") or "{}"
|
|
78
|
+
data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
|
79
|
+
stored_version = str(data.get("version", stored_version))
|
|
80
|
+
embed_provider = data.get("EMBEDDINGS_PROVIDER")
|
|
81
|
+
embed_model = data.get("EMBEDDINGS_MODEL")
|
|
82
|
+
vector_dim = (
|
|
83
|
+
int(data.get("EMBEDDINGS_VECTOR_DIM")) # pyright: ignore[reportArgumentType]
|
|
84
|
+
if data.get("EMBEDDINGS_VECTOR_DIM") is not None
|
|
85
|
+
else None
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
num_docs = 0
|
|
89
|
+
if "documents" in table_names:
|
|
90
|
+
docs_tbl = db.open_table("documents")
|
|
91
|
+
num_docs = int(docs_tbl.count_rows()) # type: ignore[attr-defined]
|
|
92
|
+
|
|
93
|
+
# Table versions per table (direct API)
|
|
94
|
+
doc_versions = (
|
|
95
|
+
len(list(db.open_table("documents").list_versions()))
|
|
96
|
+
if "documents" in table_names
|
|
97
|
+
else 0
|
|
98
|
+
)
|
|
99
|
+
chunk_versions = (
|
|
100
|
+
len(list(db.open_table("chunks").list_versions()))
|
|
101
|
+
if "chunks" in table_names
|
|
102
|
+
else 0
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
self.console.print(
|
|
106
|
+
f" [repr.attrib_name]haiku.rag version (db)[/repr.attrib_name]: {stored_version}"
|
|
107
|
+
)
|
|
108
|
+
if embed_provider or embed_model or vector_dim:
|
|
109
|
+
provider_part = embed_provider or "unknown"
|
|
110
|
+
model_part = embed_model or "unknown"
|
|
111
|
+
dim_part = f"{vector_dim}" if vector_dim is not None else "unknown"
|
|
112
|
+
self.console.print(
|
|
113
|
+
" [repr.attrib_name]embeddings[/repr.attrib_name]: "
|
|
114
|
+
f"{provider_part}/{model_part} (dim: {dim_part})"
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
self.console.print(
|
|
118
|
+
" [repr.attrib_name]embeddings[/repr.attrib_name]: unknown"
|
|
119
|
+
)
|
|
120
|
+
self.console.print(
|
|
121
|
+
f" [repr.attrib_name]documents[/repr.attrib_name]: {num_docs}"
|
|
122
|
+
)
|
|
123
|
+
self.console.print(
|
|
124
|
+
f" [repr.attrib_name]versions (documents)[/repr.attrib_name]: {doc_versions}"
|
|
125
|
+
)
|
|
126
|
+
self.console.print(
|
|
127
|
+
f" [repr.attrib_name]versions (chunks)[/repr.attrib_name]: {chunk_versions}"
|
|
128
|
+
)
|
|
129
|
+
self.console.rule()
|
|
130
|
+
self.console.print("[bold]Versions[/bold]")
|
|
131
|
+
self.console.print(
|
|
132
|
+
f" [repr.attrib_name]haiku.rag[/repr.attrib_name]: {hr_version}"
|
|
133
|
+
)
|
|
134
|
+
self.console.print(
|
|
135
|
+
f" [repr.attrib_name]lancedb[/repr.attrib_name]: {ldb_version}"
|
|
136
|
+
)
|
|
137
|
+
self.console.print(
|
|
138
|
+
f" [repr.attrib_name]docling[/repr.attrib_name]: {docling_version}"
|
|
139
|
+
)
|
|
140
|
+
|
|
28
141
|
async def list_documents(self):
|
|
29
142
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
30
143
|
documents = await self.client.list_documents()
|
|
31
144
|
for doc in documents:
|
|
32
145
|
self._rich_print_document(doc, truncate=True)
|
|
33
146
|
|
|
34
|
-
async def add_document_from_text(self, text: str):
|
|
147
|
+
async def add_document_from_text(self, text: str, metadata: dict | None = None):
|
|
35
148
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
36
|
-
doc = await self.client.create_document(text)
|
|
149
|
+
doc = await self.client.create_document(text, metadata=metadata)
|
|
37
150
|
self._rich_print_document(doc, truncate=True)
|
|
38
151
|
self.console.print(
|
|
39
|
-
f"[
|
|
152
|
+
f"[bold green]Document {doc.id} added successfully.[/bold green]"
|
|
40
153
|
)
|
|
41
154
|
|
|
42
|
-
async def add_document_from_source(
|
|
155
|
+
async def add_document_from_source(
|
|
156
|
+
self, source: str, title: str | None = None, metadata: dict | None = None
|
|
157
|
+
):
|
|
43
158
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
44
|
-
doc = await self.client.create_document_from_source(
|
|
159
|
+
doc = await self.client.create_document_from_source(
|
|
160
|
+
source, title=title, metadata=metadata
|
|
161
|
+
)
|
|
45
162
|
self._rich_print_document(doc, truncate=True)
|
|
46
163
|
self.console.print(
|
|
47
|
-
f"[
|
|
164
|
+
f"[bold green]Document {doc.id} added successfully.[/bold green]"
|
|
48
165
|
)
|
|
49
166
|
|
|
50
167
|
async def get_document(self, doc_id: str):
|
|
@@ -59,7 +176,9 @@ class HaikuRAGApp:
|
|
|
59
176
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
60
177
|
deleted = await self.client.delete_document(doc_id)
|
|
61
178
|
if deleted:
|
|
62
|
-
self.console.print(
|
|
179
|
+
self.console.print(
|
|
180
|
+
f"[bold green]Document {doc_id} deleted successfully.[/bold green]"
|
|
181
|
+
)
|
|
63
182
|
else:
|
|
64
183
|
self.console.print(
|
|
65
184
|
f"[yellow]Document with id {doc_id} not found.[/yellow]"
|
|
@@ -69,7 +188,7 @@ class HaikuRAGApp:
|
|
|
69
188
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
70
189
|
results = await self.client.search(query, limit=limit)
|
|
71
190
|
if not results:
|
|
72
|
-
self.console.print("[
|
|
191
|
+
self.console.print("[yellow]No results found.[/yellow]")
|
|
73
192
|
return
|
|
74
193
|
for chunk, score in results:
|
|
75
194
|
self._rich_print_search_result(chunk, score)
|
|
@@ -202,14 +321,16 @@ class HaikuRAGApp:
|
|
|
202
321
|
return
|
|
203
322
|
|
|
204
323
|
self.console.print(
|
|
205
|
-
f"[
|
|
324
|
+
f"[bold cyan]Rebuilding database with {total_docs} documents...[/bold cyan]"
|
|
206
325
|
)
|
|
207
326
|
with Progress() as progress:
|
|
208
327
|
task = progress.add_task("Rebuilding...", total=total_docs)
|
|
209
328
|
async for _ in client.rebuild_database():
|
|
210
329
|
progress.update(task, advance=1)
|
|
211
330
|
|
|
212
|
-
self.console.print(
|
|
331
|
+
self.console.print(
|
|
332
|
+
"[bold green]Database rebuild completed successfully.[/bold green]"
|
|
333
|
+
)
|
|
213
334
|
except Exception as e:
|
|
214
335
|
self.console.print(f"[red]Error rebuilding database: {e}[/red]")
|
|
215
336
|
|
|
@@ -218,7 +339,9 @@ class HaikuRAGApp:
|
|
|
218
339
|
try:
|
|
219
340
|
async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
|
|
220
341
|
await client.vacuum()
|
|
221
|
-
self.console.print(
|
|
342
|
+
self.console.print(
|
|
343
|
+
"[bold green]Vacuum completed successfully.[/bold green]"
|
|
344
|
+
)
|
|
222
345
|
except Exception as e:
|
|
223
346
|
self.console.print(f"[red]Error during vacuum: {e}[/red]")
|
|
224
347
|
|
|
@@ -240,7 +363,9 @@ class HaikuRAGApp:
|
|
|
240
363
|
else:
|
|
241
364
|
display_value = field_value
|
|
242
365
|
|
|
243
|
-
self.console.print(
|
|
366
|
+
self.console.print(
|
|
367
|
+
f" [repr.attrib_name]{field_name}[/repr.attrib_name]: {display_value}"
|
|
368
|
+
)
|
|
244
369
|
|
|
245
370
|
def _rich_print_document(self, doc: Document, truncate: bool = False):
|
|
246
371
|
"""Format a document for display."""
|
|
@@ -252,8 +377,16 @@ class HaikuRAGApp:
|
|
|
252
377
|
content = Markdown(content)
|
|
253
378
|
else:
|
|
254
379
|
content = Markdown(doc.content)
|
|
380
|
+
title_part = (
|
|
381
|
+
f" [repr.attrib_name]title[/repr.attrib_name]: {doc.title}"
|
|
382
|
+
if doc.title
|
|
383
|
+
else ""
|
|
384
|
+
)
|
|
255
385
|
self.console.print(
|
|
256
|
-
f"[repr.attrib_name]id[/repr.attrib_name]: {doc.id}
|
|
386
|
+
f"[repr.attrib_name]id[/repr.attrib_name]: {doc.id} "
|
|
387
|
+
f"[repr.attrib_name]uri[/repr.attrib_name]: {doc.uri}"
|
|
388
|
+
+ title_part
|
|
389
|
+
+ f" [repr.attrib_name]meta[/repr.attrib_name]: {doc.metadata}"
|
|
257
390
|
)
|
|
258
391
|
self.console.print(
|
|
259
392
|
f"[repr.attrib_name]created at[/repr.attrib_name]: {doc.created_at} [repr.attrib_name]updated at[/repr.attrib_name]: {doc.updated_at}"
|
|
@@ -272,6 +405,9 @@ class HaikuRAGApp:
|
|
|
272
405
|
if chunk.document_uri:
|
|
273
406
|
self.console.print("[repr.attrib_name]document uri[/repr.attrib_name]:")
|
|
274
407
|
self.console.print(chunk.document_uri)
|
|
408
|
+
if chunk.document_title:
|
|
409
|
+
self.console.print("[repr.attrib_name]document title[/repr.attrib_name]:")
|
|
410
|
+
self.console.print(chunk.document_title)
|
|
275
411
|
if chunk.document_meta:
|
|
276
412
|
self.console.print("[repr.attrib_name]document meta[/repr.attrib_name]:")
|
|
277
413
|
self.console.print(chunk.document_meta)
|
|
@@ -289,8 +425,6 @@ class HaikuRAGApp:
|
|
|
289
425
|
try:
|
|
290
426
|
if transport == "stdio":
|
|
291
427
|
await server.run_stdio_async()
|
|
292
|
-
elif transport == "sse":
|
|
293
|
-
await server.run_sse_async()
|
|
294
428
|
else:
|
|
295
429
|
await server.run_http_async(transport="streamable-http")
|
|
296
430
|
except KeyboardInterrupt:
|
haiku/rag/cli.py
CHANGED
|
@@ -1,30 +1,20 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
2
3
|
import warnings
|
|
3
4
|
from importlib.metadata import version
|
|
4
5
|
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
5
7
|
|
|
6
|
-
import logfire
|
|
7
8
|
import typer
|
|
8
|
-
from rich.console import Console
|
|
9
9
|
|
|
10
|
-
from haiku.rag.app import HaikuRAGApp
|
|
11
10
|
from haiku.rag.config import Config
|
|
12
11
|
from haiku.rag.logging import configure_cli_logging
|
|
13
|
-
from haiku.rag.migration import migrate_sqlite_to_lancedb
|
|
14
12
|
from haiku.rag.utils import is_up_to_date
|
|
15
13
|
|
|
16
|
-
if Config.ENV == "development":
|
|
17
|
-
logfire.configure(send_to_logfire="if-token-present")
|
|
18
|
-
logfire.instrument_pydantic_ai()
|
|
19
|
-
else:
|
|
20
|
-
warnings.filterwarnings("ignore")
|
|
21
|
-
|
|
22
14
|
cli = typer.Typer(
|
|
23
15
|
context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True
|
|
24
16
|
)
|
|
25
17
|
|
|
26
|
-
console = Console()
|
|
27
|
-
|
|
28
18
|
|
|
29
19
|
def complete_document_ids(ctx: typer.Context, incomplete: str):
|
|
30
20
|
"""Autocomplete document IDs from the selected DB."""
|
|
@@ -89,16 +79,16 @@ async def check_version():
|
|
|
89
79
|
"""Check if haiku.rag is up to date and show warning if not."""
|
|
90
80
|
up_to_date, current_version, latest_version = await is_up_to_date()
|
|
91
81
|
if not up_to_date:
|
|
92
|
-
|
|
93
|
-
f"
|
|
82
|
+
typer.echo(
|
|
83
|
+
f"Warning: haiku.rag is outdated. Current: {current_version}, Latest: {latest_version}",
|
|
94
84
|
)
|
|
95
|
-
|
|
85
|
+
typer.echo("Please update.")
|
|
96
86
|
|
|
97
87
|
|
|
98
88
|
def version_callback(value: bool):
|
|
99
89
|
if value:
|
|
100
90
|
v = version("haiku.rag")
|
|
101
|
-
|
|
91
|
+
typer.echo(f"haiku.rag version {v}")
|
|
102
92
|
raise typer.Exit()
|
|
103
93
|
|
|
104
94
|
|
|
@@ -113,10 +103,26 @@ def main(
|
|
|
113
103
|
),
|
|
114
104
|
):
|
|
115
105
|
"""haiku.rag CLI - Vector database RAG system"""
|
|
116
|
-
#
|
|
117
|
-
|
|
106
|
+
# Configure logging minimally for CLI context
|
|
107
|
+
if Config.ENV == "development":
|
|
108
|
+
# Lazy import logfire only in development
|
|
109
|
+
try:
|
|
110
|
+
import logfire # type: ignore
|
|
111
|
+
|
|
112
|
+
logfire.configure(send_to_logfire="if-token-present")
|
|
113
|
+
logfire.instrument_pydantic_ai()
|
|
114
|
+
except Exception:
|
|
115
|
+
pass
|
|
116
|
+
else:
|
|
117
|
+
configure_cli_logging()
|
|
118
|
+
warnings.filterwarnings("ignore")
|
|
119
|
+
|
|
118
120
|
# Run version check before any command
|
|
119
|
-
|
|
121
|
+
try:
|
|
122
|
+
asyncio.run(check_version())
|
|
123
|
+
except Exception:
|
|
124
|
+
# Do not block CLI on version check issues
|
|
125
|
+
pass
|
|
120
126
|
|
|
121
127
|
|
|
122
128
|
@cli.command("list", help="List all stored documents")
|
|
@@ -127,23 +133,58 @@ def list_documents(
|
|
|
127
133
|
help="Path to the LanceDB database file",
|
|
128
134
|
),
|
|
129
135
|
):
|
|
136
|
+
from haiku.rag.app import HaikuRAGApp
|
|
137
|
+
|
|
130
138
|
app = HaikuRAGApp(db_path=db)
|
|
131
139
|
asyncio.run(app.list_documents())
|
|
132
140
|
|
|
133
141
|
|
|
142
|
+
def _parse_meta_options(meta: list[str] | None) -> dict[str, Any]:
|
|
143
|
+
"""Parse repeated --meta KEY=VALUE options into a dictionary.
|
|
144
|
+
|
|
145
|
+
Raises a Typer error if any entry is malformed.
|
|
146
|
+
"""
|
|
147
|
+
result: dict[str, Any] = {}
|
|
148
|
+
if not meta:
|
|
149
|
+
return result
|
|
150
|
+
for item in meta:
|
|
151
|
+
if "=" not in item:
|
|
152
|
+
raise typer.BadParameter("--meta must be in KEY=VALUE format")
|
|
153
|
+
key, value = item.split("=", 1)
|
|
154
|
+
if not key:
|
|
155
|
+
raise typer.BadParameter("--meta key cannot be empty")
|
|
156
|
+
# Best-effort JSON coercion: numbers, booleans, null, arrays/objects
|
|
157
|
+
try:
|
|
158
|
+
parsed = json.loads(value)
|
|
159
|
+
result[key] = parsed
|
|
160
|
+
except Exception:
|
|
161
|
+
# Leave as string if not valid JSON literal
|
|
162
|
+
result[key] = value
|
|
163
|
+
return result
|
|
164
|
+
|
|
165
|
+
|
|
134
166
|
@cli.command("add", help="Add a document from text input")
|
|
135
167
|
def add_document_text(
|
|
136
168
|
text: str = typer.Argument(
|
|
137
169
|
help="The text content of the document to add",
|
|
138
170
|
),
|
|
171
|
+
meta: list[str] | None = typer.Option(
|
|
172
|
+
None,
|
|
173
|
+
"--meta",
|
|
174
|
+
help="Metadata entries as KEY=VALUE (repeatable)",
|
|
175
|
+
metavar="KEY=VALUE",
|
|
176
|
+
),
|
|
139
177
|
db: Path = typer.Option(
|
|
140
178
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
141
179
|
"--db",
|
|
142
180
|
help="Path to the LanceDB database file",
|
|
143
181
|
),
|
|
144
182
|
):
|
|
183
|
+
from haiku.rag.app import HaikuRAGApp
|
|
184
|
+
|
|
145
185
|
app = HaikuRAGApp(db_path=db)
|
|
146
|
-
|
|
186
|
+
metadata = _parse_meta_options(meta)
|
|
187
|
+
asyncio.run(app.add_document_from_text(text=text, metadata=metadata or None))
|
|
147
188
|
|
|
148
189
|
|
|
149
190
|
@cli.command("add-src", help="Add a document from a file path or URL")
|
|
@@ -152,14 +193,32 @@ def add_document_src(
|
|
|
152
193
|
help="The file path or URL of the document to add",
|
|
153
194
|
autocompletion=complete_local_paths,
|
|
154
195
|
),
|
|
196
|
+
title: str | None = typer.Option(
|
|
197
|
+
None,
|
|
198
|
+
"--title",
|
|
199
|
+
help="Optional human-readable title to store with the document",
|
|
200
|
+
),
|
|
201
|
+
meta: list[str] | None = typer.Option(
|
|
202
|
+
None,
|
|
203
|
+
"--meta",
|
|
204
|
+
help="Metadata entries as KEY=VALUE (repeatable)",
|
|
205
|
+
metavar="KEY=VALUE",
|
|
206
|
+
),
|
|
155
207
|
db: Path = typer.Option(
|
|
156
208
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
157
209
|
"--db",
|
|
158
210
|
help="Path to the LanceDB database file",
|
|
159
211
|
),
|
|
160
212
|
):
|
|
213
|
+
from haiku.rag.app import HaikuRAGApp
|
|
214
|
+
|
|
161
215
|
app = HaikuRAGApp(db_path=db)
|
|
162
|
-
|
|
216
|
+
metadata = _parse_meta_options(meta)
|
|
217
|
+
asyncio.run(
|
|
218
|
+
app.add_document_from_source(
|
|
219
|
+
source=source, title=title, metadata=metadata or None
|
|
220
|
+
)
|
|
221
|
+
)
|
|
163
222
|
|
|
164
223
|
|
|
165
224
|
@cli.command("get", help="Get and display a document by its ID")
|
|
@@ -174,6 +233,8 @@ def get_document(
|
|
|
174
233
|
help="Path to the LanceDB database file",
|
|
175
234
|
),
|
|
176
235
|
):
|
|
236
|
+
from haiku.rag.app import HaikuRAGApp
|
|
237
|
+
|
|
177
238
|
app = HaikuRAGApp(db_path=db)
|
|
178
239
|
asyncio.run(app.get_document(doc_id=doc_id))
|
|
179
240
|
|
|
@@ -190,6 +251,8 @@ def delete_document(
|
|
|
190
251
|
help="Path to the LanceDB database file",
|
|
191
252
|
),
|
|
192
253
|
):
|
|
254
|
+
from haiku.rag.app import HaikuRAGApp
|
|
255
|
+
|
|
193
256
|
app = HaikuRAGApp(db_path=db)
|
|
194
257
|
asyncio.run(app.delete_document(doc_id=doc_id))
|
|
195
258
|
|
|
@@ -215,6 +278,8 @@ def search(
|
|
|
215
278
|
help="Path to the LanceDB database file",
|
|
216
279
|
),
|
|
217
280
|
):
|
|
281
|
+
from haiku.rag.app import HaikuRAGApp
|
|
282
|
+
|
|
218
283
|
app = HaikuRAGApp(db_path=db)
|
|
219
284
|
asyncio.run(app.search(query=query, limit=limit))
|
|
220
285
|
|
|
@@ -235,6 +300,8 @@ def ask(
|
|
|
235
300
|
help="Include citations in the response",
|
|
236
301
|
),
|
|
237
302
|
):
|
|
303
|
+
from haiku.rag.app import HaikuRAGApp
|
|
304
|
+
|
|
238
305
|
app = HaikuRAGApp(db_path=db)
|
|
239
306
|
asyncio.run(app.ask(question=question, cite=cite))
|
|
240
307
|
|
|
@@ -271,6 +338,8 @@ def research(
|
|
|
271
338
|
help="Show verbose progress output",
|
|
272
339
|
),
|
|
273
340
|
):
|
|
341
|
+
from haiku.rag.app import HaikuRAGApp
|
|
342
|
+
|
|
274
343
|
app = HaikuRAGApp(db_path=db)
|
|
275
344
|
asyncio.run(
|
|
276
345
|
app.research(
|
|
@@ -285,6 +354,8 @@ def research(
|
|
|
285
354
|
|
|
286
355
|
@cli.command("settings", help="Display current configuration settings")
|
|
287
356
|
def settings():
|
|
357
|
+
from haiku.rag.app import HaikuRAGApp
|
|
358
|
+
|
|
288
359
|
app = HaikuRAGApp(db_path=Path()) # Don't need actual DB for settings
|
|
289
360
|
app.show_settings()
|
|
290
361
|
|
|
@@ -300,6 +371,8 @@ def rebuild(
|
|
|
300
371
|
help="Path to the LanceDB database file",
|
|
301
372
|
),
|
|
302
373
|
):
|
|
374
|
+
from haiku.rag.app import HaikuRAGApp
|
|
375
|
+
|
|
303
376
|
app = HaikuRAGApp(db_path=db)
|
|
304
377
|
asyncio.run(app.rebuild())
|
|
305
378
|
|
|
@@ -312,10 +385,38 @@ def vacuum(
|
|
|
312
385
|
help="Path to the LanceDB database file",
|
|
313
386
|
),
|
|
314
387
|
):
|
|
388
|
+
from haiku.rag.app import HaikuRAGApp
|
|
389
|
+
|
|
315
390
|
app = HaikuRAGApp(db_path=db)
|
|
316
391
|
asyncio.run(app.vacuum())
|
|
317
392
|
|
|
318
393
|
|
|
394
|
+
@cli.command("info", help="Show read-only database info (no upgrades or writes)")
|
|
395
|
+
def info(
|
|
396
|
+
db: Path = typer.Option(
|
|
397
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
398
|
+
"--db",
|
|
399
|
+
help="Path to the LanceDB database file",
|
|
400
|
+
),
|
|
401
|
+
):
|
|
402
|
+
from haiku.rag.app import HaikuRAGApp
|
|
403
|
+
|
|
404
|
+
app = HaikuRAGApp(db_path=db)
|
|
405
|
+
asyncio.run(app.info())
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
@cli.command("download-models", help="Download Docling and Ollama models per config")
|
|
409
|
+
def download_models_cmd():
|
|
410
|
+
from haiku.rag.utils import prefetch_models
|
|
411
|
+
|
|
412
|
+
try:
|
|
413
|
+
prefetch_models()
|
|
414
|
+
typer.echo("Models downloaded successfully.")
|
|
415
|
+
except Exception as e:
|
|
416
|
+
typer.echo(f"Error downloading models: {e}")
|
|
417
|
+
raise typer.Exit(1)
|
|
418
|
+
|
|
419
|
+
|
|
319
420
|
@cli.command(
|
|
320
421
|
"serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
|
|
321
422
|
)
|
|
@@ -330,24 +431,15 @@ def serve(
|
|
|
330
431
|
"--stdio",
|
|
331
432
|
help="Run MCP server on stdio Transport",
|
|
332
433
|
),
|
|
333
|
-
sse: bool = typer.Option(
|
|
334
|
-
False,
|
|
335
|
-
"--sse",
|
|
336
|
-
help="Run MCP server on SSE transport",
|
|
337
|
-
),
|
|
338
434
|
) -> None:
|
|
339
435
|
"""Start the MCP server."""
|
|
340
|
-
|
|
341
|
-
console.print("[red]Error: Cannot use both --stdio and --http options[/red]")
|
|
342
|
-
raise typer.Exit(1)
|
|
436
|
+
from haiku.rag.app import HaikuRAGApp
|
|
343
437
|
|
|
344
438
|
app = HaikuRAGApp(db_path=db)
|
|
345
439
|
|
|
346
440
|
transport = None
|
|
347
441
|
if stdio:
|
|
348
442
|
transport = "stdio"
|
|
349
|
-
elif sse:
|
|
350
|
-
transport = "sse"
|
|
351
443
|
|
|
352
444
|
asyncio.run(app.serve(transport=transport))
|
|
353
445
|
|
|
@@ -361,6 +453,9 @@ def migrate(
|
|
|
361
453
|
# Generate LanceDB path in same parent directory
|
|
362
454
|
lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
|
|
363
455
|
|
|
456
|
+
# Lazy import to avoid heavy deps on simple invocations
|
|
457
|
+
from haiku.rag.migration import migrate_sqlite_to_lancedb
|
|
458
|
+
|
|
364
459
|
success = asyncio.run(migrate_sqlite_to_lancedb(sqlite_path, lancedb_path))
|
|
365
460
|
|
|
366
461
|
if not success:
|