haiku.rag 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/app.py +7 -2
- haiku/rag/cli.py +69 -0
- haiku/rag/logging.py +30 -6
- haiku/rag/migration.py +24 -24
- {haiku_rag-0.7.3.dist-info → haiku_rag-0.7.5.dist-info}/METADATA +1 -1
- {haiku_rag-0.7.3.dist-info → haiku_rag-0.7.5.dist-info}/RECORD +9 -9
- {haiku_rag-0.7.3.dist-info → haiku_rag-0.7.5.dist-info}/WHEEL +0 -0
- {haiku_rag-0.7.3.dist-info → haiku_rag-0.7.5.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.7.3.dist-info → haiku_rag-0.7.5.dist-info}/licenses/LICENSE +0 -0
haiku/rag/app.py
CHANGED
|
@@ -50,8 +50,13 @@ class HaikuRAGApp:
|
|
|
50
50
|
|
|
51
51
|
async def delete_document(self, doc_id: str):
|
|
52
52
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
53
|
-
await self.client.delete_document(doc_id)
|
|
54
|
-
|
|
53
|
+
deleted = await self.client.delete_document(doc_id)
|
|
54
|
+
if deleted:
|
|
55
|
+
self.console.print(f"[b]Document {doc_id} deleted successfully.[/b]")
|
|
56
|
+
else:
|
|
57
|
+
self.console.print(
|
|
58
|
+
f"[yellow]Document with id {doc_id} not found.[/yellow]"
|
|
59
|
+
)
|
|
55
60
|
|
|
56
61
|
async def search(self, query: str, limit: int = 5):
|
|
57
62
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
haiku/rag/cli.py
CHANGED
|
@@ -8,6 +8,7 @@ from rich.console import Console
|
|
|
8
8
|
|
|
9
9
|
from haiku.rag.app import HaikuRAGApp
|
|
10
10
|
from haiku.rag.config import Config
|
|
11
|
+
from haiku.rag.logging import configure_cli_logging
|
|
11
12
|
from haiku.rag.migration import migrate_sqlite_to_lancedb
|
|
12
13
|
from haiku.rag.utils import is_up_to_date
|
|
13
14
|
|
|
@@ -21,6 +22,65 @@ cli = typer.Typer(
|
|
|
21
22
|
console = Console()
|
|
22
23
|
|
|
23
24
|
|
|
25
|
+
def complete_document_ids(ctx: typer.Context, incomplete: str):
|
|
26
|
+
"""Autocomplete document IDs from the selected DB."""
|
|
27
|
+
db_path = ctx.params.get("db") or (Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb")
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
from haiku.rag.client import HaikuRAG
|
|
31
|
+
|
|
32
|
+
async def _list_ids():
|
|
33
|
+
async with HaikuRAG(db_path) as client:
|
|
34
|
+
docs = await client.list_documents()
|
|
35
|
+
return [d.id for d in docs if d.id]
|
|
36
|
+
|
|
37
|
+
ids = asyncio.run(_list_ids())
|
|
38
|
+
except Exception:
|
|
39
|
+
return []
|
|
40
|
+
|
|
41
|
+
return [i for i in ids if i and i.startswith(incomplete)]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def complete_local_paths(ctx: typer.Context, incomplete: str) -> list[str]:
|
|
45
|
+
"""Autocomplete local filesystem paths.
|
|
46
|
+
|
|
47
|
+
Provides directory/file suggestions based on the current incomplete input.
|
|
48
|
+
Does not validate or restrict to specific extensions to keep it flexible
|
|
49
|
+
(URLs are still allowed to be typed manually).
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
text = incomplete or ""
|
|
53
|
+
|
|
54
|
+
# Expand user home
|
|
55
|
+
from os.path import expanduser
|
|
56
|
+
|
|
57
|
+
expanded = expanduser(text)
|
|
58
|
+
p = Path(expanded)
|
|
59
|
+
|
|
60
|
+
# Choose directory to list and prefix to filter
|
|
61
|
+
if text == "" or text.endswith(("/", "\\")):
|
|
62
|
+
directory = p
|
|
63
|
+
prefix = ""
|
|
64
|
+
else:
|
|
65
|
+
directory = p.parent
|
|
66
|
+
prefix = p.name
|
|
67
|
+
|
|
68
|
+
if not directory.exists():
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
suggestions: list[str] = []
|
|
72
|
+
for entry in directory.iterdir():
|
|
73
|
+
name = entry.name
|
|
74
|
+
if not prefix or name.startswith(prefix):
|
|
75
|
+
suggestion = str(directory / name)
|
|
76
|
+
if entry.is_dir():
|
|
77
|
+
suggestion += "/"
|
|
78
|
+
suggestions.append(suggestion)
|
|
79
|
+
return suggestions
|
|
80
|
+
except Exception:
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
|
|
24
84
|
async def check_version():
|
|
25
85
|
"""Check if haiku.rag is up to date and show warning if not."""
|
|
26
86
|
up_to_date, current_version, latest_version = await is_up_to_date()
|
|
@@ -49,6 +109,8 @@ def main(
|
|
|
49
109
|
),
|
|
50
110
|
):
|
|
51
111
|
"""haiku.rag CLI - Vector database RAG system"""
|
|
112
|
+
# Ensure only haiku.rag logs are emitted in CLI context
|
|
113
|
+
configure_cli_logging()
|
|
52
114
|
# Run version check before any command
|
|
53
115
|
asyncio.run(check_version())
|
|
54
116
|
|
|
@@ -84,6 +146,7 @@ def add_document_text(
|
|
|
84
146
|
def add_document_src(
|
|
85
147
|
source: str = typer.Argument(
|
|
86
148
|
help="The file path or URL of the document to add",
|
|
149
|
+
autocompletion=complete_local_paths,
|
|
87
150
|
),
|
|
88
151
|
db: Path = typer.Option(
|
|
89
152
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
@@ -99,6 +162,7 @@ def add_document_src(
|
|
|
99
162
|
def get_document(
|
|
100
163
|
doc_id: str = typer.Argument(
|
|
101
164
|
help="The ID of the document to get",
|
|
165
|
+
autocompletion=complete_document_ids,
|
|
102
166
|
),
|
|
103
167
|
db: Path = typer.Option(
|
|
104
168
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
@@ -114,6 +178,7 @@ def get_document(
|
|
|
114
178
|
def delete_document(
|
|
115
179
|
doc_id: str = typer.Argument(
|
|
116
180
|
help="The ID of the document to delete",
|
|
181
|
+
autocompletion=complete_document_ids,
|
|
117
182
|
),
|
|
118
183
|
db: Path = typer.Option(
|
|
119
184
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
@@ -125,6 +190,10 @@ def delete_document(
|
|
|
125
190
|
asyncio.run(app.delete_document(doc_id=doc_id))
|
|
126
191
|
|
|
127
192
|
|
|
193
|
+
# Add alias `rm` for delete
|
|
194
|
+
cli.command("rm", help="Alias for delete: remove a document by its ID")(delete_document)
|
|
195
|
+
|
|
196
|
+
|
|
128
197
|
@cli.command("search", help="Search for documents by a query")
|
|
129
198
|
def search(
|
|
130
199
|
query: str = typer.Argument(
|
haiku/rag/logging.py
CHANGED
|
@@ -3,13 +3,9 @@ import logging
|
|
|
3
3
|
from rich.console import Console
|
|
4
4
|
from rich.logging import RichHandler
|
|
5
5
|
|
|
6
|
-
logging.basicConfig(level=logging.DEBUG)
|
|
7
|
-
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
8
|
-
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
|
9
|
-
logging.getLogger("docling").setLevel(logging.WARNING)
|
|
10
|
-
|
|
11
6
|
|
|
12
7
|
def get_logger() -> logging.Logger:
|
|
8
|
+
"""Return the library logger configured with a Rich handler."""
|
|
13
9
|
logger = logging.getLogger("haiku.rag")
|
|
14
10
|
|
|
15
11
|
handler = RichHandler(
|
|
@@ -19,11 +15,39 @@ def get_logger() -> logging.Logger:
|
|
|
19
15
|
formatter = logging.Formatter("%(message)s")
|
|
20
16
|
handler.setFormatter(formatter)
|
|
21
17
|
|
|
22
|
-
logger.setLevel(
|
|
18
|
+
logger.setLevel(logging.INFO)
|
|
23
19
|
|
|
24
20
|
# Remove any existing handlers to avoid duplicates on reconfiguration
|
|
25
21
|
for hdlr in logger.handlers[:]:
|
|
26
22
|
logger.removeHandler(hdlr)
|
|
27
23
|
|
|
28
24
|
logger.addHandler(handler)
|
|
25
|
+
# Do not let messages propagate to the root logger
|
|
26
|
+
logger.propagate = False
|
|
27
|
+
return logger
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def configure_cli_logging(level: int = logging.INFO) -> logging.Logger:
|
|
31
|
+
"""Configure logging for CLI runs.
|
|
32
|
+
|
|
33
|
+
- Silence ALL non-haiku.rag loggers by detaching root handlers and setting
|
|
34
|
+
their level to ERROR.
|
|
35
|
+
- Attach a Rich handler only to the "haiku.rag" logger.
|
|
36
|
+
- Prevent propagation so only our logger prints in the CLI.
|
|
37
|
+
"""
|
|
38
|
+
# Silence root logger completely
|
|
39
|
+
root = logging.getLogger()
|
|
40
|
+
for hdlr in root.handlers[:]:
|
|
41
|
+
root.removeHandler(hdlr)
|
|
42
|
+
root.setLevel(logging.ERROR)
|
|
43
|
+
|
|
44
|
+
# Optionally silence some commonly noisy libraries explicitly as a safeguard
|
|
45
|
+
for noisy in ("httpx", "httpcore", "docling", "urllib3", "asyncio"):
|
|
46
|
+
logging.getLogger(noisy).setLevel(logging.ERROR)
|
|
47
|
+
logging.getLogger(noisy).propagate = False
|
|
48
|
+
|
|
49
|
+
# Configure and return our app logger
|
|
50
|
+
logger = get_logger()
|
|
51
|
+
logger.setLevel(level)
|
|
52
|
+
logger.propagate = False
|
|
29
53
|
return logger
|
haiku/rag/migration.py
CHANGED
|
@@ -1,13 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Migration script to migrate from SQLite to LanceDB.
|
|
4
|
-
|
|
5
|
-
This script will:
|
|
6
|
-
1. Read data from an existing SQLite database
|
|
7
|
-
2. Create a new LanceDB database with the same data
|
|
8
|
-
3. Preserve all documents, chunks, embeddings, and settings
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
1
|
import json
|
|
12
2
|
import sqlite3
|
|
13
3
|
import struct
|
|
@@ -55,6 +45,22 @@ class SQLiteToLanceDBMigrator:
|
|
|
55
45
|
sqlite_conn = sqlite3.connect(self.sqlite_path)
|
|
56
46
|
sqlite_conn.row_factory = sqlite3.Row
|
|
57
47
|
|
|
48
|
+
# Load the sqlite-vec extension
|
|
49
|
+
try:
|
|
50
|
+
import sqlite_vec
|
|
51
|
+
|
|
52
|
+
sqlite_conn.enable_load_extension(True)
|
|
53
|
+
sqlite_vec.load(sqlite_conn)
|
|
54
|
+
self.console.print("[blue]Loaded sqlite-vec extension[/blue]")
|
|
55
|
+
except Exception as e:
|
|
56
|
+
self.console.print(
|
|
57
|
+
f"[yellow]Warning: Could not load sqlite-vec extension: {e}[/yellow]"
|
|
58
|
+
)
|
|
59
|
+
self.console.print(
|
|
60
|
+
"[yellow]Install sqlite-vec with[/yellow]\n[green]uv pip install sqlite-vec [/green]"
|
|
61
|
+
)
|
|
62
|
+
exit(1)
|
|
63
|
+
|
|
58
64
|
# Create LanceDB store
|
|
59
65
|
lance_store = Store(self.lancedb_path, skip_validation=True)
|
|
60
66
|
|
|
@@ -180,30 +186,24 @@ class SQLiteToLanceDBMigrator:
|
|
|
180
186
|
|
|
181
187
|
chunks_data = cursor.fetchall()
|
|
182
188
|
|
|
183
|
-
# Get embeddings
|
|
189
|
+
# Get embeddings using the sqlite-vec virtual table
|
|
184
190
|
embeddings_map = {}
|
|
185
191
|
try:
|
|
186
|
-
#
|
|
192
|
+
# Use the virtual table to get embeddings properly
|
|
187
193
|
cursor.execute("""
|
|
188
|
-
SELECT
|
|
189
|
-
|
|
190
|
-
v.vectors
|
|
191
|
-
FROM chunk_embeddings_rowids r
|
|
192
|
-
JOIN chunk_embeddings_vector_chunks00 v ON r.rowid = v.rowid
|
|
194
|
+
SELECT chunk_id, embedding
|
|
195
|
+
FROM chunk_embeddings
|
|
193
196
|
""")
|
|
194
197
|
|
|
195
198
|
for row in cursor.fetchall():
|
|
196
199
|
chunk_id = row[0]
|
|
197
|
-
|
|
198
|
-
if
|
|
199
|
-
embeddings_map[chunk_id] =
|
|
200
|
+
embedding_blob = row[1]
|
|
201
|
+
if embedding_blob and chunk_id not in embeddings_map:
|
|
202
|
+
embeddings_map[chunk_id] = embedding_blob
|
|
200
203
|
|
|
201
204
|
except sqlite3.OperationalError as e:
|
|
202
205
|
self.console.print(
|
|
203
|
-
f"[yellow]Warning: Could not extract embeddings: {e}[/yellow]"
|
|
204
|
-
)
|
|
205
|
-
self.console.print(
|
|
206
|
-
"[yellow]Continuing migration without embeddings...[/yellow]"
|
|
206
|
+
f"[yellow]Warning: Could not extract embeddings from virtual table: {e}[/yellow]"
|
|
207
207
|
)
|
|
208
208
|
|
|
209
209
|
chunks = []
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
haiku/rag/app.py,sha256=
|
|
2
|
+
haiku/rag/app.py,sha256=S19UWA6dxl9NayL9hOZKC5D7fjNat4n0mUOw2PAF9u8,7842
|
|
3
3
|
haiku/rag/chunker.py,sha256=PVe6ysv8UlacUd4Zb3_8RFWIaWDXnzBAy2VDJ4TaUsE,1555
|
|
4
|
-
haiku/rag/cli.py,sha256=
|
|
4
|
+
haiku/rag/cli.py,sha256=HqFHU9x2tR1yTR74V3NPndqE4R2Yn-ohASyHp334pAg,8597
|
|
5
5
|
haiku/rag/client.py,sha256=N4zkWjE9Rsw9YgPvNo83xptHUQR2ognfOnjkoV_w6hc,20999
|
|
6
6
|
haiku/rag/config.py,sha256=3H41da9BU1R1y2JJHD0cOSErX_VSM1UXA7M2JSOxFXE,1795
|
|
7
|
-
haiku/rag/logging.py,sha256=
|
|
7
|
+
haiku/rag/logging.py,sha256=a0ELyeMqb85ebeOTN8OQCTL1PiMWiiV9R_OOH-VZoA8,1665
|
|
8
8
|
haiku/rag/mcp.py,sha256=bR9Y-Nz-hvjiql20Y0KE0hwNGwyjmPGX8K9d-qmXptY,4683
|
|
9
|
-
haiku/rag/migration.py,sha256=
|
|
9
|
+
haiku/rag/migration.py,sha256=n5G6SDhTo8wTf0uCYbWGegq1LqIgILDLNjWcGvSj-SQ,11053
|
|
10
10
|
haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
|
|
11
11
|
haiku/rag/reader.py,sha256=qkPTMJuQ_o4sK-8zpDl9WFYe_MJ7aL_gUw6rczIpW-g,3274
|
|
12
12
|
haiku/rag/utils.py,sha256=c8F0ECsFSqvQxzxINAOAnvShoOnJPLsOaNE3JEY2JSc,3230
|
|
@@ -34,8 +34,8 @@ haiku/rag/store/repositories/chunk.py,sha256=v4y4eh4yIf6zJaWfHxljvnmb12dmvwdinzm
|
|
|
34
34
|
haiku/rag/store/repositories/document.py,sha256=lP8Lo82KTP-qwXFRpYZ46WjeAdAsHwZ5pJcrXdz4g0U,6988
|
|
35
35
|
haiku/rag/store/repositories/settings.py,sha256=dqnAvm-98nQrWpLBbf9QghJw673QD80-iqQhRMP5t0c,5025
|
|
36
36
|
haiku/rag/store/upgrades/__init__.py,sha256=wUiEoSiHTahvuagx93E4FB07v123AhdbOjwUkPusiIg,14
|
|
37
|
-
haiku_rag-0.7.
|
|
38
|
-
haiku_rag-0.7.
|
|
39
|
-
haiku_rag-0.7.
|
|
40
|
-
haiku_rag-0.7.
|
|
41
|
-
haiku_rag-0.7.
|
|
37
|
+
haiku_rag-0.7.5.dist-info/METADATA,sha256=URf4qAZuzeL3OpTKuF6IhOeSmfRWlph-1pLboZTQUnw,4610
|
|
38
|
+
haiku_rag-0.7.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
39
|
+
haiku_rag-0.7.5.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
|
|
40
|
+
haiku_rag-0.7.5.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
|
|
41
|
+
haiku_rag-0.7.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|