haiku.rag 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/app.py CHANGED
@@ -50,8 +50,13 @@ class HaikuRAGApp:
50
50
 
51
51
  async def delete_document(self, doc_id: str):
52
52
  async with HaikuRAG(db_path=self.db_path) as self.client:
53
- await self.client.delete_document(doc_id)
54
- self.console.print(f"[b]Document {doc_id} deleted successfully.[/b]")
53
+ deleted = await self.client.delete_document(doc_id)
54
+ if deleted:
55
+ self.console.print(f"[b]Document {doc_id} deleted successfully.[/b]")
56
+ else:
57
+ self.console.print(
58
+ f"[yellow]Document with id {doc_id} not found.[/yellow]"
59
+ )
55
60
 
56
61
  async def search(self, query: str, limit: int = 5):
57
62
  async with HaikuRAG(db_path=self.db_path) as self.client:
haiku/rag/cli.py CHANGED
@@ -8,6 +8,7 @@ from rich.console import Console
8
8
 
9
9
  from haiku.rag.app import HaikuRAGApp
10
10
  from haiku.rag.config import Config
11
+ from haiku.rag.logging import configure_cli_logging
11
12
  from haiku.rag.migration import migrate_sqlite_to_lancedb
12
13
  from haiku.rag.utils import is_up_to_date
13
14
 
@@ -21,6 +22,65 @@ cli = typer.Typer(
21
22
  console = Console()
22
23
 
23
24
 
25
+ def complete_document_ids(ctx: typer.Context, incomplete: str):
26
+ """Autocomplete document IDs from the selected DB."""
27
+ db_path = ctx.params.get("db") or (Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb")
28
+
29
+ try:
30
+ from haiku.rag.client import HaikuRAG
31
+
32
+ async def _list_ids():
33
+ async with HaikuRAG(db_path) as client:
34
+ docs = await client.list_documents()
35
+ return [d.id for d in docs if d.id]
36
+
37
+ ids = asyncio.run(_list_ids())
38
+ except Exception:
39
+ return []
40
+
41
+ return [i for i in ids if i and i.startswith(incomplete)]
42
+
43
+
44
+ def complete_local_paths(ctx: typer.Context, incomplete: str) -> list[str]:
45
+ """Autocomplete local filesystem paths.
46
+
47
+ Provides directory/file suggestions based on the current incomplete input.
48
+ Does not validate or restrict to specific extensions to keep it flexible
49
+ (URLs are still allowed to be typed manually).
50
+ """
51
+ try:
52
+ text = incomplete or ""
53
+
54
+ # Expand user home
55
+ from os.path import expanduser
56
+
57
+ expanded = expanduser(text)
58
+ p = Path(expanded)
59
+
60
+ # Choose directory to list and prefix to filter
61
+ if text == "" or text.endswith(("/", "\\")):
62
+ directory = p
63
+ prefix = ""
64
+ else:
65
+ directory = p.parent
66
+ prefix = p.name
67
+
68
+ if not directory.exists():
69
+ return []
70
+
71
+ suggestions: list[str] = []
72
+ for entry in directory.iterdir():
73
+ name = entry.name
74
+ if not prefix or name.startswith(prefix):
75
+ suggestion = str(directory / name)
76
+ if entry.is_dir():
77
+ suggestion += "/"
78
+ suggestions.append(suggestion)
79
+ return suggestions
80
+ except Exception:
81
+ return []
82
+
83
+
24
84
  async def check_version():
25
85
  """Check if haiku.rag is up to date and show warning if not."""
26
86
  up_to_date, current_version, latest_version = await is_up_to_date()
@@ -49,6 +109,8 @@ def main(
49
109
  ),
50
110
  ):
51
111
  """haiku.rag CLI - Vector database RAG system"""
112
+ # Ensure only haiku.rag logs are emitted in CLI context
113
+ configure_cli_logging()
52
114
  # Run version check before any command
53
115
  asyncio.run(check_version())
54
116
 
@@ -84,6 +146,7 @@ def add_document_text(
84
146
  def add_document_src(
85
147
  source: str = typer.Argument(
86
148
  help="The file path or URL of the document to add",
149
+ autocompletion=complete_local_paths,
87
150
  ),
88
151
  db: Path = typer.Option(
89
152
  Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
@@ -99,6 +162,7 @@ def add_document_src(
99
162
  def get_document(
100
163
  doc_id: str = typer.Argument(
101
164
  help="The ID of the document to get",
165
+ autocompletion=complete_document_ids,
102
166
  ),
103
167
  db: Path = typer.Option(
104
168
  Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
@@ -114,6 +178,7 @@ def get_document(
114
178
  def delete_document(
115
179
  doc_id: str = typer.Argument(
116
180
  help="The ID of the document to delete",
181
+ autocompletion=complete_document_ids,
117
182
  ),
118
183
  db: Path = typer.Option(
119
184
  Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
@@ -125,6 +190,10 @@ def delete_document(
125
190
  asyncio.run(app.delete_document(doc_id=doc_id))
126
191
 
127
192
 
193
+ # Add alias `rm` for delete
194
+ cli.command("rm", help="Alias for delete: remove a document by its ID")(delete_document)
195
+
196
+
128
197
  @cli.command("search", help="Search for documents by a query")
129
198
  def search(
130
199
  query: str = typer.Argument(
haiku/rag/logging.py CHANGED
@@ -3,13 +3,9 @@ import logging
3
3
  from rich.console import Console
4
4
  from rich.logging import RichHandler
5
5
 
6
- logging.basicConfig(level=logging.DEBUG)
7
- logging.getLogger("httpx").setLevel(logging.WARNING)
8
- logging.getLogger("httpcore").setLevel(logging.WARNING)
9
- logging.getLogger("docling").setLevel(logging.WARNING)
10
-
11
6
 
12
7
  def get_logger() -> logging.Logger:
8
+ """Return the library logger configured with a Rich handler."""
13
9
  logger = logging.getLogger("haiku.rag")
14
10
 
15
11
  handler = RichHandler(
@@ -19,11 +15,39 @@ def get_logger() -> logging.Logger:
19
15
  formatter = logging.Formatter("%(message)s")
20
16
  handler.setFormatter(formatter)
21
17
 
22
- logger.setLevel("INFO")
18
+ logger.setLevel(logging.INFO)
23
19
 
24
20
  # Remove any existing handlers to avoid duplicates on reconfiguration
25
21
  for hdlr in logger.handlers[:]:
26
22
  logger.removeHandler(hdlr)
27
23
 
28
24
  logger.addHandler(handler)
25
+ # Do not let messages propagate to the root logger
26
+ logger.propagate = False
27
+ return logger
28
+
29
+
30
+ def configure_cli_logging(level: int = logging.INFO) -> logging.Logger:
31
+ """Configure logging for CLI runs.
32
+
33
+ - Silence ALL non-haiku.rag loggers by detaching root handlers and setting
34
+ their level to ERROR.
35
+ - Attach a Rich handler only to the "haiku.rag" logger.
36
+ - Prevent propagation so only our logger prints in the CLI.
37
+ """
38
+ # Silence root logger completely
39
+ root = logging.getLogger()
40
+ for hdlr in root.handlers[:]:
41
+ root.removeHandler(hdlr)
42
+ root.setLevel(logging.ERROR)
43
+
44
+ # Optionally silence some commonly noisy libraries explicitly as a safeguard
45
+ for noisy in ("httpx", "httpcore", "docling", "urllib3", "asyncio"):
46
+ logging.getLogger(noisy).setLevel(logging.ERROR)
47
+ logging.getLogger(noisy).propagate = False
48
+
49
+ # Configure and return our app logger
50
+ logger = get_logger()
51
+ logger.setLevel(level)
52
+ logger.propagate = False
29
53
  return logger
haiku/rag/migration.py CHANGED
@@ -1,13 +1,3 @@
1
- #!/usr/bin/env python3
2
- """
3
- Migration script to migrate from SQLite to LanceDB.
4
-
5
- This script will:
6
- 1. Read data from an existing SQLite database
7
- 2. Create a new LanceDB database with the same data
8
- 3. Preserve all documents, chunks, embeddings, and settings
9
- """
10
-
11
1
  import json
12
2
  import sqlite3
13
3
  import struct
@@ -55,6 +45,22 @@ class SQLiteToLanceDBMigrator:
55
45
  sqlite_conn = sqlite3.connect(self.sqlite_path)
56
46
  sqlite_conn.row_factory = sqlite3.Row
57
47
 
48
+ # Load the sqlite-vec extension
49
+ try:
50
+ import sqlite_vec
51
+
52
+ sqlite_conn.enable_load_extension(True)
53
+ sqlite_vec.load(sqlite_conn)
54
+ self.console.print("[blue]Loaded sqlite-vec extension[/blue]")
55
+ except Exception as e:
56
+ self.console.print(
57
+ f"[yellow]Warning: Could not load sqlite-vec extension: {e}[/yellow]"
58
+ )
59
+ self.console.print(
60
+ "[yellow]Install sqlite-vec with[/yellow]\n[green]uv pip install sqlite-vec [/green]"
61
+ )
62
+ exit(1)
63
+
58
64
  # Create LanceDB store
59
65
  lance_store = Store(self.lancedb_path, skip_validation=True)
60
66
 
@@ -180,30 +186,24 @@ class SQLiteToLanceDBMigrator:
180
186
 
181
187
  chunks_data = cursor.fetchall()
182
188
 
183
- # Get embeddings separately to avoid vec0 virtual table issues
189
+ # Get embeddings using the sqlite-vec virtual table
184
190
  embeddings_map = {}
185
191
  try:
186
- # Try to get embeddings from the vec0 tables directly
192
+ # Use the virtual table to get embeddings properly
187
193
  cursor.execute("""
188
- SELECT
189
- r.chunk_id,
190
- v.vectors
191
- FROM chunk_embeddings_rowids r
192
- JOIN chunk_embeddings_vector_chunks00 v ON r.rowid = v.rowid
194
+ SELECT chunk_id, embedding
195
+ FROM chunk_embeddings
193
196
  """)
194
197
 
195
198
  for row in cursor.fetchall():
196
199
  chunk_id = row[0]
197
- vectors_blob = row[1]
198
- if vectors_blob and chunk_id not in embeddings_map:
199
- embeddings_map[chunk_id] = vectors_blob
200
+ embedding_blob = row[1]
201
+ if embedding_blob and chunk_id not in embeddings_map:
202
+ embeddings_map[chunk_id] = embedding_blob
200
203
 
201
204
  except sqlite3.OperationalError as e:
202
205
  self.console.print(
203
- f"[yellow]Warning: Could not extract embeddings: {e}[/yellow]"
204
- )
205
- self.console.print(
206
- "[yellow]Continuing migration without embeddings...[/yellow]"
206
+ f"[yellow]Warning: Could not extract embeddings from virtual table: {e}[/yellow]"
207
207
  )
208
208
 
209
209
  chunks = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.7.3
3
+ Version: 0.7.5
4
4
  Summary: Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -1,12 +1,12 @@
1
1
  haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- haiku/rag/app.py,sha256=GmuZxH7BMutWt8Mdu0RSateRBaKiqXh7Z9tV7cZX6n0,7655
2
+ haiku/rag/app.py,sha256=S19UWA6dxl9NayL9hOZKC5D7fjNat4n0mUOw2PAF9u8,7842
3
3
  haiku/rag/chunker.py,sha256=PVe6ysv8UlacUd4Zb3_8RFWIaWDXnzBAy2VDJ4TaUsE,1555
4
- haiku/rag/cli.py,sha256=UY9Vh5RsIxSCV14eQbNOiwToKmbFAvqTOAnxjieaYBs,6399
4
+ haiku/rag/cli.py,sha256=HqFHU9x2tR1yTR74V3NPndqE4R2Yn-ohASyHp334pAg,8597
5
5
  haiku/rag/client.py,sha256=N4zkWjE9Rsw9YgPvNo83xptHUQR2ognfOnjkoV_w6hc,20999
6
6
  haiku/rag/config.py,sha256=3H41da9BU1R1y2JJHD0cOSErX_VSM1UXA7M2JSOxFXE,1795
7
- haiku/rag/logging.py,sha256=DOQi9QMpQRl8h17Vu4nQh8HxpHdeIu29n8-HZaT3SRQ,786
7
+ haiku/rag/logging.py,sha256=a0ELyeMqb85ebeOTN8OQCTL1PiMWiiV9R_OOH-VZoA8,1665
8
8
  haiku/rag/mcp.py,sha256=bR9Y-Nz-hvjiql20Y0KE0hwNGwyjmPGX8K9d-qmXptY,4683
9
- haiku/rag/migration.py,sha256=gWxQwiKo0YulRhogYz4K8N98kHN9LQXIx9FeTmT24v4,10915
9
+ haiku/rag/migration.py,sha256=n5G6SDhTo8wTf0uCYbWGegq1LqIgILDLNjWcGvSj-SQ,11053
10
10
  haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
11
11
  haiku/rag/reader.py,sha256=qkPTMJuQ_o4sK-8zpDl9WFYe_MJ7aL_gUw6rczIpW-g,3274
12
12
  haiku/rag/utils.py,sha256=c8F0ECsFSqvQxzxINAOAnvShoOnJPLsOaNE3JEY2JSc,3230
@@ -34,8 +34,8 @@ haiku/rag/store/repositories/chunk.py,sha256=v4y4eh4yIf6zJaWfHxljvnmb12dmvwdinzm
34
34
  haiku/rag/store/repositories/document.py,sha256=lP8Lo82KTP-qwXFRpYZ46WjeAdAsHwZ5pJcrXdz4g0U,6988
35
35
  haiku/rag/store/repositories/settings.py,sha256=dqnAvm-98nQrWpLBbf9QghJw673QD80-iqQhRMP5t0c,5025
36
36
  haiku/rag/store/upgrades/__init__.py,sha256=wUiEoSiHTahvuagx93E4FB07v123AhdbOjwUkPusiIg,14
37
- haiku_rag-0.7.3.dist-info/METADATA,sha256=PAvA6VZuyZp9IekXhYCLWDxM1wMZMmujtntxZE2lBoE,4610
38
- haiku_rag-0.7.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
39
- haiku_rag-0.7.3.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
40
- haiku_rag-0.7.3.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
41
- haiku_rag-0.7.3.dist-info/RECORD,,
37
+ haiku_rag-0.7.5.dist-info/METADATA,sha256=URf4qAZuzeL3OpTKuF6IhOeSmfRWlph-1pLboZTQUnw,4610
38
+ haiku_rag-0.7.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
39
+ haiku_rag-0.7.5.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
40
+ haiku_rag-0.7.5.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
41
+ haiku_rag-0.7.5.dist-info/RECORD,,