haiku.rag 0.7.6__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/app.py CHANGED
@@ -102,6 +102,15 @@ class HaikuRAGApp:
102
102
  except Exception as e:
103
103
  self.console.print(f"[red]Error rebuilding database: {e}[/red]")
104
104
 
105
+ async def vacuum(self):
106
+ """Run database maintenance: optimize and cleanup table history."""
107
+ try:
108
+ async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
109
+ await client.vacuum()
110
+ self.console.print("[b]Vacuum completed successfully.[/b]")
111
+ except Exception as e:
112
+ self.console.print(f"[red]Error during vacuum: {e}[/red]")
113
+
105
114
  def show_settings(self):
106
115
  """Display current configuration settings."""
107
116
  self.console.print("[bold]haiku.rag configuration[/bold]")
haiku/rag/cli.py CHANGED
@@ -256,6 +256,18 @@ def rebuild(
256
256
  asyncio.run(app.rebuild())
257
257
 
258
258
 
259
+ @cli.command("vacuum", help="Optimize and clean up all tables to reduce disk usage")
260
+ def vacuum(
261
+ db: Path = typer.Option(
262
+ Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
263
+ "--db",
264
+ help="Path to the LanceDB database file",
265
+ ),
266
+ ):
267
+ app = HaikuRAGApp(db_path=db)
268
+ asyncio.run(app.vacuum())
269
+
270
+
259
271
  @cli.command(
260
272
  "serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
261
273
  )
haiku/rag/client.py CHANGED
@@ -550,6 +550,16 @@ class HaikuRAG:
550
550
  )
551
551
  yield doc.id
552
552
 
553
+ # Final maintenance: centralized vacuum to curb disk usage
554
+ try:
555
+ self.store.vacuum()
556
+ except Exception:
557
+ pass
558
+
559
+ async def vacuum(self) -> None:
560
+ """Optimize and clean up old versions across all tables."""
561
+ self.store.vacuum()
562
+
553
563
  def close(self):
554
564
  """Close the underlying store connection."""
555
565
  self.store.close()
@@ -27,4 +27,9 @@ def get_embedder() -> EmbedderBase:
27
27
 
28
28
  return OpenAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
29
29
 
30
+ if Config.EMBEDDINGS_PROVIDER == "vllm":
31
+ from haiku.rag.embeddings.vllm import Embedder as VllmEmbedder
32
+
33
+ return VllmEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
34
+
30
35
  raise ValueError(f"Unsupported embedding provider: {Config.EMBEDDINGS_PROVIDER}")
haiku/rag/logging.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import warnings
2
3
 
3
4
  from rich.console import Console
4
5
  from rich.logging import RichHandler
@@ -50,4 +51,6 @@ def configure_cli_logging(level: int = logging.INFO) -> logging.Logger:
50
51
  logger = get_logger()
51
52
  logger.setLevel(level)
52
53
  logger.propagate = False
54
+
55
+ warnings.filterwarnings("ignore")
53
56
  return logger
haiku/rag/migration.py CHANGED
@@ -47,7 +47,7 @@ class SQLiteToLanceDBMigrator:
47
47
 
48
48
  # Load the sqlite-vec extension
49
49
  try:
50
- import sqlite_vec
50
+ import sqlite_vec # type: ignore
51
51
 
52
52
  sqlite_conn.enable_load_extension(True)
53
53
  sqlite_vec.load(sqlite_conn)
@@ -91,10 +91,10 @@ class SQLiteToLanceDBMigrator:
91
91
 
92
92
  sqlite_conn.close()
93
93
 
94
- # Optimize the chunks table after migration
94
+ # Optimize and cleanup using centralized vacuum
95
95
  self.console.print("[blue]Optimizing LanceDB...[/blue]")
96
96
  try:
97
- lance_store.chunks_table.optimize()
97
+ lance_store.vacuum()
98
98
  self.console.print("[green]✅ Optimization completed[/green]")
99
99
  except Exception as e:
100
100
  self.console.print(
haiku/rag/store/engine.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ from datetime import timedelta
3
4
  from importlib import metadata
4
5
  from pathlib import Path
5
6
  from uuid import uuid4
@@ -62,6 +63,15 @@ class Store:
62
63
  if not skip_validation:
63
64
  self._validate_configuration()
64
65
 
66
+ def vacuum(self) -> None:
67
+ """Optimize and clean up old versions across all tables to reduce disk usage."""
68
+ if self._has_cloud_config() and str(Config.LANCEDB_URI).startswith("db://"):
69
+ return
70
+
71
+ # Perform maintenance per table using optimize() with cleanup_older_than 0
72
+ for table in [self.documents_table, self.chunks_table, self.settings_table]:
73
+ table.optimize(cleanup_older_than=timedelta(0))
74
+
65
75
  def _connect_to_lancedb(self, db_path: Path):
66
76
  """Establish connection to LanceDB (local, cloud, or object storage)."""
67
77
  # Check if we have cloud configuration
@@ -159,16 +169,18 @@ class Store:
159
169
  self.settings_table.search().limit(1).to_pydantic(SettingsRecord)
160
170
  )
161
171
  if settings_records:
162
- settings = (
172
+ # Only write if version actually changes to avoid creating new table versions
173
+ current = (
163
174
  json.loads(settings_records[0].settings)
164
175
  if settings_records[0].settings
165
176
  else {}
166
177
  )
167
- settings["version"] = version
168
- # Update the record
169
- self.settings_table.update(
170
- where="id = 'settings'", values={"settings": json.dumps(settings)}
171
- )
178
+ if current.get("version") != version:
179
+ current["version"] = version
180
+ self.settings_table.update(
181
+ where="id = 'settings'",
182
+ values={"settings": json.dumps(current)},
183
+ )
172
184
  else:
173
185
  # Create new settings record
174
186
  settings_data = Config.model_dump(mode="json")
@@ -84,10 +84,15 @@ class SettingsRepository:
84
84
  )
85
85
 
86
86
  if existing:
87
- # Update existing settings
88
- self.store.settings_table.update(
89
- where="id = 'settings'", values={"settings": json.dumps(current_config)}
87
+ # Only update when configuration actually changed to avoid needless new versions
88
+ existing_payload = (
89
+ json.loads(existing[0].settings) if existing[0].settings else {}
90
90
  )
91
+ if existing_payload != current_config:
92
+ self.store.settings_table.update(
93
+ where="id = 'settings'",
94
+ values={"settings": json.dumps(current_config)},
95
+ )
91
96
  else:
92
97
  # Create new settings
93
98
  settings_record = SettingsRecord(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.7.6
3
+ Version: 0.8.0
4
4
  Summary: Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -1,16 +1,16 @@
1
1
  haiku/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- haiku/rag/app.py,sha256=n9Y-4wHnxGeatCE-scqbv5JBnPo-5WOJ6URdfZveHC8,7837
2
+ haiku/rag/app.py,sha256=XlL6PNPSqeBKF6bemvdSfXEnQghywudwZv-C116NuZU,8254
3
3
  haiku/rag/chunker.py,sha256=PVe6ysv8UlacUd4Zb3_8RFWIaWDXnzBAy2VDJ4TaUsE,1555
4
- haiku/rag/cli.py,sha256=HqFHU9x2tR1yTR74V3NPndqE4R2Yn-ohASyHp334pAg,8597
5
- haiku/rag/client.py,sha256=N4zkWjE9Rsw9YgPvNo83xptHUQR2ognfOnjkoV_w6hc,20999
4
+ haiku/rag/cli.py,sha256=houkHTeVc89BA3zPksCjUooEnScSg1Ez_BIHBH6cmJQ,8920
5
+ haiku/rag/client.py,sha256=NJVGXzVzpoVy1sttz_xEU7mXWtObKT8pGpvo5pZyzwc,21288
6
6
  haiku/rag/config.py,sha256=3H41da9BU1R1y2JJHD0cOSErX_VSM1UXA7M2JSOxFXE,1795
7
- haiku/rag/logging.py,sha256=a0ELyeMqb85ebeOTN8OQCTL1PiMWiiV9R_OOH-VZoA8,1665
7
+ haiku/rag/logging.py,sha256=dm65AwADpcQsH5OAPtRA-4hsw0w5DK-sGOvzYkj6jzw,1720
8
8
  haiku/rag/mcp.py,sha256=bR9Y-Nz-hvjiql20Y0KE0hwNGwyjmPGX8K9d-qmXptY,4683
9
- haiku/rag/migration.py,sha256=n5G6SDhTo8wTf0uCYbWGegq1LqIgILDLNjWcGvSj-SQ,11053
9
+ haiku/rag/migration.py,sha256=M--KnSF3lxgKjxmokb4vuzGH-pV8eg0C_8e7jvPqW8Y,11058
10
10
  haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
11
11
  haiku/rag/reader.py,sha256=qkPTMJuQ_o4sK-8zpDl9WFYe_MJ7aL_gUw6rczIpW-g,3274
12
12
  haiku/rag/utils.py,sha256=c8F0ECsFSqvQxzxINAOAnvShoOnJPLsOaNE3JEY2JSc,3230
13
- haiku/rag/embeddings/__init__.py,sha256=n7aHW3BxHlpGxU4ze4YYDOsljzFpEep8dwVE2n45JoE,1218
13
+ haiku/rag/embeddings/__init__.py,sha256=44IfDITGIFTflGT6UEmiYOwpWFVbYv5smLY59D0YeCs,1419
14
14
  haiku/rag/embeddings/base.py,sha256=BnSviKrlzjv3L0sZJs_T-pxfawd-bcTak-rsX-D2f3A,497
15
15
  haiku/rag/embeddings/ollama.py,sha256=LuLlHH6RGoO9_gFCIlbmesuXOj017gTw6z-p8Ez0CfE,595
16
16
  haiku/rag/embeddings/openai.py,sha256=fIFCk-jpUtaW0xsnrQnJ824O0UCjaGG2sgvBzREhilc,503
@@ -25,17 +25,17 @@ haiku/rag/reranking/cohere.py,sha256=1iTdiaa8vvb6oHVB2qpWzUOVkyfUcimVSZp6Qr4aq4c
25
25
  haiku/rag/reranking/mxbai.py,sha256=46sVTsTIkzIX9THgM3u8HaEmgY7evvEyB-N54JTHvK8,867
26
26
  haiku/rag/reranking/vllm.py,sha256=xVGH9ss-ISWdJ5SKUUHUbTqBo7PIEmA_SQv0ScdJ6XA,1479
27
27
  haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
28
- haiku/rag/store/engine.py,sha256=XHGo5Xl-dCFdQHrOdMo64xVK5n0k8-LoUl5V-tlA0HI,7131
28
+ haiku/rag/store/engine.py,sha256=uzw09IOebaKo8b_FyvVHMUQMDVKfBpN7WGfuY3fKiEE,7757
29
29
  haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
30
30
  haiku/rag/store/models/chunk.py,sha256=ZNyTfO6lh3rXWLVYO3TZcitbL4LSUGr42fR6jQQ5iQc,364
31
31
  haiku/rag/store/models/document.py,sha256=zSSpt6pyrMJAIXGQvIcqojcqUzwZnhp3WxVokaWxNRc,396
32
32
  haiku/rag/store/repositories/__init__.py,sha256=Olv5dLfBQINRV3HrsfUpjzkZ7Qm7goEYyMNykgo_DaY,291
33
33
  haiku/rag/store/repositories/chunk.py,sha256=v4y4eh4yIf6zJaWfHxljvnmb12dmvwdinzmxQt8Lvhs,13343
34
34
  haiku/rag/store/repositories/document.py,sha256=lP8Lo82KTP-qwXFRpYZ46WjeAdAsHwZ5pJcrXdz4g0U,6988
35
- haiku/rag/store/repositories/settings.py,sha256=dqnAvm-98nQrWpLBbf9QghJw673QD80-iqQhRMP5t0c,5025
35
+ haiku/rag/store/repositories/settings.py,sha256=wx3fuP_5CpPflZHRrIkeoer6ml-iD0qXERh5k6MQRzI,5291
36
36
  haiku/rag/store/upgrades/__init__.py,sha256=wUiEoSiHTahvuagx93E4FB07v123AhdbOjwUkPusiIg,14
37
- haiku_rag-0.7.6.dist-info/METADATA,sha256=lJ64GebtYSRr0ld0jXF-jvnpraumPOde2A0w8JVpXsc,4610
38
- haiku_rag-0.7.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
39
- haiku_rag-0.7.6.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
40
- haiku_rag-0.7.6.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
41
- haiku_rag-0.7.6.dist-info/RECORD,,
37
+ haiku_rag-0.8.0.dist-info/METADATA,sha256=OZfvP7S7MBndpjjTg59UaD9JgB_W39pXpYAjyULjn8A,4610
38
+ haiku_rag-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
39
+ haiku_rag-0.8.0.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
40
+ haiku_rag-0.8.0.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
41
+ haiku_rag-0.8.0.dist-info/RECORD,,