haiku.rag-slim 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag-slim might be problematic. Click here for more details.

Files changed (71) hide show
  1. haiku/rag/__init__.py +0 -0
  2. haiku/rag/app.py +542 -0
  3. haiku/rag/chunker.py +65 -0
  4. haiku/rag/cli.py +466 -0
  5. haiku/rag/client.py +731 -0
  6. haiku/rag/config/__init__.py +74 -0
  7. haiku/rag/config/loader.py +94 -0
  8. haiku/rag/config/models.py +99 -0
  9. haiku/rag/embeddings/__init__.py +49 -0
  10. haiku/rag/embeddings/base.py +25 -0
  11. haiku/rag/embeddings/ollama.py +28 -0
  12. haiku/rag/embeddings/openai.py +26 -0
  13. haiku/rag/embeddings/vllm.py +29 -0
  14. haiku/rag/embeddings/voyageai.py +27 -0
  15. haiku/rag/graph/__init__.py +26 -0
  16. haiku/rag/graph/agui/__init__.py +53 -0
  17. haiku/rag/graph/agui/cli_renderer.py +135 -0
  18. haiku/rag/graph/agui/emitter.py +197 -0
  19. haiku/rag/graph/agui/events.py +254 -0
  20. haiku/rag/graph/agui/server.py +310 -0
  21. haiku/rag/graph/agui/state.py +34 -0
  22. haiku/rag/graph/agui/stream.py +86 -0
  23. haiku/rag/graph/common/__init__.py +5 -0
  24. haiku/rag/graph/common/models.py +42 -0
  25. haiku/rag/graph/common/nodes.py +265 -0
  26. haiku/rag/graph/common/prompts.py +46 -0
  27. haiku/rag/graph/common/utils.py +44 -0
  28. haiku/rag/graph/deep_qa/__init__.py +1 -0
  29. haiku/rag/graph/deep_qa/dependencies.py +27 -0
  30. haiku/rag/graph/deep_qa/graph.py +243 -0
  31. haiku/rag/graph/deep_qa/models.py +20 -0
  32. haiku/rag/graph/deep_qa/prompts.py +59 -0
  33. haiku/rag/graph/deep_qa/state.py +56 -0
  34. haiku/rag/graph/research/__init__.py +3 -0
  35. haiku/rag/graph/research/common.py +87 -0
  36. haiku/rag/graph/research/dependencies.py +151 -0
  37. haiku/rag/graph/research/graph.py +295 -0
  38. haiku/rag/graph/research/models.py +166 -0
  39. haiku/rag/graph/research/prompts.py +107 -0
  40. haiku/rag/graph/research/state.py +85 -0
  41. haiku/rag/logging.py +56 -0
  42. haiku/rag/mcp.py +245 -0
  43. haiku/rag/monitor.py +194 -0
  44. haiku/rag/qa/__init__.py +33 -0
  45. haiku/rag/qa/agent.py +93 -0
  46. haiku/rag/qa/prompts.py +60 -0
  47. haiku/rag/reader.py +135 -0
  48. haiku/rag/reranking/__init__.py +63 -0
  49. haiku/rag/reranking/base.py +13 -0
  50. haiku/rag/reranking/cohere.py +34 -0
  51. haiku/rag/reranking/mxbai.py +28 -0
  52. haiku/rag/reranking/vllm.py +44 -0
  53. haiku/rag/reranking/zeroentropy.py +59 -0
  54. haiku/rag/store/__init__.py +4 -0
  55. haiku/rag/store/engine.py +309 -0
  56. haiku/rag/store/models/__init__.py +4 -0
  57. haiku/rag/store/models/chunk.py +17 -0
  58. haiku/rag/store/models/document.py +17 -0
  59. haiku/rag/store/repositories/__init__.py +9 -0
  60. haiku/rag/store/repositories/chunk.py +442 -0
  61. haiku/rag/store/repositories/document.py +261 -0
  62. haiku/rag/store/repositories/settings.py +165 -0
  63. haiku/rag/store/upgrades/__init__.py +62 -0
  64. haiku/rag/store/upgrades/v0_10_1.py +64 -0
  65. haiku/rag/store/upgrades/v0_9_3.py +112 -0
  66. haiku/rag/utils.py +211 -0
  67. haiku_rag_slim-0.16.0.dist-info/METADATA +128 -0
  68. haiku_rag_slim-0.16.0.dist-info/RECORD +71 -0
  69. haiku_rag_slim-0.16.0.dist-info/WHEEL +4 -0
  70. haiku_rag_slim-0.16.0.dist-info/entry_points.txt +2 -0
  71. haiku_rag_slim-0.16.0.dist-info/licenses/LICENSE +7 -0
@@ -0,0 +1,44 @@
1
+ import httpx
2
+
3
+ from haiku.rag.config import Config
4
+ from haiku.rag.reranking.base import RerankerBase
5
+ from haiku.rag.store.models.chunk import Chunk
6
+
7
+
8
+ class VLLMReranker(RerankerBase):
9
+ def __init__(self, model: str):
10
+ self._model = model
11
+ self._base_url = Config.providers.vllm.rerank_base_url
12
+
13
+ async def rerank(
14
+ self, query: str, chunks: list[Chunk], top_n: int = 10
15
+ ) -> list[tuple[Chunk, float]]:
16
+ if not chunks:
17
+ return []
18
+
19
+ # Prepare documents for reranking
20
+ documents = [chunk.content for chunk in chunks]
21
+
22
+ async with httpx.AsyncClient() as client:
23
+ response = await client.post(
24
+ f"{self._base_url}/v1/rerank",
25
+ json={"model": self._model, "query": query, "documents": documents},
26
+ headers={
27
+ "accept": "application/json",
28
+ "Content-Type": "application/json",
29
+ },
30
+ )
31
+ response.raise_for_status()
32
+
33
+ result = response.json()
34
+
35
+ # Extract scores and pair with chunks
36
+ scored_chunks = []
37
+ for item in result.get("results", []):
38
+ index = item["index"]
39
+ score = item["relevance_score"]
40
+ scored_chunks.append((chunks[index], score))
41
+
42
+ # Sort by score (descending) and return top_n
43
+ scored_chunks.sort(key=lambda x: x[1], reverse=True)
44
+ return scored_chunks[:top_n]
@@ -0,0 +1,59 @@
1
+ from zeroentropy import ZeroEntropy
2
+
3
+ from haiku.rag.reranking.base import RerankerBase
4
+ from haiku.rag.store.models.chunk import Chunk
5
+
6
+
7
+ class ZeroEntropyReranker(RerankerBase):
8
+ """Zero Entropy reranker implementation using the zerank-1 model."""
9
+
10
+ def __init__(self, model: str = "zerank-1"):
11
+ """Initialize the Zero Entropy reranker.
12
+
13
+ Args:
14
+ model: The Zero Entropy model to use (default: "zerank-1")
15
+ """
16
+ self._model = model
17
+ # Zero Entropy SDK reads ZEROENTROPY_API_KEY from environment by default
18
+ self._client = ZeroEntropy()
19
+
20
+ async def rerank(
21
+ self, query: str, chunks: list[Chunk], top_n: int = 10
22
+ ) -> list[tuple[Chunk, float]]:
23
+ """Rerank the given chunks based on relevance to the query.
24
+
25
+ Args:
26
+ query: The query to rank against
27
+ chunks: The chunks to rerank
28
+ top_n: The number of top results to return
29
+
30
+ Returns:
31
+ A list of (chunk, score) tuples, sorted by relevance
32
+ """
33
+ if not chunks:
34
+ return []
35
+
36
+ # Prepare documents for Zero Entropy API
37
+ documents = [chunk.content for chunk in chunks]
38
+
39
+ # Call Zero Entropy reranking API
40
+ response = self._client.models.rerank(
41
+ model=self._model,
42
+ query=query,
43
+ documents=documents,
44
+ )
45
+
46
+ # Extract results and map back to chunks
47
+ # Zero Entropy returns results sorted by relevance with scores
48
+ reranked_results = []
49
+
50
+ # Get top_n results
51
+ for i, result in enumerate(response.results[:top_n]):
52
+ # Zero Entropy returns index and score for each document
53
+ chunk_index = result.index
54
+ score = result.relevance_score
55
+
56
+ if chunk_index < len(chunks):
57
+ reranked_results.append((chunks[chunk_index], score))
58
+
59
+ return reranked_results
@@ -0,0 +1,4 @@
1
+ from .engine import Store
2
+ from .models import Chunk, Document
3
+
4
+ __all__ = ["Store", "Chunk", "Document"]
@@ -0,0 +1,309 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ from datetime import timedelta
5
+ from importlib import metadata
6
+ from pathlib import Path
7
+ from uuid import uuid4
8
+
9
+ import lancedb
10
+ from lancedb.pydantic import LanceModel, Vector
11
+ from pydantic import Field
12
+
13
+ from haiku.rag.config import AppConfig, Config
14
+ from haiku.rag.embeddings import get_embedder
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class DocumentRecord(LanceModel):
20
+ id: str = Field(default_factory=lambda: str(uuid4()))
21
+ content: str
22
+ uri: str | None = None
23
+ title: str | None = None
24
+ metadata: str = Field(default="{}")
25
+ created_at: str = Field(default_factory=lambda: "")
26
+ updated_at: str = Field(default_factory=lambda: "")
27
+
28
+
29
+ def create_chunk_model(vector_dim: int):
30
+ """Create a ChunkRecord model with the specified vector dimension.
31
+
32
+ This creates a model with proper vector typing for LanceDB.
33
+ """
34
+
35
+ class ChunkRecord(LanceModel):
36
+ id: str = Field(default_factory=lambda: str(uuid4()))
37
+ document_id: str
38
+ content: str
39
+ metadata: str = Field(default="{}")
40
+ order: int = Field(default=0)
41
+ vector: Vector(vector_dim) = Field(default_factory=lambda: [0.0] * vector_dim) # type: ignore
42
+
43
+ return ChunkRecord
44
+
45
+
46
+ class SettingsRecord(LanceModel):
47
+ id: str = Field(default="settings")
48
+ settings: str = Field(default="{}")
49
+
50
+
51
+ class Store:
52
+ def __init__(
53
+ self,
54
+ db_path: Path,
55
+ config: AppConfig = Config,
56
+ skip_validation: bool = False,
57
+ allow_create: bool = True,
58
+ ):
59
+ self.db_path: Path = db_path
60
+ self._config = config
61
+ self.embedder = get_embedder(config=self._config)
62
+ self._vacuum_lock = asyncio.Lock()
63
+
64
+ # Create the ChunkRecord model with the correct vector dimension
65
+ self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
66
+
67
+ # Local filesystem handling for DB directory
68
+ if not self._has_cloud_config():
69
+ if not allow_create:
70
+ # Read operations should not create the database
71
+ if not db_path.exists():
72
+ raise FileNotFoundError(
73
+ f"Database does not exist: {db_path}. Use a write operation (add, add-src) to create it."
74
+ )
75
+ else:
76
+ # Write operations - ensure parent directories exist
77
+ if not db_path.parent.exists():
78
+ Path.mkdir(db_path.parent, parents=True)
79
+
80
+ # Connect to LanceDB
81
+ self.db = self._connect_to_lancedb(db_path)
82
+
83
+ # Initialize tables
84
+ self.create_or_update_db()
85
+
86
+ # Validate config compatibility after connection is established
87
+ if not skip_validation:
88
+ self._validate_configuration()
89
+
90
+ async def vacuum(self, retention_seconds: int | None = None) -> None:
91
+ """Optimize and clean up old versions across all tables to reduce disk usage.
92
+
93
+ Args:
94
+ retention_seconds: Retention threshold in seconds. Only versions older
95
+ than this will be removed. If None, uses config.storage.vacuum_retention_seconds.
96
+
97
+ Note:
98
+ If vacuum is already running, this method returns immediately without blocking.
99
+ Use asyncio.create_task(store.vacuum()) for non-blocking background execution.
100
+ """
101
+ if self._has_cloud_config() and str(self._config.lancedb.uri).startswith(
102
+ "db://"
103
+ ):
104
+ return
105
+
106
+ # Skip if already running (non-blocking)
107
+ if self._vacuum_lock.locked():
108
+ return
109
+
110
+ async with self._vacuum_lock:
111
+ try:
112
+ # Evaluate config at runtime to allow dynamic changes
113
+ if retention_seconds is None:
114
+ retention_seconds = self._config.storage.vacuum_retention_seconds
115
+ # Perform maintenance per table using optimize() with configurable retention
116
+ retention = timedelta(seconds=retention_seconds)
117
+ for table in [
118
+ self.documents_table,
119
+ self.chunks_table,
120
+ self.settings_table,
121
+ ]:
122
+ table.optimize(cleanup_older_than=retention)
123
+ except (RuntimeError, OSError) as e:
124
+ # Handle resource errors gracefully
125
+ logger.debug(f"Vacuum skipped due to resource constraints: {e}")
126
+
127
+ def _connect_to_lancedb(self, db_path: Path):
128
+ """Establish connection to LanceDB (local, cloud, or object storage)."""
129
+ # Check if we have cloud configuration
130
+ if self._has_cloud_config():
131
+ return lancedb.connect(
132
+ uri=self._config.lancedb.uri,
133
+ api_key=self._config.lancedb.api_key,
134
+ region=self._config.lancedb.region,
135
+ )
136
+ else:
137
+ # Local file system connection
138
+ return lancedb.connect(db_path)
139
+
140
+ def _has_cloud_config(self) -> bool:
141
+ """Check if cloud configuration is complete."""
142
+ return bool(
143
+ self._config.lancedb.uri
144
+ and self._config.lancedb.api_key
145
+ and self._config.lancedb.region
146
+ )
147
+
148
+ def _validate_configuration(self) -> None:
149
+ """Validate that the configuration is compatible with the database."""
150
+ from haiku.rag.store.repositories.settings import SettingsRepository
151
+
152
+ settings_repo = SettingsRepository(self)
153
+ settings_repo.validate_config_compatibility()
154
+
155
+ def create_or_update_db(self):
156
+ """Create the database tables."""
157
+
158
+ # Get list of existing tables
159
+ existing_tables = self.db.table_names()
160
+
161
+ # Create or get documents table
162
+ if "documents" in existing_tables:
163
+ self.documents_table = self.db.open_table("documents")
164
+ else:
165
+ self.documents_table = self.db.create_table(
166
+ "documents", schema=DocumentRecord
167
+ )
168
+
169
+ # Create or get chunks table
170
+ if "chunks" in existing_tables:
171
+ self.chunks_table = self.db.open_table("chunks")
172
+ else:
173
+ self.chunks_table = self.db.create_table("chunks", schema=self.ChunkRecord)
174
+ # Create FTS index on the new table with phrase query support
175
+ self.chunks_table.create_fts_index(
176
+ "content", replace=True, with_position=True, remove_stop_words=False
177
+ )
178
+
179
+ # Create or get settings table
180
+ if "settings" in existing_tables:
181
+ self.settings_table = self.db.open_table("settings")
182
+ else:
183
+ self.settings_table = self.db.create_table(
184
+ "settings", schema=SettingsRecord
185
+ )
186
+ # Save current settings to the new database
187
+ settings_data = self._config.model_dump(mode="json")
188
+ self.settings_table.add(
189
+ [SettingsRecord(id="settings", settings=json.dumps(settings_data))]
190
+ )
191
+
192
+ # Run pending upgrades based on stored version and package version
193
+ try:
194
+ from haiku.rag.store.upgrades import run_pending_upgrades
195
+
196
+ current_version = metadata.version("haiku.rag-slim")
197
+ db_version = self.get_haiku_version()
198
+
199
+ if db_version != "0.0.0":
200
+ run_pending_upgrades(self, db_version, current_version)
201
+
202
+ # After upgrades complete (or if none), set stored version
203
+ # to the greater of the installed package version and the
204
+ # highest available upgrade step version in code.
205
+ try:
206
+ from packaging.version import parse as _v
207
+
208
+ from haiku.rag.store.upgrades import upgrades as _steps
209
+
210
+ highest_step = max((_v(u.version) for u in _steps), default=None)
211
+ effective_version = (
212
+ str(max(_v(current_version), highest_step))
213
+ if highest_step is not None
214
+ else current_version
215
+ )
216
+ except Exception:
217
+ effective_version = current_version
218
+
219
+ self.set_haiku_version(effective_version)
220
+ except Exception as e:
221
+ # Avoid hard failure on initial connection; log and continue so CLI remains usable.
222
+ logger.warning(
223
+ "Skipping upgrade due to error (db=%s -> pkg=%s): %s",
224
+ self.get_haiku_version(),
225
+ metadata.version("haiku.rag-slim"),
226
+ e,
227
+ )
228
+
229
+ def get_haiku_version(self) -> str:
230
+ """Returns the user version stored in settings."""
231
+ settings_records = list(
232
+ self.settings_table.search().limit(1).to_pydantic(SettingsRecord)
233
+ )
234
+ if settings_records:
235
+ settings = (
236
+ json.loads(settings_records[0].settings)
237
+ if settings_records[0].settings
238
+ else {}
239
+ )
240
+ return settings.get("version", "0.0.0")
241
+ return "0.0.0"
242
+
243
+ def set_haiku_version(self, version: str) -> None:
244
+ """Updates the user version in settings."""
245
+ settings_records = list(
246
+ self.settings_table.search().limit(1).to_pydantic(SettingsRecord)
247
+ )
248
+ if settings_records:
249
+ # Only write if version actually changes to avoid creating new table versions
250
+ current = (
251
+ json.loads(settings_records[0].settings)
252
+ if settings_records[0].settings
253
+ else {}
254
+ )
255
+ if current.get("version") != version:
256
+ current["version"] = version
257
+ self.settings_table.update(
258
+ where="id = 'settings'",
259
+ values={"settings": json.dumps(current)},
260
+ )
261
+ else:
262
+ # Create new settings record
263
+ settings_data = Config.model_dump(mode="json")
264
+ settings_data["version"] = version
265
+ self.settings_table.add(
266
+ [SettingsRecord(id="settings", settings=json.dumps(settings_data))]
267
+ )
268
+
269
+ def recreate_embeddings_table(self) -> None:
270
+ """Recreate the chunks table with current vector dimensions."""
271
+ # Drop and recreate chunks table
272
+ try:
273
+ self.db.drop_table("chunks")
274
+ except Exception:
275
+ pass
276
+
277
+ # Update the ChunkRecord model with new vector dimension
278
+ self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
279
+ self.chunks_table = self.db.create_table("chunks", schema=self.ChunkRecord)
280
+
281
+ # Create FTS index on the new table with phrase query support
282
+ self.chunks_table.create_fts_index(
283
+ "content", replace=True, with_position=True, remove_stop_words=False
284
+ )
285
+
286
+ def close(self):
287
+ """Close the database connection."""
288
+ # LanceDB connections are automatically managed
289
+ pass
290
+
291
+ def current_table_versions(self) -> dict[str, int]:
292
+ """Capture current versions of key tables for rollback using LanceDB's API."""
293
+ return {
294
+ "documents": int(self.documents_table.version),
295
+ "chunks": int(self.chunks_table.version),
296
+ "settings": int(self.settings_table.version),
297
+ }
298
+
299
+ def restore_table_versions(self, versions: dict[str, int]) -> bool:
300
+ """Restore tables to the provided versions using LanceDB's API."""
301
+ self.documents_table.restore(int(versions["documents"]))
302
+ self.chunks_table.restore(int(versions["chunks"]))
303
+ self.settings_table.restore(int(versions["settings"]))
304
+ return True
305
+
306
+ @property
307
+ def _connection(self):
308
+ """Compatibility property for repositories expecting _connection."""
309
+ return self
@@ -0,0 +1,4 @@
1
+ from .chunk import Chunk
2
+ from .document import Document
3
+
4
+ __all__ = ["Chunk", "Document"]
@@ -0,0 +1,17 @@
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class Chunk(BaseModel):
5
+ """
6
+ Represents a chunk with content, metadata, and optional document information.
7
+ """
8
+
9
+ id: str | None = None
10
+ document_id: str | None = None
11
+ content: str
12
+ metadata: dict = {}
13
+ order: int = 0
14
+ document_uri: str | None = None
15
+ document_title: str | None = None
16
+ document_meta: dict = {}
17
+ embedding: list[float] | None = None
@@ -0,0 +1,17 @@
1
+ from datetime import datetime
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class Document(BaseModel):
7
+ """
8
+ Represents a document with an ID, content, and metadata.
9
+ """
10
+
11
+ id: str | None = None
12
+ content: str
13
+ uri: str | None = None
14
+ title: str | None = None
15
+ metadata: dict = {}
16
+ created_at: datetime = Field(default_factory=datetime.now)
17
+ updated_at: datetime = Field(default_factory=datetime.now)
@@ -0,0 +1,9 @@
1
+ from haiku.rag.store.repositories.chunk import ChunkRepository
2
+ from haiku.rag.store.repositories.document import DocumentRepository
3
+ from haiku.rag.store.repositories.settings import SettingsRepository
4
+
5
+ __all__ = [
6
+ "ChunkRepository",
7
+ "DocumentRepository",
8
+ "SettingsRepository",
9
+ ]