haiku.rag 0.9.2__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. README.md +205 -0
  2. haiku_rag-0.14.0.dist-info/METADATA +227 -0
  3. haiku_rag-0.14.0.dist-info/RECORD +6 -0
  4. haiku/rag/__init__.py +0 -0
  5. haiku/rag/app.py +0 -267
  6. haiku/rag/chunker.py +0 -51
  7. haiku/rag/cli.py +0 -359
  8. haiku/rag/client.py +0 -565
  9. haiku/rag/config.py +0 -77
  10. haiku/rag/embeddings/__init__.py +0 -35
  11. haiku/rag/embeddings/base.py +0 -15
  12. haiku/rag/embeddings/ollama.py +0 -17
  13. haiku/rag/embeddings/openai.py +0 -16
  14. haiku/rag/embeddings/vllm.py +0 -19
  15. haiku/rag/embeddings/voyageai.py +0 -17
  16. haiku/rag/logging.py +0 -56
  17. haiku/rag/mcp.py +0 -144
  18. haiku/rag/migration.py +0 -316
  19. haiku/rag/monitor.py +0 -73
  20. haiku/rag/qa/__init__.py +0 -15
  21. haiku/rag/qa/agent.py +0 -89
  22. haiku/rag/qa/prompts.py +0 -60
  23. haiku/rag/reader.py +0 -115
  24. haiku/rag/reranking/__init__.py +0 -34
  25. haiku/rag/reranking/base.py +0 -13
  26. haiku/rag/reranking/cohere.py +0 -34
  27. haiku/rag/reranking/mxbai.py +0 -28
  28. haiku/rag/reranking/vllm.py +0 -44
  29. haiku/rag/research/__init__.py +0 -37
  30. haiku/rag/research/base.py +0 -130
  31. haiku/rag/research/dependencies.py +0 -45
  32. haiku/rag/research/evaluation_agent.py +0 -42
  33. haiku/rag/research/orchestrator.py +0 -300
  34. haiku/rag/research/presearch_agent.py +0 -34
  35. haiku/rag/research/prompts.py +0 -129
  36. haiku/rag/research/search_agent.py +0 -65
  37. haiku/rag/research/synthesis_agent.py +0 -40
  38. haiku/rag/store/__init__.py +0 -4
  39. haiku/rag/store/engine.py +0 -230
  40. haiku/rag/store/models/__init__.py +0 -4
  41. haiku/rag/store/models/chunk.py +0 -15
  42. haiku/rag/store/models/document.py +0 -16
  43. haiku/rag/store/repositories/__init__.py +0 -9
  44. haiku/rag/store/repositories/chunk.py +0 -399
  45. haiku/rag/store/repositories/document.py +0 -234
  46. haiku/rag/store/repositories/settings.py +0 -148
  47. haiku/rag/store/upgrades/__init__.py +0 -1
  48. haiku/rag/utils.py +0 -162
  49. haiku_rag-0.9.2.dist-info/METADATA +0 -131
  50. haiku_rag-0.9.2.dist-info/RECORD +0 -50
  51. {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/WHEEL +0 -0
  52. {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/entry_points.txt +0 -0
  53. {haiku_rag-0.9.2.dist-info → haiku_rag-0.14.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,17 +0,0 @@
1
- from openai import AsyncOpenAI
2
-
3
- from haiku.rag.config import Config
4
- from haiku.rag.embeddings.base import EmbedderBase
5
-
6
-
7
- class Embedder(EmbedderBase):
8
- async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
9
- client = AsyncOpenAI(base_url=f"{Config.OLLAMA_BASE_URL}/v1", api_key="dummy")
10
- response = await client.embeddings.create(
11
- model=self._model,
12
- input=text,
13
- )
14
- if isinstance(text, str):
15
- return response.data[0].embedding
16
- else:
17
- return [item.embedding for item in response.data]
@@ -1,16 +0,0 @@
1
- from openai import AsyncOpenAI
2
-
3
- from haiku.rag.embeddings.base import EmbedderBase
4
-
5
-
6
- class Embedder(EmbedderBase):
7
- async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
8
- client = AsyncOpenAI()
9
- response = await client.embeddings.create(
10
- model=self._model,
11
- input=text,
12
- )
13
- if isinstance(text, str):
14
- return response.data[0].embedding
15
- else:
16
- return [item.embedding for item in response.data]
@@ -1,19 +0,0 @@
1
- from openai import AsyncOpenAI
2
-
3
- from haiku.rag.config import Config
4
- from haiku.rag.embeddings.base import EmbedderBase
5
-
6
-
7
- class Embedder(EmbedderBase):
8
- async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
9
- client = AsyncOpenAI(
10
- base_url=f"{Config.VLLM_EMBEDDINGS_BASE_URL}/v1", api_key="dummy"
11
- )
12
- response = await client.embeddings.create(
13
- model=self._model,
14
- input=text,
15
- )
16
- if isinstance(text, str):
17
- return response.data[0].embedding
18
- else:
19
- return [item.embedding for item in response.data]
@@ -1,17 +0,0 @@
1
- try:
2
- from voyageai.client import Client # type: ignore
3
-
4
- from haiku.rag.embeddings.base import EmbedderBase
5
-
6
- class Embedder(EmbedderBase):
7
- async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
8
- client = Client()
9
- if isinstance(text, str):
10
- res = client.embed([text], model=self._model, output_dtype="float")
11
- return res.embeddings[0] # type: ignore[return-value]
12
- else:
13
- res = client.embed(text, model=self._model, output_dtype="float")
14
- return res.embeddings # type: ignore[return-value]
15
-
16
- except ImportError:
17
- pass
haiku/rag/logging.py DELETED
@@ -1,56 +0,0 @@
1
- import logging
2
- import warnings
3
-
4
- from rich.console import Console
5
- from rich.logging import RichHandler
6
-
7
-
8
- def get_logger() -> logging.Logger:
9
- """Return the library logger configured with a Rich handler."""
10
- logger = logging.getLogger("haiku.rag")
11
-
12
- handler = RichHandler(
13
- console=Console(stderr=True),
14
- rich_tracebacks=True,
15
- )
16
- formatter = logging.Formatter("%(message)s")
17
- handler.setFormatter(formatter)
18
-
19
- logger.setLevel(logging.INFO)
20
-
21
- # Remove any existing handlers to avoid duplicates on reconfiguration
22
- for hdlr in logger.handlers[:]:
23
- logger.removeHandler(hdlr)
24
-
25
- logger.addHandler(handler)
26
- # Do not let messages propagate to the root logger
27
- logger.propagate = False
28
- return logger
29
-
30
-
31
- def configure_cli_logging(level: int = logging.INFO) -> logging.Logger:
32
- """Configure logging for CLI runs.
33
-
34
- - Silence ALL non-haiku.rag loggers by detaching root handlers and setting
35
- their level to ERROR.
36
- - Attach a Rich handler only to the "haiku.rag" logger.
37
- - Prevent propagation so only our logger prints in the CLI.
38
- """
39
- # Silence root logger completely
40
- root = logging.getLogger()
41
- for hdlr in root.handlers[:]:
42
- root.removeHandler(hdlr)
43
- root.setLevel(logging.ERROR)
44
-
45
- # Optionally silence some commonly noisy libraries explicitly as a safeguard
46
- for noisy in ("httpx", "httpcore", "docling", "urllib3", "asyncio"):
47
- logging.getLogger(noisy).setLevel(logging.ERROR)
48
- logging.getLogger(noisy).propagate = False
49
-
50
- # Configure and return our app logger
51
- logger = get_logger()
52
- logger.setLevel(level)
53
- logger.propagate = False
54
-
55
- warnings.filterwarnings("ignore")
56
- return logger
haiku/rag/mcp.py DELETED
@@ -1,144 +0,0 @@
1
- from pathlib import Path
2
- from typing import Any
3
-
4
- from fastmcp import FastMCP
5
- from pydantic import BaseModel
6
-
7
- from haiku.rag.client import HaikuRAG
8
-
9
-
10
- class SearchResult(BaseModel):
11
- document_id: str
12
- content: str
13
- score: float
14
-
15
-
16
- class DocumentResult(BaseModel):
17
- id: str | None
18
- content: str
19
- uri: str | None = None
20
- metadata: dict[str, Any] = {}
21
- created_at: str
22
- updated_at: str
23
-
24
-
25
- def create_mcp_server(db_path: Path) -> FastMCP:
26
- """Create an MCP server with the specified database path."""
27
- mcp = FastMCP("haiku-rag")
28
-
29
- @mcp.tool()
30
- async def add_document_from_file(
31
- file_path: str, metadata: dict[str, Any] | None = None
32
- ) -> str | None:
33
- """Add a document to the RAG system from a file path."""
34
- try:
35
- async with HaikuRAG(db_path) as rag:
36
- document = await rag.create_document_from_source(
37
- Path(file_path), metadata or {}
38
- )
39
- return document.id
40
- except Exception:
41
- return None
42
-
43
- @mcp.tool()
44
- async def add_document_from_url(
45
- url: str, metadata: dict[str, Any] | None = None
46
- ) -> str | None:
47
- """Add a document to the RAG system from a URL."""
48
- try:
49
- async with HaikuRAG(db_path) as rag:
50
- document = await rag.create_document_from_source(url, metadata or {})
51
- return document.id
52
- except Exception:
53
- return None
54
-
55
- @mcp.tool()
56
- async def add_document_from_text(
57
- content: str, uri: str | None = None, metadata: dict[str, Any] | None = None
58
- ) -> str | None:
59
- """Add a document to the RAG system from text content."""
60
- try:
61
- async with HaikuRAG(db_path) as rag:
62
- document = await rag.create_document(content, uri, metadata or {})
63
- return document.id
64
- except Exception:
65
- return None
66
-
67
- @mcp.tool()
68
- async def search_documents(query: str, limit: int = 5) -> list[SearchResult]:
69
- """Search the RAG system for documents using hybrid search (vector similarity + full-text search)."""
70
- try:
71
- async with HaikuRAG(db_path) as rag:
72
- results = await rag.search(query, limit)
73
-
74
- search_results = []
75
- for chunk, score in results:
76
- assert chunk.document_id is not None, (
77
- "Chunk document_id should not be None in search results"
78
- )
79
- search_results.append(
80
- SearchResult(
81
- document_id=chunk.document_id,
82
- content=chunk.content,
83
- score=score,
84
- )
85
- )
86
-
87
- return search_results
88
- except Exception:
89
- return []
90
-
91
- @mcp.tool()
92
- async def get_document(document_id: str) -> DocumentResult | None:
93
- """Get a document by its ID."""
94
- try:
95
- async with HaikuRAG(db_path) as rag:
96
- document = await rag.get_document_by_id(document_id)
97
-
98
- if document is None:
99
- return None
100
-
101
- return DocumentResult(
102
- id=document.id,
103
- content=document.content,
104
- uri=document.uri,
105
- metadata=document.metadata,
106
- created_at=str(document.created_at),
107
- updated_at=str(document.updated_at),
108
- )
109
- except Exception:
110
- return None
111
-
112
- @mcp.tool()
113
- async def list_documents(
114
- limit: int | None = None, offset: int | None = None
115
- ) -> list[DocumentResult]:
116
- """List all documents with optional pagination."""
117
- try:
118
- async with HaikuRAG(db_path) as rag:
119
- documents = await rag.list_documents(limit, offset)
120
-
121
- return [
122
- DocumentResult(
123
- id=doc.id,
124
- content=doc.content,
125
- uri=doc.uri,
126
- metadata=doc.metadata,
127
- created_at=str(doc.created_at),
128
- updated_at=str(doc.updated_at),
129
- )
130
- for doc in documents
131
- ]
132
- except Exception:
133
- return []
134
-
135
- @mcp.tool()
136
- async def delete_document(document_id: str) -> bool:
137
- """Delete a document by its ID."""
138
- try:
139
- async with HaikuRAG(db_path) as rag:
140
- return await rag.delete_document(document_id)
141
- except Exception:
142
- return False
143
-
144
- return mcp
haiku/rag/migration.py DELETED
@@ -1,316 +0,0 @@
1
- import json
2
- import sqlite3
3
- import struct
4
- from pathlib import Path
5
- from uuid import uuid4
6
-
7
- from rich.console import Console
8
- from rich.progress import Progress, TaskID
9
-
10
- from haiku.rag.store.engine import Store
11
-
12
-
13
- def deserialize_sqlite_embedding(data: bytes) -> list[float]:
14
- """Deserialize sqlite-vec embedding from bytes."""
15
- if not data:
16
- return []
17
- # sqlite-vec stores embeddings as float32 arrays
18
- num_floats = len(data) // 4
19
- return list(struct.unpack(f"{num_floats}f", data))
20
-
21
-
22
- class SQLiteToLanceDBMigrator:
23
- """Migrates data from SQLite to LanceDB."""
24
-
25
- def __init__(self, sqlite_path: Path, lancedb_path: Path):
26
- self.sqlite_path = sqlite_path
27
- self.lancedb_path = lancedb_path
28
- self.console = Console()
29
-
30
- def migrate(self) -> bool:
31
- """Perform the migration."""
32
- try:
33
- self.console.print(
34
- f"[blue]Starting migration from {self.sqlite_path} to {self.lancedb_path}[/blue]"
35
- )
36
-
37
- # Check if SQLite database exists
38
- if not self.sqlite_path.exists():
39
- self.console.print(
40
- f"[red]SQLite database not found: {self.sqlite_path}[/red]"
41
- )
42
- return False
43
-
44
- # Connect to SQLite database
45
- sqlite_conn = sqlite3.connect(self.sqlite_path)
46
- sqlite_conn.row_factory = sqlite3.Row
47
-
48
- # Load the sqlite-vec extension
49
- try:
50
- import sqlite_vec # type: ignore
51
-
52
- sqlite_conn.enable_load_extension(True)
53
- sqlite_vec.load(sqlite_conn)
54
- self.console.print("[blue]Loaded sqlite-vec extension[/blue]")
55
- except Exception as e:
56
- self.console.print(
57
- f"[yellow]Warning: Could not load sqlite-vec extension: {e}[/yellow]"
58
- )
59
- self.console.print(
60
- "[yellow]Install sqlite-vec with[/yellow]\n[green]uv pip install sqlite-vec [/green]"
61
- )
62
- exit(1)
63
-
64
- # Create LanceDB store
65
- lance_store = Store(self.lancedb_path, skip_validation=True)
66
-
67
- with Progress() as progress:
68
- # Migrate documents
69
- doc_task = progress.add_task(
70
- "[green]Migrating documents...", total=None
71
- )
72
- document_id_mapping = self._migrate_documents(
73
- sqlite_conn, lance_store, progress, doc_task
74
- )
75
-
76
- # Migrate chunks and embeddings
77
- chunk_task = progress.add_task(
78
- "[yellow]Migrating chunks and embeddings...", total=None
79
- )
80
- self._migrate_chunks(
81
- sqlite_conn, lance_store, progress, chunk_task, document_id_mapping
82
- )
83
-
84
- # Migrate settings
85
- settings_task = progress.add_task(
86
- "[blue]Migrating settings...", total=None
87
- )
88
- self._migrate_settings(
89
- sqlite_conn, lance_store, progress, settings_task
90
- )
91
-
92
- sqlite_conn.close()
93
-
94
- # Optimize and cleanup using centralized vacuum
95
- self.console.print("[blue]Optimizing LanceDB...[/blue]")
96
- try:
97
- lance_store.vacuum()
98
- self.console.print("[green]✅ Optimization completed[/green]")
99
- except Exception as e:
100
- self.console.print(
101
- f"[yellow]Warning: Optimization failed: {e}[/yellow]"
102
- )
103
-
104
- lance_store.close()
105
-
106
- self.console.print("[green]✅ Migration completed successfully![/green]")
107
- self.console.print(
108
- f"[green]✅ Migrated {len(document_id_mapping)} documents[/green]"
109
- )
110
- return True
111
-
112
- except Exception as e:
113
- self.console.print(f"[red]❌ Migration failed: {e}[/red]")
114
- import traceback
115
-
116
- self.console.print(f"[red]{traceback.format_exc()}[/red]")
117
- return False
118
-
119
- def _migrate_documents(
120
- self,
121
- sqlite_conn: sqlite3.Connection,
122
- lance_store: Store,
123
- progress: Progress,
124
- task: TaskID,
125
- ) -> dict[int, str]:
126
- """Migrate documents from SQLite to LanceDB and return ID mapping."""
127
- cursor = sqlite_conn.cursor()
128
- cursor.execute(
129
- "SELECT id, content, uri, metadata, created_at, updated_at FROM documents ORDER BY id"
130
- )
131
-
132
- documents = []
133
- id_mapping = {} # Maps old integer ID to new UUID
134
-
135
- for row in cursor.fetchall():
136
- new_uuid = str(uuid4())
137
- id_mapping[row["id"]] = new_uuid
138
-
139
- doc_data = {
140
- "id": new_uuid,
141
- "content": row["content"],
142
- "uri": row["uri"],
143
- "metadata": json.loads(row["metadata"]) if row["metadata"] else {},
144
- "created_at": row["created_at"],
145
- "updated_at": row["updated_at"],
146
- }
147
- documents.append(doc_data)
148
-
149
- # Batch insert documents to LanceDB
150
- if documents:
151
- from haiku.rag.store.engine import DocumentRecord
152
-
153
- doc_records = [
154
- DocumentRecord(
155
- id=doc["id"],
156
- content=doc["content"],
157
- uri=doc["uri"],
158
- metadata=json.dumps(doc["metadata"]),
159
- created_at=doc["created_at"],
160
- updated_at=doc["updated_at"],
161
- )
162
- for doc in documents
163
- ]
164
- lance_store.documents_table.add(doc_records)
165
-
166
- progress.update(task, completed=len(documents), total=len(documents))
167
- return id_mapping
168
-
169
- def _migrate_chunks(
170
- self,
171
- sqlite_conn: sqlite3.Connection,
172
- lance_store: Store,
173
- progress: Progress,
174
- task: TaskID,
175
- document_id_mapping: dict[int, str],
176
- ):
177
- """Migrate chunks and embeddings from SQLite to LanceDB."""
178
- cursor = sqlite_conn.cursor()
179
-
180
- # Get chunks first
181
- cursor.execute("""
182
- SELECT id, document_id, content, metadata
183
- FROM chunks
184
- ORDER BY id
185
- """)
186
-
187
- chunks_data = cursor.fetchall()
188
-
189
- # Get embeddings using the sqlite-vec virtual table
190
- embeddings_map = {}
191
- try:
192
- # Use the virtual table to get embeddings properly
193
- cursor.execute("""
194
- SELECT chunk_id, embedding
195
- FROM chunk_embeddings
196
- """)
197
-
198
- for row in cursor.fetchall():
199
- chunk_id = row[0]
200
- embedding_blob = row[1]
201
- if embedding_blob and chunk_id not in embeddings_map:
202
- embeddings_map[chunk_id] = embedding_blob
203
-
204
- except sqlite3.OperationalError as e:
205
- self.console.print(
206
- f"[yellow]Warning: Could not extract embeddings from virtual table: {e}[/yellow]"
207
- )
208
-
209
- chunks = []
210
- for row in chunks_data:
211
- # Generate new UUID for chunk
212
- chunk_uuid = str(uuid4())
213
-
214
- # Map the old document_id to new UUID
215
- document_uuid = document_id_mapping.get(row["document_id"])
216
- if not document_uuid:
217
- self.console.print(
218
- f"[yellow]Warning: Document ID {row['document_id']} not found in mapping for chunk {row['id']}[/yellow]"
219
- )
220
- continue
221
-
222
- # Get embedding for this chunk
223
- embedding = []
224
- embedding_blob = embeddings_map.get(row["id"])
225
- if embedding_blob:
226
- try:
227
- embedding = deserialize_sqlite_embedding(embedding_blob)
228
- except Exception as e:
229
- self.console.print(
230
- f"[yellow]Warning: Failed to deserialize embedding for chunk {row['id']}: {e}[/yellow]"
231
- )
232
- # Generate a zero vector of the expected dimension
233
- embedding = [0.0] * lance_store.embedder._vector_dim
234
- else:
235
- # No embedding found, generate zero vector
236
- embedding = [0.0] * lance_store.embedder._vector_dim
237
-
238
- chunk_data = {
239
- "id": chunk_uuid,
240
- "document_id": document_uuid,
241
- "content": row["content"],
242
- "metadata": json.loads(row["metadata"]) if row["metadata"] else {},
243
- "vector": embedding,
244
- }
245
- chunks.append(chunk_data)
246
-
247
- # Batch insert chunks to LanceDB
248
- if chunks:
249
- chunk_records = [
250
- lance_store.ChunkRecord(
251
- id=chunk["id"],
252
- document_id=chunk["document_id"],
253
- content=chunk["content"],
254
- metadata=json.dumps(chunk["metadata"]),
255
- vector=chunk["vector"],
256
- )
257
- for chunk in chunks
258
- ]
259
- lance_store.chunks_table.add(chunk_records)
260
-
261
- progress.update(task, completed=len(chunks), total=len(chunks))
262
-
263
- def _migrate_settings(
264
- self,
265
- sqlite_conn: sqlite3.Connection,
266
- lance_store: Store,
267
- progress: Progress,
268
- task: TaskID,
269
- ):
270
- """Migrate settings from SQLite to LanceDB."""
271
- cursor = sqlite_conn.cursor()
272
-
273
- try:
274
- cursor.execute("SELECT id, settings FROM settings WHERE id = 1")
275
- row = cursor.fetchone()
276
-
277
- if row:
278
- settings_data = json.loads(row["settings"]) if row["settings"] else {}
279
-
280
- # Update the existing settings in LanceDB (use string ID)
281
- lance_store.settings_table.update(
282
- where="id = 'settings'",
283
- values={"settings": json.dumps(settings_data)},
284
- )
285
-
286
- progress.update(task, completed=1, total=1)
287
- else:
288
- progress.update(task, completed=0, total=0)
289
-
290
- except sqlite3.OperationalError:
291
- # Settings table doesn't exist in old SQLite database
292
- self.console.print(
293
- "[yellow]No settings table found in SQLite database[/yellow]"
294
- )
295
- progress.update(task, completed=0, total=0)
296
-
297
-
298
- async def migrate_sqlite_to_lancedb(
299
- sqlite_path: Path, lancedb_path: Path | None = None
300
- ) -> bool:
301
- """
302
- Migrate an existing SQLite database to LanceDB.
303
-
304
- Args:
305
- sqlite_path: Path to the existing SQLite database
306
- lancedb_path: Path for the new LanceDB database (optional, will auto-generate if not provided)
307
-
308
- Returns:
309
- True if migration was successful, False otherwise
310
- """
311
- if lancedb_path is None:
312
- # Auto-generate LanceDB path
313
- lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
314
-
315
- migrator = SQLiteToLanceDBMigrator(sqlite_path, lancedb_path)
316
- return migrator.migrate()
haiku/rag/monitor.py DELETED
@@ -1,73 +0,0 @@
1
- from pathlib import Path
2
-
3
- from watchfiles import Change, DefaultFilter, awatch
4
-
5
- from haiku.rag.client import HaikuRAG
6
- from haiku.rag.logging import get_logger
7
- from haiku.rag.reader import FileReader
8
- from haiku.rag.store.models.document import Document
9
-
10
- logger = get_logger()
11
-
12
-
13
- class FileFilter(DefaultFilter):
14
- def __init__(self, *, ignore_paths: list[Path] | None = None) -> None:
15
- self.extensions = tuple(FileReader.extensions)
16
- super().__init__(ignore_paths=ignore_paths)
17
-
18
- def __call__(self, change: "Change", path: str) -> bool:
19
- return path.endswith(self.extensions) and super().__call__(change, path)
20
-
21
-
22
- class FileWatcher:
23
- def __init__(self, paths: list[Path], client: HaikuRAG):
24
- self.paths = paths
25
- self.client = client
26
-
27
- async def observe(self):
28
- logger.info(f"Watching files in {self.paths}")
29
- filter = FileFilter()
30
- await self.refresh()
31
-
32
- async for changes in awatch(*self.paths, watch_filter=filter):
33
- await self.handler(changes)
34
-
35
- async def handler(self, changes: set[tuple[Change, str]]):
36
- for change, path in changes:
37
- if change == Change.added or change == Change.modified:
38
- await self._upsert_document(Path(path))
39
- elif change == Change.deleted:
40
- await self._delete_document(Path(path))
41
-
42
- async def refresh(self):
43
- for path in self.paths:
44
- for f in Path(path).rglob("**/*"):
45
- if f.is_file() and f.suffix in FileReader.extensions:
46
- await self._upsert_document(f)
47
-
48
- async def _upsert_document(self, file: Path) -> Document | None:
49
- try:
50
- uri = file.as_uri()
51
- existing_doc = await self.client.get_document_by_uri(uri)
52
- if existing_doc:
53
- doc = await self.client.create_document_from_source(str(file))
54
- logger.info(f"Updated document {existing_doc.id} from {file}")
55
- return doc
56
- else:
57
- doc = await self.client.create_document_from_source(str(file))
58
- logger.info(f"Created new document {doc.id} from {file}")
59
- return doc
60
- except Exception as e:
61
- logger.error(f"Failed to upsert document from {file}: {e}")
62
- return None
63
-
64
- async def _delete_document(self, file: Path):
65
- try:
66
- uri = file.as_uri()
67
- existing_doc = await self.client.get_document_by_uri(uri)
68
-
69
- if existing_doc and existing_doc.id:
70
- await self.client.delete_document(existing_doc.id)
71
- logger.info(f"Deleted document {existing_doc.id} for {file}")
72
- except Exception as e:
73
- logger.error(f"Failed to delete document for {file}: {e}")
haiku/rag/qa/__init__.py DELETED
@@ -1,15 +0,0 @@
1
- from haiku.rag.client import HaikuRAG
2
- from haiku.rag.config import Config
3
- from haiku.rag.qa.agent import QuestionAnswerAgent
4
-
5
-
6
- def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswerAgent:
7
- provider = Config.QA_PROVIDER
8
- model_name = Config.QA_MODEL
9
-
10
- return QuestionAnswerAgent(
11
- client=client,
12
- provider=provider,
13
- model=model_name,
14
- use_citations=use_citations,
15
- )