haiku.rag 0.11.3__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/a2a/__init__.py +176 -0
- haiku/rag/a2a/client.py +271 -0
- haiku/rag/a2a/context.py +68 -0
- haiku/rag/a2a/models.py +21 -0
- haiku/rag/a2a/prompts.py +59 -0
- haiku/rag/a2a/skills.py +75 -0
- haiku/rag/a2a/storage.py +71 -0
- haiku/rag/a2a/worker.py +320 -0
- haiku/rag/app.py +75 -14
- haiku/rag/cli.py +79 -69
- haiku/rag/client.py +10 -4
- haiku/rag/config.py +9 -0
- haiku/rag/mcp.py +99 -0
- haiku/rag/migration.py +3 -3
- haiku/rag/qa/__init__.py +6 -1
- haiku/rag/qa/agent.py +6 -6
- haiku/rag/store/engine.py +33 -5
- haiku/rag/store/repositories/chunk.py +0 -28
- haiku/rag/store/repositories/document.py +7 -0
- {haiku_rag-0.11.3.dist-info → haiku_rag-0.12.0.dist-info}/METADATA +31 -10
- {haiku_rag-0.11.3.dist-info → haiku_rag-0.12.0.dist-info}/RECORD +24 -16
- {haiku_rag-0.11.3.dist-info → haiku_rag-0.12.0.dist-info}/WHEEL +0 -0
- {haiku_rag-0.11.3.dist-info → haiku_rag-0.12.0.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.11.3.dist-info → haiku_rag-0.12.0.dist-info}/licenses/LICENSE +0 -0
haiku/rag/cli.py
CHANGED
|
@@ -16,65 +16,6 @@ cli = typer.Typer(
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def complete_document_ids(ctx: typer.Context, incomplete: str):
|
|
20
|
-
"""Autocomplete document IDs from the selected DB."""
|
|
21
|
-
db_path = ctx.params.get("db") or (Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb")
|
|
22
|
-
|
|
23
|
-
try:
|
|
24
|
-
from haiku.rag.client import HaikuRAG
|
|
25
|
-
|
|
26
|
-
async def _list_ids():
|
|
27
|
-
async with HaikuRAG(db_path) as client:
|
|
28
|
-
docs = await client.list_documents()
|
|
29
|
-
return [d.id for d in docs if d.id]
|
|
30
|
-
|
|
31
|
-
ids = asyncio.run(_list_ids())
|
|
32
|
-
except Exception:
|
|
33
|
-
return []
|
|
34
|
-
|
|
35
|
-
return [i for i in ids if i and i.startswith(incomplete)]
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def complete_local_paths(ctx: typer.Context, incomplete: str) -> list[str]:
|
|
39
|
-
"""Autocomplete local filesystem paths.
|
|
40
|
-
|
|
41
|
-
Provides directory/file suggestions based on the current incomplete input.
|
|
42
|
-
Does not validate or restrict to specific extensions to keep it flexible
|
|
43
|
-
(URLs are still allowed to be typed manually).
|
|
44
|
-
"""
|
|
45
|
-
try:
|
|
46
|
-
text = incomplete or ""
|
|
47
|
-
|
|
48
|
-
# Expand user home
|
|
49
|
-
from os.path import expanduser
|
|
50
|
-
|
|
51
|
-
expanded = expanduser(text)
|
|
52
|
-
p = Path(expanded)
|
|
53
|
-
|
|
54
|
-
# Choose directory to list and prefix to filter
|
|
55
|
-
if text == "" or text.endswith(("/", "\\")):
|
|
56
|
-
directory = p
|
|
57
|
-
prefix = ""
|
|
58
|
-
else:
|
|
59
|
-
directory = p.parent
|
|
60
|
-
prefix = p.name
|
|
61
|
-
|
|
62
|
-
if not directory.exists():
|
|
63
|
-
return []
|
|
64
|
-
|
|
65
|
-
suggestions: list[str] = []
|
|
66
|
-
for entry in directory.iterdir():
|
|
67
|
-
name = entry.name
|
|
68
|
-
if not prefix or name.startswith(prefix):
|
|
69
|
-
suggestion = str(directory / name)
|
|
70
|
-
if entry.is_dir():
|
|
71
|
-
suggestion += "/"
|
|
72
|
-
suggestions.append(suggestion)
|
|
73
|
-
return suggestions
|
|
74
|
-
except Exception:
|
|
75
|
-
return []
|
|
76
|
-
|
|
77
|
-
|
|
78
19
|
async def check_version():
|
|
79
20
|
"""Check if haiku.rag is up to date and show warning if not."""
|
|
80
21
|
up_to_date, current_version, latest_version = await is_up_to_date()
|
|
@@ -191,7 +132,6 @@ def add_document_text(
|
|
|
191
132
|
def add_document_src(
|
|
192
133
|
source: str = typer.Argument(
|
|
193
134
|
help="The file path or URL of the document to add",
|
|
194
|
-
autocompletion=complete_local_paths,
|
|
195
135
|
),
|
|
196
136
|
title: str | None = typer.Option(
|
|
197
137
|
None,
|
|
@@ -225,7 +165,6 @@ def add_document_src(
|
|
|
225
165
|
def get_document(
|
|
226
166
|
doc_id: str = typer.Argument(
|
|
227
167
|
help="The ID of the document to get",
|
|
228
|
-
autocompletion=complete_document_ids,
|
|
229
168
|
),
|
|
230
169
|
db: Path = typer.Option(
|
|
231
170
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
@@ -243,7 +182,6 @@ def get_document(
|
|
|
243
182
|
def delete_document(
|
|
244
183
|
doc_id: str = typer.Argument(
|
|
245
184
|
help="The ID of the document to delete",
|
|
246
|
-
autocompletion=complete_document_ids,
|
|
247
185
|
),
|
|
248
186
|
db: Path = typer.Option(
|
|
249
187
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
@@ -428,7 +366,8 @@ def download_models_cmd():
|
|
|
428
366
|
|
|
429
367
|
|
|
430
368
|
@cli.command(
|
|
431
|
-
"serve",
|
|
369
|
+
"serve",
|
|
370
|
+
help="Start haiku.rag server. Use --monitor, --mcp, and/or --a2a to enable services.",
|
|
432
371
|
)
|
|
433
372
|
def serve(
|
|
434
373
|
db: Path = typer.Option(
|
|
@@ -436,22 +375,71 @@ def serve(
|
|
|
436
375
|
"--db",
|
|
437
376
|
help="Path to the LanceDB database file",
|
|
438
377
|
),
|
|
378
|
+
monitor: bool = typer.Option(
|
|
379
|
+
False,
|
|
380
|
+
"--monitor",
|
|
381
|
+
help="Enable file monitoring",
|
|
382
|
+
),
|
|
383
|
+
mcp: bool = typer.Option(
|
|
384
|
+
False,
|
|
385
|
+
"--mcp",
|
|
386
|
+
help="Enable MCP server",
|
|
387
|
+
),
|
|
439
388
|
stdio: bool = typer.Option(
|
|
440
389
|
False,
|
|
441
390
|
"--stdio",
|
|
442
|
-
help="Run MCP server on stdio Transport",
|
|
391
|
+
help="Run MCP server on stdio Transport (requires --mcp)",
|
|
392
|
+
),
|
|
393
|
+
mcp_port: int = typer.Option(
|
|
394
|
+
8001,
|
|
395
|
+
"--mcp-port",
|
|
396
|
+
help="Port to bind MCP server to (ignored with --stdio)",
|
|
397
|
+
),
|
|
398
|
+
a2a: bool = typer.Option(
|
|
399
|
+
False,
|
|
400
|
+
"--a2a",
|
|
401
|
+
help="Enable A2A (Agent-to-Agent) server",
|
|
402
|
+
),
|
|
403
|
+
a2a_host: str = typer.Option(
|
|
404
|
+
"127.0.0.1",
|
|
405
|
+
"--a2a-host",
|
|
406
|
+
help="Host to bind A2A server to",
|
|
407
|
+
),
|
|
408
|
+
a2a_port: int = typer.Option(
|
|
409
|
+
8000,
|
|
410
|
+
"--a2a-port",
|
|
411
|
+
help="Port to bind A2A server to",
|
|
443
412
|
),
|
|
444
413
|
) -> None:
|
|
445
|
-
"""Start the
|
|
414
|
+
"""Start the server with selected services."""
|
|
415
|
+
# Require at least one service flag
|
|
416
|
+
if not (monitor or mcp or a2a):
|
|
417
|
+
typer.echo(
|
|
418
|
+
"Error: At least one service flag (--monitor, --mcp, or --a2a) must be specified"
|
|
419
|
+
)
|
|
420
|
+
raise typer.Exit(1)
|
|
421
|
+
|
|
422
|
+
if stdio and not mcp:
|
|
423
|
+
typer.echo("Error: --stdio requires --mcp")
|
|
424
|
+
raise typer.Exit(1)
|
|
425
|
+
|
|
446
426
|
from haiku.rag.app import HaikuRAGApp
|
|
447
427
|
|
|
448
428
|
app = HaikuRAGApp(db_path=db)
|
|
449
429
|
|
|
450
|
-
transport = None
|
|
451
|
-
if stdio:
|
|
452
|
-
transport = "stdio"
|
|
430
|
+
transport = "stdio" if stdio else None
|
|
453
431
|
|
|
454
|
-
asyncio.run(
|
|
432
|
+
asyncio.run(
|
|
433
|
+
app.serve(
|
|
434
|
+
enable_monitor=monitor,
|
|
435
|
+
enable_mcp=mcp,
|
|
436
|
+
mcp_transport=transport,
|
|
437
|
+
mcp_port=mcp_port,
|
|
438
|
+
enable_a2a=a2a,
|
|
439
|
+
a2a_host=a2a_host,
|
|
440
|
+
a2a_port=a2a_port,
|
|
441
|
+
)
|
|
442
|
+
)
|
|
455
443
|
|
|
456
444
|
|
|
457
445
|
@cli.command("migrate", help="Migrate an SQLite database to LanceDB")
|
|
@@ -472,5 +460,27 @@ def migrate(
|
|
|
472
460
|
raise typer.Exit(1)
|
|
473
461
|
|
|
474
462
|
|
|
463
|
+
@cli.command(
|
|
464
|
+
"a2aclient", help="Run interactive client to chat with haiku.rag's A2A server"
|
|
465
|
+
)
|
|
466
|
+
def a2aclient(
|
|
467
|
+
url: str = typer.Option(
|
|
468
|
+
"http://localhost:8000",
|
|
469
|
+
"--url",
|
|
470
|
+
help="Base URL of the A2A server",
|
|
471
|
+
),
|
|
472
|
+
):
|
|
473
|
+
try:
|
|
474
|
+
from haiku.rag.a2a.client import run_interactive_client
|
|
475
|
+
except ImportError:
|
|
476
|
+
typer.echo(
|
|
477
|
+
"Error: A2A support requires the 'a2a' extra. "
|
|
478
|
+
"Install with: uv pip install 'haiku.rag[a2a]'"
|
|
479
|
+
)
|
|
480
|
+
raise typer.Exit(1)
|
|
481
|
+
|
|
482
|
+
asyncio.run(run_interactive_client(url=url))
|
|
483
|
+
|
|
484
|
+
|
|
475
485
|
if __name__ == "__main__":
|
|
476
486
|
cli()
|
haiku/rag/client.py
CHANGED
|
@@ -46,6 +46,9 @@ class HaikuRAG:
|
|
|
46
46
|
|
|
47
47
|
async def __aexit__(self, exc_type, exc_val, exc_tb): # noqa: ARG002
|
|
48
48
|
"""Async context manager exit."""
|
|
49
|
+
# Wait for any pending vacuum to complete before closing
|
|
50
|
+
async with self.store._vacuum_lock:
|
|
51
|
+
pass
|
|
49
52
|
self.close()
|
|
50
53
|
return False
|
|
51
54
|
|
|
@@ -522,19 +525,22 @@ class HaikuRAG:
|
|
|
522
525
|
merged.append(current)
|
|
523
526
|
return merged
|
|
524
527
|
|
|
525
|
-
async def ask(
|
|
528
|
+
async def ask(
|
|
529
|
+
self, question: str, cite: bool = False, system_prompt: str | None = None
|
|
530
|
+
) -> str:
|
|
526
531
|
"""Ask a question using the configured QA agent.
|
|
527
532
|
|
|
528
533
|
Args:
|
|
529
534
|
question: The question to ask.
|
|
530
535
|
cite: Whether to include citations in the response.
|
|
536
|
+
system_prompt: Optional custom system prompt for the QA agent.
|
|
531
537
|
|
|
532
538
|
Returns:
|
|
533
539
|
The generated answer as a string.
|
|
534
540
|
"""
|
|
535
541
|
from haiku.rag.qa import get_qa_agent
|
|
536
542
|
|
|
537
|
-
qa_agent = get_qa_agent(self, use_citations=cite)
|
|
543
|
+
qa_agent = get_qa_agent(self, use_citations=cite, system_prompt=system_prompt)
|
|
538
544
|
return await qa_agent.answer(question)
|
|
539
545
|
|
|
540
546
|
async def rebuild_database(self) -> AsyncGenerator[str, None]:
|
|
@@ -617,13 +623,13 @@ class HaikuRAG:
|
|
|
617
623
|
|
|
618
624
|
# Final maintenance: centralized vacuum to curb disk usage
|
|
619
625
|
try:
|
|
620
|
-
self.store.vacuum()
|
|
626
|
+
await self.store.vacuum()
|
|
621
627
|
except Exception:
|
|
622
628
|
pass
|
|
623
629
|
|
|
624
630
|
async def vacuum(self) -> None:
|
|
625
631
|
"""Optimize and clean up old versions across all tables."""
|
|
626
|
-
self.store.vacuum()
|
|
632
|
+
await self.store.vacuum()
|
|
627
633
|
|
|
628
634
|
def close(self):
|
|
629
635
|
"""Close the underlying store connection."""
|
haiku/rag/config.py
CHANGED
|
@@ -57,6 +57,15 @@ class AppConfig(BaseModel):
|
|
|
57
57
|
# and error out when the database does not already exist.
|
|
58
58
|
DISABLE_DB_AUTOCREATE: bool = False
|
|
59
59
|
|
|
60
|
+
# Vacuum retention threshold in seconds. Only versions older than this
|
|
61
|
+
# threshold will be removed during vacuum operations. Default is 60 seconds
|
|
62
|
+
# to allow concurrent connections to safely use recent versions.
|
|
63
|
+
VACUUM_RETENTION_SECONDS: int = 60
|
|
64
|
+
|
|
65
|
+
# Maximum number of A2A contexts to keep in memory. When exceeded, least
|
|
66
|
+
# recently used contexts will be evicted. Default is 1000.
|
|
67
|
+
A2A_MAX_CONTEXTS: int = 1000
|
|
68
|
+
|
|
60
69
|
@field_validator("MONITOR_DIRECTORIES", mode="before")
|
|
61
70
|
@classmethod
|
|
62
71
|
def parse_monitor_directories(cls, v):
|
haiku/rag/mcp.py
CHANGED
|
@@ -5,6 +5,8 @@ from fastmcp import FastMCP
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
7
|
from haiku.rag.client import HaikuRAG
|
|
8
|
+
from haiku.rag.config import Config
|
|
9
|
+
from haiku.rag.research.models import ResearchReport
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class SearchResult(BaseModel):
|
|
@@ -153,4 +155,101 @@ def create_mcp_server(db_path: Path) -> FastMCP:
|
|
|
153
155
|
except Exception:
|
|
154
156
|
return False
|
|
155
157
|
|
|
158
|
+
@mcp.tool()
|
|
159
|
+
async def ask_question(
|
|
160
|
+
question: str,
|
|
161
|
+
cite: bool = False,
|
|
162
|
+
deep: bool = False,
|
|
163
|
+
) -> str:
|
|
164
|
+
"""Ask a question using the QA agent.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
question: The question to ask.
|
|
168
|
+
cite: Whether to include citations in the response.
|
|
169
|
+
deep: Use deep multi-agent QA for complex questions that require decomposition.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
The answer as a string.
|
|
173
|
+
"""
|
|
174
|
+
try:
|
|
175
|
+
async with HaikuRAG(db_path) as rag:
|
|
176
|
+
if deep:
|
|
177
|
+
from haiku.rag.config import Config
|
|
178
|
+
from haiku.rag.qa.deep.dependencies import DeepQAContext
|
|
179
|
+
from haiku.rag.qa.deep.graph import build_deep_qa_graph
|
|
180
|
+
from haiku.rag.qa.deep.nodes import DeepQAPlanNode
|
|
181
|
+
from haiku.rag.qa.deep.state import DeepQADeps, DeepQAState
|
|
182
|
+
|
|
183
|
+
graph = build_deep_qa_graph()
|
|
184
|
+
context = DeepQAContext(
|
|
185
|
+
original_question=question, use_citations=cite
|
|
186
|
+
)
|
|
187
|
+
state = DeepQAState(context=context)
|
|
188
|
+
deps = DeepQADeps(client=rag)
|
|
189
|
+
|
|
190
|
+
start_node = DeepQAPlanNode(
|
|
191
|
+
provider=Config.QA_PROVIDER,
|
|
192
|
+
model=Config.QA_MODEL,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
result = await graph.run(
|
|
196
|
+
start_node=start_node, state=state, deps=deps
|
|
197
|
+
)
|
|
198
|
+
answer = result.output.answer
|
|
199
|
+
else:
|
|
200
|
+
answer = await rag.ask(question, cite=cite)
|
|
201
|
+
return answer
|
|
202
|
+
except Exception as e:
|
|
203
|
+
return f"Error answering question: {e!s}"
|
|
204
|
+
|
|
205
|
+
@mcp.tool()
|
|
206
|
+
async def research_question(
|
|
207
|
+
question: str,
|
|
208
|
+
max_iterations: int = 3,
|
|
209
|
+
confidence_threshold: float = 0.8,
|
|
210
|
+
max_concurrency: int = 1,
|
|
211
|
+
) -> ResearchReport | None:
|
|
212
|
+
"""Run multi-agent research to investigate a complex question.
|
|
213
|
+
|
|
214
|
+
The research process uses multiple agents to plan, search, evaluate, and synthesize
|
|
215
|
+
information iteratively until confidence threshold is met or max iterations reached.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
question: The research question to investigate.
|
|
219
|
+
max_iterations: Maximum search/analyze iterations (default: 3).
|
|
220
|
+
confidence_threshold: Minimum confidence score (0-1) to stop early (default: 0.8).
|
|
221
|
+
max_concurrency: Maximum concurrent searches per iteration (default: 1).
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
A research report with findings, or None if an error occurred.
|
|
225
|
+
"""
|
|
226
|
+
try:
|
|
227
|
+
from haiku.rag.graph.nodes.plan import PlanNode
|
|
228
|
+
from haiku.rag.research.dependencies import ResearchContext
|
|
229
|
+
from haiku.rag.research.graph import build_research_graph
|
|
230
|
+
from haiku.rag.research.state import ResearchDeps, ResearchState
|
|
231
|
+
|
|
232
|
+
async with HaikuRAG(db_path) as rag:
|
|
233
|
+
graph = build_research_graph()
|
|
234
|
+
state = ResearchState(
|
|
235
|
+
context=ResearchContext(original_question=question),
|
|
236
|
+
max_iterations=max_iterations,
|
|
237
|
+
confidence_threshold=confidence_threshold,
|
|
238
|
+
max_concurrency=max_concurrency,
|
|
239
|
+
)
|
|
240
|
+
deps = ResearchDeps(client=rag)
|
|
241
|
+
|
|
242
|
+
result = await graph.run(
|
|
243
|
+
PlanNode(
|
|
244
|
+
provider=Config.RESEARCH_PROVIDER or Config.QA_PROVIDER,
|
|
245
|
+
model=Config.RESEARCH_MODEL or Config.QA_MODEL,
|
|
246
|
+
),
|
|
247
|
+
state=state,
|
|
248
|
+
deps=deps,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
return result.output
|
|
252
|
+
except Exception:
|
|
253
|
+
return None
|
|
254
|
+
|
|
156
255
|
return mcp
|
haiku/rag/migration.py
CHANGED
|
@@ -27,7 +27,7 @@ class SQLiteToLanceDBMigrator:
|
|
|
27
27
|
self.lancedb_path = lancedb_path
|
|
28
28
|
self.console = Console()
|
|
29
29
|
|
|
30
|
-
def migrate(self) -> bool:
|
|
30
|
+
async def migrate(self) -> bool:
|
|
31
31
|
"""Perform the migration."""
|
|
32
32
|
try:
|
|
33
33
|
self.console.print(
|
|
@@ -94,7 +94,7 @@ class SQLiteToLanceDBMigrator:
|
|
|
94
94
|
# Optimize and cleanup using centralized vacuum
|
|
95
95
|
self.console.print("[cyan]Optimizing LanceDB...[/cyan]")
|
|
96
96
|
try:
|
|
97
|
-
lance_store.vacuum()
|
|
97
|
+
await lance_store.vacuum()
|
|
98
98
|
self.console.print("[green]✅ Optimization completed[/green]")
|
|
99
99
|
except Exception as e:
|
|
100
100
|
self.console.print(
|
|
@@ -313,4 +313,4 @@ async def migrate_sqlite_to_lancedb(
|
|
|
313
313
|
lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
|
|
314
314
|
|
|
315
315
|
migrator = SQLiteToLanceDBMigrator(sqlite_path, lancedb_path)
|
|
316
|
-
return migrator.migrate()
|
|
316
|
+
return await migrator.migrate()
|
haiku/rag/qa/__init__.py
CHANGED
|
@@ -3,7 +3,11 @@ from haiku.rag.config import Config
|
|
|
3
3
|
from haiku.rag.qa.agent import QuestionAnswerAgent
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def get_qa_agent(
|
|
6
|
+
def get_qa_agent(
|
|
7
|
+
client: HaikuRAG,
|
|
8
|
+
use_citations: bool = False,
|
|
9
|
+
system_prompt: str | None = None,
|
|
10
|
+
) -> QuestionAnswerAgent:
|
|
7
11
|
provider = Config.QA_PROVIDER
|
|
8
12
|
model_name = Config.QA_MODEL
|
|
9
13
|
|
|
@@ -12,4 +16,5 @@ def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswe
|
|
|
12
16
|
provider=provider,
|
|
13
17
|
model=model_name,
|
|
14
18
|
use_citations=use_citations,
|
|
19
|
+
system_prompt=system_prompt,
|
|
15
20
|
)
|
haiku/rag/qa/agent.py
CHANGED
|
@@ -30,18 +30,21 @@ class QuestionAnswerAgent:
|
|
|
30
30
|
model: str,
|
|
31
31
|
use_citations: bool = False,
|
|
32
32
|
q: float = 0.0,
|
|
33
|
+
system_prompt: str | None = None,
|
|
33
34
|
):
|
|
34
35
|
self._client = client
|
|
35
36
|
|
|
36
|
-
system_prompt
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
if system_prompt is None:
|
|
38
|
+
system_prompt = (
|
|
39
|
+
QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
|
|
40
|
+
)
|
|
39
41
|
model_obj = self._get_model(provider, model)
|
|
40
42
|
|
|
41
43
|
self._agent = Agent(
|
|
42
44
|
model=model_obj,
|
|
43
45
|
deps_type=Dependencies,
|
|
44
46
|
system_prompt=system_prompt,
|
|
47
|
+
retries=3,
|
|
45
48
|
)
|
|
46
49
|
|
|
47
50
|
@self._agent.tool
|
|
@@ -51,9 +54,6 @@ class QuestionAnswerAgent:
|
|
|
51
54
|
limit: int = 3,
|
|
52
55
|
) -> list[SearchResult]:
|
|
53
56
|
"""Search the knowledge base for relevant documents."""
|
|
54
|
-
|
|
55
|
-
# Remove quotes from queries as this requires positional indexing in lancedb
|
|
56
|
-
query = query.replace('"', "")
|
|
57
57
|
search_results = await ctx.deps.client.search(query, limit=limit)
|
|
58
58
|
expanded_results = await ctx.deps.client.expand_context(search_results)
|
|
59
59
|
|
haiku/rag/store/engine.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
4
|
from datetime import timedelta
|
|
@@ -51,6 +52,7 @@ class Store:
|
|
|
51
52
|
def __init__(self, db_path: Path, skip_validation: bool = False):
|
|
52
53
|
self.db_path: Path = db_path
|
|
53
54
|
self.embedder = get_embedder()
|
|
55
|
+
self._vacuum_lock = asyncio.Lock()
|
|
54
56
|
|
|
55
57
|
# Create the ChunkRecord model with the correct vector dimension
|
|
56
58
|
self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
|
|
@@ -78,14 +80,40 @@ class Store:
|
|
|
78
80
|
if not skip_validation:
|
|
79
81
|
self._validate_configuration()
|
|
80
82
|
|
|
81
|
-
def vacuum(self) -> None:
|
|
82
|
-
"""Optimize and clean up old versions across all tables to reduce disk usage.
|
|
83
|
+
async def vacuum(self, retention_seconds: int | None = None) -> None:
|
|
84
|
+
"""Optimize and clean up old versions across all tables to reduce disk usage.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
retention_seconds: Retention threshold in seconds. Only versions older
|
|
88
|
+
than this will be removed. If None, uses Config.VACUUM_RETENTION_SECONDS.
|
|
89
|
+
|
|
90
|
+
Note:
|
|
91
|
+
If vacuum is already running, this method returns immediately without blocking.
|
|
92
|
+
Use asyncio.create_task(store.vacuum()) for non-blocking background execution.
|
|
93
|
+
"""
|
|
83
94
|
if self._has_cloud_config() and str(Config.LANCEDB_URI).startswith("db://"):
|
|
84
95
|
return
|
|
85
96
|
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
97
|
+
# Skip if already running (non-blocking)
|
|
98
|
+
if self._vacuum_lock.locked():
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
async with self._vacuum_lock:
|
|
102
|
+
try:
|
|
103
|
+
# Evaluate config at runtime to allow dynamic changes
|
|
104
|
+
if retention_seconds is None:
|
|
105
|
+
retention_seconds = Config.VACUUM_RETENTION_SECONDS
|
|
106
|
+
# Perform maintenance per table using optimize() with configurable retention
|
|
107
|
+
retention = timedelta(seconds=retention_seconds)
|
|
108
|
+
for table in [
|
|
109
|
+
self.documents_table,
|
|
110
|
+
self.chunks_table,
|
|
111
|
+
self.settings_table,
|
|
112
|
+
]:
|
|
113
|
+
table.optimize(cleanup_older_than=retention)
|
|
114
|
+
except (RuntimeError, OSError) as e:
|
|
115
|
+
# Handle resource errors gracefully
|
|
116
|
+
logger.debug(f"Vacuum skipped due to resource constraints: {e}")
|
|
89
117
|
|
|
90
118
|
def _connect_to_lancedb(self, db_path: Path):
|
|
91
119
|
"""Establish connection to LanceDB (local, cloud, or object storage)."""
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import asyncio
|
|
2
1
|
import inspect
|
|
3
2
|
import json
|
|
4
3
|
import logging
|
|
@@ -23,7 +22,6 @@ class ChunkRepository:
|
|
|
23
22
|
def __init__(self, store: Store) -> None:
|
|
24
23
|
self.store = store
|
|
25
24
|
self.embedder = get_embedder()
|
|
26
|
-
self._optimize_lock = asyncio.Lock()
|
|
27
25
|
|
|
28
26
|
def _ensure_fts_index(self) -> None:
|
|
29
27
|
"""Ensure FTS index exists on the content column."""
|
|
@@ -35,21 +33,6 @@ class ChunkRepository:
|
|
|
35
33
|
# Log the error but don't fail - FTS might already exist
|
|
36
34
|
logger.debug(f"FTS index creation skipped: {e}")
|
|
37
35
|
|
|
38
|
-
async def _optimize(self) -> None:
|
|
39
|
-
"""Optimize the chunks table to refresh indexes."""
|
|
40
|
-
# Skip optimization for LanceDB Cloud as it handles this automatically
|
|
41
|
-
if Config.LANCEDB_URI and Config.LANCEDB_URI.startswith("db://"):
|
|
42
|
-
return
|
|
43
|
-
|
|
44
|
-
async with self._optimize_lock:
|
|
45
|
-
try:
|
|
46
|
-
self.store.chunks_table.optimize()
|
|
47
|
-
except (RuntimeError, OSError) as e:
|
|
48
|
-
# Handle "too many open files" and other resource errors gracefully
|
|
49
|
-
logger.debug(
|
|
50
|
-
f"Table optimization skipped due to resource constraints: {e}"
|
|
51
|
-
)
|
|
52
|
-
|
|
53
36
|
async def create(self, entity: Chunk) -> Chunk:
|
|
54
37
|
"""Create a chunk in the database."""
|
|
55
38
|
assert entity.document_id, "Chunk must have a document_id to be created"
|
|
@@ -77,11 +60,6 @@ class ChunkRepository:
|
|
|
77
60
|
self.store.chunks_table.add([chunk_record])
|
|
78
61
|
|
|
79
62
|
entity.id = chunk_id
|
|
80
|
-
|
|
81
|
-
# Try to optimize if not currently locked (non-blocking)
|
|
82
|
-
if not self._optimize_lock.locked():
|
|
83
|
-
asyncio.create_task(self._optimize())
|
|
84
|
-
|
|
85
63
|
return entity
|
|
86
64
|
|
|
87
65
|
async def get_by_id(self, entity_id: str) -> Chunk | None:
|
|
@@ -125,10 +103,6 @@ class ChunkRepository:
|
|
|
125
103
|
"vector": embedding,
|
|
126
104
|
},
|
|
127
105
|
)
|
|
128
|
-
# Try to optimize if not currently locked (non-blocking)
|
|
129
|
-
if not self._optimize_lock.locked():
|
|
130
|
-
asyncio.create_task(self._optimize())
|
|
131
|
-
|
|
132
106
|
return entity
|
|
133
107
|
|
|
134
108
|
async def delete(self, entity_id: str) -> bool:
|
|
@@ -227,8 +201,6 @@ class ChunkRepository:
|
|
|
227
201
|
if chunk_records:
|
|
228
202
|
self.store.chunks_table.add(chunk_records)
|
|
229
203
|
|
|
230
|
-
# Force optimization once at the end for bulk operations
|
|
231
|
-
await self._optimize()
|
|
232
204
|
return created_chunks
|
|
233
205
|
|
|
234
206
|
async def delete_all(self) -> None:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import json
|
|
2
3
|
from datetime import datetime
|
|
3
4
|
from typing import TYPE_CHECKING
|
|
@@ -200,6 +201,9 @@ class DocumentRepository:
|
|
|
200
201
|
chunk.order = order
|
|
201
202
|
await self.chunk_repository.create(chunk)
|
|
202
203
|
|
|
204
|
+
# Vacuum old versions in background (non-blocking)
|
|
205
|
+
asyncio.create_task(self.store.vacuum())
|
|
206
|
+
|
|
203
207
|
return created_doc
|
|
204
208
|
except Exception:
|
|
205
209
|
# Roll back to the captured versions and re-raise
|
|
@@ -230,6 +234,9 @@ class DocumentRepository:
|
|
|
230
234
|
updated_doc.id, docling_document
|
|
231
235
|
)
|
|
232
236
|
|
|
237
|
+
# Vacuum old versions in background (non-blocking)
|
|
238
|
+
asyncio.create_task(self.store.vacuum())
|
|
239
|
+
|
|
233
240
|
return updated_doc
|
|
234
241
|
except Exception:
|
|
235
242
|
# Roll back to the captured versions and re-raise
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -18,18 +18,20 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Typing :: Typed
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
|
-
Requires-Dist: docling>=2.
|
|
22
|
-
Requires-Dist: fastmcp>=2.12.
|
|
21
|
+
Requires-Dist: docling>=2.56.1
|
|
22
|
+
Requires-Dist: fastmcp>=2.12.4
|
|
23
23
|
Requires-Dist: httpx>=0.28.1
|
|
24
|
-
Requires-Dist: lancedb>=0.25.
|
|
25
|
-
Requires-Dist: pydantic-ai>=1.0.
|
|
26
|
-
Requires-Dist: pydantic-graph>=1.0.
|
|
27
|
-
Requires-Dist: pydantic>=2.
|
|
24
|
+
Requires-Dist: lancedb>=0.25.2
|
|
25
|
+
Requires-Dist: pydantic-ai>=1.0.18
|
|
26
|
+
Requires-Dist: pydantic-graph>=1.0.18
|
|
27
|
+
Requires-Dist: pydantic>=2.12.1
|
|
28
28
|
Requires-Dist: python-dotenv>=1.1.1
|
|
29
|
-
Requires-Dist: rich>=14.
|
|
30
|
-
Requires-Dist: tiktoken>=0.
|
|
31
|
-
Requires-Dist: typer>=0.
|
|
29
|
+
Requires-Dist: rich>=14.2.0
|
|
30
|
+
Requires-Dist: tiktoken>=0.12.0
|
|
31
|
+
Requires-Dist: typer>=0.19.2
|
|
32
32
|
Requires-Dist: watchfiles>=1.1.0
|
|
33
|
+
Provides-Extra: a2a
|
|
34
|
+
Requires-Dist: fasta2a>=0.1.0; extra == 'a2a'
|
|
33
35
|
Provides-Extra: mxbai
|
|
34
36
|
Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
|
|
35
37
|
Provides-Extra: voyageai
|
|
@@ -56,6 +58,7 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
56
58
|
- **File monitoring**: Auto-index files when run as server
|
|
57
59
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
|
|
58
60
|
- **MCP server**: Expose as tools for AI assistants
|
|
61
|
+
- **A2A agent**: Conversational agent with context and multi-turn dialogue
|
|
59
62
|
- **CLI & Python API**: Use from command line or Python
|
|
60
63
|
|
|
61
64
|
## Quick Start
|
|
@@ -181,6 +184,24 @@ haiku-rag serve --stdio
|
|
|
181
184
|
|
|
182
185
|
Provides tools for document management and search directly in your AI assistant.
|
|
183
186
|
|
|
187
|
+
## A2A Agent
|
|
188
|
+
|
|
189
|
+
Run as a conversational agent with the Agent-to-Agent protocol:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
# Start the A2A server
|
|
193
|
+
haiku-rag serve --a2a
|
|
194
|
+
|
|
195
|
+
# Connect with the interactive client (in another terminal)
|
|
196
|
+
haiku-rag a2aclient
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
The A2A agent provides:
|
|
200
|
+
- Multi-turn dialogue with context
|
|
201
|
+
- Intelligent multi-search for complex questions
|
|
202
|
+
- Source citations with titles and URIs
|
|
203
|
+
- Full document retrieval on request
|
|
204
|
+
|
|
184
205
|
## Documentation
|
|
185
206
|
|
|
186
207
|
Full documentation at: https://ggozad.github.io/haiku.rag/
|