haiku.rag 0.11.3__tar.gz → 0.11.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/PKG-INFO +1 -1
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/pyproject.toml +1 -1
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/benchmark.py +27 -8
- haiku_rag-0.11.4/src/evaluations/prompts.py +22 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/client.py +10 -4
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/config.py +5 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/migration.py +3 -3
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/__init__.py +6 -1
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/agent.py +6 -3
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/engine.py +33 -5
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/chunk.py +0 -28
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/document.py +7 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/uv.lock +18 -18
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/.gitignore +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/.python-version +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/LICENSE +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/README.md +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/mkdocs.yml +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/config.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/datasets/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/datasets/repliqa.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/datasets/wix.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/llm_judge.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/app.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/cli.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/vllm.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/base.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/common.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/models.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/analysis.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/plan.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/search.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/synthesize.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/prompts.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/dependencies.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/graph.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/models.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/nodes.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/prompts.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/state.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/prompts.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/vllm.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/common.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/dependencies.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/graph.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/models.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/prompts.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/state.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/stream.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
- {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/utils.py +0 -0
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
name = "haiku.rag"
|
|
4
4
|
description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
|
|
5
|
-
version = "0.11.
|
|
5
|
+
version = "0.11.4"
|
|
6
6
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
7
7
|
license = { text = "MIT" }
|
|
8
8
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -15,6 +15,7 @@ from rich.progress import Progress
|
|
|
15
15
|
from evaluations.config import DatasetSpec, RetrievalSample
|
|
16
16
|
from evaluations.datasets import DATASETS
|
|
17
17
|
from evaluations.llm_judge import ANSWER_EQUIVALENCE_RUBRIC
|
|
18
|
+
from evaluations.prompts import WIX_SUPPORT_PROMPT
|
|
18
19
|
from haiku.rag import logging # noqa: F401
|
|
19
20
|
from haiku.rag.client import HaikuRAG
|
|
20
21
|
from haiku.rag.config import Config
|
|
@@ -61,7 +62,6 @@ async def populate_db(spec: DatasetSpec) -> None:
|
|
|
61
62
|
metadata=payload.metadata,
|
|
62
63
|
)
|
|
63
64
|
progress.advance(task)
|
|
64
|
-
rag.store.vacuum()
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
def _is_relevant_match(retrieved_uri: str | None, sample: RetrievalSample) -> bool:
|
|
@@ -80,6 +80,11 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
|
|
|
80
80
|
3: 0.0,
|
|
81
81
|
5: 0.0,
|
|
82
82
|
}
|
|
83
|
+
success_totals = {
|
|
84
|
+
1: 0.0,
|
|
85
|
+
3: 0.0,
|
|
86
|
+
5: 0.0,
|
|
87
|
+
}
|
|
83
88
|
total_queries = 0
|
|
84
89
|
|
|
85
90
|
with Progress() as progress:
|
|
@@ -109,15 +114,16 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
|
|
|
109
114
|
if retrieved_doc and retrieved_doc.uri:
|
|
110
115
|
retrieved_uris.append(retrieved_doc.uri)
|
|
111
116
|
|
|
112
|
-
# Compute
|
|
113
|
-
# documents are retrieved within the first K results and
|
|
114
|
-
# averaging these fractions across all queries.
|
|
117
|
+
# Compute metrics for each cutoff
|
|
115
118
|
for cutoff in (1, 3, 5):
|
|
116
119
|
top_k = set(retrieved_uris[:cutoff])
|
|
117
120
|
relevant = set(sample.expected_uris)
|
|
118
121
|
if relevant:
|
|
119
122
|
matched = len(top_k & relevant)
|
|
123
|
+
# Recall: fraction of relevant docs retrieved
|
|
120
124
|
recall_totals[cutoff] += matched / len(relevant)
|
|
125
|
+
# Success: binary - did we get at least one relevant doc?
|
|
126
|
+
success_totals[cutoff] += 1.0 if matched > 0 else 0.0
|
|
121
127
|
|
|
122
128
|
progress.advance(task)
|
|
123
129
|
|
|
@@ -129,16 +135,28 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
|
|
|
129
135
|
recall_at_3 = recall_totals[3] / total_queries
|
|
130
136
|
recall_at_5 = recall_totals[5] / total_queries
|
|
131
137
|
|
|
138
|
+
success_at_1 = success_totals[1] / total_queries
|
|
139
|
+
success_at_3 = success_totals[3] / total_queries
|
|
140
|
+
success_at_5 = success_totals[5] / total_queries
|
|
141
|
+
|
|
132
142
|
console.print("\n=== Retrieval Benchmark Results ===", style="bold cyan")
|
|
133
143
|
console.print(f"Total queries: {total_queries}")
|
|
134
|
-
console.print(
|
|
135
|
-
console.print(f"Recall@
|
|
136
|
-
console.print(f"Recall@
|
|
144
|
+
console.print("\nRecall@K (fraction of relevant docs retrieved):")
|
|
145
|
+
console.print(f" Recall@1: {recall_at_1:.4f}")
|
|
146
|
+
console.print(f" Recall@3: {recall_at_3:.4f}")
|
|
147
|
+
console.print(f" Recall@5: {recall_at_5:.4f}")
|
|
148
|
+
console.print("\nSuccess@K (queries with at least one relevant doc):")
|
|
149
|
+
console.print(f" Success@1: {success_at_1:.4f} ({success_at_1 * 100:.1f}%)")
|
|
150
|
+
console.print(f" Success@3: {success_at_3:.4f} ({success_at_3 * 100:.1f}%)")
|
|
151
|
+
console.print(f" Success@5: {success_at_5:.4f} ({success_at_5 * 100:.1f}%)")
|
|
137
152
|
|
|
138
153
|
return {
|
|
139
154
|
"recall@1": recall_at_1,
|
|
140
155
|
"recall@3": recall_at_3,
|
|
141
156
|
"recall@5": recall_at_5,
|
|
157
|
+
"success@1": success_at_1,
|
|
158
|
+
"success@3": success_at_3,
|
|
159
|
+
"success@5": success_at_5,
|
|
142
160
|
}
|
|
143
161
|
|
|
144
162
|
|
|
@@ -187,7 +205,8 @@ async def run_qa_benchmark(
|
|
|
187
205
|
)
|
|
188
206
|
|
|
189
207
|
async with HaikuRAG(spec.db_path) as rag:
|
|
190
|
-
|
|
208
|
+
system_prompt = WIX_SUPPORT_PROMPT if spec.key == "wix" else None
|
|
209
|
+
qa = get_qa_agent(rag, system_prompt=system_prompt)
|
|
191
210
|
|
|
192
211
|
async def answer_question(question: str) -> str:
|
|
193
212
|
return await qa.answer(question)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
WIX_SUPPORT_PROMPT = """
|
|
2
|
+
You are a WIX technical support expert helping users with questions about the WIX platform.
|
|
3
|
+
|
|
4
|
+
Your process:
|
|
5
|
+
1. When a user asks a question, use the search_documents tool to find relevant information
|
|
6
|
+
2. Search with specific keywords and phrases from the user's question
|
|
7
|
+
3. Review the search results and their relevance scores
|
|
8
|
+
4. If you need additional context, perform follow-up searches with different keywords
|
|
9
|
+
5. Provide a short and to the point comprehensive answer based only on the retrieved documents
|
|
10
|
+
|
|
11
|
+
Guidelines:
|
|
12
|
+
- Base your answers strictly on the provided document content
|
|
13
|
+
- Quote or reference specific information when possible
|
|
14
|
+
- If multiple documents contain relevant information, synthesize them coherently
|
|
15
|
+
- Indicate when information is incomplete or when you need to search for additional context
|
|
16
|
+
- If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
|
|
17
|
+
- For complex questions, consider breaking them down and performing multiple searches
|
|
18
|
+
- Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
|
|
19
|
+
|
|
20
|
+
Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
|
|
21
|
+
/no_think
|
|
22
|
+
"""
|
|
@@ -46,6 +46,9 @@ class HaikuRAG:
|
|
|
46
46
|
|
|
47
47
|
async def __aexit__(self, exc_type, exc_val, exc_tb): # noqa: ARG002
|
|
48
48
|
"""Async context manager exit."""
|
|
49
|
+
# Wait for any pending vacuum to complete before closing
|
|
50
|
+
async with self.store._vacuum_lock:
|
|
51
|
+
pass
|
|
49
52
|
self.close()
|
|
50
53
|
return False
|
|
51
54
|
|
|
@@ -522,19 +525,22 @@ class HaikuRAG:
|
|
|
522
525
|
merged.append(current)
|
|
523
526
|
return merged
|
|
524
527
|
|
|
525
|
-
async def ask(
|
|
528
|
+
async def ask(
|
|
529
|
+
self, question: str, cite: bool = False, system_prompt: str | None = None
|
|
530
|
+
) -> str:
|
|
526
531
|
"""Ask a question using the configured QA agent.
|
|
527
532
|
|
|
528
533
|
Args:
|
|
529
534
|
question: The question to ask.
|
|
530
535
|
cite: Whether to include citations in the response.
|
|
536
|
+
system_prompt: Optional custom system prompt for the QA agent.
|
|
531
537
|
|
|
532
538
|
Returns:
|
|
533
539
|
The generated answer as a string.
|
|
534
540
|
"""
|
|
535
541
|
from haiku.rag.qa import get_qa_agent
|
|
536
542
|
|
|
537
|
-
qa_agent = get_qa_agent(self, use_citations=cite)
|
|
543
|
+
qa_agent = get_qa_agent(self, use_citations=cite, system_prompt=system_prompt)
|
|
538
544
|
return await qa_agent.answer(question)
|
|
539
545
|
|
|
540
546
|
async def rebuild_database(self) -> AsyncGenerator[str, None]:
|
|
@@ -617,13 +623,13 @@ class HaikuRAG:
|
|
|
617
623
|
|
|
618
624
|
# Final maintenance: centralized vacuum to curb disk usage
|
|
619
625
|
try:
|
|
620
|
-
self.store.vacuum()
|
|
626
|
+
await self.store.vacuum()
|
|
621
627
|
except Exception:
|
|
622
628
|
pass
|
|
623
629
|
|
|
624
630
|
async def vacuum(self) -> None:
|
|
625
631
|
"""Optimize and clean up old versions across all tables."""
|
|
626
|
-
self.store.vacuum()
|
|
632
|
+
await self.store.vacuum()
|
|
627
633
|
|
|
628
634
|
def close(self):
|
|
629
635
|
"""Close the underlying store connection."""
|
|
@@ -57,6 +57,11 @@ class AppConfig(BaseModel):
|
|
|
57
57
|
# and error out when the database does not already exist.
|
|
58
58
|
DISABLE_DB_AUTOCREATE: bool = False
|
|
59
59
|
|
|
60
|
+
# Vacuum retention threshold in seconds. Only versions older than this
|
|
61
|
+
# threshold will be removed during vacuum operations. Default is 60 seconds
|
|
62
|
+
# to allow concurrent connections to safely use recent versions.
|
|
63
|
+
VACUUM_RETENTION_SECONDS: int = 60
|
|
64
|
+
|
|
60
65
|
@field_validator("MONITOR_DIRECTORIES", mode="before")
|
|
61
66
|
@classmethod
|
|
62
67
|
def parse_monitor_directories(cls, v):
|
|
@@ -27,7 +27,7 @@ class SQLiteToLanceDBMigrator:
|
|
|
27
27
|
self.lancedb_path = lancedb_path
|
|
28
28
|
self.console = Console()
|
|
29
29
|
|
|
30
|
-
def migrate(self) -> bool:
|
|
30
|
+
async def migrate(self) -> bool:
|
|
31
31
|
"""Perform the migration."""
|
|
32
32
|
try:
|
|
33
33
|
self.console.print(
|
|
@@ -94,7 +94,7 @@ class SQLiteToLanceDBMigrator:
|
|
|
94
94
|
# Optimize and cleanup using centralized vacuum
|
|
95
95
|
self.console.print("[cyan]Optimizing LanceDB...[/cyan]")
|
|
96
96
|
try:
|
|
97
|
-
lance_store.vacuum()
|
|
97
|
+
await lance_store.vacuum()
|
|
98
98
|
self.console.print("[green]✅ Optimization completed[/green]")
|
|
99
99
|
except Exception as e:
|
|
100
100
|
self.console.print(
|
|
@@ -313,4 +313,4 @@ async def migrate_sqlite_to_lancedb(
|
|
|
313
313
|
lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
|
|
314
314
|
|
|
315
315
|
migrator = SQLiteToLanceDBMigrator(sqlite_path, lancedb_path)
|
|
316
|
-
return migrator.migrate()
|
|
316
|
+
return await migrator.migrate()
|
|
@@ -3,7 +3,11 @@ from haiku.rag.config import Config
|
|
|
3
3
|
from haiku.rag.qa.agent import QuestionAnswerAgent
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def get_qa_agent(
|
|
6
|
+
def get_qa_agent(
|
|
7
|
+
client: HaikuRAG,
|
|
8
|
+
use_citations: bool = False,
|
|
9
|
+
system_prompt: str | None = None,
|
|
10
|
+
) -> QuestionAnswerAgent:
|
|
7
11
|
provider = Config.QA_PROVIDER
|
|
8
12
|
model_name = Config.QA_MODEL
|
|
9
13
|
|
|
@@ -12,4 +16,5 @@ def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswe
|
|
|
12
16
|
provider=provider,
|
|
13
17
|
model=model_name,
|
|
14
18
|
use_citations=use_citations,
|
|
19
|
+
system_prompt=system_prompt,
|
|
15
20
|
)
|
|
@@ -30,18 +30,21 @@ class QuestionAnswerAgent:
|
|
|
30
30
|
model: str,
|
|
31
31
|
use_citations: bool = False,
|
|
32
32
|
q: float = 0.0,
|
|
33
|
+
system_prompt: str | None = None,
|
|
33
34
|
):
|
|
34
35
|
self._client = client
|
|
35
36
|
|
|
36
|
-
system_prompt
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
if system_prompt is None:
|
|
38
|
+
system_prompt = (
|
|
39
|
+
QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
|
|
40
|
+
)
|
|
39
41
|
model_obj = self._get_model(provider, model)
|
|
40
42
|
|
|
41
43
|
self._agent = Agent(
|
|
42
44
|
model=model_obj,
|
|
43
45
|
deps_type=Dependencies,
|
|
44
46
|
system_prompt=system_prompt,
|
|
47
|
+
retries=3,
|
|
45
48
|
)
|
|
46
49
|
|
|
47
50
|
@self._agent.tool
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
4
|
from datetime import timedelta
|
|
@@ -51,6 +52,7 @@ class Store:
|
|
|
51
52
|
def __init__(self, db_path: Path, skip_validation: bool = False):
|
|
52
53
|
self.db_path: Path = db_path
|
|
53
54
|
self.embedder = get_embedder()
|
|
55
|
+
self._vacuum_lock = asyncio.Lock()
|
|
54
56
|
|
|
55
57
|
# Create the ChunkRecord model with the correct vector dimension
|
|
56
58
|
self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
|
|
@@ -78,14 +80,40 @@ class Store:
|
|
|
78
80
|
if not skip_validation:
|
|
79
81
|
self._validate_configuration()
|
|
80
82
|
|
|
81
|
-
def vacuum(self) -> None:
|
|
82
|
-
"""Optimize and clean up old versions across all tables to reduce disk usage.
|
|
83
|
+
async def vacuum(self, retention_seconds: int | None = None) -> None:
|
|
84
|
+
"""Optimize and clean up old versions across all tables to reduce disk usage.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
retention_seconds: Retention threshold in seconds. Only versions older
|
|
88
|
+
than this will be removed. If None, uses Config.VACUUM_RETENTION_SECONDS.
|
|
89
|
+
|
|
90
|
+
Note:
|
|
91
|
+
If vacuum is already running, this method returns immediately without blocking.
|
|
92
|
+
Use asyncio.create_task(store.vacuum()) for non-blocking background execution.
|
|
93
|
+
"""
|
|
83
94
|
if self._has_cloud_config() and str(Config.LANCEDB_URI).startswith("db://"):
|
|
84
95
|
return
|
|
85
96
|
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
97
|
+
# Skip if already running (non-blocking)
|
|
98
|
+
if self._vacuum_lock.locked():
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
async with self._vacuum_lock:
|
|
102
|
+
try:
|
|
103
|
+
# Evaluate config at runtime to allow dynamic changes
|
|
104
|
+
if retention_seconds is None:
|
|
105
|
+
retention_seconds = Config.VACUUM_RETENTION_SECONDS
|
|
106
|
+
# Perform maintenance per table using optimize() with configurable retention
|
|
107
|
+
retention = timedelta(seconds=retention_seconds)
|
|
108
|
+
for table in [
|
|
109
|
+
self.documents_table,
|
|
110
|
+
self.chunks_table,
|
|
111
|
+
self.settings_table,
|
|
112
|
+
]:
|
|
113
|
+
table.optimize(cleanup_older_than=retention)
|
|
114
|
+
except (RuntimeError, OSError) as e:
|
|
115
|
+
# Handle resource errors gracefully
|
|
116
|
+
logger.debug(f"Vacuum skipped due to resource constraints: {e}")
|
|
89
117
|
|
|
90
118
|
def _connect_to_lancedb(self, db_path: Path):
|
|
91
119
|
"""Establish connection to LanceDB (local, cloud, or object storage)."""
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import asyncio
|
|
2
1
|
import inspect
|
|
3
2
|
import json
|
|
4
3
|
import logging
|
|
@@ -23,7 +22,6 @@ class ChunkRepository:
|
|
|
23
22
|
def __init__(self, store: Store) -> None:
|
|
24
23
|
self.store = store
|
|
25
24
|
self.embedder = get_embedder()
|
|
26
|
-
self._optimize_lock = asyncio.Lock()
|
|
27
25
|
|
|
28
26
|
def _ensure_fts_index(self) -> None:
|
|
29
27
|
"""Ensure FTS index exists on the content column."""
|
|
@@ -35,21 +33,6 @@ class ChunkRepository:
|
|
|
35
33
|
# Log the error but don't fail - FTS might already exist
|
|
36
34
|
logger.debug(f"FTS index creation skipped: {e}")
|
|
37
35
|
|
|
38
|
-
async def _optimize(self) -> None:
|
|
39
|
-
"""Optimize the chunks table to refresh indexes."""
|
|
40
|
-
# Skip optimization for LanceDB Cloud as it handles this automatically
|
|
41
|
-
if Config.LANCEDB_URI and Config.LANCEDB_URI.startswith("db://"):
|
|
42
|
-
return
|
|
43
|
-
|
|
44
|
-
async with self._optimize_lock:
|
|
45
|
-
try:
|
|
46
|
-
self.store.chunks_table.optimize()
|
|
47
|
-
except (RuntimeError, OSError) as e:
|
|
48
|
-
# Handle "too many open files" and other resource errors gracefully
|
|
49
|
-
logger.debug(
|
|
50
|
-
f"Table optimization skipped due to resource constraints: {e}"
|
|
51
|
-
)
|
|
52
|
-
|
|
53
36
|
async def create(self, entity: Chunk) -> Chunk:
|
|
54
37
|
"""Create a chunk in the database."""
|
|
55
38
|
assert entity.document_id, "Chunk must have a document_id to be created"
|
|
@@ -77,11 +60,6 @@ class ChunkRepository:
|
|
|
77
60
|
self.store.chunks_table.add([chunk_record])
|
|
78
61
|
|
|
79
62
|
entity.id = chunk_id
|
|
80
|
-
|
|
81
|
-
# Try to optimize if not currently locked (non-blocking)
|
|
82
|
-
if not self._optimize_lock.locked():
|
|
83
|
-
asyncio.create_task(self._optimize())
|
|
84
|
-
|
|
85
63
|
return entity
|
|
86
64
|
|
|
87
65
|
async def get_by_id(self, entity_id: str) -> Chunk | None:
|
|
@@ -125,10 +103,6 @@ class ChunkRepository:
|
|
|
125
103
|
"vector": embedding,
|
|
126
104
|
},
|
|
127
105
|
)
|
|
128
|
-
# Try to optimize if not currently locked (non-blocking)
|
|
129
|
-
if not self._optimize_lock.locked():
|
|
130
|
-
asyncio.create_task(self._optimize())
|
|
131
|
-
|
|
132
106
|
return entity
|
|
133
107
|
|
|
134
108
|
async def delete(self, entity_id: str) -> bool:
|
|
@@ -227,8 +201,6 @@ class ChunkRepository:
|
|
|
227
201
|
if chunk_records:
|
|
228
202
|
self.store.chunks_table.add(chunk_records)
|
|
229
203
|
|
|
230
|
-
# Force optimization once at the end for bulk operations
|
|
231
|
-
await self._optimize()
|
|
232
204
|
return created_chunks
|
|
233
205
|
|
|
234
206
|
async def delete_all(self) -> None:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import json
|
|
2
3
|
from datetime import datetime
|
|
3
4
|
from typing import TYPE_CHECKING
|
|
@@ -200,6 +201,9 @@ class DocumentRepository:
|
|
|
200
201
|
chunk.order = order
|
|
201
202
|
await self.chunk_repository.create(chunk)
|
|
202
203
|
|
|
204
|
+
# Vacuum old versions in background (non-blocking)
|
|
205
|
+
asyncio.create_task(self.store.vacuum())
|
|
206
|
+
|
|
203
207
|
return created_doc
|
|
204
208
|
except Exception:
|
|
205
209
|
# Roll back to the captured versions and re-raise
|
|
@@ -230,6 +234,9 @@ class DocumentRepository:
|
|
|
230
234
|
updated_doc.id, docling_document
|
|
231
235
|
)
|
|
232
236
|
|
|
237
|
+
# Vacuum old versions in background (non-blocking)
|
|
238
|
+
asyncio.create_task(self.store.vacuum())
|
|
239
|
+
|
|
233
240
|
return updated_doc
|
|
234
241
|
except Exception:
|
|
235
242
|
# Roll back to the captured versions and re-raise
|
|
@@ -645,7 +645,7 @@ wheels = [
|
|
|
645
645
|
|
|
646
646
|
[[package]]
|
|
647
647
|
name = "docling"
|
|
648
|
-
version = "2.
|
|
648
|
+
version = "2.55.1"
|
|
649
649
|
source = { registry = "https://pypi.org/simple" }
|
|
650
650
|
dependencies = [
|
|
651
651
|
{ name = "accelerate" },
|
|
@@ -676,14 +676,14 @@ dependencies = [
|
|
|
676
676
|
{ name = "tqdm" },
|
|
677
677
|
{ name = "typer" },
|
|
678
678
|
]
|
|
679
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
679
|
+
sdist = { url = "https://files.pythonhosted.org/packages/81/8c/baa24f0d64a36a87c66eef91dcf169ac346776739c4fb8065e59c31b1291/docling-2.55.1.tar.gz", hash = "sha256:e60a5612b2b993efd8a0b5464aff1b9868e3cab5c2e239c863709e6b780f3c57", size = 212483, upload-time = "2025-10-03T10:27:46.907Z" }
|
|
680
680
|
wheels = [
|
|
681
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
681
|
+
{ url = "https://files.pythonhosted.org/packages/e2/a3/2a2801cb909981b57326da2a9736cd11514d0393dc37771e200615b8b44f/docling-2.55.1-py3-none-any.whl", hash = "sha256:895aba282c6cca9ca1f6b9ff57c2002e4f581f722c608aa671d68382d4d61e07", size = 239394, upload-time = "2025-10-03T10:27:45.157Z" },
|
|
682
682
|
]
|
|
683
683
|
|
|
684
684
|
[[package]]
|
|
685
685
|
name = "docling-core"
|
|
686
|
-
version = "2.48.
|
|
686
|
+
version = "2.48.4"
|
|
687
687
|
source = { registry = "https://pypi.org/simple" }
|
|
688
688
|
dependencies = [
|
|
689
689
|
{ name = "jsonref" },
|
|
@@ -697,9 +697,9 @@ dependencies = [
|
|
|
697
697
|
{ name = "typer" },
|
|
698
698
|
{ name = "typing-extensions" },
|
|
699
699
|
]
|
|
700
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
700
|
+
sdist = { url = "https://files.pythonhosted.org/packages/38/d8/f0c8034f87d6151eb955e56975b9f2374a54d57af2b56b1682d7c8ff5c71/docling_core-2.48.4.tar.gz", hash = "sha256:d87ce3021cdae3d073ce7572a2396b69be3cde82ebf9a74d4bad1e1cdfdfd524", size = 161377, upload-time = "2025-10-01T09:10:08.614Z" }
|
|
701
701
|
wheels = [
|
|
702
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
702
|
+
{ url = "https://files.pythonhosted.org/packages/c8/2a/06e5f9d3083f830de8bef86f91acda994965f88d8b945ce3b257ea83e780/docling_core-2.48.4-py3-none-any.whl", hash = "sha256:367675c1165d0934ae498fa57ca2d27ef0468aad74dc44a5ab061f5d87882ea1", size = 164374, upload-time = "2025-10-01T09:10:06.034Z" },
|
|
703
703
|
]
|
|
704
704
|
|
|
705
705
|
[package.optional-dependencies]
|
|
@@ -1111,7 +1111,7 @@ wheels = [
|
|
|
1111
1111
|
|
|
1112
1112
|
[[package]]
|
|
1113
1113
|
name = "haiku-rag"
|
|
1114
|
-
version = "0.11.
|
|
1114
|
+
version = "0.11.4"
|
|
1115
1115
|
source = { editable = "." }
|
|
1116
1116
|
dependencies = [
|
|
1117
1117
|
{ name = "docling" },
|
|
@@ -1516,7 +1516,7 @@ wheels = [
|
|
|
1516
1516
|
|
|
1517
1517
|
[[package]]
|
|
1518
1518
|
name = "lancedb"
|
|
1519
|
-
version = "0.25.
|
|
1519
|
+
version = "0.25.1"
|
|
1520
1520
|
source = { registry = "https://pypi.org/simple" }
|
|
1521
1521
|
dependencies = [
|
|
1522
1522
|
{ name = "deprecation" },
|
|
@@ -1529,13 +1529,13 @@ dependencies = [
|
|
|
1529
1529
|
{ name = "tqdm" },
|
|
1530
1530
|
]
|
|
1531
1531
|
wheels = [
|
|
1532
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
1533
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
1534
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
1535
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
1536
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
1537
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
1538
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
1532
|
+
{ url = "https://files.pythonhosted.org/packages/ad/2b/ed9870288506d8ca61cddf7b1dbb03c68f95b8797feb49467b33ef185477/lancedb-0.25.1-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:ec0a1cab435a5307054b84ffb798a4d828253f23698848788bfe31930e343c6c", size = 34985432, upload-time = "2025-09-23T23:15:56.558Z" },
|
|
1533
|
+
{ url = "https://files.pythonhosted.org/packages/58/75/320f9142918b646b4b6d0277676c2466d2e0ce2a22aca320d0113b3ef035/lancedb-0.25.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:69e1f8343f6a4ff6985ea13f5c5cdf6d07435d04f8279c4fc6e623a34ceadda0", size = 31993179, upload-time = "2025-09-23T22:20:23.039Z" },
|
|
1534
|
+
{ url = "https://files.pythonhosted.org/packages/fd/44/d223cb64c9feb78dfa3857690d743e961f76e065935c8c4304cb64659882/lancedb-0.25.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9432134155474e73907fc5e1f8a4310433b9234a0c5f964c21b4c39aca50dde6", size = 32872519, upload-time = "2025-09-23T22:29:03.5Z" },
|
|
1535
|
+
{ url = "https://files.pythonhosted.org/packages/61/a6/e6d88d8076fa8c40b7b6f96a37f21c75ce3518ccbf64a351d26ae983461a/lancedb-0.25.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955c6e1aa4e249be7456ea7f7c42ba119be5a5c2c51f4d78efeb6c4f3cc2dbdf", size = 36325984, upload-time = "2025-09-23T22:31:46.118Z" },
|
|
1536
|
+
{ url = "https://files.pythonhosted.org/packages/97/84/14d4f0c3a98a324fcb401161e25fb1699c69ba1cd2928983fb283bd8b04f/lancedb-0.25.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d584bdfb96372c03a209bb8f010eb7358135e4adddb903ae1385450af39e1187", size = 32883704, upload-time = "2025-09-23T22:27:41.393Z" },
|
|
1537
|
+
{ url = "https://files.pythonhosted.org/packages/68/10/3e8ae8bf9880b2fed10122cef5e535bd67f0df0a874cc3122220d47ca255/lancedb-0.25.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c495da53d3dfa105364f202710d0bb2f031fe54a077b9c2ac9d098d02bd20bb2", size = 36369514, upload-time = "2025-09-23T22:30:53.605Z" },
|
|
1538
|
+
{ url = "https://files.pythonhosted.org/packages/0d/fb/dce4757f257cb4e11e13b71ce502dc5d1caf51f1e5cccfdae85bf23960a0/lancedb-0.25.1-cp39-abi3-win_amd64.whl", hash = "sha256:2c6effc10c8263ea84261f49d5ff1957c18814ed7e3eaa5094d71b1aa0573871", size = 38390878, upload-time = "2025-09-23T22:55:24.687Z" },
|
|
1539
1539
|
]
|
|
1540
1540
|
|
|
1541
1541
|
[[package]]
|
|
@@ -3212,15 +3212,15 @@ sdist = { url = "https://files.pythonhosted.org/packages/30/23/2f0a3efc4d6a32f3b
|
|
|
3212
3212
|
|
|
3213
3213
|
[[package]]
|
|
3214
3214
|
name = "pyright"
|
|
3215
|
-
version = "1.1.
|
|
3215
|
+
version = "1.1.406"
|
|
3216
3216
|
source = { registry = "https://pypi.org/simple" }
|
|
3217
3217
|
dependencies = [
|
|
3218
3218
|
{ name = "nodeenv" },
|
|
3219
3219
|
{ name = "typing-extensions" },
|
|
3220
3220
|
]
|
|
3221
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
3221
|
+
sdist = { url = "https://files.pythonhosted.org/packages/f7/16/6b4fbdd1fef59a0292cbb99f790b44983e390321eccbc5921b4d161da5d1/pyright-1.1.406.tar.gz", hash = "sha256:c4872bc58c9643dac09e8a2e74d472c62036910b3bd37a32813989ef7576ea2c", size = 4113151, upload-time = "2025-10-02T01:04:45.488Z" }
|
|
3222
3222
|
wheels = [
|
|
3223
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
3223
|
+
{ url = "https://files.pythonhosted.org/packages/f6/a2/e309afbb459f50507103793aaef85ca4348b66814c86bc73908bdeb66d12/pyright-1.1.406-py3-none-any.whl", hash = "sha256:1d81fb43c2407bf566e97e57abb01c811973fdb21b2df8df59f870f688bdca71", size = 5980982, upload-time = "2025-10-02T01:04:43.137Z" },
|
|
3224
3224
|
]
|
|
3225
3225
|
|
|
3226
3226
|
[[package]]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|