haiku.rag 0.11.3__tar.gz → 0.11.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (80) hide show
  1. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/PKG-INFO +1 -1
  2. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/pyproject.toml +1 -1
  3. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/benchmark.py +27 -8
  4. haiku_rag-0.11.4/src/evaluations/prompts.py +22 -0
  5. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/client.py +10 -4
  6. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/config.py +5 -0
  7. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/migration.py +3 -3
  8. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/__init__.py +6 -1
  9. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/agent.py +6 -3
  10. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/engine.py +33 -5
  11. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/chunk.py +0 -28
  12. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/document.py +7 -0
  13. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/uv.lock +18 -18
  14. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/.gitignore +0 -0
  15. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/.pre-commit-config.yaml +0 -0
  16. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/.python-version +0 -0
  17. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/LICENSE +0 -0
  18. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/README.md +0 -0
  19. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/mkdocs.yml +0 -0
  20. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/__init__.py +0 -0
  21. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/config.py +0 -0
  22. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/datasets/__init__.py +0 -0
  23. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/datasets/repliqa.py +0 -0
  24. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/datasets/wix.py +0 -0
  25. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/evaluations/llm_judge.py +0 -0
  26. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/__init__.py +0 -0
  27. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/app.py +0 -0
  28. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/chunker.py +0 -0
  29. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/cli.py +0 -0
  30. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/__init__.py +0 -0
  31. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/base.py +0 -0
  32. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/ollama.py +0 -0
  33. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/openai.py +0 -0
  34. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/vllm.py +0 -0
  35. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/embeddings/voyageai.py +0 -0
  36. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/__init__.py +0 -0
  37. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/base.py +0 -0
  38. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/common.py +0 -0
  39. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/models.py +0 -0
  40. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/__init__.py +0 -0
  41. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/analysis.py +0 -0
  42. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/plan.py +0 -0
  43. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/search.py +0 -0
  44. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/nodes/synthesize.py +0 -0
  45. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/graph/prompts.py +0 -0
  46. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/logging.py +0 -0
  47. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/mcp.py +0 -0
  48. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/monitor.py +0 -0
  49. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/__init__.py +0 -0
  50. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/dependencies.py +0 -0
  51. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/graph.py +0 -0
  52. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/models.py +0 -0
  53. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/nodes.py +0 -0
  54. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/prompts.py +0 -0
  55. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/deep/state.py +0 -0
  56. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/qa/prompts.py +0 -0
  57. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reader.py +0 -0
  58. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/__init__.py +0 -0
  59. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/base.py +0 -0
  60. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/cohere.py +0 -0
  61. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/mxbai.py +0 -0
  62. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/reranking/vllm.py +0 -0
  63. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/__init__.py +0 -0
  64. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/common.py +0 -0
  65. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/dependencies.py +0 -0
  66. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/graph.py +0 -0
  67. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/models.py +0 -0
  68. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/prompts.py +0 -0
  69. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/state.py +0 -0
  70. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/research/stream.py +0 -0
  71. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/__init__.py +0 -0
  72. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/models/__init__.py +0 -0
  73. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/models/chunk.py +0 -0
  74. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/models/document.py +0 -0
  75. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/__init__.py +0 -0
  76. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/repositories/settings.py +0 -0
  77. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  78. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
  79. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
  80. {haiku_rag-0.11.3 → haiku_rag-0.11.4}/src/haiku/rag/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.11.3
3
+ Version: 0.11.4
4
4
  Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -2,7 +2,7 @@
2
2
 
3
3
  name = "haiku.rag"
4
4
  description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
5
- version = "0.11.3"
5
+ version = "0.11.4"
6
6
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
7
7
  license = { text = "MIT" }
8
8
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -15,6 +15,7 @@ from rich.progress import Progress
15
15
  from evaluations.config import DatasetSpec, RetrievalSample
16
16
  from evaluations.datasets import DATASETS
17
17
  from evaluations.llm_judge import ANSWER_EQUIVALENCE_RUBRIC
18
+ from evaluations.prompts import WIX_SUPPORT_PROMPT
18
19
  from haiku.rag import logging # noqa: F401
19
20
  from haiku.rag.client import HaikuRAG
20
21
  from haiku.rag.config import Config
@@ -61,7 +62,6 @@ async def populate_db(spec: DatasetSpec) -> None:
61
62
  metadata=payload.metadata,
62
63
  )
63
64
  progress.advance(task)
64
- rag.store.vacuum()
65
65
 
66
66
 
67
67
  def _is_relevant_match(retrieved_uri: str | None, sample: RetrievalSample) -> bool:
@@ -80,6 +80,11 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
80
80
  3: 0.0,
81
81
  5: 0.0,
82
82
  }
83
+ success_totals = {
84
+ 1: 0.0,
85
+ 3: 0.0,
86
+ 5: 0.0,
87
+ }
83
88
  total_queries = 0
84
89
 
85
90
  with Progress() as progress:
@@ -109,15 +114,16 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
109
114
  if retrieved_doc and retrieved_doc.uri:
110
115
  retrieved_uris.append(retrieved_doc.uri)
111
116
 
112
- # Compute per-query recall@K by counting how many relevant
113
- # documents are retrieved within the first K results and
114
- # averaging these fractions across all queries.
117
+ # Compute metrics for each cutoff
115
118
  for cutoff in (1, 3, 5):
116
119
  top_k = set(retrieved_uris[:cutoff])
117
120
  relevant = set(sample.expected_uris)
118
121
  if relevant:
119
122
  matched = len(top_k & relevant)
123
+ # Recall: fraction of relevant docs retrieved
120
124
  recall_totals[cutoff] += matched / len(relevant)
125
+ # Success: binary - did we get at least one relevant doc?
126
+ success_totals[cutoff] += 1.0 if matched > 0 else 0.0
121
127
 
122
128
  progress.advance(task)
123
129
 
@@ -129,16 +135,28 @@ async def run_retrieval_benchmark(spec: DatasetSpec) -> dict[str, float] | None:
129
135
  recall_at_3 = recall_totals[3] / total_queries
130
136
  recall_at_5 = recall_totals[5] / total_queries
131
137
 
138
+ success_at_1 = success_totals[1] / total_queries
139
+ success_at_3 = success_totals[3] / total_queries
140
+ success_at_5 = success_totals[5] / total_queries
141
+
132
142
  console.print("\n=== Retrieval Benchmark Results ===", style="bold cyan")
133
143
  console.print(f"Total queries: {total_queries}")
134
- console.print(f"Recall@1: {recall_at_1:.4f}")
135
- console.print(f"Recall@3: {recall_at_3:.4f}")
136
- console.print(f"Recall@5: {recall_at_5:.4f}")
144
+ console.print("\nRecall@K (fraction of relevant docs retrieved):")
145
+ console.print(f" Recall@1: {recall_at_1:.4f}")
146
+ console.print(f" Recall@3: {recall_at_3:.4f}")
147
+ console.print(f" Recall@5: {recall_at_5:.4f}")
148
+ console.print("\nSuccess@K (queries with at least one relevant doc):")
149
+ console.print(f" Success@1: {success_at_1:.4f} ({success_at_1 * 100:.1f}%)")
150
+ console.print(f" Success@3: {success_at_3:.4f} ({success_at_3 * 100:.1f}%)")
151
+ console.print(f" Success@5: {success_at_5:.4f} ({success_at_5 * 100:.1f}%)")
137
152
 
138
153
  return {
139
154
  "recall@1": recall_at_1,
140
155
  "recall@3": recall_at_3,
141
156
  "recall@5": recall_at_5,
157
+ "success@1": success_at_1,
158
+ "success@3": success_at_3,
159
+ "success@5": success_at_5,
142
160
  }
143
161
 
144
162
 
@@ -187,7 +205,8 @@ async def run_qa_benchmark(
187
205
  )
188
206
 
189
207
  async with HaikuRAG(spec.db_path) as rag:
190
- qa = get_qa_agent(rag)
208
+ system_prompt = WIX_SUPPORT_PROMPT if spec.key == "wix" else None
209
+ qa = get_qa_agent(rag, system_prompt=system_prompt)
191
210
 
192
211
  async def answer_question(question: str) -> str:
193
212
  return await qa.answer(question)
@@ -0,0 +1,22 @@
1
+ WIX_SUPPORT_PROMPT = """
2
+ You are a WIX technical support expert helping users with questions about the WIX platform.
3
+
4
+ Your process:
5
+ 1. When a user asks a question, use the search_documents tool to find relevant information
6
+ 2. Search with specific keywords and phrases from the user's question
7
+ 3. Review the search results and their relevance scores
8
+ 4. If you need additional context, perform follow-up searches with different keywords
9
+ 5. Provide a short and to the point comprehensive answer based only on the retrieved documents
10
+
11
+ Guidelines:
12
+ - Base your answers strictly on the provided document content
13
+ - Quote or reference specific information when possible
14
+ - If multiple documents contain relevant information, synthesize them coherently
15
+ - Indicate when information is incomplete or when you need to search for additional context
16
+ - If the retrieved documents don't contain sufficient information, clearly state: "I cannot find enough information in the knowledge base to answer this question."
17
+ - For complex questions, consider breaking them down and performing multiple searches
18
+ - Stick to the answer, do not ellaborate or provide context unless explicitly asked for it.
19
+
20
+ Be concise, and always maintain accuracy over completeness. Prefer short, direct answers that are well-supported by the documents.
21
+ /no_think
22
+ """
@@ -46,6 +46,9 @@ class HaikuRAG:
46
46
 
47
47
  async def __aexit__(self, exc_type, exc_val, exc_tb): # noqa: ARG002
48
48
  """Async context manager exit."""
49
+ # Wait for any pending vacuum to complete before closing
50
+ async with self.store._vacuum_lock:
51
+ pass
49
52
  self.close()
50
53
  return False
51
54
 
@@ -522,19 +525,22 @@ class HaikuRAG:
522
525
  merged.append(current)
523
526
  return merged
524
527
 
525
- async def ask(self, question: str, cite: bool = False) -> str:
528
+ async def ask(
529
+ self, question: str, cite: bool = False, system_prompt: str | None = None
530
+ ) -> str:
526
531
  """Ask a question using the configured QA agent.
527
532
 
528
533
  Args:
529
534
  question: The question to ask.
530
535
  cite: Whether to include citations in the response.
536
+ system_prompt: Optional custom system prompt for the QA agent.
531
537
 
532
538
  Returns:
533
539
  The generated answer as a string.
534
540
  """
535
541
  from haiku.rag.qa import get_qa_agent
536
542
 
537
- qa_agent = get_qa_agent(self, use_citations=cite)
543
+ qa_agent = get_qa_agent(self, use_citations=cite, system_prompt=system_prompt)
538
544
  return await qa_agent.answer(question)
539
545
 
540
546
  async def rebuild_database(self) -> AsyncGenerator[str, None]:
@@ -617,13 +623,13 @@ class HaikuRAG:
617
623
 
618
624
  # Final maintenance: centralized vacuum to curb disk usage
619
625
  try:
620
- self.store.vacuum()
626
+ await self.store.vacuum()
621
627
  except Exception:
622
628
  pass
623
629
 
624
630
  async def vacuum(self) -> None:
625
631
  """Optimize and clean up old versions across all tables."""
626
- self.store.vacuum()
632
+ await self.store.vacuum()
627
633
 
628
634
  def close(self):
629
635
  """Close the underlying store connection."""
@@ -57,6 +57,11 @@ class AppConfig(BaseModel):
57
57
  # and error out when the database does not already exist.
58
58
  DISABLE_DB_AUTOCREATE: bool = False
59
59
 
60
+ # Vacuum retention threshold in seconds. Only versions older than this
61
+ # threshold will be removed during vacuum operations. Default is 60 seconds
62
+ # to allow concurrent connections to safely use recent versions.
63
+ VACUUM_RETENTION_SECONDS: int = 60
64
+
60
65
  @field_validator("MONITOR_DIRECTORIES", mode="before")
61
66
  @classmethod
62
67
  def parse_monitor_directories(cls, v):
@@ -27,7 +27,7 @@ class SQLiteToLanceDBMigrator:
27
27
  self.lancedb_path = lancedb_path
28
28
  self.console = Console()
29
29
 
30
- def migrate(self) -> bool:
30
+ async def migrate(self) -> bool:
31
31
  """Perform the migration."""
32
32
  try:
33
33
  self.console.print(
@@ -94,7 +94,7 @@ class SQLiteToLanceDBMigrator:
94
94
  # Optimize and cleanup using centralized vacuum
95
95
  self.console.print("[cyan]Optimizing LanceDB...[/cyan]")
96
96
  try:
97
- lance_store.vacuum()
97
+ await lance_store.vacuum()
98
98
  self.console.print("[green]✅ Optimization completed[/green]")
99
99
  except Exception as e:
100
100
  self.console.print(
@@ -313,4 +313,4 @@ async def migrate_sqlite_to_lancedb(
313
313
  lancedb_path = sqlite_path.parent / (sqlite_path.stem + ".lancedb")
314
314
 
315
315
  migrator = SQLiteToLanceDBMigrator(sqlite_path, lancedb_path)
316
- return migrator.migrate()
316
+ return await migrator.migrate()
@@ -3,7 +3,11 @@ from haiku.rag.config import Config
3
3
  from haiku.rag.qa.agent import QuestionAnswerAgent
4
4
 
5
5
 
6
- def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswerAgent:
6
+ def get_qa_agent(
7
+ client: HaikuRAG,
8
+ use_citations: bool = False,
9
+ system_prompt: str | None = None,
10
+ ) -> QuestionAnswerAgent:
7
11
  provider = Config.QA_PROVIDER
8
12
  model_name = Config.QA_MODEL
9
13
 
@@ -12,4 +16,5 @@ def get_qa_agent(client: HaikuRAG, use_citations: bool = False) -> QuestionAnswe
12
16
  provider=provider,
13
17
  model=model_name,
14
18
  use_citations=use_citations,
19
+ system_prompt=system_prompt,
15
20
  )
@@ -30,18 +30,21 @@ class QuestionAnswerAgent:
30
30
  model: str,
31
31
  use_citations: bool = False,
32
32
  q: float = 0.0,
33
+ system_prompt: str | None = None,
33
34
  ):
34
35
  self._client = client
35
36
 
36
- system_prompt = (
37
- QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
38
- )
37
+ if system_prompt is None:
38
+ system_prompt = (
39
+ QA_SYSTEM_PROMPT_WITH_CITATIONS if use_citations else QA_SYSTEM_PROMPT
40
+ )
39
41
  model_obj = self._get_model(provider, model)
40
42
 
41
43
  self._agent = Agent(
42
44
  model=model_obj,
43
45
  deps_type=Dependencies,
44
46
  system_prompt=system_prompt,
47
+ retries=3,
45
48
  )
46
49
 
47
50
  @self._agent.tool
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import json
2
3
  import logging
3
4
  from datetime import timedelta
@@ -51,6 +52,7 @@ class Store:
51
52
  def __init__(self, db_path: Path, skip_validation: bool = False):
52
53
  self.db_path: Path = db_path
53
54
  self.embedder = get_embedder()
55
+ self._vacuum_lock = asyncio.Lock()
54
56
 
55
57
  # Create the ChunkRecord model with the correct vector dimension
56
58
  self.ChunkRecord = create_chunk_model(self.embedder._vector_dim)
@@ -78,14 +80,40 @@ class Store:
78
80
  if not skip_validation:
79
81
  self._validate_configuration()
80
82
 
81
- def vacuum(self) -> None:
82
- """Optimize and clean up old versions across all tables to reduce disk usage."""
83
+ async def vacuum(self, retention_seconds: int | None = None) -> None:
84
+ """Optimize and clean up old versions across all tables to reduce disk usage.
85
+
86
+ Args:
87
+ retention_seconds: Retention threshold in seconds. Only versions older
88
+ than this will be removed. If None, uses Config.VACUUM_RETENTION_SECONDS.
89
+
90
+ Note:
91
+ If vacuum is already running, this method returns immediately without blocking.
92
+ Use asyncio.create_task(store.vacuum()) for non-blocking background execution.
93
+ """
83
94
  if self._has_cloud_config() and str(Config.LANCEDB_URI).startswith("db://"):
84
95
  return
85
96
 
86
- # Perform maintenance per table using optimize() with cleanup_older_than 0
87
- for table in [self.documents_table, self.chunks_table, self.settings_table]:
88
- table.optimize(cleanup_older_than=timedelta(0))
97
+ # Skip if already running (non-blocking)
98
+ if self._vacuum_lock.locked():
99
+ return
100
+
101
+ async with self._vacuum_lock:
102
+ try:
103
+ # Evaluate config at runtime to allow dynamic changes
104
+ if retention_seconds is None:
105
+ retention_seconds = Config.VACUUM_RETENTION_SECONDS
106
+ # Perform maintenance per table using optimize() with configurable retention
107
+ retention = timedelta(seconds=retention_seconds)
108
+ for table in [
109
+ self.documents_table,
110
+ self.chunks_table,
111
+ self.settings_table,
112
+ ]:
113
+ table.optimize(cleanup_older_than=retention)
114
+ except (RuntimeError, OSError) as e:
115
+ # Handle resource errors gracefully
116
+ logger.debug(f"Vacuum skipped due to resource constraints: {e}")
89
117
 
90
118
  def _connect_to_lancedb(self, db_path: Path):
91
119
  """Establish connection to LanceDB (local, cloud, or object storage)."""
@@ -1,4 +1,3 @@
1
- import asyncio
2
1
  import inspect
3
2
  import json
4
3
  import logging
@@ -23,7 +22,6 @@ class ChunkRepository:
23
22
  def __init__(self, store: Store) -> None:
24
23
  self.store = store
25
24
  self.embedder = get_embedder()
26
- self._optimize_lock = asyncio.Lock()
27
25
 
28
26
  def _ensure_fts_index(self) -> None:
29
27
  """Ensure FTS index exists on the content column."""
@@ -35,21 +33,6 @@ class ChunkRepository:
35
33
  # Log the error but don't fail - FTS might already exist
36
34
  logger.debug(f"FTS index creation skipped: {e}")
37
35
 
38
- async def _optimize(self) -> None:
39
- """Optimize the chunks table to refresh indexes."""
40
- # Skip optimization for LanceDB Cloud as it handles this automatically
41
- if Config.LANCEDB_URI and Config.LANCEDB_URI.startswith("db://"):
42
- return
43
-
44
- async with self._optimize_lock:
45
- try:
46
- self.store.chunks_table.optimize()
47
- except (RuntimeError, OSError) as e:
48
- # Handle "too many open files" and other resource errors gracefully
49
- logger.debug(
50
- f"Table optimization skipped due to resource constraints: {e}"
51
- )
52
-
53
36
  async def create(self, entity: Chunk) -> Chunk:
54
37
  """Create a chunk in the database."""
55
38
  assert entity.document_id, "Chunk must have a document_id to be created"
@@ -77,11 +60,6 @@ class ChunkRepository:
77
60
  self.store.chunks_table.add([chunk_record])
78
61
 
79
62
  entity.id = chunk_id
80
-
81
- # Try to optimize if not currently locked (non-blocking)
82
- if not self._optimize_lock.locked():
83
- asyncio.create_task(self._optimize())
84
-
85
63
  return entity
86
64
 
87
65
  async def get_by_id(self, entity_id: str) -> Chunk | None:
@@ -125,10 +103,6 @@ class ChunkRepository:
125
103
  "vector": embedding,
126
104
  },
127
105
  )
128
- # Try to optimize if not currently locked (non-blocking)
129
- if not self._optimize_lock.locked():
130
- asyncio.create_task(self._optimize())
131
-
132
106
  return entity
133
107
 
134
108
  async def delete(self, entity_id: str) -> bool:
@@ -227,8 +201,6 @@ class ChunkRepository:
227
201
  if chunk_records:
228
202
  self.store.chunks_table.add(chunk_records)
229
203
 
230
- # Force optimization once at the end for bulk operations
231
- await self._optimize()
232
204
  return created_chunks
233
205
 
234
206
  async def delete_all(self) -> None:
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import json
2
3
  from datetime import datetime
3
4
  from typing import TYPE_CHECKING
@@ -200,6 +201,9 @@ class DocumentRepository:
200
201
  chunk.order = order
201
202
  await self.chunk_repository.create(chunk)
202
203
 
204
+ # Vacuum old versions in background (non-blocking)
205
+ asyncio.create_task(self.store.vacuum())
206
+
203
207
  return created_doc
204
208
  except Exception:
205
209
  # Roll back to the captured versions and re-raise
@@ -230,6 +234,9 @@ class DocumentRepository:
230
234
  updated_doc.id, docling_document
231
235
  )
232
236
 
237
+ # Vacuum old versions in background (non-blocking)
238
+ asyncio.create_task(self.store.vacuum())
239
+
233
240
  return updated_doc
234
241
  except Exception:
235
242
  # Roll back to the captured versions and re-raise
@@ -645,7 +645,7 @@ wheels = [
645
645
 
646
646
  [[package]]
647
647
  name = "docling"
648
- version = "2.52.0"
648
+ version = "2.55.1"
649
649
  source = { registry = "https://pypi.org/simple" }
650
650
  dependencies = [
651
651
  { name = "accelerate" },
@@ -676,14 +676,14 @@ dependencies = [
676
676
  { name = "tqdm" },
677
677
  { name = "typer" },
678
678
  ]
679
- sdist = { url = "https://files.pythonhosted.org/packages/c9/40/e6b25533f99eb48dae6f21b226ee7ebe6c9796d42ecf4ecfce525414f5fa/docling-2.52.0.tar.gz", hash = "sha256:e6c6b4db5ed583e899528f76a9de71679c75f25a0585afcc7fc3ccff6c791d41", size = 196658, upload-time = "2025-09-11T16:12:40.951Z" }
679
+ sdist = { url = "https://files.pythonhosted.org/packages/81/8c/baa24f0d64a36a87c66eef91dcf169ac346776739c4fb8065e59c31b1291/docling-2.55.1.tar.gz", hash = "sha256:e60a5612b2b993efd8a0b5464aff1b9868e3cab5c2e239c863709e6b780f3c57", size = 212483, upload-time = "2025-10-03T10:27:46.907Z" }
680
680
  wheels = [
681
- { url = "https://files.pythonhosted.org/packages/28/6a/ff3e65868f409438f2957ad4601852db51d7f6e5eb866825fa306177edc7/docling-2.52.0-py3-none-any.whl", hash = "sha256:85927fde42cd6b597c314c7a73241a7d28c817f8666b3c00c045c2905a769d8b", size = 224855, upload-time = "2025-09-11T16:12:39.319Z" },
681
+ { url = "https://files.pythonhosted.org/packages/e2/a3/2a2801cb909981b57326da2a9736cd11514d0393dc37771e200615b8b44f/docling-2.55.1-py3-none-any.whl", hash = "sha256:895aba282c6cca9ca1f6b9ff57c2002e4f581f722c608aa671d68382d4d61e07", size = 239394, upload-time = "2025-10-03T10:27:45.157Z" },
682
682
  ]
683
683
 
684
684
  [[package]]
685
685
  name = "docling-core"
686
- version = "2.48.1"
686
+ version = "2.48.4"
687
687
  source = { registry = "https://pypi.org/simple" }
688
688
  dependencies = [
689
689
  { name = "jsonref" },
@@ -697,9 +697,9 @@ dependencies = [
697
697
  { name = "typer" },
698
698
  { name = "typing-extensions" },
699
699
  ]
700
- sdist = { url = "https://files.pythonhosted.org/packages/f9/0c/dce7f80e99e56570d143885fc40536107e8a39ef4de2888959e055b39607/docling_core-2.48.1.tar.gz", hash = "sha256:48cb77575dfd020a51413957e96b165e45f6d1027c641710fddb389dcb9b189c", size = 161311, upload-time = "2025-09-11T12:33:22.46Z" }
700
+ sdist = { url = "https://files.pythonhosted.org/packages/38/d8/f0c8034f87d6151eb955e56975b9f2374a54d57af2b56b1682d7c8ff5c71/docling_core-2.48.4.tar.gz", hash = "sha256:d87ce3021cdae3d073ce7572a2396b69be3cde82ebf9a74d4bad1e1cdfdfd524", size = 161377, upload-time = "2025-10-01T09:10:08.614Z" }
701
701
  wheels = [
702
- { url = "https://files.pythonhosted.org/packages/90/fe/1b96120c9d94c97016716ccf46ad2708a2e76157e52dfcca4101db70fc21/docling_core-2.48.1-py3-none-any.whl", hash = "sha256:a3985999ac2067e15e589ef0f11ccde264deacaea403c0f94049242f10a6189a", size = 164330, upload-time = "2025-09-11T12:33:20.935Z" },
702
+ { url = "https://files.pythonhosted.org/packages/c8/2a/06e5f9d3083f830de8bef86f91acda994965f88d8b945ce3b257ea83e780/docling_core-2.48.4-py3-none-any.whl", hash = "sha256:367675c1165d0934ae498fa57ca2d27ef0468aad74dc44a5ab061f5d87882ea1", size = 164374, upload-time = "2025-10-01T09:10:06.034Z" },
703
703
  ]
704
704
 
705
705
  [package.optional-dependencies]
@@ -1111,7 +1111,7 @@ wheels = [
1111
1111
 
1112
1112
  [[package]]
1113
1113
  name = "haiku-rag"
1114
- version = "0.11.3"
1114
+ version = "0.11.4"
1115
1115
  source = { editable = "." }
1116
1116
  dependencies = [
1117
1117
  { name = "docling" },
@@ -1516,7 +1516,7 @@ wheels = [
1516
1516
 
1517
1517
  [[package]]
1518
1518
  name = "lancedb"
1519
- version = "0.25.0"
1519
+ version = "0.25.1"
1520
1520
  source = { registry = "https://pypi.org/simple" }
1521
1521
  dependencies = [
1522
1522
  { name = "deprecation" },
@@ -1529,13 +1529,13 @@ dependencies = [
1529
1529
  { name = "tqdm" },
1530
1530
  ]
1531
1531
  wheels = [
1532
- { url = "https://files.pythonhosted.org/packages/a2/e7/10953deea89b06ae5bc568169d5ae888ff6df314decb92b9b3e453f53f0b/lancedb-0.25.0-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:ae2e80b7b3be3fa4d92fc8d500f47549dd1f8d28ca5092f1c898b92d0cfd4393", size = 34171227, upload-time = "2025-09-04T11:05:31.327Z" },
1533
- { url = "https://files.pythonhosted.org/packages/55/7f/2874a3709f1b8c487e707e171c9004a9240af3af0fd7a247b9187bb6e0f7/lancedb-0.25.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:a9d67ea9edffa596c6f190151fdd535da8e355a4fd1979c1dc19d540a5665916", size = 31552856, upload-time = "2025-09-04T09:46:50.788Z" },
1534
- { url = "https://files.pythonhosted.org/packages/e3/e9/faab70ad918576ed3bb7cb936474137ac265ac3026d3e16e30cd4d3daac2/lancedb-0.25.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8fe20079ed86b1ab75c65dcfc920a9646c835e9c40ef825cadd148c11b0001e", size = 32487962, upload-time = "2025-09-04T08:51:35.358Z" },
1535
- { url = "https://files.pythonhosted.org/packages/ce/40/5471bc8115f287040b5afdf9d7a20c4685ec16cddb4a7da79e7c1f63914e/lancedb-0.25.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b37bc402d85c83e454d9f2e79480b31acc5904bb159a4fc715032c7560494157", size = 35726794, upload-time = "2025-09-04T08:57:30.554Z" },
1536
- { url = "https://files.pythonhosted.org/packages/47/5e/aa3d9d2c7a834a9aa539b2b1c731ab860f7e32e2c87b9086ad233ecb13cd/lancedb-0.25.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f9bbc20bd1e64be359ca11c90428c00b0062d26b0291bddf32ab5471a3525c76", size = 32492508, upload-time = "2025-09-04T08:53:54.661Z" },
1537
- { url = "https://files.pythonhosted.org/packages/fa/37/75f4e3ed7fa00a2cd5d321e8bf13441cdb61a83fbbcd0fa0f1a7241affe1/lancedb-0.25.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1306be9c08e208a5bcb5188275f47f962c2eda96369fad5949a3ddaf592afc6d", size = 35776383, upload-time = "2025-09-04T08:57:18.737Z" },
1538
- { url = "https://files.pythonhosted.org/packages/b5/af/eb217ea1daab5c28ce4c764d2f672f4e3a5bcd3d4faf7921a8ee28c6cb5b/lancedb-0.25.0-cp39-abi3-win_amd64.whl", hash = "sha256:f66283e5d63c99c2bfbd4eaa134d9a5c5b0145eb26a972648214f8ba87777e24", size = 37826272, upload-time = "2025-09-04T09:15:23.729Z" },
1532
+ { url = "https://files.pythonhosted.org/packages/ad/2b/ed9870288506d8ca61cddf7b1dbb03c68f95b8797feb49467b33ef185477/lancedb-0.25.1-cp39-abi3-macosx_10_15_x86_64.whl", hash = "sha256:ec0a1cab435a5307054b84ffb798a4d828253f23698848788bfe31930e343c6c", size = 34985432, upload-time = "2025-09-23T23:15:56.558Z" },
1533
+ { url = "https://files.pythonhosted.org/packages/58/75/320f9142918b646b4b6d0277676c2466d2e0ce2a22aca320d0113b3ef035/lancedb-0.25.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:69e1f8343f6a4ff6985ea13f5c5cdf6d07435d04f8279c4fc6e623a34ceadda0", size = 31993179, upload-time = "2025-09-23T22:20:23.039Z" },
1534
+ { url = "https://files.pythonhosted.org/packages/fd/44/d223cb64c9feb78dfa3857690d743e961f76e065935c8c4304cb64659882/lancedb-0.25.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9432134155474e73907fc5e1f8a4310433b9234a0c5f964c21b4c39aca50dde6", size = 32872519, upload-time = "2025-09-23T22:29:03.5Z" },
1535
+ { url = "https://files.pythonhosted.org/packages/61/a6/e6d88d8076fa8c40b7b6f96a37f21c75ce3518ccbf64a351d26ae983461a/lancedb-0.25.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955c6e1aa4e249be7456ea7f7c42ba119be5a5c2c51f4d78efeb6c4f3cc2dbdf", size = 36325984, upload-time = "2025-09-23T22:31:46.118Z" },
1536
+ { url = "https://files.pythonhosted.org/packages/97/84/14d4f0c3a98a324fcb401161e25fb1699c69ba1cd2928983fb283bd8b04f/lancedb-0.25.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d584bdfb96372c03a209bb8f010eb7358135e4adddb903ae1385450af39e1187", size = 32883704, upload-time = "2025-09-23T22:27:41.393Z" },
1537
+ { url = "https://files.pythonhosted.org/packages/68/10/3e8ae8bf9880b2fed10122cef5e535bd67f0df0a874cc3122220d47ca255/lancedb-0.25.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c495da53d3dfa105364f202710d0bb2f031fe54a077b9c2ac9d098d02bd20bb2", size = 36369514, upload-time = "2025-09-23T22:30:53.605Z" },
1538
+ { url = "https://files.pythonhosted.org/packages/0d/fb/dce4757f257cb4e11e13b71ce502dc5d1caf51f1e5cccfdae85bf23960a0/lancedb-0.25.1-cp39-abi3-win_amd64.whl", hash = "sha256:2c6effc10c8263ea84261f49d5ff1957c18814ed7e3eaa5094d71b1aa0573871", size = 38390878, upload-time = "2025-09-23T22:55:24.687Z" },
1539
1539
  ]
1540
1540
 
1541
1541
  [[package]]
@@ -3212,15 +3212,15 @@ sdist = { url = "https://files.pythonhosted.org/packages/30/23/2f0a3efc4d6a32f3b
3212
3212
 
3213
3213
  [[package]]
3214
3214
  name = "pyright"
3215
- version = "1.1.405"
3215
+ version = "1.1.406"
3216
3216
  source = { registry = "https://pypi.org/simple" }
3217
3217
  dependencies = [
3218
3218
  { name = "nodeenv" },
3219
3219
  { name = "typing-extensions" },
3220
3220
  ]
3221
- sdist = { url = "https://files.pythonhosted.org/packages/fb/6c/ba4bbee22e76af700ea593a1d8701e3225080956753bee9750dcc25e2649/pyright-1.1.405.tar.gz", hash = "sha256:5c2a30e1037af27eb463a1cc0b9f6d65fec48478ccf092c1ac28385a15c55763", size = 4068319, upload-time = "2025-09-04T03:37:06.776Z" }
3221
+ sdist = { url = "https://files.pythonhosted.org/packages/f7/16/6b4fbdd1fef59a0292cbb99f790b44983e390321eccbc5921b4d161da5d1/pyright-1.1.406.tar.gz", hash = "sha256:c4872bc58c9643dac09e8a2e74d472c62036910b3bd37a32813989ef7576ea2c", size = 4113151, upload-time = "2025-10-02T01:04:45.488Z" }
3222
3222
  wheels = [
3223
- { url = "https://files.pythonhosted.org/packages/d5/1a/524f832e1ff1962a22a1accc775ca7b143ba2e9f5924bb6749dce566784a/pyright-1.1.405-py3-none-any.whl", hash = "sha256:a2cb13700b5508ce8e5d4546034cb7ea4aedb60215c6c33f56cec7f53996035a", size = 5905038, upload-time = "2025-09-04T03:37:04.913Z" },
3223
+ { url = "https://files.pythonhosted.org/packages/f6/a2/e309afbb459f50507103793aaef85ca4348b66814c86bc73908bdeb66d12/pyright-1.1.406-py3-none-any.whl", hash = "sha256:1d81fb43c2407bf566e97e57abb01c811973fdb21b2df8df59f870f688bdca71", size = 5980982, upload-time = "2025-10-02T01:04:43.137Z" },
3224
3224
  ]
3225
3225
 
3226
3226
  [[package]]
File without changes
File without changes
File without changes
File without changes
File without changes