haiku.rag 0.10.1__tar.gz → 0.10.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (95) hide show
  1. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/PKG-INFO +3 -2
  2. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/README.md +2 -1
  3. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/cli.md +39 -0
  4. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/index.md +2 -1
  5. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/installation.md +10 -0
  6. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/pyproject.toml +1 -1
  7. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/app.py +137 -12
  8. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/cli.py +72 -2
  9. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/migration.py +2 -2
  10. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/__init__.py +1 -1
  11. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/models/__init__.py +1 -1
  12. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/utils.py +34 -0
  13. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_app.py +15 -8
  14. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_cli.py +93 -7
  15. haiku_rag-0.10.2/tests/test_info.py +79 -0
  16. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/uv.lock +1 -1
  17. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.github/FUNDING.yml +0 -0
  18. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.github/workflows/build-docs.yml +0 -0
  19. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.github/workflows/build-publish.yml +0 -0
  20. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.gitignore +0 -0
  21. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.pre-commit-config.yaml +0 -0
  22. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.python-version +0 -0
  23. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/LICENSE +0 -0
  24. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/agents.md +0 -0
  25. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/benchmarks.md +0 -0
  26. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/configuration.md +0 -0
  27. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/mcp.md +0 -0
  28. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/python.md +0 -0
  29. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/server.md +0 -0
  30. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/mkdocs.yml +0 -0
  31. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/__init__.py +0 -0
  32. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/chunker.py +0 -0
  33. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/client.py +0 -0
  34. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/config.py +0 -0
  35. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/__init__.py +0 -0
  36. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/base.py +0 -0
  37. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/ollama.py +0 -0
  38. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/openai.py +0 -0
  39. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/vllm.py +0 -0
  40. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/voyageai.py +0 -0
  41. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/logging.py +0 -0
  42. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/mcp.py +0 -0
  43. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/monitor.py +0 -0
  44. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/qa/__init__.py +0 -0
  45. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/qa/agent.py +0 -0
  46. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/qa/prompts.py +0 -0
  47. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reader.py +0 -0
  48. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/__init__.py +0 -0
  49. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/base.py +0 -0
  50. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/cohere.py +0 -0
  51. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/mxbai.py +0 -0
  52. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/vllm.py +0 -0
  53. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/__init__.py +0 -0
  54. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/common.py +0 -0
  55. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/dependencies.py +0 -0
  56. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/graph.py +0 -0
  57. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/models.py +0 -0
  58. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/evaluate.py +0 -0
  59. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/plan.py +0 -0
  60. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/search.py +0 -0
  61. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/synthesize.py +0 -0
  62. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/prompts.py +0 -0
  63. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/state.py +0 -0
  64. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/engine.py +0 -0
  65. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/models/chunk.py +0 -0
  66. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/models/document.py +0 -0
  67. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/__init__.py +0 -0
  68. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/chunk.py +0 -0
  69. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/document.py +0 -0
  70. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/settings.py +0 -0
  71. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/upgrades/__init__.py +0 -0
  72. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
  73. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
  74. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/__init__.py +0 -0
  75. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/conftest.py +0 -0
  76. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/generate_benchmark_db.py +0 -0
  77. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/llm_judge.py +0 -0
  78. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_chunk.py +0 -0
  79. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_chunker.py +0 -0
  80. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_client.py +0 -0
  81. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_document.py +0 -0
  82. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_embedder.py +0 -0
  83. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_lancedb_connection.py +0 -0
  84. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_monitor.py +0 -0
  85. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_preprocessor.py +0 -0
  86. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_qa.py +0 -0
  87. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_reader.py +0 -0
  88. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_rebuild.py +0 -0
  89. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_reranker.py +0 -0
  90. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_research_graph.py +0 -0
  91. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_research_graph_integration.py +0 -0
  92. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_search.py +0 -0
  93. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_settings.py +0 -0
  94. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_utils.py +0 -0
  95. {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_versioning.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.10.1
3
+ Version: 0.10.2
4
4
  Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -66,7 +66,8 @@ uv pip install haiku.rag
66
66
 
67
67
  # Add documents
68
68
  haiku-rag add "Your content here"
69
- haiku-rag add-src document.pdf
69
+ haiku-rag add "Your content here" --meta author=alice --meta topic=notes
70
+ haiku-rag add-src document.pdf --meta source=manual
70
71
 
71
72
  # Search
72
73
  haiku-rag search "query"
@@ -28,7 +28,8 @@ uv pip install haiku.rag
28
28
 
29
29
  # Add documents
30
30
  haiku-rag add "Your content here"
31
- haiku-rag add-src document.pdf
31
+ haiku-rag add "Your content here" --meta author=alice --meta topic=notes
32
+ haiku-rag add-src document.pdf --meta source=manual
32
33
 
33
34
  # Search
34
35
  haiku-rag search "query"
@@ -27,6 +27,9 @@ haiku-rag list
27
27
  From text:
28
28
  ```bash
29
29
  haiku-rag add "Your document content here"
30
+
31
+ # Attach metadata (repeat --meta for multiple entries)
32
+ haiku-rag add "Your document content here" --meta author=alice --meta topic=notes
30
33
  ```
31
34
 
32
35
  From file or URL:
@@ -36,6 +39,10 @@ haiku-rag add-src https://example.com/article.html
36
39
 
37
40
  # Optionally set a human‑readable title stored in the DB schema
38
41
  haiku-rag add-src /mnt/data/doc1.pdf --title "Q3 Financial Report"
42
+
43
+ # Optionally attach metadata (repeat --meta). Values use JSON parsing if possible:
44
+ # numbers, booleans, null, arrays/objects; otherwise kept as strings.
45
+ haiku-rag add-src /mnt/data/doc1.pdf --meta source=manual --meta page_count=12 --meta published=true
39
46
  ```
40
47
 
41
48
  !!! note
@@ -126,6 +133,26 @@ haiku-rag settings
126
133
 
127
134
  ## Maintenance
128
135
 
136
+ ### Info (Read-only)
137
+
138
+ Display database metadata without upgrading or modifying it:
139
+
140
+ ```bash
141
+ haiku-rag info [--db /path/to/your.lancedb]
142
+ ```
143
+
144
+ Shows:
145
+ - path to the database
146
+ - stored haiku.rag version (from settings)
147
+ - embeddings provider/model and vector dimension
148
+ - number of documents
149
+ - table versions per table (documents, chunks)
150
+
151
+ At the end, a separate “Versions” section lists runtime package versions:
152
+ - haiku.rag
153
+ - lancedb
154
+ - docling
155
+
129
156
  ### Vacuum (Optimize and Cleanup)
130
157
 
131
158
  Reduce disk usage by optimizing and pruning old table versions across all tables:
@@ -143,6 +170,18 @@ when want to switch embeddings provider or model:
143
170
  haiku-rag rebuild
144
171
  ```
145
172
 
173
+ ### Download Models
174
+
175
+ Download required runtime models:
176
+
177
+ ```bash
178
+ haiku-rag download-models
179
+ ```
180
+
181
+ This command:
182
+ - Downloads Docling OCR/conversion models (no-op if already present).
183
+ - Pulls Ollama models referenced in your configuration (embeddings, QA, research, rerank).
184
+
146
185
  ## Migration
147
186
 
148
187
  ### Migrate from SQLite to LanceDB
@@ -43,7 +43,8 @@ async with HaikuRAG("database.lancedb") as client:
43
43
  Or use the CLI:
44
44
  ```bash
45
45
  haiku-rag add "Your document content"
46
- haiku-rag add-src /path/to/document.pdf --title "Q3 Financial Report"
46
+ haiku-rag add "Your document content" --meta author=alice
47
+ haiku-rag add-src /path/to/document.pdf --title "Q3 Financial Report" --meta source=manual
47
48
  haiku-rag search "query"
48
49
  haiku-rag ask "Who is the author of haiku.rag?"
49
50
  haiku-rag migrate old_database.sqlite # Migrate from SQLite
@@ -72,3 +72,13 @@ VLLM_RERANK_BASE_URL="http://localhost:8001"
72
72
  - Python 3.10+
73
73
  - Ollama (for default embeddings)
74
74
  - vLLM server (for vLLM provider)
75
+
76
+ ## Pre-download Models (Optional)
77
+
78
+ You can prefetch all required runtime models before first use:
79
+
80
+ ```bash
81
+ haiku-rag download-models
82
+ ```
83
+
84
+ This will download Docling models and pull any Ollama models referenced by your current configuration.
@@ -2,7 +2,7 @@
2
2
 
3
3
  name = "haiku.rag"
4
4
  description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
5
- version = "0.10.1"
5
+ version = "0.10.2"
6
6
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
7
7
  license = { text = "MIT" }
8
8
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -1,4 +1,6 @@
1
1
  import asyncio
2
+ import json
3
+ from importlib.metadata import version as pkg_version
2
4
  from pathlib import Path
3
5
 
4
6
  from rich.console import Console
@@ -25,26 +27,141 @@ class HaikuRAGApp:
25
27
  self.db_path = db_path
26
28
  self.console = Console()
27
29
 
30
+ async def info(self):
31
+ """Display read-only information about the database without modifying it."""
32
+
33
+ import lancedb
34
+
35
+ # Basic: show path
36
+ self.console.print("[bold]haiku.rag database info[/bold]")
37
+ self.console.print(
38
+ f" [repr.attrib_name]path[/repr.attrib_name]: {self.db_path}"
39
+ )
40
+
41
+ if not self.db_path.exists():
42
+ self.console.print("[red]Database path does not exist.[/red]")
43
+ return
44
+
45
+ # Connect without going through Store to avoid upgrades/validation writes
46
+ try:
47
+ db = lancedb.connect(self.db_path)
48
+ table_names = set(db.table_names())
49
+ except Exception as e:
50
+ self.console.print(f"[red]Failed to open database: {e}[/red]")
51
+ return
52
+
53
+ try:
54
+ ldb_version = pkg_version("lancedb")
55
+ except Exception:
56
+ ldb_version = "unknown"
57
+ try:
58
+ hr_version = pkg_version("haiku.rag")
59
+ except Exception:
60
+ hr_version = "unknown"
61
+ try:
62
+ docling_version = pkg_version("docling")
63
+ except Exception:
64
+ docling_version = "unknown"
65
+
66
+ # Read settings (if present) to find stored haiku.rag version and embedding config
67
+ stored_version = "unknown"
68
+ embed_provider: str | None = None
69
+ embed_model: str | None = None
70
+ vector_dim: int | None = None
71
+
72
+ if "settings" in table_names:
73
+ settings_tbl = db.open_table("settings")
74
+ arrow = settings_tbl.search().where("id = 'settings'").limit(1).to_arrow()
75
+ rows = arrow.to_pylist() if arrow is not None else []
76
+ if rows:
77
+ raw = rows[0].get("settings") or "{}"
78
+ data = json.loads(raw) if isinstance(raw, str) else (raw or {})
79
+ stored_version = str(data.get("version", stored_version))
80
+ embed_provider = data.get("EMBEDDINGS_PROVIDER")
81
+ embed_model = data.get("EMBEDDINGS_MODEL")
82
+ vector_dim = (
83
+ int(data.get("EMBEDDINGS_VECTOR_DIM")) # pyright: ignore[reportArgumentType]
84
+ if data.get("EMBEDDINGS_VECTOR_DIM") is not None
85
+ else None
86
+ )
87
+
88
+ num_docs = 0
89
+ if "documents" in table_names:
90
+ docs_tbl = db.open_table("documents")
91
+ num_docs = int(docs_tbl.count_rows()) # type: ignore[attr-defined]
92
+
93
+ # Table versions per table (direct API)
94
+ doc_versions = (
95
+ len(list(db.open_table("documents").list_versions()))
96
+ if "documents" in table_names
97
+ else 0
98
+ )
99
+ chunk_versions = (
100
+ len(list(db.open_table("chunks").list_versions()))
101
+ if "chunks" in table_names
102
+ else 0
103
+ )
104
+
105
+ self.console.print(
106
+ f" [repr.attrib_name]haiku.rag version (db)[/repr.attrib_name]: {stored_version}"
107
+ )
108
+ if embed_provider or embed_model or vector_dim:
109
+ provider_part = embed_provider or "unknown"
110
+ model_part = embed_model or "unknown"
111
+ dim_part = f"{vector_dim}" if vector_dim is not None else "unknown"
112
+ self.console.print(
113
+ " [repr.attrib_name]embeddings[/repr.attrib_name]: "
114
+ f"{provider_part}/{model_part} (dim: {dim_part})"
115
+ )
116
+ else:
117
+ self.console.print(
118
+ " [repr.attrib_name]embeddings[/repr.attrib_name]: unknown"
119
+ )
120
+ self.console.print(
121
+ f" [repr.attrib_name]documents[/repr.attrib_name]: {num_docs}"
122
+ )
123
+ self.console.print(
124
+ f" [repr.attrib_name]versions (documents)[/repr.attrib_name]: {doc_versions}"
125
+ )
126
+ self.console.print(
127
+ f" [repr.attrib_name]versions (chunks)[/repr.attrib_name]: {chunk_versions}"
128
+ )
129
+ self.console.rule()
130
+ self.console.print("[bold]Versions[/bold]")
131
+ self.console.print(
132
+ f" [repr.attrib_name]haiku.rag[/repr.attrib_name]: {hr_version}"
133
+ )
134
+ self.console.print(
135
+ f" [repr.attrib_name]lancedb[/repr.attrib_name]: {ldb_version}"
136
+ )
137
+ self.console.print(
138
+ f" [repr.attrib_name]docling[/repr.attrib_name]: {docling_version}"
139
+ )
140
+
28
141
  async def list_documents(self):
29
142
  async with HaikuRAG(db_path=self.db_path) as self.client:
30
143
  documents = await self.client.list_documents()
31
144
  for doc in documents:
32
145
  self._rich_print_document(doc, truncate=True)
33
146
 
34
- async def add_document_from_text(self, text: str):
147
+ async def add_document_from_text(self, text: str, metadata: dict | None = None):
35
148
  async with HaikuRAG(db_path=self.db_path) as self.client:
36
- doc = await self.client.create_document(text)
149
+ doc = await self.client.create_document(text, metadata=metadata)
37
150
  self._rich_print_document(doc, truncate=True)
38
151
  self.console.print(
39
- f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
152
+ f"[bold green]Document {doc.id} added successfully.[/bold green]"
40
153
  )
41
154
 
42
- async def add_document_from_source(self, source: str, title: str | None = None):
155
+ async def add_document_from_source(
156
+ self, source: str, title: str | None = None, metadata: dict | None = None
157
+ ):
43
158
  async with HaikuRAG(db_path=self.db_path) as self.client:
44
- doc = await self.client.create_document_from_source(source, title=title)
159
+ doc = await self.client.create_document_from_source(
160
+ source, title=title, metadata=metadata
161
+ )
45
162
  self._rich_print_document(doc, truncate=True)
46
163
  self.console.print(
47
- f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
164
+ f"[bold green]Document {doc.id} added successfully.[/bold green]"
48
165
  )
49
166
 
50
167
  async def get_document(self, doc_id: str):
@@ -59,7 +176,9 @@ class HaikuRAGApp:
59
176
  async with HaikuRAG(db_path=self.db_path) as self.client:
60
177
  deleted = await self.client.delete_document(doc_id)
61
178
  if deleted:
62
- self.console.print(f"[b]Document {doc_id} deleted successfully.[/b]")
179
+ self.console.print(
180
+ f"[bold green]Document {doc_id} deleted successfully.[/bold green]"
181
+ )
63
182
  else:
64
183
  self.console.print(
65
184
  f"[yellow]Document with id {doc_id} not found.[/yellow]"
@@ -69,7 +188,7 @@ class HaikuRAGApp:
69
188
  async with HaikuRAG(db_path=self.db_path) as self.client:
70
189
  results = await self.client.search(query, limit=limit)
71
190
  if not results:
72
- self.console.print("[red]No results found.[/red]")
191
+ self.console.print("[yellow]No results found.[/yellow]")
73
192
  return
74
193
  for chunk, score in results:
75
194
  self._rich_print_search_result(chunk, score)
@@ -202,14 +321,16 @@ class HaikuRAGApp:
202
321
  return
203
322
 
204
323
  self.console.print(
205
- f"[b]Rebuilding database with {total_docs} documents...[/b]"
324
+ f"[bold cyan]Rebuilding database with {total_docs} documents...[/bold cyan]"
206
325
  )
207
326
  with Progress() as progress:
208
327
  task = progress.add_task("Rebuilding...", total=total_docs)
209
328
  async for _ in client.rebuild_database():
210
329
  progress.update(task, advance=1)
211
330
 
212
- self.console.print("[b]Database rebuild completed successfully.[/b]")
331
+ self.console.print(
332
+ "[bold green]Database rebuild completed successfully.[/bold green]"
333
+ )
213
334
  except Exception as e:
214
335
  self.console.print(f"[red]Error rebuilding database: {e}[/red]")
215
336
 
@@ -218,7 +339,9 @@ class HaikuRAGApp:
218
339
  try:
219
340
  async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
220
341
  await client.vacuum()
221
- self.console.print("[b]Vacuum completed successfully.[/b]")
342
+ self.console.print(
343
+ "[bold green]Vacuum completed successfully.[/bold green]"
344
+ )
222
345
  except Exception as e:
223
346
  self.console.print(f"[red]Error during vacuum: {e}[/red]")
224
347
 
@@ -240,7 +363,9 @@ class HaikuRAGApp:
240
363
  else:
241
364
  display_value = field_value
242
365
 
243
- self.console.print(f" [cyan]{field_name}[/cyan]: {display_value}")
366
+ self.console.print(
367
+ f" [repr.attrib_name]{field_name}[/repr.attrib_name]: {display_value}"
368
+ )
244
369
 
245
370
  def _rich_print_document(self, doc: Document, truncate: bool = False):
246
371
  """Format a document for display."""
@@ -1,7 +1,9 @@
1
1
  import asyncio
2
+ import json
2
3
  import warnings
3
4
  from importlib.metadata import version
4
5
  from pathlib import Path
6
+ from typing import Any
5
7
 
6
8
  import typer
7
9
 
@@ -137,11 +139,41 @@ def list_documents(
137
139
  asyncio.run(app.list_documents())
138
140
 
139
141
 
142
+ def _parse_meta_options(meta: list[str] | None) -> dict[str, Any]:
143
+ """Parse repeated --meta KEY=VALUE options into a dictionary.
144
+
145
+ Raises a Typer error if any entry is malformed.
146
+ """
147
+ result: dict[str, Any] = {}
148
+ if not meta:
149
+ return result
150
+ for item in meta:
151
+ if "=" not in item:
152
+ raise typer.BadParameter("--meta must be in KEY=VALUE format")
153
+ key, value = item.split("=", 1)
154
+ if not key:
155
+ raise typer.BadParameter("--meta key cannot be empty")
156
+ # Best-effort JSON coercion: numbers, booleans, null, arrays/objects
157
+ try:
158
+ parsed = json.loads(value)
159
+ result[key] = parsed
160
+ except Exception:
161
+ # Leave as string if not valid JSON literal
162
+ result[key] = value
163
+ return result
164
+
165
+
140
166
  @cli.command("add", help="Add a document from text input")
141
167
  def add_document_text(
142
168
  text: str = typer.Argument(
143
169
  help="The text content of the document to add",
144
170
  ),
171
+ meta: list[str] | None = typer.Option(
172
+ None,
173
+ "--meta",
174
+ help="Metadata entries as KEY=VALUE (repeatable)",
175
+ metavar="KEY=VALUE",
176
+ ),
145
177
  db: Path = typer.Option(
146
178
  Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
147
179
  "--db",
@@ -151,7 +183,8 @@ def add_document_text(
151
183
  from haiku.rag.app import HaikuRAGApp
152
184
 
153
185
  app = HaikuRAGApp(db_path=db)
154
- asyncio.run(app.add_document_from_text(text=text))
186
+ metadata = _parse_meta_options(meta)
187
+ asyncio.run(app.add_document_from_text(text=text, metadata=metadata or None))
155
188
 
156
189
 
157
190
  @cli.command("add-src", help="Add a document from a file path or URL")
@@ -165,6 +198,12 @@ def add_document_src(
165
198
  "--title",
166
199
  help="Optional human-readable title to store with the document",
167
200
  ),
201
+ meta: list[str] | None = typer.Option(
202
+ None,
203
+ "--meta",
204
+ help="Metadata entries as KEY=VALUE (repeatable)",
205
+ metavar="KEY=VALUE",
206
+ ),
168
207
  db: Path = typer.Option(
169
208
  Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
170
209
  "--db",
@@ -174,7 +213,12 @@ def add_document_src(
174
213
  from haiku.rag.app import HaikuRAGApp
175
214
 
176
215
  app = HaikuRAGApp(db_path=db)
177
- asyncio.run(app.add_document_from_source(source=source, title=title))
216
+ metadata = _parse_meta_options(meta)
217
+ asyncio.run(
218
+ app.add_document_from_source(
219
+ source=source, title=title, metadata=metadata or None
220
+ )
221
+ )
178
222
 
179
223
 
180
224
  @cli.command("get", help="Get and display a document by its ID")
@@ -347,6 +391,32 @@ def vacuum(
347
391
  asyncio.run(app.vacuum())
348
392
 
349
393
 
394
+ @cli.command("info", help="Show read-only database info (no upgrades or writes)")
395
+ def info(
396
+ db: Path = typer.Option(
397
+ Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
398
+ "--db",
399
+ help="Path to the LanceDB database file",
400
+ ),
401
+ ):
402
+ from haiku.rag.app import HaikuRAGApp
403
+
404
+ app = HaikuRAGApp(db_path=db)
405
+ asyncio.run(app.info())
406
+
407
+
408
+ @cli.command("download-models", help="Download Docling and Ollama models per config")
409
+ def download_models_cmd():
410
+ from haiku.rag.utils import prefetch_models
411
+
412
+ try:
413
+ prefetch_models()
414
+ typer.echo("Models downloaded successfully.")
415
+ except Exception as e:
416
+ typer.echo(f"Error downloading models: {e}")
417
+ raise typer.Exit(1)
418
+
419
+
350
420
  @cli.command(
351
421
  "serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
352
422
  )
@@ -51,7 +51,7 @@ class SQLiteToLanceDBMigrator:
51
51
 
52
52
  sqlite_conn.enable_load_extension(True)
53
53
  sqlite_vec.load(sqlite_conn)
54
- self.console.print("[blue]Loaded sqlite-vec extension[/blue]")
54
+ self.console.print("[cyan]Loaded sqlite-vec extension[/cyan]")
55
55
  except Exception as e:
56
56
  self.console.print(
57
57
  f"[yellow]Warning: Could not load sqlite-vec extension: {e}[/yellow]"
@@ -92,7 +92,7 @@ class SQLiteToLanceDBMigrator:
92
92
  sqlite_conn.close()
93
93
 
94
94
  # Optimize and cleanup using centralized vacuum
95
- self.console.print("[blue]Optimizing LanceDB...[/blue]")
95
+ self.console.print("[cyan]Optimizing LanceDB...[/cyan]")
96
96
  try:
97
97
  lance_store.vacuum()
98
98
  self.console.print("[green]✅ Optimization completed[/green]")
@@ -1,4 +1,4 @@
1
1
  from .engine import Store
2
2
  from .models import Chunk, Document
3
3
 
4
- __all__ = ["Store", "Chunk", "Document"]
4
+ __all__ = ["Store", "Chunk", "Document"]
@@ -1,4 +1,4 @@
1
1
  from .chunk import Chunk
2
2
  from .document import Document
3
3
 
4
- __all__ = ["Chunk", "Document"]
4
+ __all__ = ["Chunk", "Document"]
@@ -163,3 +163,37 @@ def load_callable(path: str):
163
163
  f"Attribute '{func_name}' in module '{module_part}' is not callable"
164
164
  )
165
165
  return func
166
+
167
+
168
+ def prefetch_models():
169
+ """Prefetch runtime models (Docling + Ollama as configured)."""
170
+ import httpx
171
+ from docling.utils.model_downloader import download_models
172
+
173
+ from haiku.rag.config import Config
174
+
175
+ download_models()
176
+
177
+ # Collect Ollama models from config
178
+ required_models: set[str] = set()
179
+ if Config.EMBEDDINGS_PROVIDER == "ollama":
180
+ required_models.add(Config.EMBEDDINGS_MODEL)
181
+ if Config.QA_PROVIDER == "ollama":
182
+ required_models.add(Config.QA_MODEL)
183
+ if Config.RESEARCH_PROVIDER == "ollama":
184
+ required_models.add(Config.RESEARCH_MODEL)
185
+ if Config.RERANK_PROVIDER == "ollama":
186
+ required_models.add(Config.RERANK_MODEL)
187
+
188
+ if not required_models:
189
+ return
190
+
191
+ base_url = Config.OLLAMA_BASE_URL
192
+
193
+ with httpx.Client(timeout=None) as client:
194
+ for model in sorted(required_models):
195
+ with client.stream(
196
+ "POST", f"{base_url}/api/pull", json={"model": model}
197
+ ) as r:
198
+ for _ in r.iter_lines():
199
+ pass
@@ -54,10 +54,13 @@ async def test_add_document_from_text(app: HaikuRAGApp, monkeypatch):
54
54
  with patch("haiku.rag.app.HaikuRAG", return_value=mock_client):
55
55
  await app.add_document_from_text("test document")
56
56
 
57
- mock_client.create_document.assert_called_once_with("test document")
57
+ mock_client.create_document.assert_called_once()
58
+ args, kwargs = mock_client.create_document.call_args
59
+ assert args[0] == "test document"
60
+ assert kwargs.get("metadata") is None
58
61
  mock_rich_print.assert_called_once_with(mock_doc, truncate=True)
59
62
  mock_print.assert_called_once_with(
60
- "[b]Document with id [cyan]1[/cyan] added successfully.[/b]"
63
+ "[bold green]Document 1 added successfully.[/bold green]"
61
64
  )
62
65
 
63
66
 
@@ -78,12 +81,14 @@ async def test_add_document_from_source(app: HaikuRAGApp, monkeypatch):
78
81
  with patch("haiku.rag.app.HaikuRAG", return_value=mock_client):
79
82
  await app.add_document_from_source(file_path)
80
83
 
81
- mock_client.create_document_from_source.assert_called_once_with(
82
- file_path, title=None
83
- )
84
+ mock_client.create_document_from_source.assert_called_once()
85
+ args, kwargs = mock_client.create_document_from_source.call_args
86
+ assert args[0] == file_path
87
+ assert kwargs.get("title") is None
88
+ assert kwargs.get("metadata") is None
84
89
  mock_rich_print.assert_called_once_with(mock_doc, truncate=True)
85
90
  mock_print.assert_called_once_with(
86
- "[b]Document with id [cyan]1[/cyan] added successfully.[/b]"
91
+ "[bold green]Document 1 added successfully.[/bold green]"
87
92
  )
88
93
 
89
94
 
@@ -135,7 +140,9 @@ async def test_delete_document(app: HaikuRAGApp, monkeypatch):
135
140
  await app.delete_document("1")
136
141
 
137
142
  mock_client.delete_document.assert_called_once_with("1")
138
- mock_print.assert_called_once_with("[b]Document 1 deleted successfully.[/b]")
143
+ mock_print.assert_called_once_with(
144
+ "[bold green]Document 1 deleted successfully.[/bold green]"
145
+ )
139
146
 
140
147
 
141
148
  @pytest.mark.asyncio
@@ -170,7 +177,7 @@ async def test_search_no_results(app: HaikuRAGApp, monkeypatch):
170
177
  await app.search("query")
171
178
 
172
179
  mock_client.search.assert_called_once_with("query", limit=5)
173
- mock_print.assert_called_once_with("[red]No results found.[/red]")
180
+ mock_print.assert_called_once_with("[yellow]No results found.[/yellow]")
174
181
 
175
182
 
176
183
  @pytest.mark.asyncio
@@ -28,9 +28,10 @@ def test_add_document_text():
28
28
  result = runner.invoke(cli, ["add", "test document"])
29
29
 
30
30
  assert result.exit_code == 0
31
- mock_app_instance.add_document_from_text.assert_called_once_with(
32
- text="test document"
33
- )
31
+ mock_app_instance.add_document_from_text.assert_called_once()
32
+ _, kwargs = mock_app_instance.add_document_from_text.call_args
33
+ assert kwargs.get("text") == "test document"
34
+ assert kwargs.get("metadata") is None
34
35
 
35
36
 
36
37
  def test_add_document_src():
@@ -57,8 +58,83 @@ def test_add_document_src_with_title():
57
58
  mock_app_instance.add_document_from_source.assert_called_once()
58
59
  # Verify title is forwarded (inspect call kwargs)
59
60
  _, kwargs = mock_app_instance.add_document_from_source.call_args
60
- assert kwargs.get("title") == "Nice Name"
61
+ assert kwargs.get("title") == "Nice Name"
62
+ assert kwargs.get("source") == "test.txt"
63
+
64
+
65
+ def test_add_document_text_with_meta():
66
+ with patch("haiku.rag.app.HaikuRAGApp") as mock_app:
67
+ mock_app_instance = MagicMock()
68
+ mock_app_instance.add_document_from_text = AsyncMock()
69
+ mock_app.return_value = mock_app_instance
70
+
71
+ result = runner.invoke(
72
+ cli,
73
+ [
74
+ "add",
75
+ "some text",
76
+ "--meta",
77
+ "author=alice",
78
+ "--meta",
79
+ "topic=notes",
80
+ ],
81
+ )
82
+
83
+ assert result.exit_code == 0
84
+ mock_app_instance.add_document_from_text.assert_called_once()
85
+ _, kwargs = mock_app_instance.add_document_from_text.call_args
86
+ assert kwargs.get("text") == "some text"
87
+ assert kwargs.get("metadata") == {"author": "alice", "topic": "notes"}
88
+
89
+
90
+ def test_add_document_src_with_meta():
91
+ with patch("haiku.rag.app.HaikuRAGApp") as mock_app:
92
+ mock_app_instance = MagicMock()
93
+ mock_app_instance.add_document_from_source = AsyncMock()
94
+ mock_app.return_value = mock_app_instance
95
+
96
+ result = runner.invoke(
97
+ cli,
98
+ [
99
+ "add-src",
100
+ "test.txt",
101
+ "--meta",
102
+ "source=manual",
103
+ "--meta",
104
+ "lang=en",
105
+ ],
106
+ )
107
+
108
+ assert result.exit_code == 0
109
+ mock_app_instance.add_document_from_source.assert_called_once()
110
+ _, kwargs = mock_app_instance.add_document_from_source.call_args
61
111
  assert kwargs.get("source") == "test.txt"
112
+ assert kwargs.get("metadata") == {"source": "manual", "lang": "en"}
113
+
114
+
115
+ def test_add_document_text_with_numeric_meta():
116
+ with patch("haiku.rag.app.HaikuRAGApp") as mock_app:
117
+ mock_app_instance = MagicMock()
118
+ mock_app_instance.add_document_from_text = AsyncMock()
119
+ mock_app.return_value = mock_app_instance
120
+
121
+ result = runner.invoke(
122
+ cli,
123
+ [
124
+ "add",
125
+ "some text",
126
+ "--meta",
127
+ "version=3",
128
+ "--meta",
129
+ "published=true",
130
+ ],
131
+ )
132
+
133
+ assert result.exit_code == 0
134
+ mock_app_instance.add_document_from_text.assert_called_once()
135
+ _, kwargs = mock_app_instance.add_document_from_text.call_args
136
+ assert kwargs.get("text") == "some text"
137
+ assert kwargs.get("metadata") == {"version": 3, "published": True}
62
138
 
63
139
 
64
140
  def test_get_document():
@@ -144,6 +220,16 @@ def test_ask_with_cite():
144
220
  result = runner.invoke(cli, ["ask", "What is Python?", "--cite"])
145
221
 
146
222
  assert result.exit_code == 0
147
- mock_app_instance.ask.assert_called_once_with(
148
- question="What is Python?", cite=True
149
- )
223
+ mock_app_instance.ask.assert_called_once_with(question="What is Python?", cite=True)
224
+
225
+
226
+ def test_info():
227
+ with patch("haiku.rag.app.HaikuRAGApp") as mock_app:
228
+ mock_app_instance = MagicMock()
229
+ mock_app_instance.info = AsyncMock()
230
+ mock_app.return_value = mock_app_instance
231
+
232
+ result = runner.invoke(cli, ["info"])
233
+
234
+ assert result.exit_code == 0
235
+ mock_app_instance.info.assert_called_once()
@@ -0,0 +1,79 @@
1
+ import json
2
+
3
+ import pytest
4
+
5
+ from haiku.rag.app import HaikuRAGApp
6
+
7
+
8
+ @pytest.mark.asyncio
9
+ async def test_app_info_outputs_and_read_only(temp_db_path, capsys):
10
+ # Build a minimal LanceDB with settings, documents, and chunks without using Store
11
+ import lancedb
12
+ from lancedb.pydantic import LanceModel, Vector
13
+ from pydantic import Field
14
+
15
+ db = lancedb.connect(temp_db_path)
16
+
17
+ class SettingsRecord(LanceModel):
18
+ id: str = Field(default="settings")
19
+ settings: str = Field(default="{}")
20
+
21
+ class DocumentRecord(LanceModel):
22
+ id: str
23
+ content: str
24
+
25
+ class ChunkRecord(LanceModel):
26
+ id: str
27
+ document_id: str
28
+ content: str
29
+ vector: Vector(3) # type: ignore
30
+
31
+ settings_tbl = db.create_table("settings", schema=SettingsRecord)
32
+ docs_tbl = db.create_table("documents", schema=DocumentRecord)
33
+ chunks_tbl = db.create_table("chunks", schema=ChunkRecord)
34
+
35
+ # Insert one of each
36
+ settings_tbl.add(
37
+ [
38
+ SettingsRecord(
39
+ id="settings",
40
+ settings=json.dumps(
41
+ {
42
+ "version": "1.2.3",
43
+ "EMBEDDINGS_PROVIDER": "openai",
44
+ "EMBEDDINGS_MODEL": "text-embedding-3-small",
45
+ "EMBEDDINGS_VECTOR_DIM": 3,
46
+ }
47
+ ),
48
+ )
49
+ ]
50
+ )
51
+ docs_tbl.add([DocumentRecord(id="doc-1", content="hello")])
52
+ chunks_tbl.add(
53
+ [ChunkRecord(id="c1", document_id="doc-1", content="c", vector=[0.1, 0.2, 0.3])]
54
+ )
55
+
56
+ # Capture versions before
57
+ before_versions = {
58
+ "settings": int(settings_tbl.version),
59
+ "documents": int(docs_tbl.version),
60
+ "chunks": int(chunks_tbl.version),
61
+ }
62
+
63
+ app = HaikuRAGApp(db_path=temp_db_path)
64
+ await app.info()
65
+
66
+ out = capsys.readouterr().out
67
+ # Validate expected content substrings
68
+ assert f"path: \n{temp_db_path}" in out
69
+ assert "haiku.rag version (db): 1.2.3" in out
70
+ assert "embeddings: openai/text-embedding-3-small (dim: 3)" in out
71
+ assert "lancedb:" in out
72
+ assert "documents: 1" in out
73
+
74
+ # Verify no versions changed (read-only)
75
+ # Re-open to ensure fresh view
76
+ db2 = lancedb.connect(temp_db_path)
77
+ assert int(db2.open_table("settings").version) == before_versions["settings"]
78
+ assert int(db2.open_table("documents").version) == before_versions["documents"]
79
+ assert int(db2.open_table("chunks").version) == before_versions["chunks"]
@@ -1111,7 +1111,7 @@ wheels = [
1111
1111
 
1112
1112
  [[package]]
1113
1113
  name = "haiku-rag"
1114
- version = "0.10.1"
1114
+ version = "0.10.2"
1115
1115
  source = { editable = "." }
1116
1116
  dependencies = [
1117
1117
  { name = "docling" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes