haiku.rag 0.10.0__tar.gz → 0.10.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (96) hide show
  1. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/PKG-INFO +3 -2
  2. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/README.md +2 -1
  3. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/docs/agents.md +2 -1
  4. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/docs/cli.md +43 -3
  5. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/docs/configuration.md +10 -0
  6. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/docs/index.md +3 -0
  7. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/docs/installation.md +10 -0
  8. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/docs/mcp.md +1 -4
  9. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/docs/python.md +9 -3
  10. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/docs/server.md +0 -1
  11. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/pyproject.toml +1 -1
  12. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/app.py +149 -15
  13. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/cli.py +126 -31
  14. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/client.py +63 -21
  15. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/config.py +4 -0
  16. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/mcp.py +18 -6
  17. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/migration.py +2 -2
  18. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/qa/agent.py +4 -2
  19. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/qa/prompts.py +2 -2
  20. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/models.py +2 -2
  21. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/search.py +3 -1
  22. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/prompts.py +4 -3
  23. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/__init__.py +1 -1
  24. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/engine.py +14 -0
  25. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/models/__init__.py +1 -1
  26. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/models/chunk.py +1 -0
  27. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/models/document.py +1 -0
  28. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/chunk.py +4 -0
  29. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/document.py +3 -0
  30. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/upgrades/__init__.py +2 -0
  31. haiku_rag-0.10.2/src/haiku/rag/store/upgrades/v0_10_1.py +64 -0
  32. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/utils.py +42 -5
  33. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_app.py +16 -9
  34. haiku_rag-0.10.2/tests/test_cli.py +235 -0
  35. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_client.py +49 -8
  36. haiku_rag-0.10.2/tests/test_info.py +79 -0
  37. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_search.py +32 -0
  38. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/uv.lock +1 -1
  39. haiku_rag-0.10.0/tests/test_cli.py +0 -157
  40. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/.github/FUNDING.yml +0 -0
  41. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/.github/workflows/build-docs.yml +0 -0
  42. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/.github/workflows/build-publish.yml +0 -0
  43. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/.gitignore +0 -0
  44. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/.pre-commit-config.yaml +0 -0
  45. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/.python-version +0 -0
  46. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/LICENSE +0 -0
  47. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/docs/benchmarks.md +0 -0
  48. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/mkdocs.yml +0 -0
  49. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/__init__.py +0 -0
  50. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/chunker.py +0 -0
  51. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/__init__.py +0 -0
  52. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/base.py +0 -0
  53. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/ollama.py +0 -0
  54. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/openai.py +0 -0
  55. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/vllm.py +0 -0
  56. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/voyageai.py +0 -0
  57. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/logging.py +0 -0
  58. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/monitor.py +0 -0
  59. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/qa/__init__.py +0 -0
  60. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/reader.py +0 -0
  61. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/reranking/__init__.py +0 -0
  62. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/reranking/base.py +0 -0
  63. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/reranking/cohere.py +0 -0
  64. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/reranking/mxbai.py +0 -0
  65. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/reranking/vllm.py +0 -0
  66. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/__init__.py +0 -0
  67. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/common.py +0 -0
  68. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/dependencies.py +0 -0
  69. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/graph.py +0 -0
  70. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/evaluate.py +0 -0
  71. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/plan.py +0 -0
  72. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/synthesize.py +0 -0
  73. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/research/state.py +0 -0
  74. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/__init__.py +0 -0
  75. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/settings.py +0 -0
  76. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
  77. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/__init__.py +0 -0
  78. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/conftest.py +0 -0
  79. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/generate_benchmark_db.py +0 -0
  80. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/llm_judge.py +0 -0
  81. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_chunk.py +0 -0
  82. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_chunker.py +0 -0
  83. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_document.py +0 -0
  84. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_embedder.py +0 -0
  85. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_lancedb_connection.py +0 -0
  86. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_monitor.py +0 -0
  87. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_preprocessor.py +0 -0
  88. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_qa.py +0 -0
  89. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_reader.py +0 -0
  90. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_rebuild.py +0 -0
  91. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_reranker.py +0 -0
  92. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_research_graph.py +0 -0
  93. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_research_graph_integration.py +0 -0
  94. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_settings.py +0 -0
  95. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_utils.py +0 -0
  96. {haiku_rag-0.10.0 → haiku_rag-0.10.2}/tests/test_versioning.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.10.0
3
+ Version: 0.10.2
4
4
  Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -66,7 +66,8 @@ uv pip install haiku.rag
66
66
 
67
67
  # Add documents
68
68
  haiku-rag add "Your content here"
69
- haiku-rag add-src document.pdf
69
+ haiku-rag add "Your content here" --meta author=alice --meta topic=notes
70
+ haiku-rag add-src document.pdf --meta source=manual
70
71
 
71
72
  # Search
72
73
  haiku-rag search "query"
@@ -28,7 +28,8 @@ uv pip install haiku.rag
28
28
 
29
29
  # Add documents
30
30
  haiku-rag add "Your content here"
31
- haiku-rag add-src document.pdf
31
+ haiku-rag add "Your content here" --meta author=alice --meta topic=notes
32
+ haiku-rag add-src document.pdf --meta source=manual
32
33
 
33
34
  # Search
34
35
  haiku-rag search "query"
@@ -13,7 +13,8 @@ The simple QA agent answers a single question using the knowledge base. It retri
13
13
  Key points:
14
14
 
15
15
  - Uses a single `search_documents` tool to fetch relevant chunks
16
- - Can be run with or without inline citations in the prompt
16
+ - Can be run with or without inline citations in the prompt (citations prefer
17
+ document titles when present, otherwise URIs)
17
18
  - Returns a plain string answer
18
19
 
19
20
  Python usage:
@@ -27,12 +27,22 @@ haiku-rag list
27
27
  From text:
28
28
  ```bash
29
29
  haiku-rag add "Your document content here"
30
+
31
+ # Attach metadata (repeat --meta for multiple entries)
32
+ haiku-rag add "Your document content here" --meta author=alice --meta topic=notes
30
33
  ```
31
34
 
32
35
  From file or URL:
33
36
  ```bash
34
37
  haiku-rag add-src /path/to/document.pdf
35
38
  haiku-rag add-src https://example.com/article.html
39
+
40
+ # Optionally set a human‑readable title stored in the DB schema
41
+ haiku-rag add-src /mnt/data/doc1.pdf --title "Q3 Financial Report"
42
+
43
+ # Optionally attach metadata (repeat --meta). Values use JSON parsing if possible:
44
+ # numbers, booleans, null, arrays/objects; otherwise kept as strings.
45
+ haiku-rag add-src /mnt/data/doc1.pdf --meta source=manual --meta page_count=12 --meta published=true
36
46
  ```
37
47
 
38
48
  !!! note
@@ -83,6 +93,7 @@ haiku-rag ask "Who is the author of haiku.rag?" --cite
83
93
  ```
84
94
 
85
95
  The QA agent will search your documents for relevant information and provide a comprehensive answer. With `--cite`, responses include citations showing which documents were used.
96
+ When available, citations use the document title; otherwise they fall back to the URI.
86
97
 
87
98
  ## Research
88
99
 
@@ -111,9 +122,6 @@ haiku-rag serve
111
122
 
112
123
  # stdio transport
113
124
  haiku-rag serve --stdio
114
-
115
- # SSE transport
116
- haiku-rag serve --sse
117
125
  ```
118
126
 
119
127
  ## Settings
@@ -125,6 +133,26 @@ haiku-rag settings
125
133
 
126
134
  ## Maintenance
127
135
 
136
+ ### Info (Read-only)
137
+
138
+ Display database metadata without upgrading or modifying it:
139
+
140
+ ```bash
141
+ haiku-rag info [--db /path/to/your.lancedb]
142
+ ```
143
+
144
+ Shows:
145
+ - path to the database
146
+ - stored haiku.rag version (from settings)
147
+ - embeddings provider/model and vector dimension
148
+ - number of documents
149
+ - table versions per table (documents, chunks)
150
+
151
+ At the end, a separate “Versions” section lists runtime package versions:
152
+ - haiku.rag
153
+ - lancedb
154
+ - docling
155
+
128
156
  ### Vacuum (Optimize and Cleanup)
129
157
 
130
158
  Reduce disk usage by optimizing and pruning old table versions across all tables:
@@ -142,6 +170,18 @@ when want to switch embeddings provider or model:
142
170
  haiku-rag rebuild
143
171
  ```
144
172
 
173
+ ### Download Models
174
+
175
+ Download required runtime models:
176
+
177
+ ```bash
178
+ haiku-rag download-models
179
+ ```
180
+
181
+ This command:
182
+ - Downloads Docling OCR/conversion models (no-op if already present).
183
+ - Pulls Ollama models referenced in your configuration (embeddings, QA, research, rerank).
184
+
145
185
  ## Migration
146
186
 
147
187
  ### Migrate from SQLite to LanceDB
@@ -211,6 +211,16 @@ Authentication is handled through standard cloud provider credentials (AWS CLI,
211
211
 
212
212
  **Note:** Table optimization is automatically handled by LanceDB Cloud (`db://` URIs) and is disabled for better performance. For object storage backends (S3, Azure, GCS), optimization is still performed locally.
213
213
 
214
+ #### Disable database auto-creation
215
+
216
+ By default, haiku.rag creates the local LanceDB directory and required tables on first use. To prevent accidental database creation and fail fast if a database hasn’t been set up yet, set:
217
+
218
+ ```bash
219
+ DISABLE_DB_AUTOCREATE=true
220
+ ```
221
+
222
+ When enabled, for local paths, haiku.rag errors if the LanceDB directory does not exist, and it will not create parent directories.
223
+
214
224
  ### Document Processing
215
225
 
216
226
  ```bash
@@ -15,6 +15,7 @@
15
15
  - **Extended file format support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, code files and more. Or add a URL!
16
16
  - **MCP server**: Exposes functionality as MCP tools
17
17
  - **CLI commands**: Access all functionality from your terminal
18
+ - Add sources from text, files, or URLs, optionally with a human‑readable title
18
19
  - **Python client**: Call `haiku.rag` from your own python applications
19
20
 
20
21
  ## Quick Start
@@ -42,6 +43,8 @@ async with HaikuRAG("database.lancedb") as client:
42
43
  Or use the CLI:
43
44
  ```bash
44
45
  haiku-rag add "Your document content"
46
+ haiku-rag add "Your document content" --meta author=alice
47
+ haiku-rag add-src /path/to/document.pdf --title "Q3 Financial Report" --meta source=manual
45
48
  haiku-rag search "query"
46
49
  haiku-rag ask "Who is the author of haiku.rag?"
47
50
  haiku-rag migrate old_database.sqlite # Migrate from SQLite
@@ -72,3 +72,13 @@ VLLM_RERANK_BASE_URL="http://localhost:8001"
72
72
  - Python 3.10+
73
73
  - Ollama (for default embeddings)
74
74
  - vLLM server (for vLLM provider)
75
+
76
+ ## Pre-download Models (Optional)
77
+
78
+ You can prefetch all required runtime models before first use:
79
+
80
+ ```bash
81
+ haiku-rag download-models
82
+ ```
83
+
84
+ This will download Docling models and pull any Ollama models referenced by your current configuration.
@@ -19,7 +19,7 @@ The MCP server exposes `haiku.rag` as MCP tools for compatible MCP clients.
19
19
 
20
20
  ## Starting MCP Server
21
21
 
22
- The MCP server starts automatically with the serve command and supports Streamable HTTP, stdio and SSE transports:
22
+ The MCP server starts automatically with the serve command and supports Streamable HTTP and stdio transports:
23
23
 
24
24
  ```bash
25
25
  # Default streamable HTTP transport
@@ -27,7 +27,4 @@ haiku-rag serve
27
27
 
28
28
  # stdio transport (for Claude Desktop)
29
29
  haiku-rag serve --stdio
30
-
31
- # SSE transport
32
- haiku-rag serve --sse
33
30
  ```
@@ -23,6 +23,7 @@ From text:
23
23
  doc = await client.create_document(
24
24
  content="Your document content here",
25
25
  uri="doc://example",
26
+ title="My Example Document", # optional human‑readable title
26
27
  metadata={"source": "manual", "topic": "example"}
27
28
  )
28
29
  ```
@@ -54,12 +55,16 @@ doc = await client.create_document(
54
55
 
55
56
  From file:
56
57
  ```python
57
- doc = await client.create_document_from_source("path/to/document.pdf")
58
+ doc = await client.create_document_from_source(
59
+ "path/to/document.pdf", title="Project Brief"
60
+ )
58
61
  ```
59
62
 
60
63
  From URL:
61
64
  ```python
62
- doc = await client.create_document_from_source("https://example.com/article.html")
65
+ doc = await client.create_document_from_source(
66
+ "https://example.com/article.html", title="Example Article"
67
+ )
63
68
  ```
64
69
 
65
70
  ### Retrieving Documents
@@ -159,6 +164,7 @@ for chunk, relevance_score in results:
159
164
  print(f"Content: {chunk.content}")
160
165
  print(f"From document: {chunk.document_id}")
161
166
  print(f"Document URI: {chunk.document_uri}")
167
+ print(f"Document Title: {chunk.document_title}") # when available
162
168
  print(f"Document metadata: {chunk.document_meta}")
163
169
  ```
164
170
 
@@ -201,7 +207,7 @@ answer = await client.ask("Who is the author of haiku.rag?", cite=True)
201
207
  print(answer)
202
208
  ```
203
209
 
204
- The QA agent will search your documents for relevant information and use the configured LLM to generate a comprehensive answer. With `cite=True`, responses include citations showing which documents were used as sources.
210
+ The QA agent will search your documents for relevant information and use the configured LLM to generate a comprehensive answer. With `cite=True`, responses include citations showing which documents were used as sources. Citations prefer the document title when present, otherwise they use the URI.
205
211
 
206
212
  The QA provider and model can be configured via environment variables (see [Configuration](configuration.md)).
207
213
 
@@ -11,7 +11,6 @@ haiku-rag serve
11
11
  Transport options:
12
12
  - Default - Streamable HTTP transport
13
13
  - `--stdio` - Standard input/output transport
14
- - `--sse` - Server-sent events transport
15
14
 
16
15
  ## File Monitoring
17
16
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  name = "haiku.rag"
4
4
  description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
5
- version = "0.10.0"
5
+ version = "0.10.2"
6
6
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
7
7
  license = { text = "MIT" }
8
8
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -1,4 +1,6 @@
1
1
  import asyncio
2
+ import json
3
+ from importlib.metadata import version as pkg_version
2
4
  from pathlib import Path
3
5
 
4
6
  from rich.console import Console
@@ -25,26 +27,141 @@ class HaikuRAGApp:
25
27
  self.db_path = db_path
26
28
  self.console = Console()
27
29
 
30
+ async def info(self):
31
+ """Display read-only information about the database without modifying it."""
32
+
33
+ import lancedb
34
+
35
+ # Basic: show path
36
+ self.console.print("[bold]haiku.rag database info[/bold]")
37
+ self.console.print(
38
+ f" [repr.attrib_name]path[/repr.attrib_name]: {self.db_path}"
39
+ )
40
+
41
+ if not self.db_path.exists():
42
+ self.console.print("[red]Database path does not exist.[/red]")
43
+ return
44
+
45
+ # Connect without going through Store to avoid upgrades/validation writes
46
+ try:
47
+ db = lancedb.connect(self.db_path)
48
+ table_names = set(db.table_names())
49
+ except Exception as e:
50
+ self.console.print(f"[red]Failed to open database: {e}[/red]")
51
+ return
52
+
53
+ try:
54
+ ldb_version = pkg_version("lancedb")
55
+ except Exception:
56
+ ldb_version = "unknown"
57
+ try:
58
+ hr_version = pkg_version("haiku.rag")
59
+ except Exception:
60
+ hr_version = "unknown"
61
+ try:
62
+ docling_version = pkg_version("docling")
63
+ except Exception:
64
+ docling_version = "unknown"
65
+
66
+ # Read settings (if present) to find stored haiku.rag version and embedding config
67
+ stored_version = "unknown"
68
+ embed_provider: str | None = None
69
+ embed_model: str | None = None
70
+ vector_dim: int | None = None
71
+
72
+ if "settings" in table_names:
73
+ settings_tbl = db.open_table("settings")
74
+ arrow = settings_tbl.search().where("id = 'settings'").limit(1).to_arrow()
75
+ rows = arrow.to_pylist() if arrow is not None else []
76
+ if rows:
77
+ raw = rows[0].get("settings") or "{}"
78
+ data = json.loads(raw) if isinstance(raw, str) else (raw or {})
79
+ stored_version = str(data.get("version", stored_version))
80
+ embed_provider = data.get("EMBEDDINGS_PROVIDER")
81
+ embed_model = data.get("EMBEDDINGS_MODEL")
82
+ vector_dim = (
83
+ int(data.get("EMBEDDINGS_VECTOR_DIM")) # pyright: ignore[reportArgumentType]
84
+ if data.get("EMBEDDINGS_VECTOR_DIM") is not None
85
+ else None
86
+ )
87
+
88
+ num_docs = 0
89
+ if "documents" in table_names:
90
+ docs_tbl = db.open_table("documents")
91
+ num_docs = int(docs_tbl.count_rows()) # type: ignore[attr-defined]
92
+
93
+ # Table versions per table (direct API)
94
+ doc_versions = (
95
+ len(list(db.open_table("documents").list_versions()))
96
+ if "documents" in table_names
97
+ else 0
98
+ )
99
+ chunk_versions = (
100
+ len(list(db.open_table("chunks").list_versions()))
101
+ if "chunks" in table_names
102
+ else 0
103
+ )
104
+
105
+ self.console.print(
106
+ f" [repr.attrib_name]haiku.rag version (db)[/repr.attrib_name]: {stored_version}"
107
+ )
108
+ if embed_provider or embed_model or vector_dim:
109
+ provider_part = embed_provider or "unknown"
110
+ model_part = embed_model or "unknown"
111
+ dim_part = f"{vector_dim}" if vector_dim is not None else "unknown"
112
+ self.console.print(
113
+ " [repr.attrib_name]embeddings[/repr.attrib_name]: "
114
+ f"{provider_part}/{model_part} (dim: {dim_part})"
115
+ )
116
+ else:
117
+ self.console.print(
118
+ " [repr.attrib_name]embeddings[/repr.attrib_name]: unknown"
119
+ )
120
+ self.console.print(
121
+ f" [repr.attrib_name]documents[/repr.attrib_name]: {num_docs}"
122
+ )
123
+ self.console.print(
124
+ f" [repr.attrib_name]versions (documents)[/repr.attrib_name]: {doc_versions}"
125
+ )
126
+ self.console.print(
127
+ f" [repr.attrib_name]versions (chunks)[/repr.attrib_name]: {chunk_versions}"
128
+ )
129
+ self.console.rule()
130
+ self.console.print("[bold]Versions[/bold]")
131
+ self.console.print(
132
+ f" [repr.attrib_name]haiku.rag[/repr.attrib_name]: {hr_version}"
133
+ )
134
+ self.console.print(
135
+ f" [repr.attrib_name]lancedb[/repr.attrib_name]: {ldb_version}"
136
+ )
137
+ self.console.print(
138
+ f" [repr.attrib_name]docling[/repr.attrib_name]: {docling_version}"
139
+ )
140
+
28
141
  async def list_documents(self):
29
142
  async with HaikuRAG(db_path=self.db_path) as self.client:
30
143
  documents = await self.client.list_documents()
31
144
  for doc in documents:
32
145
  self._rich_print_document(doc, truncate=True)
33
146
 
34
- async def add_document_from_text(self, text: str):
147
+ async def add_document_from_text(self, text: str, metadata: dict | None = None):
35
148
  async with HaikuRAG(db_path=self.db_path) as self.client:
36
- doc = await self.client.create_document(text)
149
+ doc = await self.client.create_document(text, metadata=metadata)
37
150
  self._rich_print_document(doc, truncate=True)
38
151
  self.console.print(
39
- f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
152
+ f"[bold green]Document {doc.id} added successfully.[/bold green]"
40
153
  )
41
154
 
42
- async def add_document_from_source(self, source: str):
155
+ async def add_document_from_source(
156
+ self, source: str, title: str | None = None, metadata: dict | None = None
157
+ ):
43
158
  async with HaikuRAG(db_path=self.db_path) as self.client:
44
- doc = await self.client.create_document_from_source(source)
159
+ doc = await self.client.create_document_from_source(
160
+ source, title=title, metadata=metadata
161
+ )
45
162
  self._rich_print_document(doc, truncate=True)
46
163
  self.console.print(
47
- f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
164
+ f"[bold green]Document {doc.id} added successfully.[/bold green]"
48
165
  )
49
166
 
50
167
  async def get_document(self, doc_id: str):
@@ -59,7 +176,9 @@ class HaikuRAGApp:
59
176
  async with HaikuRAG(db_path=self.db_path) as self.client:
60
177
  deleted = await self.client.delete_document(doc_id)
61
178
  if deleted:
62
- self.console.print(f"[b]Document {doc_id} deleted successfully.[/b]")
179
+ self.console.print(
180
+ f"[bold green]Document {doc_id} deleted successfully.[/bold green]"
181
+ )
63
182
  else:
64
183
  self.console.print(
65
184
  f"[yellow]Document with id {doc_id} not found.[/yellow]"
@@ -69,7 +188,7 @@ class HaikuRAGApp:
69
188
  async with HaikuRAG(db_path=self.db_path) as self.client:
70
189
  results = await self.client.search(query, limit=limit)
71
190
  if not results:
72
- self.console.print("[red]No results found.[/red]")
191
+ self.console.print("[yellow]No results found.[/yellow]")
73
192
  return
74
193
  for chunk, score in results:
75
194
  self._rich_print_search_result(chunk, score)
@@ -202,14 +321,16 @@ class HaikuRAGApp:
202
321
  return
203
322
 
204
323
  self.console.print(
205
- f"[b]Rebuilding database with {total_docs} documents...[/b]"
324
+ f"[bold cyan]Rebuilding database with {total_docs} documents...[/bold cyan]"
206
325
  )
207
326
  with Progress() as progress:
208
327
  task = progress.add_task("Rebuilding...", total=total_docs)
209
328
  async for _ in client.rebuild_database():
210
329
  progress.update(task, advance=1)
211
330
 
212
- self.console.print("[b]Database rebuild completed successfully.[/b]")
331
+ self.console.print(
332
+ "[bold green]Database rebuild completed successfully.[/bold green]"
333
+ )
213
334
  except Exception as e:
214
335
  self.console.print(f"[red]Error rebuilding database: {e}[/red]")
215
336
 
@@ -218,7 +339,9 @@ class HaikuRAGApp:
218
339
  try:
219
340
  async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
220
341
  await client.vacuum()
221
- self.console.print("[b]Vacuum completed successfully.[/b]")
342
+ self.console.print(
343
+ "[bold green]Vacuum completed successfully.[/bold green]"
344
+ )
222
345
  except Exception as e:
223
346
  self.console.print(f"[red]Error during vacuum: {e}[/red]")
224
347
 
@@ -240,7 +363,9 @@ class HaikuRAGApp:
240
363
  else:
241
364
  display_value = field_value
242
365
 
243
- self.console.print(f" [cyan]{field_name}[/cyan]: {display_value}")
366
+ self.console.print(
367
+ f" [repr.attrib_name]{field_name}[/repr.attrib_name]: {display_value}"
368
+ )
244
369
 
245
370
  def _rich_print_document(self, doc: Document, truncate: bool = False):
246
371
  """Format a document for display."""
@@ -252,8 +377,16 @@ class HaikuRAGApp:
252
377
  content = Markdown(content)
253
378
  else:
254
379
  content = Markdown(doc.content)
380
+ title_part = (
381
+ f" [repr.attrib_name]title[/repr.attrib_name]: {doc.title}"
382
+ if doc.title
383
+ else ""
384
+ )
255
385
  self.console.print(
256
- f"[repr.attrib_name]id[/repr.attrib_name]: {doc.id} [repr.attrib_name]uri[/repr.attrib_name]: {doc.uri} [repr.attrib_name]meta[/repr.attrib_name]: {doc.metadata}"
386
+ f"[repr.attrib_name]id[/repr.attrib_name]: {doc.id} "
387
+ f"[repr.attrib_name]uri[/repr.attrib_name]: {doc.uri}"
388
+ + title_part
389
+ + f" [repr.attrib_name]meta[/repr.attrib_name]: {doc.metadata}"
257
390
  )
258
391
  self.console.print(
259
392
  f"[repr.attrib_name]created at[/repr.attrib_name]: {doc.created_at} [repr.attrib_name]updated at[/repr.attrib_name]: {doc.updated_at}"
@@ -272,6 +405,9 @@ class HaikuRAGApp:
272
405
  if chunk.document_uri:
273
406
  self.console.print("[repr.attrib_name]document uri[/repr.attrib_name]:")
274
407
  self.console.print(chunk.document_uri)
408
+ if chunk.document_title:
409
+ self.console.print("[repr.attrib_name]document title[/repr.attrib_name]:")
410
+ self.console.print(chunk.document_title)
275
411
  if chunk.document_meta:
276
412
  self.console.print("[repr.attrib_name]document meta[/repr.attrib_name]:")
277
413
  self.console.print(chunk.document_meta)
@@ -289,8 +425,6 @@ class HaikuRAGApp:
289
425
  try:
290
426
  if transport == "stdio":
291
427
  await server.run_stdio_async()
292
- elif transport == "sse":
293
- await server.run_sse_async()
294
428
  else:
295
429
  await server.run_http_async(transport="streamable-http")
296
430
  except KeyboardInterrupt: