haiku.rag 0.10.1__tar.gz → 0.10.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/PKG-INFO +3 -2
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/README.md +2 -1
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/cli.md +39 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/index.md +2 -1
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/installation.md +10 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/pyproject.toml +1 -1
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/app.py +137 -12
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/cli.py +72 -2
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/migration.py +2 -2
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/__init__.py +1 -1
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/models/__init__.py +1 -1
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/utils.py +34 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_app.py +15 -8
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_cli.py +93 -7
- haiku_rag-0.10.2/tests/test_info.py +79 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/uv.lock +1 -1
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.github/workflows/build-docs.yml +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.gitignore +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/.python-version +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/LICENSE +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/agents.md +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/benchmarks.md +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/configuration.md +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/mcp.md +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/python.md +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/docs/server.md +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/mkdocs.yml +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/client.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/config.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/__init__.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/ollama.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/openai.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/vllm.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/embeddings/voyageai.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/logging.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/monitor.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/qa/__init__.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/qa/agent.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/qa/prompts.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/__init__.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/base.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/cohere.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/mxbai.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/reranking/vllm.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/__init__.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/common.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/dependencies.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/graph.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/models.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/evaluate.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/plan.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/search.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/nodes/synthesize.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/prompts.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/research/state.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/engine.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/chunk.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/document.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/repositories/settings.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/upgrades/__init__.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/upgrades/v0_10_1.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/src/haiku/rag/store/upgrades/v0_9_3.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/__init__.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/conftest.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/generate_benchmark_db.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/llm_judge.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_chunk.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_chunker.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_client.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_document.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_embedder.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_lancedb_connection.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_monitor.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_preprocessor.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_qa.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_reader.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_rebuild.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_reranker.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_research_graph.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_research_graph_integration.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_search.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_settings.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_utils.py +0 -0
- {haiku_rag-0.10.1 → haiku_rag-0.10.2}/tests/test_versioning.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.2
|
|
4
4
|
Summary: Agentic Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -66,7 +66,8 @@ uv pip install haiku.rag
|
|
|
66
66
|
|
|
67
67
|
# Add documents
|
|
68
68
|
haiku-rag add "Your content here"
|
|
69
|
-
haiku-rag add
|
|
69
|
+
haiku-rag add "Your content here" --meta author=alice --meta topic=notes
|
|
70
|
+
haiku-rag add-src document.pdf --meta source=manual
|
|
70
71
|
|
|
71
72
|
# Search
|
|
72
73
|
haiku-rag search "query"
|
|
@@ -28,7 +28,8 @@ uv pip install haiku.rag
|
|
|
28
28
|
|
|
29
29
|
# Add documents
|
|
30
30
|
haiku-rag add "Your content here"
|
|
31
|
-
haiku-rag add
|
|
31
|
+
haiku-rag add "Your content here" --meta author=alice --meta topic=notes
|
|
32
|
+
haiku-rag add-src document.pdf --meta source=manual
|
|
32
33
|
|
|
33
34
|
# Search
|
|
34
35
|
haiku-rag search "query"
|
|
@@ -27,6 +27,9 @@ haiku-rag list
|
|
|
27
27
|
From text:
|
|
28
28
|
```bash
|
|
29
29
|
haiku-rag add "Your document content here"
|
|
30
|
+
|
|
31
|
+
# Attach metadata (repeat --meta for multiple entries)
|
|
32
|
+
haiku-rag add "Your document content here" --meta author=alice --meta topic=notes
|
|
30
33
|
```
|
|
31
34
|
|
|
32
35
|
From file or URL:
|
|
@@ -36,6 +39,10 @@ haiku-rag add-src https://example.com/article.html
|
|
|
36
39
|
|
|
37
40
|
# Optionally set a human‑readable title stored in the DB schema
|
|
38
41
|
haiku-rag add-src /mnt/data/doc1.pdf --title "Q3 Financial Report"
|
|
42
|
+
|
|
43
|
+
# Optionally attach metadata (repeat --meta). Values use JSON parsing if possible:
|
|
44
|
+
# numbers, booleans, null, arrays/objects; otherwise kept as strings.
|
|
45
|
+
haiku-rag add-src /mnt/data/doc1.pdf --meta source=manual --meta page_count=12 --meta published=true
|
|
39
46
|
```
|
|
40
47
|
|
|
41
48
|
!!! note
|
|
@@ -126,6 +133,26 @@ haiku-rag settings
|
|
|
126
133
|
|
|
127
134
|
## Maintenance
|
|
128
135
|
|
|
136
|
+
### Info (Read-only)
|
|
137
|
+
|
|
138
|
+
Display database metadata without upgrading or modifying it:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
haiku-rag info [--db /path/to/your.lancedb]
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Shows:
|
|
145
|
+
- path to the database
|
|
146
|
+
- stored haiku.rag version (from settings)
|
|
147
|
+
- embeddings provider/model and vector dimension
|
|
148
|
+
- number of documents
|
|
149
|
+
- table versions per table (documents, chunks)
|
|
150
|
+
|
|
151
|
+
At the end, a separate “Versions” section lists runtime package versions:
|
|
152
|
+
- haiku.rag
|
|
153
|
+
- lancedb
|
|
154
|
+
- docling
|
|
155
|
+
|
|
129
156
|
### Vacuum (Optimize and Cleanup)
|
|
130
157
|
|
|
131
158
|
Reduce disk usage by optimizing and pruning old table versions across all tables:
|
|
@@ -143,6 +170,18 @@ when want to switch embeddings provider or model:
|
|
|
143
170
|
haiku-rag rebuild
|
|
144
171
|
```
|
|
145
172
|
|
|
173
|
+
### Download Models
|
|
174
|
+
|
|
175
|
+
Download required runtime models:
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
haiku-rag download-models
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
This command:
|
|
182
|
+
- Downloads Docling OCR/conversion models (no-op if already present).
|
|
183
|
+
- Pulls Ollama models referenced in your configuration (embeddings, QA, research, rerank).
|
|
184
|
+
|
|
146
185
|
## Migration
|
|
147
186
|
|
|
148
187
|
### Migrate from SQLite to LanceDB
|
|
@@ -43,7 +43,8 @@ async with HaikuRAG("database.lancedb") as client:
|
|
|
43
43
|
Or use the CLI:
|
|
44
44
|
```bash
|
|
45
45
|
haiku-rag add "Your document content"
|
|
46
|
-
haiku-rag add
|
|
46
|
+
haiku-rag add "Your document content" --meta author=alice
|
|
47
|
+
haiku-rag add-src /path/to/document.pdf --title "Q3 Financial Report" --meta source=manual
|
|
47
48
|
haiku-rag search "query"
|
|
48
49
|
haiku-rag ask "Who is the author of haiku.rag?"
|
|
49
50
|
haiku-rag migrate old_database.sqlite # Migrate from SQLite
|
|
@@ -72,3 +72,13 @@ VLLM_RERANK_BASE_URL="http://localhost:8001"
|
|
|
72
72
|
- Python 3.10+
|
|
73
73
|
- Ollama (for default embeddings)
|
|
74
74
|
- vLLM server (for vLLM provider)
|
|
75
|
+
|
|
76
|
+
## Pre-download Models (Optional)
|
|
77
|
+
|
|
78
|
+
You can prefetch all required runtime models before first use:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
haiku-rag download-models
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
This will download Docling models and pull any Ollama models referenced by your current configuration.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
name = "haiku.rag"
|
|
4
4
|
description = "Agentic Retrieval Augmented Generation (RAG) with LanceDB"
|
|
5
|
-
version = "0.10.
|
|
5
|
+
version = "0.10.2"
|
|
6
6
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
7
7
|
license = { text = "MIT" }
|
|
8
8
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
from importlib.metadata import version as pkg_version
|
|
2
4
|
from pathlib import Path
|
|
3
5
|
|
|
4
6
|
from rich.console import Console
|
|
@@ -25,26 +27,141 @@ class HaikuRAGApp:
|
|
|
25
27
|
self.db_path = db_path
|
|
26
28
|
self.console = Console()
|
|
27
29
|
|
|
30
|
+
async def info(self):
|
|
31
|
+
"""Display read-only information about the database without modifying it."""
|
|
32
|
+
|
|
33
|
+
import lancedb
|
|
34
|
+
|
|
35
|
+
# Basic: show path
|
|
36
|
+
self.console.print("[bold]haiku.rag database info[/bold]")
|
|
37
|
+
self.console.print(
|
|
38
|
+
f" [repr.attrib_name]path[/repr.attrib_name]: {self.db_path}"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
if not self.db_path.exists():
|
|
42
|
+
self.console.print("[red]Database path does not exist.[/red]")
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
# Connect without going through Store to avoid upgrades/validation writes
|
|
46
|
+
try:
|
|
47
|
+
db = lancedb.connect(self.db_path)
|
|
48
|
+
table_names = set(db.table_names())
|
|
49
|
+
except Exception as e:
|
|
50
|
+
self.console.print(f"[red]Failed to open database: {e}[/red]")
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
ldb_version = pkg_version("lancedb")
|
|
55
|
+
except Exception:
|
|
56
|
+
ldb_version = "unknown"
|
|
57
|
+
try:
|
|
58
|
+
hr_version = pkg_version("haiku.rag")
|
|
59
|
+
except Exception:
|
|
60
|
+
hr_version = "unknown"
|
|
61
|
+
try:
|
|
62
|
+
docling_version = pkg_version("docling")
|
|
63
|
+
except Exception:
|
|
64
|
+
docling_version = "unknown"
|
|
65
|
+
|
|
66
|
+
# Read settings (if present) to find stored haiku.rag version and embedding config
|
|
67
|
+
stored_version = "unknown"
|
|
68
|
+
embed_provider: str | None = None
|
|
69
|
+
embed_model: str | None = None
|
|
70
|
+
vector_dim: int | None = None
|
|
71
|
+
|
|
72
|
+
if "settings" in table_names:
|
|
73
|
+
settings_tbl = db.open_table("settings")
|
|
74
|
+
arrow = settings_tbl.search().where("id = 'settings'").limit(1).to_arrow()
|
|
75
|
+
rows = arrow.to_pylist() if arrow is not None else []
|
|
76
|
+
if rows:
|
|
77
|
+
raw = rows[0].get("settings") or "{}"
|
|
78
|
+
data = json.loads(raw) if isinstance(raw, str) else (raw or {})
|
|
79
|
+
stored_version = str(data.get("version", stored_version))
|
|
80
|
+
embed_provider = data.get("EMBEDDINGS_PROVIDER")
|
|
81
|
+
embed_model = data.get("EMBEDDINGS_MODEL")
|
|
82
|
+
vector_dim = (
|
|
83
|
+
int(data.get("EMBEDDINGS_VECTOR_DIM")) # pyright: ignore[reportArgumentType]
|
|
84
|
+
if data.get("EMBEDDINGS_VECTOR_DIM") is not None
|
|
85
|
+
else None
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
num_docs = 0
|
|
89
|
+
if "documents" in table_names:
|
|
90
|
+
docs_tbl = db.open_table("documents")
|
|
91
|
+
num_docs = int(docs_tbl.count_rows()) # type: ignore[attr-defined]
|
|
92
|
+
|
|
93
|
+
# Table versions per table (direct API)
|
|
94
|
+
doc_versions = (
|
|
95
|
+
len(list(db.open_table("documents").list_versions()))
|
|
96
|
+
if "documents" in table_names
|
|
97
|
+
else 0
|
|
98
|
+
)
|
|
99
|
+
chunk_versions = (
|
|
100
|
+
len(list(db.open_table("chunks").list_versions()))
|
|
101
|
+
if "chunks" in table_names
|
|
102
|
+
else 0
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
self.console.print(
|
|
106
|
+
f" [repr.attrib_name]haiku.rag version (db)[/repr.attrib_name]: {stored_version}"
|
|
107
|
+
)
|
|
108
|
+
if embed_provider or embed_model or vector_dim:
|
|
109
|
+
provider_part = embed_provider or "unknown"
|
|
110
|
+
model_part = embed_model or "unknown"
|
|
111
|
+
dim_part = f"{vector_dim}" if vector_dim is not None else "unknown"
|
|
112
|
+
self.console.print(
|
|
113
|
+
" [repr.attrib_name]embeddings[/repr.attrib_name]: "
|
|
114
|
+
f"{provider_part}/{model_part} (dim: {dim_part})"
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
self.console.print(
|
|
118
|
+
" [repr.attrib_name]embeddings[/repr.attrib_name]: unknown"
|
|
119
|
+
)
|
|
120
|
+
self.console.print(
|
|
121
|
+
f" [repr.attrib_name]documents[/repr.attrib_name]: {num_docs}"
|
|
122
|
+
)
|
|
123
|
+
self.console.print(
|
|
124
|
+
f" [repr.attrib_name]versions (documents)[/repr.attrib_name]: {doc_versions}"
|
|
125
|
+
)
|
|
126
|
+
self.console.print(
|
|
127
|
+
f" [repr.attrib_name]versions (chunks)[/repr.attrib_name]: {chunk_versions}"
|
|
128
|
+
)
|
|
129
|
+
self.console.rule()
|
|
130
|
+
self.console.print("[bold]Versions[/bold]")
|
|
131
|
+
self.console.print(
|
|
132
|
+
f" [repr.attrib_name]haiku.rag[/repr.attrib_name]: {hr_version}"
|
|
133
|
+
)
|
|
134
|
+
self.console.print(
|
|
135
|
+
f" [repr.attrib_name]lancedb[/repr.attrib_name]: {ldb_version}"
|
|
136
|
+
)
|
|
137
|
+
self.console.print(
|
|
138
|
+
f" [repr.attrib_name]docling[/repr.attrib_name]: {docling_version}"
|
|
139
|
+
)
|
|
140
|
+
|
|
28
141
|
async def list_documents(self):
|
|
29
142
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
30
143
|
documents = await self.client.list_documents()
|
|
31
144
|
for doc in documents:
|
|
32
145
|
self._rich_print_document(doc, truncate=True)
|
|
33
146
|
|
|
34
|
-
async def add_document_from_text(self, text: str):
|
|
147
|
+
async def add_document_from_text(self, text: str, metadata: dict | None = None):
|
|
35
148
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
36
|
-
doc = await self.client.create_document(text)
|
|
149
|
+
doc = await self.client.create_document(text, metadata=metadata)
|
|
37
150
|
self._rich_print_document(doc, truncate=True)
|
|
38
151
|
self.console.print(
|
|
39
|
-
f"[
|
|
152
|
+
f"[bold green]Document {doc.id} added successfully.[/bold green]"
|
|
40
153
|
)
|
|
41
154
|
|
|
42
|
-
async def add_document_from_source(
|
|
155
|
+
async def add_document_from_source(
|
|
156
|
+
self, source: str, title: str | None = None, metadata: dict | None = None
|
|
157
|
+
):
|
|
43
158
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
44
|
-
doc = await self.client.create_document_from_source(
|
|
159
|
+
doc = await self.client.create_document_from_source(
|
|
160
|
+
source, title=title, metadata=metadata
|
|
161
|
+
)
|
|
45
162
|
self._rich_print_document(doc, truncate=True)
|
|
46
163
|
self.console.print(
|
|
47
|
-
f"[
|
|
164
|
+
f"[bold green]Document {doc.id} added successfully.[/bold green]"
|
|
48
165
|
)
|
|
49
166
|
|
|
50
167
|
async def get_document(self, doc_id: str):
|
|
@@ -59,7 +176,9 @@ class HaikuRAGApp:
|
|
|
59
176
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
60
177
|
deleted = await self.client.delete_document(doc_id)
|
|
61
178
|
if deleted:
|
|
62
|
-
self.console.print(
|
|
179
|
+
self.console.print(
|
|
180
|
+
f"[bold green]Document {doc_id} deleted successfully.[/bold green]"
|
|
181
|
+
)
|
|
63
182
|
else:
|
|
64
183
|
self.console.print(
|
|
65
184
|
f"[yellow]Document with id {doc_id} not found.[/yellow]"
|
|
@@ -69,7 +188,7 @@ class HaikuRAGApp:
|
|
|
69
188
|
async with HaikuRAG(db_path=self.db_path) as self.client:
|
|
70
189
|
results = await self.client.search(query, limit=limit)
|
|
71
190
|
if not results:
|
|
72
|
-
self.console.print("[
|
|
191
|
+
self.console.print("[yellow]No results found.[/yellow]")
|
|
73
192
|
return
|
|
74
193
|
for chunk, score in results:
|
|
75
194
|
self._rich_print_search_result(chunk, score)
|
|
@@ -202,14 +321,16 @@ class HaikuRAGApp:
|
|
|
202
321
|
return
|
|
203
322
|
|
|
204
323
|
self.console.print(
|
|
205
|
-
f"[
|
|
324
|
+
f"[bold cyan]Rebuilding database with {total_docs} documents...[/bold cyan]"
|
|
206
325
|
)
|
|
207
326
|
with Progress() as progress:
|
|
208
327
|
task = progress.add_task("Rebuilding...", total=total_docs)
|
|
209
328
|
async for _ in client.rebuild_database():
|
|
210
329
|
progress.update(task, advance=1)
|
|
211
330
|
|
|
212
|
-
self.console.print(
|
|
331
|
+
self.console.print(
|
|
332
|
+
"[bold green]Database rebuild completed successfully.[/bold green]"
|
|
333
|
+
)
|
|
213
334
|
except Exception as e:
|
|
214
335
|
self.console.print(f"[red]Error rebuilding database: {e}[/red]")
|
|
215
336
|
|
|
@@ -218,7 +339,9 @@ class HaikuRAGApp:
|
|
|
218
339
|
try:
|
|
219
340
|
async with HaikuRAG(db_path=self.db_path, skip_validation=True) as client:
|
|
220
341
|
await client.vacuum()
|
|
221
|
-
self.console.print(
|
|
342
|
+
self.console.print(
|
|
343
|
+
"[bold green]Vacuum completed successfully.[/bold green]"
|
|
344
|
+
)
|
|
222
345
|
except Exception as e:
|
|
223
346
|
self.console.print(f"[red]Error during vacuum: {e}[/red]")
|
|
224
347
|
|
|
@@ -240,7 +363,9 @@ class HaikuRAGApp:
|
|
|
240
363
|
else:
|
|
241
364
|
display_value = field_value
|
|
242
365
|
|
|
243
|
-
self.console.print(
|
|
366
|
+
self.console.print(
|
|
367
|
+
f" [repr.attrib_name]{field_name}[/repr.attrib_name]: {display_value}"
|
|
368
|
+
)
|
|
244
369
|
|
|
245
370
|
def _rich_print_document(self, doc: Document, truncate: bool = False):
|
|
246
371
|
"""Format a document for display."""
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
2
3
|
import warnings
|
|
3
4
|
from importlib.metadata import version
|
|
4
5
|
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
5
7
|
|
|
6
8
|
import typer
|
|
7
9
|
|
|
@@ -137,11 +139,41 @@ def list_documents(
|
|
|
137
139
|
asyncio.run(app.list_documents())
|
|
138
140
|
|
|
139
141
|
|
|
142
|
+
def _parse_meta_options(meta: list[str] | None) -> dict[str, Any]:
|
|
143
|
+
"""Parse repeated --meta KEY=VALUE options into a dictionary.
|
|
144
|
+
|
|
145
|
+
Raises a Typer error if any entry is malformed.
|
|
146
|
+
"""
|
|
147
|
+
result: dict[str, Any] = {}
|
|
148
|
+
if not meta:
|
|
149
|
+
return result
|
|
150
|
+
for item in meta:
|
|
151
|
+
if "=" not in item:
|
|
152
|
+
raise typer.BadParameter("--meta must be in KEY=VALUE format")
|
|
153
|
+
key, value = item.split("=", 1)
|
|
154
|
+
if not key:
|
|
155
|
+
raise typer.BadParameter("--meta key cannot be empty")
|
|
156
|
+
# Best-effort JSON coercion: numbers, booleans, null, arrays/objects
|
|
157
|
+
try:
|
|
158
|
+
parsed = json.loads(value)
|
|
159
|
+
result[key] = parsed
|
|
160
|
+
except Exception:
|
|
161
|
+
# Leave as string if not valid JSON literal
|
|
162
|
+
result[key] = value
|
|
163
|
+
return result
|
|
164
|
+
|
|
165
|
+
|
|
140
166
|
@cli.command("add", help="Add a document from text input")
|
|
141
167
|
def add_document_text(
|
|
142
168
|
text: str = typer.Argument(
|
|
143
169
|
help="The text content of the document to add",
|
|
144
170
|
),
|
|
171
|
+
meta: list[str] | None = typer.Option(
|
|
172
|
+
None,
|
|
173
|
+
"--meta",
|
|
174
|
+
help="Metadata entries as KEY=VALUE (repeatable)",
|
|
175
|
+
metavar="KEY=VALUE",
|
|
176
|
+
),
|
|
145
177
|
db: Path = typer.Option(
|
|
146
178
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
147
179
|
"--db",
|
|
@@ -151,7 +183,8 @@ def add_document_text(
|
|
|
151
183
|
from haiku.rag.app import HaikuRAGApp
|
|
152
184
|
|
|
153
185
|
app = HaikuRAGApp(db_path=db)
|
|
154
|
-
|
|
186
|
+
metadata = _parse_meta_options(meta)
|
|
187
|
+
asyncio.run(app.add_document_from_text(text=text, metadata=metadata or None))
|
|
155
188
|
|
|
156
189
|
|
|
157
190
|
@cli.command("add-src", help="Add a document from a file path or URL")
|
|
@@ -165,6 +198,12 @@ def add_document_src(
|
|
|
165
198
|
"--title",
|
|
166
199
|
help="Optional human-readable title to store with the document",
|
|
167
200
|
),
|
|
201
|
+
meta: list[str] | None = typer.Option(
|
|
202
|
+
None,
|
|
203
|
+
"--meta",
|
|
204
|
+
help="Metadata entries as KEY=VALUE (repeatable)",
|
|
205
|
+
metavar="KEY=VALUE",
|
|
206
|
+
),
|
|
168
207
|
db: Path = typer.Option(
|
|
169
208
|
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
170
209
|
"--db",
|
|
@@ -174,7 +213,12 @@ def add_document_src(
|
|
|
174
213
|
from haiku.rag.app import HaikuRAGApp
|
|
175
214
|
|
|
176
215
|
app = HaikuRAGApp(db_path=db)
|
|
177
|
-
|
|
216
|
+
metadata = _parse_meta_options(meta)
|
|
217
|
+
asyncio.run(
|
|
218
|
+
app.add_document_from_source(
|
|
219
|
+
source=source, title=title, metadata=metadata or None
|
|
220
|
+
)
|
|
221
|
+
)
|
|
178
222
|
|
|
179
223
|
|
|
180
224
|
@cli.command("get", help="Get and display a document by its ID")
|
|
@@ -347,6 +391,32 @@ def vacuum(
|
|
|
347
391
|
asyncio.run(app.vacuum())
|
|
348
392
|
|
|
349
393
|
|
|
394
|
+
@cli.command("info", help="Show read-only database info (no upgrades or writes)")
|
|
395
|
+
def info(
|
|
396
|
+
db: Path = typer.Option(
|
|
397
|
+
Config.DEFAULT_DATA_DIR / "haiku.rag.lancedb",
|
|
398
|
+
"--db",
|
|
399
|
+
help="Path to the LanceDB database file",
|
|
400
|
+
),
|
|
401
|
+
):
|
|
402
|
+
from haiku.rag.app import HaikuRAGApp
|
|
403
|
+
|
|
404
|
+
app = HaikuRAGApp(db_path=db)
|
|
405
|
+
asyncio.run(app.info())
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
@cli.command("download-models", help="Download Docling and Ollama models per config")
|
|
409
|
+
def download_models_cmd():
|
|
410
|
+
from haiku.rag.utils import prefetch_models
|
|
411
|
+
|
|
412
|
+
try:
|
|
413
|
+
prefetch_models()
|
|
414
|
+
typer.echo("Models downloaded successfully.")
|
|
415
|
+
except Exception as e:
|
|
416
|
+
typer.echo(f"Error downloading models: {e}")
|
|
417
|
+
raise typer.Exit(1)
|
|
418
|
+
|
|
419
|
+
|
|
350
420
|
@cli.command(
|
|
351
421
|
"serve", help="Start the haiku.rag MCP server (by default in streamable HTTP mode)"
|
|
352
422
|
)
|
|
@@ -51,7 +51,7 @@ class SQLiteToLanceDBMigrator:
|
|
|
51
51
|
|
|
52
52
|
sqlite_conn.enable_load_extension(True)
|
|
53
53
|
sqlite_vec.load(sqlite_conn)
|
|
54
|
-
self.console.print("[
|
|
54
|
+
self.console.print("[cyan]Loaded sqlite-vec extension[/cyan]")
|
|
55
55
|
except Exception as e:
|
|
56
56
|
self.console.print(
|
|
57
57
|
f"[yellow]Warning: Could not load sqlite-vec extension: {e}[/yellow]"
|
|
@@ -92,7 +92,7 @@ class SQLiteToLanceDBMigrator:
|
|
|
92
92
|
sqlite_conn.close()
|
|
93
93
|
|
|
94
94
|
# Optimize and cleanup using centralized vacuum
|
|
95
|
-
self.console.print("[
|
|
95
|
+
self.console.print("[cyan]Optimizing LanceDB...[/cyan]")
|
|
96
96
|
try:
|
|
97
97
|
lance_store.vacuum()
|
|
98
98
|
self.console.print("[green]✅ Optimization completed[/green]")
|
|
@@ -163,3 +163,37 @@ def load_callable(path: str):
|
|
|
163
163
|
f"Attribute '{func_name}' in module '{module_part}' is not callable"
|
|
164
164
|
)
|
|
165
165
|
return func
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def prefetch_models():
|
|
169
|
+
"""Prefetch runtime models (Docling + Ollama as configured)."""
|
|
170
|
+
import httpx
|
|
171
|
+
from docling.utils.model_downloader import download_models
|
|
172
|
+
|
|
173
|
+
from haiku.rag.config import Config
|
|
174
|
+
|
|
175
|
+
download_models()
|
|
176
|
+
|
|
177
|
+
# Collect Ollama models from config
|
|
178
|
+
required_models: set[str] = set()
|
|
179
|
+
if Config.EMBEDDINGS_PROVIDER == "ollama":
|
|
180
|
+
required_models.add(Config.EMBEDDINGS_MODEL)
|
|
181
|
+
if Config.QA_PROVIDER == "ollama":
|
|
182
|
+
required_models.add(Config.QA_MODEL)
|
|
183
|
+
if Config.RESEARCH_PROVIDER == "ollama":
|
|
184
|
+
required_models.add(Config.RESEARCH_MODEL)
|
|
185
|
+
if Config.RERANK_PROVIDER == "ollama":
|
|
186
|
+
required_models.add(Config.RERANK_MODEL)
|
|
187
|
+
|
|
188
|
+
if not required_models:
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
base_url = Config.OLLAMA_BASE_URL
|
|
192
|
+
|
|
193
|
+
with httpx.Client(timeout=None) as client:
|
|
194
|
+
for model in sorted(required_models):
|
|
195
|
+
with client.stream(
|
|
196
|
+
"POST", f"{base_url}/api/pull", json={"model": model}
|
|
197
|
+
) as r:
|
|
198
|
+
for _ in r.iter_lines():
|
|
199
|
+
pass
|
|
@@ -54,10 +54,13 @@ async def test_add_document_from_text(app: HaikuRAGApp, monkeypatch):
|
|
|
54
54
|
with patch("haiku.rag.app.HaikuRAG", return_value=mock_client):
|
|
55
55
|
await app.add_document_from_text("test document")
|
|
56
56
|
|
|
57
|
-
mock_client.create_document.
|
|
57
|
+
mock_client.create_document.assert_called_once()
|
|
58
|
+
args, kwargs = mock_client.create_document.call_args
|
|
59
|
+
assert args[0] == "test document"
|
|
60
|
+
assert kwargs.get("metadata") is None
|
|
58
61
|
mock_rich_print.assert_called_once_with(mock_doc, truncate=True)
|
|
59
62
|
mock_print.assert_called_once_with(
|
|
60
|
-
"[
|
|
63
|
+
"[bold green]Document 1 added successfully.[/bold green]"
|
|
61
64
|
)
|
|
62
65
|
|
|
63
66
|
|
|
@@ -78,12 +81,14 @@ async def test_add_document_from_source(app: HaikuRAGApp, monkeypatch):
|
|
|
78
81
|
with patch("haiku.rag.app.HaikuRAG", return_value=mock_client):
|
|
79
82
|
await app.add_document_from_source(file_path)
|
|
80
83
|
|
|
81
|
-
mock_client.create_document_from_source.
|
|
82
|
-
|
|
83
|
-
|
|
84
|
+
mock_client.create_document_from_source.assert_called_once()
|
|
85
|
+
args, kwargs = mock_client.create_document_from_source.call_args
|
|
86
|
+
assert args[0] == file_path
|
|
87
|
+
assert kwargs.get("title") is None
|
|
88
|
+
assert kwargs.get("metadata") is None
|
|
84
89
|
mock_rich_print.assert_called_once_with(mock_doc, truncate=True)
|
|
85
90
|
mock_print.assert_called_once_with(
|
|
86
|
-
"[
|
|
91
|
+
"[bold green]Document 1 added successfully.[/bold green]"
|
|
87
92
|
)
|
|
88
93
|
|
|
89
94
|
|
|
@@ -135,7 +140,9 @@ async def test_delete_document(app: HaikuRAGApp, monkeypatch):
|
|
|
135
140
|
await app.delete_document("1")
|
|
136
141
|
|
|
137
142
|
mock_client.delete_document.assert_called_once_with("1")
|
|
138
|
-
mock_print.assert_called_once_with(
|
|
143
|
+
mock_print.assert_called_once_with(
|
|
144
|
+
"[bold green]Document 1 deleted successfully.[/bold green]"
|
|
145
|
+
)
|
|
139
146
|
|
|
140
147
|
|
|
141
148
|
@pytest.mark.asyncio
|
|
@@ -170,7 +177,7 @@ async def test_search_no_results(app: HaikuRAGApp, monkeypatch):
|
|
|
170
177
|
await app.search("query")
|
|
171
178
|
|
|
172
179
|
mock_client.search.assert_called_once_with("query", limit=5)
|
|
173
|
-
mock_print.assert_called_once_with("[
|
|
180
|
+
mock_print.assert_called_once_with("[yellow]No results found.[/yellow]")
|
|
174
181
|
|
|
175
182
|
|
|
176
183
|
@pytest.mark.asyncio
|
|
@@ -28,9 +28,10 @@ def test_add_document_text():
|
|
|
28
28
|
result = runner.invoke(cli, ["add", "test document"])
|
|
29
29
|
|
|
30
30
|
assert result.exit_code == 0
|
|
31
|
-
mock_app_instance.add_document_from_text.
|
|
32
|
-
|
|
33
|
-
)
|
|
31
|
+
mock_app_instance.add_document_from_text.assert_called_once()
|
|
32
|
+
_, kwargs = mock_app_instance.add_document_from_text.call_args
|
|
33
|
+
assert kwargs.get("text") == "test document"
|
|
34
|
+
assert kwargs.get("metadata") is None
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
def test_add_document_src():
|
|
@@ -57,8 +58,83 @@ def test_add_document_src_with_title():
|
|
|
57
58
|
mock_app_instance.add_document_from_source.assert_called_once()
|
|
58
59
|
# Verify title is forwarded (inspect call kwargs)
|
|
59
60
|
_, kwargs = mock_app_instance.add_document_from_source.call_args
|
|
60
|
-
|
|
61
|
+
assert kwargs.get("title") == "Nice Name"
|
|
62
|
+
assert kwargs.get("source") == "test.txt"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_add_document_text_with_meta():
|
|
66
|
+
with patch("haiku.rag.app.HaikuRAGApp") as mock_app:
|
|
67
|
+
mock_app_instance = MagicMock()
|
|
68
|
+
mock_app_instance.add_document_from_text = AsyncMock()
|
|
69
|
+
mock_app.return_value = mock_app_instance
|
|
70
|
+
|
|
71
|
+
result = runner.invoke(
|
|
72
|
+
cli,
|
|
73
|
+
[
|
|
74
|
+
"add",
|
|
75
|
+
"some text",
|
|
76
|
+
"--meta",
|
|
77
|
+
"author=alice",
|
|
78
|
+
"--meta",
|
|
79
|
+
"topic=notes",
|
|
80
|
+
],
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
assert result.exit_code == 0
|
|
84
|
+
mock_app_instance.add_document_from_text.assert_called_once()
|
|
85
|
+
_, kwargs = mock_app_instance.add_document_from_text.call_args
|
|
86
|
+
assert kwargs.get("text") == "some text"
|
|
87
|
+
assert kwargs.get("metadata") == {"author": "alice", "topic": "notes"}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_add_document_src_with_meta():
|
|
91
|
+
with patch("haiku.rag.app.HaikuRAGApp") as mock_app:
|
|
92
|
+
mock_app_instance = MagicMock()
|
|
93
|
+
mock_app_instance.add_document_from_source = AsyncMock()
|
|
94
|
+
mock_app.return_value = mock_app_instance
|
|
95
|
+
|
|
96
|
+
result = runner.invoke(
|
|
97
|
+
cli,
|
|
98
|
+
[
|
|
99
|
+
"add-src",
|
|
100
|
+
"test.txt",
|
|
101
|
+
"--meta",
|
|
102
|
+
"source=manual",
|
|
103
|
+
"--meta",
|
|
104
|
+
"lang=en",
|
|
105
|
+
],
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
assert result.exit_code == 0
|
|
109
|
+
mock_app_instance.add_document_from_source.assert_called_once()
|
|
110
|
+
_, kwargs = mock_app_instance.add_document_from_source.call_args
|
|
61
111
|
assert kwargs.get("source") == "test.txt"
|
|
112
|
+
assert kwargs.get("metadata") == {"source": "manual", "lang": "en"}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_add_document_text_with_numeric_meta():
|
|
116
|
+
with patch("haiku.rag.app.HaikuRAGApp") as mock_app:
|
|
117
|
+
mock_app_instance = MagicMock()
|
|
118
|
+
mock_app_instance.add_document_from_text = AsyncMock()
|
|
119
|
+
mock_app.return_value = mock_app_instance
|
|
120
|
+
|
|
121
|
+
result = runner.invoke(
|
|
122
|
+
cli,
|
|
123
|
+
[
|
|
124
|
+
"add",
|
|
125
|
+
"some text",
|
|
126
|
+
"--meta",
|
|
127
|
+
"version=3",
|
|
128
|
+
"--meta",
|
|
129
|
+
"published=true",
|
|
130
|
+
],
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
assert result.exit_code == 0
|
|
134
|
+
mock_app_instance.add_document_from_text.assert_called_once()
|
|
135
|
+
_, kwargs = mock_app_instance.add_document_from_text.call_args
|
|
136
|
+
assert kwargs.get("text") == "some text"
|
|
137
|
+
assert kwargs.get("metadata") == {"version": 3, "published": True}
|
|
62
138
|
|
|
63
139
|
|
|
64
140
|
def test_get_document():
|
|
@@ -144,6 +220,16 @@ def test_ask_with_cite():
|
|
|
144
220
|
result = runner.invoke(cli, ["ask", "What is Python?", "--cite"])
|
|
145
221
|
|
|
146
222
|
assert result.exit_code == 0
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
223
|
+
mock_app_instance.ask.assert_called_once_with(question="What is Python?", cite=True)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_info():
|
|
227
|
+
with patch("haiku.rag.app.HaikuRAGApp") as mock_app:
|
|
228
|
+
mock_app_instance = MagicMock()
|
|
229
|
+
mock_app_instance.info = AsyncMock()
|
|
230
|
+
mock_app.return_value = mock_app_instance
|
|
231
|
+
|
|
232
|
+
result = runner.invoke(cli, ["info"])
|
|
233
|
+
|
|
234
|
+
assert result.exit_code == 0
|
|
235
|
+
mock_app_instance.info.assert_called_once()
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from haiku.rag.app import HaikuRAGApp
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.mark.asyncio
|
|
9
|
+
async def test_app_info_outputs_and_read_only(temp_db_path, capsys):
|
|
10
|
+
# Build a minimal LanceDB with settings, documents, and chunks without using Store
|
|
11
|
+
import lancedb
|
|
12
|
+
from lancedb.pydantic import LanceModel, Vector
|
|
13
|
+
from pydantic import Field
|
|
14
|
+
|
|
15
|
+
db = lancedb.connect(temp_db_path)
|
|
16
|
+
|
|
17
|
+
class SettingsRecord(LanceModel):
|
|
18
|
+
id: str = Field(default="settings")
|
|
19
|
+
settings: str = Field(default="{}")
|
|
20
|
+
|
|
21
|
+
class DocumentRecord(LanceModel):
|
|
22
|
+
id: str
|
|
23
|
+
content: str
|
|
24
|
+
|
|
25
|
+
class ChunkRecord(LanceModel):
|
|
26
|
+
id: str
|
|
27
|
+
document_id: str
|
|
28
|
+
content: str
|
|
29
|
+
vector: Vector(3) # type: ignore
|
|
30
|
+
|
|
31
|
+
settings_tbl = db.create_table("settings", schema=SettingsRecord)
|
|
32
|
+
docs_tbl = db.create_table("documents", schema=DocumentRecord)
|
|
33
|
+
chunks_tbl = db.create_table("chunks", schema=ChunkRecord)
|
|
34
|
+
|
|
35
|
+
# Insert one of each
|
|
36
|
+
settings_tbl.add(
|
|
37
|
+
[
|
|
38
|
+
SettingsRecord(
|
|
39
|
+
id="settings",
|
|
40
|
+
settings=json.dumps(
|
|
41
|
+
{
|
|
42
|
+
"version": "1.2.3",
|
|
43
|
+
"EMBEDDINGS_PROVIDER": "openai",
|
|
44
|
+
"EMBEDDINGS_MODEL": "text-embedding-3-small",
|
|
45
|
+
"EMBEDDINGS_VECTOR_DIM": 3,
|
|
46
|
+
}
|
|
47
|
+
),
|
|
48
|
+
)
|
|
49
|
+
]
|
|
50
|
+
)
|
|
51
|
+
docs_tbl.add([DocumentRecord(id="doc-1", content="hello")])
|
|
52
|
+
chunks_tbl.add(
|
|
53
|
+
[ChunkRecord(id="c1", document_id="doc-1", content="c", vector=[0.1, 0.2, 0.3])]
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Capture versions before
|
|
57
|
+
before_versions = {
|
|
58
|
+
"settings": int(settings_tbl.version),
|
|
59
|
+
"documents": int(docs_tbl.version),
|
|
60
|
+
"chunks": int(chunks_tbl.version),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
app = HaikuRAGApp(db_path=temp_db_path)
|
|
64
|
+
await app.info()
|
|
65
|
+
|
|
66
|
+
out = capsys.readouterr().out
|
|
67
|
+
# Validate expected content substrings
|
|
68
|
+
assert f"path: \n{temp_db_path}" in out
|
|
69
|
+
assert "haiku.rag version (db): 1.2.3" in out
|
|
70
|
+
assert "embeddings: openai/text-embedding-3-small (dim: 3)" in out
|
|
71
|
+
assert "lancedb:" in out
|
|
72
|
+
assert "documents: 1" in out
|
|
73
|
+
|
|
74
|
+
# Verify no versions changed (read-only)
|
|
75
|
+
# Re-open to ensure fresh view
|
|
76
|
+
db2 = lancedb.connect(temp_db_path)
|
|
77
|
+
assert int(db2.open_table("settings").version) == before_versions["settings"]
|
|
78
|
+
assert int(db2.open_table("documents").version) == before_versions["documents"]
|
|
79
|
+
assert int(db2.open_table("chunks").version) == before_versions["chunks"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|