haiku.rag 0.6.0__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (86) hide show
  1. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.pre-commit-config.yaml +9 -9
  2. haiku_rag-0.7.1/.python-version +1 -0
  3. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/PKG-INFO +21 -16
  4. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/README.md +11 -6
  5. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/benchmarks.md +13 -10
  6. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/cli.md +18 -1
  7. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/configuration.md +32 -20
  8. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/index.md +8 -5
  9. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/installation.md +0 -1
  10. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/mcp.md +2 -2
  11. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/python.md +21 -7
  12. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/docs/server.md +1 -1
  13. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/mkdocs.yml +1 -1
  14. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/pyproject.toml +11 -11
  15. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/app.py +4 -4
  16. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/cli.py +38 -27
  17. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/client.py +19 -23
  18. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/config.py +6 -2
  19. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/logging.py +5 -0
  20. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/mcp.py +12 -9
  21. haiku_rag-0.7.1/src/haiku/rag/migration.py +316 -0
  22. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/qa/agent.py +2 -2
  23. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reranking/__init__.py +0 -6
  24. haiku_rag-0.7.1/src/haiku/rag/store/engine.py +203 -0
  25. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/store/models/chunk.py +2 -2
  26. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/store/models/document.py +1 -1
  27. haiku_rag-0.7.1/src/haiku/rag/store/repositories/__init__.py +9 -0
  28. haiku_rag-0.7.1/src/haiku/rag/store/repositories/chunk.py +381 -0
  29. haiku_rag-0.7.1/src/haiku/rag/store/repositories/document.py +214 -0
  30. haiku_rag-0.7.1/src/haiku/rag/store/repositories/settings.py +143 -0
  31. haiku_rag-0.7.1/src/haiku/rag/store/upgrades/__init__.py +1 -0
  32. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/utils.py +39 -31
  33. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/conftest.py +8 -0
  34. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/generate_benchmark_db.py +31 -16
  35. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_app.py +15 -16
  36. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_chunk.py +24 -41
  37. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_cli.py +3 -3
  38. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_client.py +48 -46
  39. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_document.py +24 -43
  40. haiku_rag-0.7.1/tests/test_lancedb_connection.py +86 -0
  41. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_monitor.py +8 -6
  42. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_qa.py +6 -6
  43. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_rebuild.py +2 -2
  44. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_reranker.py +8 -17
  45. haiku_rag-0.7.1/tests/test_search.py +176 -0
  46. haiku_rag-0.7.1/tests/test_settings.py +84 -0
  47. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_utils.py +1 -19
  48. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/uv.lock +166 -541
  49. haiku_rag-0.6.0/.python-version +0 -1
  50. haiku_rag-0.6.0/src/haiku/rag/reranking/ollama.py +0 -81
  51. haiku_rag-0.6.0/src/haiku/rag/store/engine.py +0 -171
  52. haiku_rag-0.6.0/src/haiku/rag/store/repositories/__init__.py +0 -5
  53. haiku_rag-0.6.0/src/haiku/rag/store/repositories/base.py +0 -40
  54. haiku_rag-0.6.0/src/haiku/rag/store/repositories/chunk.py +0 -516
  55. haiku_rag-0.6.0/src/haiku/rag/store/repositories/document.py +0 -248
  56. haiku_rag-0.6.0/src/haiku/rag/store/repositories/settings.py +0 -77
  57. haiku_rag-0.6.0/src/haiku/rag/store/upgrades/__init__.py +0 -3
  58. haiku_rag-0.6.0/src/haiku/rag/store/upgrades/v0_3_4.py +0 -26
  59. haiku_rag-0.6.0/tests/test_search.py +0 -92
  60. haiku_rag-0.6.0/tests/test_settings.py +0 -80
  61. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.github/FUNDING.yml +0 -0
  62. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.github/workflows/build-docs.yml +0 -0
  63. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.github/workflows/build-publish.yml +0 -0
  64. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/.gitignore +0 -0
  65. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/LICENSE +0 -0
  66. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/__init__.py +0 -0
  67. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/chunker.py +0 -0
  68. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/__init__.py +0 -0
  69. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/base.py +0 -0
  70. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/ollama.py +0 -0
  71. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/openai.py +0 -0
  72. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/embeddings/voyageai.py +0 -0
  73. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/monitor.py +0 -0
  74. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/qa/__init__.py +0 -0
  75. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/qa/prompts.py +0 -0
  76. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reader.py +0 -0
  77. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reranking/base.py +0 -0
  78. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reranking/cohere.py +0 -0
  79. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/reranking/mxbai.py +0 -0
  80. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/store/__init__.py +0 -0
  81. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/src/haiku/rag/store/models/__init__.py +0 -0
  82. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/__init__.py +0 -0
  83. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/llm_judge.py +0 -0
  84. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_chunker.py +0 -0
  85. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_embedder.py +0 -0
  86. {haiku_rag-0.6.0 → haiku_rag-0.7.1}/tests/test_reader.py +0 -0
@@ -21,12 +21,12 @@ repos:
21
21
  hooks:
22
22
  - id: pyright
23
23
 
24
- - repo: https://github.com/RodrigoGonzalez/check-mkdocs
25
- rev: v1.2.0
26
- hooks:
27
- - id: check-mkdocs
28
- name: check-mkdocs
29
- args: ["--config", "mkdocs.yml"] # Optional, mkdocs.yml is the default
30
- # If you have additional plugins or libraries that are not included in
31
- # check-mkdocs, add them here
32
- additional_dependencies: ["mkdocs-material"]
24
+ # - repo: https://github.com/RodrigoGonzalez/check-mkdocs
25
+ # rev: v1.2.0
26
+ # hooks:
27
+ # - id: check-mkdocs
28
+ # name: check-mkdocs
29
+ # args: ["--config", "mkdocs.yml"] # Optional, mkdocs.yml is the default
30
+ # # If you have additional plugins or libraries that are not included in
31
+ # # check-mkdocs, add them here
32
+ # additional_dependencies: ["mkdocs-material"]
@@ -0,0 +1 @@
1
+ 3.12
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.6.0
4
- Summary: Retrieval Augmented Generation (RAG) with SQLite
3
+ Version: 0.7.1
4
+ Summary: Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
7
7
  License-File: LICENSE
8
- Keywords: RAG,mcp,ml,sqlite,sqlite-vec
8
+ Keywords: RAG,lancedb,mcp,ml,vector-database
9
9
  Classifier: Development Status :: 4 - Beta
10
10
  Classifier: Environment :: Console
11
11
  Classifier: Intended Audience :: Developers
@@ -17,18 +17,18 @@ Classifier: Programming Language :: Python :: 3.10
17
17
  Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Typing :: Typed
20
- Requires-Python: >=3.11
21
- Requires-Dist: docling>=2.15.0
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: docling>=2.49.0
22
22
  Requires-Dist: fastmcp>=2.8.1
23
23
  Requires-Dist: httpx>=0.28.1
24
+ Requires-Dist: lancedb>=0.24.3
24
25
  Requires-Dist: ollama>=0.5.3
25
- Requires-Dist: pydantic-ai>=0.7.2
26
+ Requires-Dist: pydantic-ai>=0.8.1
26
27
  Requires-Dist: pydantic>=2.11.7
27
28
  Requires-Dist: python-dotenv>=1.1.0
28
- Requires-Dist: rich>=14.0.0
29
- Requires-Dist: sqlite-vec>=0.1.6
30
- Requires-Dist: tiktoken>=0.9.0
31
- Requires-Dist: typer>=0.16.0
29
+ Requires-Dist: rich>=14.1.0
30
+ Requires-Dist: tiktoken>=0.11.0
31
+ Requires-Dist: typer>=0.16.1
32
32
  Requires-Dist: watchfiles>=1.1.0
33
33
  Provides-Extra: mxbai
34
34
  Requires-Dist: mxbai-rerank>=0.1.6; extra == 'mxbai'
@@ -36,18 +36,20 @@ Provides-Extra: voyageai
36
36
  Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
37
37
  Description-Content-Type: text/markdown
38
38
 
39
- # Haiku SQLite RAG
39
+ # Haiku RAG
40
40
 
41
- Retrieval-Augmented Generation (RAG) library on SQLite.
41
+ Retrieval-Augmented Generation (RAG) library built on LanceDB.
42
42
 
43
- `haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
43
+ `haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work with LanceDB as a local vector database. It uses LanceDB for storing embeddings and performs semantic (vector) search as well as full-text search combined through native hybrid search with Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
44
+
45
+ > **Note**: Starting with version 0.7.0, haiku.rag uses LanceDB instead of SQLite. If you have an existing SQLite database, use `haiku-rag migrate old_database.sqlite` to migrate your data safely.
44
46
 
45
47
  ## Features
46
48
 
47
- - **Local SQLite**: No external servers required
49
+ - **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
48
50
  - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
49
51
  - **Multiple QA providers**: Any provider/model supported by Pydantic AI
50
- - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
52
+ - **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
51
53
  - **Reranking**: Default search result reranking with MixedBread AI or Cohere
52
54
  - **Question answering**: Built-in QA agents on your documents
53
55
  - **File monitoring**: Auto-index files when run as server
@@ -77,6 +79,9 @@ haiku-rag ask "Who is the author of haiku.rag?" --cite
77
79
  # Rebuild database (re-chunk and re-embed all documents)
78
80
  haiku-rag rebuild
79
81
 
82
+ # Migrate from SQLite to LanceDB
83
+ haiku-rag migrate old_database.sqlite
84
+
80
85
  # Start server with file monitoring
81
86
  export MONITOR_DIRECTORIES="/path/to/docs"
82
87
  haiku-rag serve
@@ -87,7 +92,7 @@ haiku-rag serve
87
92
  ```python
88
93
  from haiku.rag.client import HaikuRAG
89
94
 
90
- async with HaikuRAG("database.db") as client:
95
+ async with HaikuRAG("database.lancedb") as client:
91
96
  # Add document
92
97
  doc = await client.create_document("Your content")
93
98
 
@@ -1,15 +1,17 @@
1
- # Haiku SQLite RAG
1
+ # Haiku RAG
2
2
 
3
- Retrieval-Augmented Generation (RAG) library on SQLite.
3
+ Retrieval-Augmented Generation (RAG) library built on LanceDB.
4
4
 
5
- `haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
5
+ `haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work with LanceDB as a local vector database. It uses LanceDB for storing embeddings and performs semantic (vector) search as well as full-text search combined through native hybrid search with Reciprocal Rank Fusion. Both open-source (Ollama) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
6
+
7
+ > **Note**: Starting with version 0.7.0, haiku.rag uses LanceDB instead of SQLite. If you have an existing SQLite database, use `haiku-rag migrate old_database.sqlite` to migrate your data safely.
6
8
 
7
9
  ## Features
8
10
 
9
- - **Local SQLite**: No external servers required
11
+ - **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
10
12
  - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
11
13
  - **Multiple QA providers**: Any provider/model supported by Pydantic AI
12
- - **Hybrid search**: Vector + full-text search with Reciprocal Rank Fusion
14
+ - **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
13
15
  - **Reranking**: Default search result reranking with MixedBread AI or Cohere
14
16
  - **Question answering**: Built-in QA agents on your documents
15
17
  - **File monitoring**: Auto-index files when run as server
@@ -39,6 +41,9 @@ haiku-rag ask "Who is the author of haiku.rag?" --cite
39
41
  # Rebuild database (re-chunk and re-embed all documents)
40
42
  haiku-rag rebuild
41
43
 
44
+ # Migrate from SQLite to LanceDB
45
+ haiku-rag migrate old_database.sqlite
46
+
42
47
  # Start server with file monitoring
43
48
  export MONITOR_DIRECTORIES="/path/to/docs"
44
49
  haiku-rag serve
@@ -49,7 +54,7 @@ haiku-rag serve
49
54
  ```python
50
55
  from haiku.rag.client import HaikuRAG
51
56
 
52
- async with HaikuRAG("database.db") as client:
57
+ async with HaikuRAG("database.lancedb") as client:
53
58
  # Add document
54
59
  doc = await client.create_document("Your content")
55
60
 
@@ -7,19 +7,19 @@ You can perform your own evaluations using as example the script found at
7
7
 
8
8
  ## Recall
9
9
 
10
- In order to calculate recall, we load the `News Stories` from `repliqa_3` which is 1035 documents and index them in a sqlite db. Subsequently, we run a search over the `question` field for each row of the dataset and check whether we match the document that answers the question.
10
+ In order to calculate recall, we load the `News Stories` from `repliqa_3` (1035 documents) and index them. Subsequently, we run a search over the `question` field for each row of the dataset and check whether we match the document that answers the question. Questions for which the answer cannot be found in the documents are ignored.
11
11
 
12
12
 
13
- The recall obtained is ~0.73 for matching in the top result, raising to ~0.75 for the top 3 results.
13
+ The recall obtained is ~0.79 for matching in the top result, raising to ~0.91 for the top 3 results with the "bare" default settings (Ollama `qwen3`, `mxbai-embed-large` embeddings, no reranking).
14
14
 
15
15
  | Embedding Model | Document in top 1 | Document in top 3 | Reranker |
16
16
  |---------------------------------------|-------------------|-------------------|------------------------|
17
- | Ollama / `mxbai-embed-large` | 0.77 | 0.89 | None |
18
- | Ollama / `mxbai-embed-large` | 0.81 | 0.91 | `mxbai-rerank-base-v2` |
19
- | Ollama / `nomic-embed-text` | 0.74 | 0.88 | None |
17
+ | Ollama / `mxbai-embed-large` | 0.79 | 0.91 | None |
18
+ | Ollama / `mxbai-embed-large` | 0.90 | 0.95 | `mxbai-rerank-base-v2` |
19
+ <!-- | Ollama / `nomic-embed-text` | 0.74 | 0.88 | None |
20
20
  | OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
21
21
  | OpenAI / `text-embeddings-3-small` | 0.75 | 0.88 | None |
22
- | OpenAI / `text-embeddings-3-small` | 0.83 | 0.90 | Cohere / `rerank-v3.5` |
22
+ | OpenAI / `text-embeddings-3-small` | 0.83 | 0.90 | Cohere / `rerank-v3.5` | -->
23
23
 
24
24
  ## Question/Answer evaluation
25
25
 
@@ -27,7 +27,10 @@ Again using the same dataset, we use a QA agent to answer the question. In addit
27
27
 
28
28
  | Embedding Model | QA Model | Accuracy | Reranker |
29
29
  |------------------------------------|-----------------------------------|-----------|------------------------|
30
- | Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.64 | None |
31
- | Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.72 | `mxbai-rerank-base-v2` |
32
- | Ollama / `mxbai-embed-large` | Anthropic / `Claude Sonnet 3.7` | 0.79 | None |
33
- | OpenAI / `text-embeddings-3-small` | OpenAI / `gpt-4-turbo` | 0.62 | None |
30
+ | Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.85 | None |
31
+ | Ollama / `mxbai-embed-large` | Ollama / `qwen3` | 0.87 | `mxbai-rerank-base-v2` |
32
+ | Ollama / `mxbai-embed-large` | Ollama / `qwen3:0.6b` | 0.28 | None |
33
+
34
+ Note the significant degradation when very small models are used such as `qwen3:0.6b`.
35
+ <!-- | Ollama / `mxbai-embed-large` | Anthropic / `Claude Sonnet 3.7` | 0.79 | None |
36
+ | OpenAI / `text-embeddings-3-small` | OpenAI / `gpt-4-turbo` | 0.62 | None | -->
@@ -45,6 +45,23 @@ haiku-rag rebuild
45
45
 
46
46
  Use this when you want to change things like the embedding model or chunk size for example.
47
47
 
48
+ ## Migration
49
+
50
+ ### Migrate from SQLite to LanceDB
51
+
52
+ Migrate an existing SQLite database to LanceDB:
53
+
54
+ ```bash
55
+ haiku-rag migrate /path/to/old_database.sqlite
56
+ ```
57
+
58
+ This will:
59
+ - Read all documents, chunks, embeddings, and settings from the SQLite database
60
+ - Create a new LanceDB database with the same data in the same directory
61
+ - Optimize the new database for best performance
62
+
63
+ The original SQLite database remains unchanged, so you can safely migrate without risk of data loss.
64
+
48
65
  ## Search
49
66
 
50
67
  Basic search:
@@ -54,7 +71,7 @@ haiku-rag search "machine learning"
54
71
 
55
72
  With options:
56
73
  ```bash
57
- haiku-rag search "python programming" --limit 10 --k 100
74
+ haiku-rag search "python programming" --limit 10
58
75
  ```
59
76
 
60
77
  ## Question Answering
@@ -109,25 +109,7 @@ See the [Pydantic AI documentation](https://ai.pydantic.dev/models/) for the com
109
109
 
110
110
  Reranking improves search quality by re-ordering the initial search results using specialized models. When enabled, the system retrieves more candidates (3x the requested limit) and then reranks them to return the most relevant results.
111
111
 
112
- Reranking is **automatically enabled** by default using Ollama, or if you install the appropriate reranking provider package.
113
-
114
- ### Disabling Reranking
115
-
116
- To disable reranking completely for faster searches:
117
-
118
- ```bash
119
- RERANK_PROVIDER=""
120
- ```
121
-
122
- ### Ollama (Default)
123
-
124
- Ollama reranking uses LLMs with structured output to rank documents by relevance:
125
-
126
- ```bash
127
- RERANK_PROVIDER="ollama"
128
- RERANK_MODEL="qwen3:1.7b" # or any model that supports structured output
129
- OLLAMA_BASE_URL="http://localhost:11434"
130
- ```
112
+ Reranking is **disabled by default** (`RERANK_PROVIDER=""`) for faster searches. You can enable it by configuring one of the providers below.
131
113
 
132
114
  ### MixedBread AI
133
115
 
@@ -158,11 +140,41 @@ COHERE_API_KEY="your-api-key"
158
140
 
159
141
  ### Database and Storage
160
142
 
143
+ By default, `haiku.rag` uses a local LanceDB database:
144
+
161
145
  ```bash
162
- # Default data directory (where SQLite database is stored)
146
+ # Default data directory (where local LanceDB is stored)
163
147
  DEFAULT_DATA_DIR="/path/to/data"
164
148
  ```
165
149
 
150
+ For remote storage, use the `LANCEDB_URI` setting with various backends:
151
+
152
+ ```bash
153
+ # LanceDB Cloud
154
+ LANCEDB_URI="db://your-database-name"
155
+ LANCEDB_API_KEY="your-api-key"
156
+ LANCEDB_REGION="us-west-2" # optional
157
+
158
+ # Amazon S3
159
+ LANCEDB_URI="s3://my-bucket/my-table"
160
+ # Use AWS credentials or IAM roles
161
+
162
+ # Azure Blob Storage
163
+ LANCEDB_URI="az://my-container/my-table"
164
+ # Use Azure credentials
165
+
166
+ # Google Cloud Storage
167
+ LANCEDB_URI="gs://my-bucket/my-table"
168
+ # Use GCP credentials
169
+
170
+ # HDFS
171
+ LANCEDB_URI="hdfs://namenode:port/path/to/table"
172
+ ```
173
+
174
+ Authentication is handled through standard cloud provider credentials (AWS CLI, Azure CLI, gcloud, etc.) or by setting `LANCEDB_API_KEY` for LanceDB Cloud.
175
+
176
+ **Note:** Table optimization is automatically handled by LanceDB Cloud (`db://` URIs) and is disabled for better performance. For object storage backends (S3, Azure, GCS), optimization is still performed locally.
177
+
166
178
  ### Document Processing
167
179
 
168
180
  ```bash
@@ -1,12 +1,14 @@
1
1
  # haiku.rag
2
2
 
3
- `haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work on SQLite alone without the need for external vector databases. It uses [sqlite-vec](https://github.com/asg017/sqlite-vec) for storing the embeddings and performs semantic (vector) search as well as full-text search combined through Reciprocal Rank Fusion. Both open-source (Ollama, MixedBread AI) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
3
+ `haiku.rag` is a Retrieval-Augmented Generation (RAG) library built to work with LanceDB as a local vector database. It uses LanceDB for storing embeddings and performs semantic (vector) search as well as full-text search combined through native hybrid search with Reciprocal Rank Fusion. Both open-source (Ollama, MixedBread AI) as well as commercial (OpenAI, VoyageAI) embedding providers are supported.
4
+
5
+ > **Note**: Starting with version 0.7.0, haiku.rag uses LanceDB instead of SQLite. If you have an existing SQLite database, use `haiku-rag migrate old_database.sqlite` to migrate your data safely.
4
6
 
5
7
  ## Features
6
8
 
7
- - **Local SQLite**: No need to run additional servers
9
+ - **Local LanceDB**: No need to run additional servers
8
10
  - **Support for various embedding providers**: Ollama, VoyageAI, OpenAI or add your own
9
- - **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
11
+ - **Native Hybrid Search**: Vector search combined with full-text search using native LanceDB RRF reranking
10
12
  - **Reranking**: Optional result reranking with MixedBread AI or Cohere
11
13
  - **Question Answering**: Built-in QA agents using Ollama, OpenAI, or Anthropic.
12
14
  - **File monitoring**: Automatically index files when run as a server
@@ -26,7 +28,7 @@ Use from Python:
26
28
  ```python
27
29
  from haiku.rag.client import HaikuRAG
28
30
 
29
- async with HaikuRAG("database.db") as client:
31
+ async with HaikuRAG("database.lancedb") as client:
30
32
  # Add a document
31
33
  doc = await client.create_document("Your content here")
32
34
 
@@ -34,7 +36,7 @@ async with HaikuRAG("database.db") as client:
34
36
  results = await client.search("query")
35
37
 
36
38
  # Ask questions
37
- answer = await client.ask("Who is the author of haiku.rag?", rerank=False)
39
+ answer = await client.ask("Who is the author of haiku.rag?")
38
40
  ```
39
41
 
40
42
  Or use the CLI:
@@ -42,6 +44,7 @@ Or use the CLI:
42
44
  haiku-rag add "Your document content"
43
45
  haiku-rag search "query"
44
46
  haiku-rag ask "Who is the author of haiku.rag?"
47
+ haiku-rag migrate old_database.sqlite # Migrate from SQLite
45
48
  ```
46
49
 
47
50
  ## Documentation
@@ -31,5 +31,4 @@ uv pip install haiku.rag[mxbai]
31
31
  ## Requirements
32
32
 
33
33
  - Python 3.10+
34
- - SQLite 3.38+
35
34
  - Ollama (for default embeddings)
@@ -19,10 +19,10 @@ The MCP server exposes `haiku.rag` as MCP tools for compatible MCP clients.
19
19
 
20
20
  ## Starting MCP Server
21
21
 
22
- The MCP server starts automatically with the serve command and supports `Streamable HTTP`, `stdio` and `SSE` transports:
22
+ The MCP server starts automatically with the serve command and supports Streamable HTTP, stdio and SSE transports:
23
23
 
24
24
  ```bash
25
- # Default HTTP transport
25
+ # Default streamable HTTP transport
26
26
  haiku-rag serve
27
27
 
28
28
  # stdio transport (for Claude Desktop)
@@ -9,7 +9,7 @@ from pathlib import Path
9
9
  from haiku.rag.client import HaikuRAG
10
10
 
11
11
  # Use as async context manager (recommended)
12
- async with HaikuRAG("path/to/database.db") as client:
12
+ async with HaikuRAG("path/to/database.lancedb") as client:
13
13
  # Your code here
14
14
  pass
15
15
  ```
@@ -101,9 +101,9 @@ async for doc_id in client.rebuild_database():
101
101
 
102
102
  ## Searching Documents
103
103
 
104
- The search method performs hybrid search (vector + full-text) with **reranking enabled by default** for improved relevance:
104
+ The search method performs native hybrid search (vector + full-text) using LanceDB with optional reranking for improved relevance:
105
105
 
106
- Basic search (with reranking):
106
+ Basic hybrid search (default):
107
107
  ```python
108
108
  results = await client.search("machine learning algorithms", limit=5)
109
109
  for chunk, score in results:
@@ -112,13 +112,27 @@ for chunk, score in results:
112
112
  print(f"Document ID: {chunk.document_id}")
113
113
  ```
114
114
 
115
- With options:
115
+ Search with different search types:
116
116
  ```python
117
+ # Vector search only
117
118
  results = await client.search(
118
119
  query="machine learning",
119
- limit=5, # Maximum results to return
120
- k=60, # RRF parameter for reciprocal rank fusion
121
- rerank=False # Disable reranking for faster search
120
+ limit=5,
121
+ search_type="vector"
122
+ )
123
+
124
+ # Full-text search only
125
+ results = await client.search(
126
+ query="machine learning",
127
+ limit=5,
128
+ search_type="fts"
129
+ )
130
+
131
+ # Hybrid search (default - combines vector + fts with native LanceDB RRF)
132
+ results = await client.search(
133
+ query="machine learning",
134
+ limit=5,
135
+ search_type="hybrid"
122
136
  )
123
137
 
124
138
  # Process results
@@ -9,7 +9,7 @@ haiku-rag serve
9
9
  ```
10
10
 
11
11
  Transport options:
12
- - `--http` (default) - Streamable HTTP transport
12
+ - Default - Streamable HTTP transport
13
13
  - `--stdio` - Standard input/output transport
14
14
  - `--sse` - Server-sent events transport
15
15
 
@@ -1,5 +1,5 @@
1
1
  site_name: haiku.rag
2
- site_description: Retrieval-Augmented Generation (RAG) library on SQLite.
2
+ site_description: Retrieval-Augmented Generation (RAG) library on LanceDB.
3
3
  site_url: https://ggozad.github.io/haiku.rag/
4
4
  theme:
5
5
  name: material
@@ -1,12 +1,12 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.6.0"
4
- description = "Retrieval Augmented Generation (RAG) with SQLite"
3
+ version = "0.7.1"
4
+ description = "Retrieval Augmented Generation (RAG) with LanceDB"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
7
7
  readme = { file = "README.md", content-type = "text/markdown" }
8
- requires-python = ">=3.11"
9
- keywords = ["RAG", "sqlite", "sqlite-vec", "ml", "mcp"]
8
+ requires-python = ">=3.12"
9
+ keywords = ["RAG", "lancedb", "vector-database", "ml", "mcp"]
10
10
  classifiers = [
11
11
  "Development Status :: 4 - Beta",
12
12
  "Environment :: Console",
@@ -22,17 +22,17 @@ classifiers = [
22
22
  ]
23
23
 
24
24
  dependencies = [
25
- "docling>=2.15.0",
25
+ "docling>=2.49.0",
26
26
  "fastmcp>=2.8.1",
27
27
  "httpx>=0.28.1",
28
+ "lancedb>=0.24.3",
28
29
  "ollama>=0.5.3",
29
30
  "pydantic>=2.11.7",
30
- "pydantic-ai>=0.7.2",
31
+ "pydantic-ai>=0.8.1",
31
32
  "python-dotenv>=1.1.0",
32
- "rich>=14.0.0",
33
- "sqlite-vec>=0.1.6",
34
- "tiktoken>=0.9.0",
35
- "typer>=0.16.0",
33
+ "rich>=14.1.0",
34
+ "tiktoken>=0.11.0",
35
+ "typer>=0.16.1",
36
36
  "watchfiles>=1.1.0",
37
37
  ]
38
38
 
@@ -56,7 +56,7 @@ dev = [
56
56
  "mkdocs>=1.6.1",
57
57
  "mkdocs-material>=9.6.14",
58
58
  "pre-commit>=4.2.0",
59
- "pyright>=1.1.403",
59
+ "pyright>=1.1.404",
60
60
  "pytest>=8.4.0",
61
61
  "pytest-asyncio>=1.0.0",
62
62
  "pytest-cov>=6.2.1",
@@ -40,7 +40,7 @@ class HaikuRAGApp:
40
40
  f"[b]Document with id [cyan]{doc.id}[/cyan] added successfully.[/b]"
41
41
  )
42
42
 
43
- async def get_document(self, doc_id: int):
43
+ async def get_document(self, doc_id: str):
44
44
  async with HaikuRAG(db_path=self.db_path) as self.client:
45
45
  doc = await self.client.get_document_by_id(doc_id)
46
46
  if doc is None:
@@ -48,14 +48,14 @@ class HaikuRAGApp:
48
48
  return
49
49
  self._rich_print_document(doc, truncate=False)
50
50
 
51
- async def delete_document(self, doc_id: int):
51
+ async def delete_document(self, doc_id: str):
52
52
  async with HaikuRAG(db_path=self.db_path) as self.client:
53
53
  await self.client.delete_document(doc_id)
54
54
  self.console.print(f"[b]Document {doc_id} deleted successfully.[/b]")
55
55
 
56
- async def search(self, query: str, limit: int = 5, k: int = 60):
56
+ async def search(self, query: str, limit: int = 5):
57
57
  async with HaikuRAG(db_path=self.db_path) as self.client:
58
- results = await self.client.search(query, limit=limit, k=k)
58
+ results = await self.client.search(query, limit=limit)
59
59
  if not results:
60
60
  self.console.print("[red]No results found.[/red]")
61
61
  return