haiku.rag 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (42) hide show
  1. haiku_rag-0.1.0/.claude/settings.local.json +15 -0
  2. haiku_rag-0.1.0/.github/FUNDING.yml +3 -0
  3. haiku_rag-0.1.0/.github/workflows/build-docs.yml +28 -0
  4. haiku_rag-0.1.0/.github/workflows/build-publish.yml +18 -0
  5. haiku_rag-0.1.0/.gitignore +19 -0
  6. haiku_rag-0.1.0/.pre-commit-config.yaml +22 -0
  7. haiku_rag-0.1.0/.python-version +1 -0
  8. haiku_rag-0.1.0/CLAUDE.md +35 -0
  9. haiku_rag-0.1.0/LICENSE +7 -0
  10. haiku_rag-0.1.0/PKG-INFO +195 -0
  11. haiku_rag-0.1.0/README.md +161 -0
  12. haiku_rag-0.1.0/pyproject.toml +93 -0
  13. haiku_rag-0.1.0/src/haiku/rag/__init__.py +0 -0
  14. haiku_rag-0.1.0/src/haiku/rag/app.py +107 -0
  15. haiku_rag-0.1.0/src/haiku/rag/chunker.py +76 -0
  16. haiku_rag-0.1.0/src/haiku/rag/cli.py +153 -0
  17. haiku_rag-0.1.0/src/haiku/rag/client.py +261 -0
  18. haiku_rag-0.1.0/src/haiku/rag/config.py +28 -0
  19. haiku_rag-0.1.0/src/haiku/rag/embeddings/__init__.py +24 -0
  20. haiku_rag-0.1.0/src/haiku/rag/embeddings/base.py +12 -0
  21. haiku_rag-0.1.0/src/haiku/rag/embeddings/ollama.py +14 -0
  22. haiku_rag-0.1.0/src/haiku/rag/embeddings/voyageai.py +17 -0
  23. haiku_rag-0.1.0/src/haiku/rag/mcp.py +141 -0
  24. haiku_rag-0.1.0/src/haiku/rag/reader.py +52 -0
  25. haiku_rag-0.1.0/src/haiku/rag/store/__init__.py +4 -0
  26. haiku_rag-0.1.0/src/haiku/rag/store/engine.py +80 -0
  27. haiku_rag-0.1.0/src/haiku/rag/store/models/__init__.py +4 -0
  28. haiku_rag-0.1.0/src/haiku/rag/store/models/chunk.py +12 -0
  29. haiku_rag-0.1.0/src/haiku/rag/store/models/document.py +16 -0
  30. haiku_rag-0.1.0/src/haiku/rag/store/repositories/__init__.py +5 -0
  31. haiku_rag-0.1.0/src/haiku/rag/store/repositories/base.py +40 -0
  32. haiku_rag-0.1.0/src/haiku/rag/store/repositories/chunk.py +424 -0
  33. haiku_rag-0.1.0/src/haiku/rag/store/repositories/document.py +210 -0
  34. haiku_rag-0.1.0/src/haiku/rag/utils.py +25 -0
  35. haiku_rag-0.1.0/tests/conftest.py +18 -0
  36. haiku_rag-0.1.0/tests/test_chunk.py +157 -0
  37. haiku_rag-0.1.0/tests/test_chunker.py +44 -0
  38. haiku_rag-0.1.0/tests/test_client.py +499 -0
  39. haiku_rag-0.1.0/tests/test_document.py +127 -0
  40. haiku_rag-0.1.0/tests/test_embedder.py +48 -0
  41. haiku_rag-0.1.0/tests/test_search.py +58 -0
  42. haiku_rag-0.1.0/uv.lock +2853 -0
@@ -0,0 +1,15 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(pytest:*)",
5
+ "Bash(uv add:*)",
6
+ "Bash(source:*)",
7
+ "Bash(pyright:*)",
8
+ "Bash(ruff check:*)",
9
+ "Bash(find:*)",
10
+ "Bash(. .venv/bin/activate)",
11
+ "Bash(mv:*)"
12
+ ],
13
+ "deny": []
14
+ }
15
+ }
@@ -0,0 +1,3 @@
1
+ # These are supported funding model platforms
2
+
3
+ github: ggozad
@@ -0,0 +1,28 @@
1
+ name: build-docs
2
+ on:
3
+ push:
4
+ branches:
5
+ - main
6
+ permissions:
7
+ contents: write
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - name: Configure Git Credentials
14
+ run: |
15
+ git config user.name github-actions[bot]
16
+ git config user.email 41898282+github-actions[bot]@users.noreply.github.com
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: 3.x
20
+ - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
21
+ - uses: actions/cache@v4
22
+ with:
23
+ key: mkdocs-material-${{ env.cache_id }}
24
+ path: .cache
25
+ restore-keys: |
26
+ mkdocs-material-
27
+ - run: pip install mkdocs-material
28
+ - run: mkdocs gh-deploy --force
@@ -0,0 +1,18 @@
1
+ name: Build & publish to pypi
2
+ on:
3
+ release:
4
+ types: [published]
5
+
6
+ jobs:
7
+ build:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - uses: actions/checkout@v4
11
+ - name: Set up uv
12
+ run: curl -LsSf https://astral.sh/uv/0.3.0/install.sh | sh
13
+ - name: Set up Python 3.10
14
+ run: uv python install 3.10
15
+ - name: Build package
16
+ run: uvx --from build pyproject-build --installer uv
17
+ - name: Publish package
18
+ run: uvx twine upload -u __token__ -p ${{ secrets.PYPI_API_TOKEN }} dist/* --non-interactive
@@ -0,0 +1,19 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ # tests
13
+ .coverage*
14
+ tests/data/
15
+ .pytest_cache/
16
+ .ruff_cache/
17
+
18
+ # environment variables
19
+ .env
@@ -0,0 +1,22 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-merge-conflict
8
+ - id: check-toml
9
+ - id: debug-statements
10
+ - repo: https://github.com/astral-sh/ruff-pre-commit
11
+ # Ruff version.
12
+ rev: v0.11.4
13
+ hooks:
14
+ # Run the linter.
15
+ - id: ruff
16
+ # Run the formatter.
17
+ - id: ruff-format
18
+
19
+ - repo: https://github.com/RobertCraigie/pyright-python
20
+ rev: v1.1.399
21
+ hooks:
22
+ - id: pyright
@@ -0,0 +1 @@
1
+ 3.10
@@ -0,0 +1,35 @@
1
+ # Claude AI Assistant Configuration
2
+
3
+ This file contains project-specific information and preferences for Claude AI assistant interactions.
4
+
5
+ ## Project Overview
6
+
7
+ This is a SQLite-based RAG (Retrieval-Augmented Generation) system built with Haiku.
8
+
9
+ ## Development Commands
10
+
11
+ - Install dependencies: `uv sync`
12
+ - Run tests: `pytest`
13
+ - Run specific test: `pytest path/to/test_file.py`
14
+ - Run with coverage: `pytest --cov`
15
+ - Type checking: `pyright`
16
+ - Run MCP server: `python -m haiku.rag.mcp`
17
+
18
+ ## Project Structure
19
+
20
+ - `src/` - Source code
21
+ - `tests/` - Test files
22
+ - `README.md` - Documentation
23
+
24
+ ## Notes
25
+
26
+ - This is a Python project using uv for dependency management
27
+ - Use pytest for testing
28
+ - Prefer editing existing files over creating new ones
29
+ - Follow existing code patterns and conventions
30
+ - Remember to activate the .venv when you start working
31
+ - Never use relative imports
32
+ - Always run ruff as well as pyright after you are done
33
+ - Do not be verbose with comments!
34
+ - When you change something check if the README needs an update too.
35
+
@@ -0,0 +1,7 @@
1
+ Copyright 2025 Yiorgis Gozadinos
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,195 @@
1
+ Metadata-Version: 2.4
2
+ Name: haiku.rag
3
+ Version: 0.1.0
4
+ Summary: Retrieval Augmented Generation (RAG) with SQLite
5
+ Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Operating System :: MacOS
12
+ Classifier: Operating System :: Microsoft :: Windows :: Windows 10
13
+ Classifier: Operating System :: Microsoft :: Windows :: Windows 11
14
+ Classifier: Operating System :: POSIX :: Linux
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Typing :: Typed
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: fastmcp>=2.8.1
21
+ Requires-Dist: httpx>=0.28.1
22
+ Requires-Dist: markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2
23
+ Requires-Dist: ollama>=0.5.1
24
+ Requires-Dist: pydantic>=2.11.7
25
+ Requires-Dist: python-dotenv>=1.1.0
26
+ Requires-Dist: rich>=14.0.0
27
+ Requires-Dist: sqlite-vec>=0.1.6
28
+ Requires-Dist: tiktoken>=0.9.0
29
+ Requires-Dist: typer>=0.16.0
30
+ Requires-Dist: watchfiles>=1.1.0
31
+ Provides-Extra: voyageai
32
+ Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # Haiku SQLite RAG
36
+
37
+ A SQLite-based Retrieval-Augmented Generation (RAG) system built for efficient document storage, chunking, and hybrid search capabilities.
38
+
39
+ ## Features
40
+ - **Local SQLite**: No need to run additional servers
41
+ - **Support for various embedding providers**: You can use Ollama, VoyageAI or add your own
42
+ - **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
43
+ - **Multi-format Support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a url!
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ uv pip install haiku.rag
49
+ ```
50
+
51
+ By default Ollama (with the `mxbai-embed-large` model) is used for the embeddings.
52
+ For other providers use:
53
+
54
+ - **VoyageAI**: `uv pip install haiku.rag --extra voyageai`
55
+
56
+ ## Configuration
57
+
58
+ If you want to use an alternative embeddings provider (Ollama being the default) you will need to set the provider details through environment variables:
59
+
60
+ By default:
61
+
62
+ ```bash
63
+ EMBEDDING_PROVIDER="ollama"
64
+ EMBEDDING_MODEL="mxbai-embed-large" # or any other model
65
+ EMBEDDING_VECTOR_DIM=1024
66
+ ```
67
+
68
+ For VoyageAI:
69
+ ```bash
70
+ EMBEDDING_PROVIDER="voyageai"
71
+ EMBEDDING_MODEL="voyage-3.5" # or any other model
72
+ EMBEDDING_VECTOR_DIM=1024
73
+ ```
74
+
75
+ ## Command Line Interface
76
+
77
+ `haiku.rag` includes a CLI application for managing documents and performing searches from the command line:
78
+
79
+ ### Available Commands
80
+
81
+ ```bash
82
+ # List all documents
83
+ haiku-rag list
84
+
85
+ # Add document from text
86
+ haiku-rag add "Your document content here"
87
+
88
+ # Add document from file or URL
89
+ haiku-rag add-src /path/to/document.pdf
90
+ haiku-rag add-src https://example.com/article.html
91
+
92
+ # Get and display a specific document
93
+ haiku-rag get 1
94
+
95
+ # Delete a document by ID
96
+ haiku-rag delete 1
97
+
98
+ # Search documents
99
+ haiku-rag search "machine learning"
100
+
101
+ # Search with custom options
102
+ haiku-rag search "python programming" --limit 10 --k 100
103
+
104
+ # Start MCP server (default HTTP transport)
105
+ haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
106
+ ```
107
+
108
+ All commands support the `--db` option to specify a custom database path. Run
109
+ ```bash
110
+ haiku-rag command -h
111
+ ```
112
+ to see additional parameters for a command.
113
+
114
+ ## MCP Server
115
+
116
+ `haiku.rag` includes a Model Context Protocol (MCP) server that exposes RAG functionality as tools for AI assistants like Claude Desktop. The MCP server provides the following tools:
117
+
118
+ - `add_document_from_file` - Add documents from local file paths
119
+ - `add_document_from_url` - Add documents from URLs
120
+ - `add_document_from_text` - Add documents from raw text content
121
+ - `search_documents` - Search documents using hybrid search
122
+ - `get_document` - Retrieve specific documents by ID
123
+ - `list_documents` - List all documents with pagination
124
+ - `delete_document` - Delete documents by ID
125
+
126
+ You can start the server (using Streamble HTTP, stdio or SSE transports) with:
127
+
128
+ ```bash
129
+ # Start with default HTTP transport
130
+ haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
131
+ ```
132
+
133
+ ## Using `haiku.rag` from python
134
+
135
+ ### Managing documents
136
+
137
+ ```python
138
+ from pathlib import Path
139
+ from haiku.rag.client import HaikuRAG
140
+
141
+ # Use as async context manager (recommended)
142
+ async with HaikuRAG("path/to/database.db") as client:
143
+ # Create document from text
144
+ doc = await client.create_document(
145
+ content="Your document content here",
146
+ uri="doc://example",
147
+ metadata={"source": "manual", "topic": "example"}
148
+ )
149
+
150
+ # Create document from file (auto-parses content)
151
+ doc = await client.create_document_from_source("path/to/document.pdf")
152
+
153
+ # Create document from URL
154
+ doc = await client.create_document_from_source("https://example.com/article.html")
155
+
156
+ # Retrieve documents
157
+ doc = await client.get_document_by_id(1)
158
+ doc = await client.get_document_by_uri("file:///path/to/document.pdf")
159
+
160
+ # List all documents with pagination
161
+ docs = await client.list_documents(limit=10, offset=0)
162
+
163
+ # Update document content
164
+ doc.content = "Updated content"
165
+ await client.update_document(doc)
166
+
167
+ # Delete document
168
+ await client.delete_document(doc.id)
169
+
170
+ # Search documents using hybrid search (vector + full-text)
171
+ results = await client.search("machine learning algorithms", limit=5)
172
+ for chunk, score in results:
173
+ print(f"Score: {score:.3f}")
174
+ print(f"Content: {chunk.content}")
175
+ print(f"Document ID: {chunk.document_id}")
176
+ print("---")
177
+ ```
178
+
179
+ ## Searching documents
180
+
181
+ ```python
182
+ async with HaikuRAG("database.db") as client:
183
+
184
+ results = await client.search(
185
+ query="machine learning",
186
+ limit=5, # Maximum results to return, defaults to 5
187
+ k=60 # RRF parameter for reciprocal rank fusion, defaults to 60
188
+ )
189
+
190
+ # Process results
191
+ for chunk, relevance_score in results:
192
+ print(f"Relevance: {relevance_score:.3f}")
193
+ print(f"Content: {chunk.content}")
194
+ print(f"From document: {chunk.document_id}")
195
+ ```
@@ -0,0 +1,161 @@
1
+ # Haiku SQLite RAG
2
+
3
+ A SQLite-based Retrieval-Augmented Generation (RAG) system built for efficient document storage, chunking, and hybrid search capabilities.
4
+
5
+ ## Features
6
+ - **Local SQLite**: No need to run additional servers
7
+ - **Support for various embedding providers**: You can use Ollama, VoyageAI or add your own
8
+ - **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
9
+ - **Multi-format Support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a url!
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ uv pip install haiku.rag
15
+ ```
16
+
17
+ By default Ollama (with the `mxbai-embed-large` model) is used for the embeddings.
18
+ For other providers use:
19
+
20
+ - **VoyageAI**: `uv pip install haiku.rag --extra voyageai`
21
+
22
+ ## Configuration
23
+
24
+ If you want to use an alternative embeddings provider (Ollama being the default) you will need to set the provider details through environment variables:
25
+
26
+ By default:
27
+
28
+ ```bash
29
+ EMBEDDING_PROVIDER="ollama"
30
+ EMBEDDING_MODEL="mxbai-embed-large" # or any other model
31
+ EMBEDDING_VECTOR_DIM=1024
32
+ ```
33
+
34
+ For VoyageAI:
35
+ ```bash
36
+ EMBEDDING_PROVIDER="voyageai"
37
+ EMBEDDING_MODEL="voyage-3.5" # or any other model
38
+ EMBEDDING_VECTOR_DIM=1024
39
+ ```
40
+
41
+ ## Command Line Interface
42
+
43
+ `haiku.rag` includes a CLI application for managing documents and performing searches from the command line:
44
+
45
+ ### Available Commands
46
+
47
+ ```bash
48
+ # List all documents
49
+ haiku-rag list
50
+
51
+ # Add document from text
52
+ haiku-rag add "Your document content here"
53
+
54
+ # Add document from file or URL
55
+ haiku-rag add-src /path/to/document.pdf
56
+ haiku-rag add-src https://example.com/article.html
57
+
58
+ # Get and display a specific document
59
+ haiku-rag get 1
60
+
61
+ # Delete a document by ID
62
+ haiku-rag delete 1
63
+
64
+ # Search documents
65
+ haiku-rag search "machine learning"
66
+
67
+ # Search with custom options
68
+ haiku-rag search "python programming" --limit 10 --k 100
69
+
70
+ # Start MCP server (default HTTP transport)
71
+ haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
72
+ ```
73
+
74
+ All commands support the `--db` option to specify a custom database path. Run
75
+ ```bash
76
+ haiku-rag command -h
77
+ ```
78
+ to see additional parameters for a command.
79
+
80
+ ## MCP Server
81
+
82
+ `haiku.rag` includes a Model Context Protocol (MCP) server that exposes RAG functionality as tools for AI assistants like Claude Desktop. The MCP server provides the following tools:
83
+
84
+ - `add_document_from_file` - Add documents from local file paths
85
+ - `add_document_from_url` - Add documents from URLs
86
+ - `add_document_from_text` - Add documents from raw text content
87
+ - `search_documents` - Search documents using hybrid search
88
+ - `get_document` - Retrieve specific documents by ID
89
+ - `list_documents` - List all documents with pagination
90
+ - `delete_document` - Delete documents by ID
91
+
92
+ You can start the server (using Streamble HTTP, stdio or SSE transports) with:
93
+
94
+ ```bash
95
+ # Start with default HTTP transport
96
+ haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
97
+ ```
98
+
99
+ ## Using `haiku.rag` from python
100
+
101
+ ### Managing documents
102
+
103
+ ```python
104
+ from pathlib import Path
105
+ from haiku.rag.client import HaikuRAG
106
+
107
+ # Use as async context manager (recommended)
108
+ async with HaikuRAG("path/to/database.db") as client:
109
+ # Create document from text
110
+ doc = await client.create_document(
111
+ content="Your document content here",
112
+ uri="doc://example",
113
+ metadata={"source": "manual", "topic": "example"}
114
+ )
115
+
116
+ # Create document from file (auto-parses content)
117
+ doc = await client.create_document_from_source("path/to/document.pdf")
118
+
119
+ # Create document from URL
120
+ doc = await client.create_document_from_source("https://example.com/article.html")
121
+
122
+ # Retrieve documents
123
+ doc = await client.get_document_by_id(1)
124
+ doc = await client.get_document_by_uri("file:///path/to/document.pdf")
125
+
126
+ # List all documents with pagination
127
+ docs = await client.list_documents(limit=10, offset=0)
128
+
129
+ # Update document content
130
+ doc.content = "Updated content"
131
+ await client.update_document(doc)
132
+
133
+ # Delete document
134
+ await client.delete_document(doc.id)
135
+
136
+ # Search documents using hybrid search (vector + full-text)
137
+ results = await client.search("machine learning algorithms", limit=5)
138
+ for chunk, score in results:
139
+ print(f"Score: {score:.3f}")
140
+ print(f"Content: {chunk.content}")
141
+ print(f"Document ID: {chunk.document_id}")
142
+ print("---")
143
+ ```
144
+
145
+ ## Searching documents
146
+
147
+ ```python
148
+ async with HaikuRAG("database.db") as client:
149
+
150
+ results = await client.search(
151
+ query="machine learning",
152
+ limit=5, # Maximum results to return, defaults to 5
153
+ k=60 # RRF parameter for reciprocal rank fusion, defaults to 60
154
+ )
155
+
156
+ # Process results
157
+ for chunk, relevance_score in results:
158
+ print(f"Relevance: {relevance_score:.3f}")
159
+ print(f"Content: {chunk.content}")
160
+ print(f"From document: {chunk.document_id}")
161
+ ```
@@ -0,0 +1,93 @@
1
+ [project]
2
+ name = "haiku.rag"
3
+ version = "0.1.0"
4
+ description = "Retrieval Augmented Generation (RAG) with SQLite"
5
+ authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
+ license = { text = "MIT" }
7
+ readme = { file = "README.md", content-type = "text/markdown" }
8
+ requires-python = ">=3.10"
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "Environment :: Console",
12
+ "Intended Audience :: Developers",
13
+ "Operating System :: Microsoft :: Windows :: Windows 10",
14
+ "Operating System :: Microsoft :: Windows :: Windows 11",
15
+ "Operating System :: MacOS",
16
+ "Operating System :: POSIX :: Linux",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Typing :: Typed",
21
+ ]
22
+
23
+ dependencies = [
24
+ "fastmcp>=2.8.1",
25
+ "httpx>=0.28.1",
26
+ "markitdown[audio-transcription,docx,pdf,pptx,xlsx]>=0.1.2",
27
+ "ollama>=0.5.1",
28
+ "pydantic>=2.11.7",
29
+ "python-dotenv>=1.1.0",
30
+ "rich>=14.0.0",
31
+ "sqlite-vec>=0.1.6",
32
+ "tiktoken>=0.9.0",
33
+ "typer>=0.16.0",
34
+ "watchfiles>=1.1.0",
35
+ ]
36
+
37
+ [project.optional-dependencies]
38
+ voyageai = ["voyageai>=0.3.2"]
39
+
40
+ [project.scripts]
41
+ haiku-rag = "haiku.rag.cli:cli"
42
+
43
+ [build-system]
44
+ requires = ["hatchling"]
45
+ build-backend = "hatchling.build"
46
+
47
+ [tool.hatch.build.targets.wheel]
48
+ packages = ["src/haiku"]
49
+
50
+ [dependency-groups]
51
+ dev = [
52
+ "datasets>=3.6.0",
53
+ "pre-commit>=4.2.0",
54
+ "pyright>=1.1.402",
55
+ "pytest>=8.4.0",
56
+ "pytest-asyncio>=1.0.0",
57
+ "pytest-cov>=6.2.1",
58
+ "ruff>=0.11.13",
59
+ ]
60
+
61
+ [tool.ruff]
62
+ line-length = 88
63
+ # Enable Flake's "E" and "F" codes by default and "I" for sorting imports.
64
+ # Exclude a variety of commonly ignored directories.
65
+
66
+ [tool.ruff.lint]
67
+ select = [
68
+ "E",
69
+ "F",
70
+ "UP",
71
+ "I",
72
+ ] # Enable Flake's "E" and "F" codes by default and "I" for sorting imports
73
+ ignore = ["E501"]
74
+ per-file-ignores = { "__init__.py" = ["F401", "F403"] }
75
+ # Allow unused variables when underscore-prefixed.
76
+ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
77
+
78
+ [tool.ruff.format]
79
+ quote-style = "double"
80
+ indent-style = "space"
81
+ skip-magic-trailing-comma = false
82
+ line-ending = "auto"
83
+
84
+ [tool.pyright]
85
+ venvPath = "."
86
+ venv = ".venv"
87
+
88
+ [tool.pytest.ini_options]
89
+ asyncio_default_fixture_loop_scope = "session"
90
+ asyncio_mode = "auto"
91
+
92
+ # pyproject.toml
93
+ filterwarnings = ["error", "ignore::UserWarning", "ignore::DeprecationWarning"]
File without changes