haiku.rag 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/PKG-INFO +52 -17
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/README.md +48 -16
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/pyproject.toml +3 -1
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/app.py +24 -15
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/cli.py +1 -1
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/client.py +1 -1
- haiku_rag-0.2.0/src/haiku/rag/config.py +40 -0
- haiku_rag-0.2.0/src/haiku/rag/embeddings/__init__.py +36 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/embeddings/ollama.py +1 -1
- haiku_rag-0.2.0/src/haiku/rag/embeddings/openai.py +20 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/embeddings/voyageai.py +1 -1
- haiku_rag-0.2.0/src/haiku/rag/logging.py +24 -0
- haiku_rag-0.2.0/src/haiku/rag/monitor.py +74 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_client.py +2 -2
- haiku_rag-0.2.0/tests/test_embedder.py +128 -0
- haiku_rag-0.2.0/tests/test_monitor.py +99 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/uv.lock +106 -2
- haiku_rag-0.1.0/.claude/settings.local.json +0 -15
- haiku_rag-0.1.0/.github/workflows/build-docs.yml +0 -28
- haiku_rag-0.1.0/CLAUDE.md +0 -35
- haiku_rag-0.1.0/src/haiku/rag/config.py +0 -28
- haiku_rag-0.1.0/src/haiku/rag/embeddings/__init__.py +0 -24
- haiku_rag-0.1.0/tests/test_embedder.py +0 -48
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.github/FUNDING.yml +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.github/workflows/build-publish.yml +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.gitignore +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.pre-commit-config.yaml +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.python-version +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/LICENSE +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/__init__.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/chunker.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/embeddings/base.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/mcp.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/reader.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/__init__.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/engine.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/models/__init__.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/models/chunk.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/models/document.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/repositories/base.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/repositories/chunk.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/repositories/document.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/utils.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/conftest.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_chunk.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_chunker.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_document.py +0 -0
- {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_search.py +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Retrieval Augmented Generation (RAG) with SQLite
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
License-File: LICENSE
|
|
8
|
+
Keywords: RAG,mcp,ml,sqlite,sqlite-vec
|
|
8
9
|
Classifier: Development Status :: 4 - Beta
|
|
9
10
|
Classifier: Environment :: Console
|
|
10
11
|
Classifier: Intended Audience :: Developers
|
|
@@ -28,6 +29,8 @@ Requires-Dist: sqlite-vec>=0.1.6
|
|
|
28
29
|
Requires-Dist: tiktoken>=0.9.0
|
|
29
30
|
Requires-Dist: typer>=0.16.0
|
|
30
31
|
Requires-Dist: watchfiles>=1.1.0
|
|
32
|
+
Provides-Extra: openai
|
|
33
|
+
Requires-Dist: openai>=1.0.0; extra == 'openai'
|
|
31
34
|
Provides-Extra: voyageai
|
|
32
35
|
Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
|
|
33
36
|
Description-Content-Type: text/markdown
|
|
@@ -38,9 +41,12 @@ A SQLite-based Retrieval-Augmented Generation (RAG) system built for efficient d
|
|
|
38
41
|
|
|
39
42
|
## Features
|
|
40
43
|
- **Local SQLite**: No need to run additional servers
|
|
41
|
-
- **Support for various embedding providers**: You can use Ollama, VoyageAI or add your own
|
|
44
|
+
- **Support for various embedding providers**: You can use Ollama, VoyageAI, OpenAI or add your own
|
|
42
45
|
- **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
|
|
43
46
|
- **Multi-format Support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a url!
|
|
47
|
+
- **File monitoring** when run as a server automatically indexing your files
|
|
48
|
+
- **MCP server** Exposes functionality as MCP tools.
|
|
49
|
+
- **Python client** Call `haiku.rag` from your own python applications.
|
|
44
50
|
|
|
45
51
|
## Installation
|
|
46
52
|
|
|
@@ -52,24 +58,41 @@ By default Ollama (with the `mxbai-embed-large` model) is used for the embedding
|
|
|
52
58
|
For other providers use:
|
|
53
59
|
|
|
54
60
|
- **VoyageAI**: `uv pip install haiku.rag --extra voyageai`
|
|
61
|
+
- **OpenAI**: `uv pip install haiku.rag --extra openai`
|
|
55
62
|
|
|
56
63
|
## Configuration
|
|
57
64
|
|
|
65
|
+
You can set the directories to monitor using the `MONITOR_DIRECTORIES` environment variable (as comma separated values) :
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# Monitor single directory
|
|
69
|
+
export MONITOR_DIRECTORIES="/path/to/documents,/another_path/to/documents"
|
|
70
|
+
```
|
|
71
|
+
|
|
58
72
|
If you want to use an alternative embeddings provider (Ollama being the default) you will need to set the provider details through environment variables:
|
|
59
73
|
|
|
60
74
|
By default:
|
|
61
75
|
|
|
62
76
|
```bash
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
77
|
+
EMBEDDINGS_PROVIDER="ollama"
|
|
78
|
+
EMBEDDINGS_MODEL="mxbai-embed-large" # or any other model
|
|
79
|
+
EMBEDDINGS_VECTOR_DIM=1024
|
|
66
80
|
```
|
|
67
81
|
|
|
68
82
|
For VoyageAI:
|
|
69
83
|
```bash
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
84
|
+
EMBEDDINGS_PROVIDER="voyageai"
|
|
85
|
+
EMBEDDINGS_MODEL="voyage-3.5" # or any other model
|
|
86
|
+
EMBEDDINGS_VECTOR_DIM=1024
|
|
87
|
+
VOYAGE_API_KEY="your-api-key"
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
For OpenAI:
|
|
91
|
+
```bash
|
|
92
|
+
EMBEDDINGS_PROVIDER="openai"
|
|
93
|
+
EMBEDDINGS_MODEL="text-embedding-3-small" # or text-embedding-3-large
|
|
94
|
+
EMBEDDINGS_VECTOR_DIM=1536
|
|
95
|
+
OPENAI_API_KEY="your-api-key"
|
|
73
96
|
```
|
|
74
97
|
|
|
75
98
|
## Command Line Interface
|
|
@@ -101,7 +124,7 @@ haiku-rag search "machine learning"
|
|
|
101
124
|
# Search with custom options
|
|
102
125
|
haiku-rag search "python programming" --limit 10 --k 100
|
|
103
126
|
|
|
104
|
-
# Start MCP server (default HTTP transport)
|
|
127
|
+
# Start file monitoring & MCP server (default HTTP transport)
|
|
105
128
|
haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
|
|
106
129
|
```
|
|
107
130
|
|
|
@@ -111,7 +134,26 @@ haiku-rag command -h
|
|
|
111
134
|
```
|
|
112
135
|
to see additional parameters for a command.
|
|
113
136
|
|
|
114
|
-
## MCP
|
|
137
|
+
## File Monitoring & MCP server
|
|
138
|
+
|
|
139
|
+
You can start the server (using Streamble HTTP, stdio or SSE transports) with:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
# Start with default HTTP transport
|
|
143
|
+
haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
You need to have set the `MONITOR_DIRECTORIES` environment variable for monitoring to take place.
|
|
147
|
+
|
|
148
|
+
### File monitoring
|
|
149
|
+
|
|
150
|
+
`haiku.rag` can watch directories for changes and automatically update the document store:
|
|
151
|
+
|
|
152
|
+
- **Startup**: Scan all monitored directories and add any new files
|
|
153
|
+
- **File Added/Modified**: Automatically parse and add/update the document in the database
|
|
154
|
+
- **File Deleted**: Remove the corresponding document from the database
|
|
155
|
+
|
|
156
|
+
### MCP Server
|
|
115
157
|
|
|
116
158
|
`haiku.rag` includes a Model Context Protocol (MCP) server that exposes RAG functionality as tools for AI assistants like Claude Desktop. The MCP server provides the following tools:
|
|
117
159
|
|
|
@@ -123,13 +165,6 @@ to see additional parameters for a command.
|
|
|
123
165
|
- `list_documents` - List all documents with pagination
|
|
124
166
|
- `delete_document` - Delete documents by ID
|
|
125
167
|
|
|
126
|
-
You can start the server (using Streamble HTTP, stdio or SSE transports) with:
|
|
127
|
-
|
|
128
|
-
```bash
|
|
129
|
-
# Start with default HTTP transport
|
|
130
|
-
haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
|
|
131
|
-
```
|
|
132
|
-
|
|
133
168
|
## Using `haiku.rag` from python
|
|
134
169
|
|
|
135
170
|
### Managing documents
|
|
@@ -4,9 +4,12 @@ A SQLite-based Retrieval-Augmented Generation (RAG) system built for efficient d
|
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
- **Local SQLite**: No need to run additional servers
|
|
7
|
-
- **Support for various embedding providers**: You can use Ollama, VoyageAI or add your own
|
|
7
|
+
- **Support for various embedding providers**: You can use Ollama, VoyageAI, OpenAI or add your own
|
|
8
8
|
- **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
|
|
9
9
|
- **Multi-format Support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a url!
|
|
10
|
+
- **File monitoring** when run as a server automatically indexing your files
|
|
11
|
+
- **MCP server** Exposes functionality as MCP tools.
|
|
12
|
+
- **Python client** Call `haiku.rag` from your own python applications.
|
|
10
13
|
|
|
11
14
|
## Installation
|
|
12
15
|
|
|
@@ -18,24 +21,41 @@ By default Ollama (with the `mxbai-embed-large` model) is used for the embedding
|
|
|
18
21
|
For other providers use:
|
|
19
22
|
|
|
20
23
|
- **VoyageAI**: `uv pip install haiku.rag --extra voyageai`
|
|
24
|
+
- **OpenAI**: `uv pip install haiku.rag --extra openai`
|
|
21
25
|
|
|
22
26
|
## Configuration
|
|
23
27
|
|
|
28
|
+
You can set the directories to monitor using the `MONITOR_DIRECTORIES` environment variable (as comma separated values) :
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# Monitor single directory
|
|
32
|
+
export MONITOR_DIRECTORIES="/path/to/documents,/another_path/to/documents"
|
|
33
|
+
```
|
|
34
|
+
|
|
24
35
|
If you want to use an alternative embeddings provider (Ollama being the default) you will need to set the provider details through environment variables:
|
|
25
36
|
|
|
26
37
|
By default:
|
|
27
38
|
|
|
28
39
|
```bash
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
40
|
+
EMBEDDINGS_PROVIDER="ollama"
|
|
41
|
+
EMBEDDINGS_MODEL="mxbai-embed-large" # or any other model
|
|
42
|
+
EMBEDDINGS_VECTOR_DIM=1024
|
|
32
43
|
```
|
|
33
44
|
|
|
34
45
|
For VoyageAI:
|
|
35
46
|
```bash
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
47
|
+
EMBEDDINGS_PROVIDER="voyageai"
|
|
48
|
+
EMBEDDINGS_MODEL="voyage-3.5" # or any other model
|
|
49
|
+
EMBEDDINGS_VECTOR_DIM=1024
|
|
50
|
+
VOYAGE_API_KEY="your-api-key"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
For OpenAI:
|
|
54
|
+
```bash
|
|
55
|
+
EMBEDDINGS_PROVIDER="openai"
|
|
56
|
+
EMBEDDINGS_MODEL="text-embedding-3-small" # or text-embedding-3-large
|
|
57
|
+
EMBEDDINGS_VECTOR_DIM=1536
|
|
58
|
+
OPENAI_API_KEY="your-api-key"
|
|
39
59
|
```
|
|
40
60
|
|
|
41
61
|
## Command Line Interface
|
|
@@ -67,7 +87,7 @@ haiku-rag search "machine learning"
|
|
|
67
87
|
# Search with custom options
|
|
68
88
|
haiku-rag search "python programming" --limit 10 --k 100
|
|
69
89
|
|
|
70
|
-
# Start MCP server (default HTTP transport)
|
|
90
|
+
# Start file monitoring & MCP server (default HTTP transport)
|
|
71
91
|
haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
|
|
72
92
|
```
|
|
73
93
|
|
|
@@ -77,7 +97,26 @@ haiku-rag command -h
|
|
|
77
97
|
```
|
|
78
98
|
to see additional parameters for a command.
|
|
79
99
|
|
|
80
|
-
## MCP
|
|
100
|
+
## File Monitoring & MCP server
|
|
101
|
+
|
|
102
|
+
You can start the server (using Streamble HTTP, stdio or SSE transports) with:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
# Start with default HTTP transport
|
|
106
|
+
haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
You need to have set the `MONITOR_DIRECTORIES` environment variable for monitoring to take place.
|
|
110
|
+
|
|
111
|
+
### File monitoring
|
|
112
|
+
|
|
113
|
+
`haiku.rag` can watch directories for changes and automatically update the document store:
|
|
114
|
+
|
|
115
|
+
- **Startup**: Scan all monitored directories and add any new files
|
|
116
|
+
- **File Added/Modified**: Automatically parse and add/update the document in the database
|
|
117
|
+
- **File Deleted**: Remove the corresponding document from the database
|
|
118
|
+
|
|
119
|
+
### MCP Server
|
|
81
120
|
|
|
82
121
|
`haiku.rag` includes a Model Context Protocol (MCP) server that exposes RAG functionality as tools for AI assistants like Claude Desktop. The MCP server provides the following tools:
|
|
83
122
|
|
|
@@ -89,13 +128,6 @@ to see additional parameters for a command.
|
|
|
89
128
|
- `list_documents` - List all documents with pagination
|
|
90
129
|
- `delete_document` - Delete documents by ID
|
|
91
130
|
|
|
92
|
-
You can start the server (using Streamble HTTP, stdio or SSE transports) with:
|
|
93
|
-
|
|
94
|
-
```bash
|
|
95
|
-
# Start with default HTTP transport
|
|
96
|
-
haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
|
|
97
|
-
```
|
|
98
|
-
|
|
99
131
|
## Using `haiku.rag` from python
|
|
100
132
|
|
|
101
133
|
### Managing documents
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "haiku.rag"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "Retrieval Augmented Generation (RAG) with SQLite"
|
|
5
5
|
authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
|
|
6
6
|
license = { text = "MIT" }
|
|
7
7
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
8
8
|
requires-python = ">=3.10"
|
|
9
|
+
keywords = ["RAG", "sqlite", "sqlite-vec", "ml", "mcp"]
|
|
9
10
|
classifiers = [
|
|
10
11
|
"Development Status :: 4 - Beta",
|
|
11
12
|
"Environment :: Console",
|
|
@@ -36,6 +37,7 @@ dependencies = [
|
|
|
36
37
|
|
|
37
38
|
[project.optional-dependencies]
|
|
38
39
|
voyageai = ["voyageai>=0.3.2"]
|
|
40
|
+
openai = ["openai>=1.0.0"]
|
|
39
41
|
|
|
40
42
|
[project.scripts]
|
|
41
43
|
haiku-rag = "haiku.rag.cli:cli"
|
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
from rich.console import Console
|
|
4
5
|
from rich.markdown import Markdown
|
|
5
6
|
|
|
6
7
|
from haiku.rag.client import HaikuRAG
|
|
8
|
+
from haiku.rag.config import Config
|
|
9
|
+
from haiku.rag.mcp import create_mcp_server
|
|
10
|
+
from haiku.rag.monitor import FileWatcher
|
|
7
11
|
from haiku.rag.store.models.chunk import Chunk
|
|
8
12
|
from haiku.rag.store.models.document import Document
|
|
9
13
|
|
|
@@ -88,20 +92,25 @@ class HaikuRAGApp:
|
|
|
88
92
|
self.console.print(content)
|
|
89
93
|
self.console.rule()
|
|
90
94
|
|
|
91
|
-
def serve(self, transport: str | None = None):
|
|
95
|
+
async def serve(self, transport: str | None = None):
|
|
92
96
|
"""Start the MCP server."""
|
|
93
|
-
|
|
97
|
+
async with HaikuRAG(self.db_path) as client:
|
|
98
|
+
monitor = FileWatcher(paths=Config.MONITOR_DIRECTORIES, client=client)
|
|
99
|
+
monitor_task = asyncio.create_task(monitor.observe())
|
|
100
|
+
server = create_mcp_server(self.db_path)
|
|
94
101
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
102
|
+
try:
|
|
103
|
+
if transport == "stdio":
|
|
104
|
+
await server.run_stdio_async()
|
|
105
|
+
elif transport == "sse":
|
|
106
|
+
await server.run_sse_async("sse")
|
|
107
|
+
else:
|
|
108
|
+
await server.run_http_async("streamable-http")
|
|
109
|
+
except KeyboardInterrupt:
|
|
110
|
+
pass
|
|
111
|
+
finally:
|
|
112
|
+
monitor_task.cancel()
|
|
113
|
+
try:
|
|
114
|
+
await monitor_task
|
|
115
|
+
except asyncio.CancelledError:
|
|
116
|
+
pass
|
|
@@ -88,7 +88,7 @@ class HaikuRAG:
|
|
|
88
88
|
if not source_path.exists():
|
|
89
89
|
raise ValueError(f"File does not exist: {source_path}")
|
|
90
90
|
|
|
91
|
-
uri =
|
|
91
|
+
uri = source_path.as_uri()
|
|
92
92
|
md5_hash = hashlib.md5(source_path.read_bytes()).hexdigest()
|
|
93
93
|
|
|
94
94
|
# Check if document already exists
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from dotenv import load_dotenv
|
|
5
|
+
from pydantic import BaseModel, field_validator
|
|
6
|
+
|
|
7
|
+
from haiku.rag.utils import get_default_data_dir
|
|
8
|
+
|
|
9
|
+
load_dotenv()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AppConfig(BaseModel):
|
|
13
|
+
ENV: str = "development"
|
|
14
|
+
|
|
15
|
+
DEFAULT_DATA_DIR: Path = get_default_data_dir()
|
|
16
|
+
MONITOR_DIRECTORIES: list[Path] = []
|
|
17
|
+
|
|
18
|
+
EMBEDDINGS_PROVIDER: str = "ollama"
|
|
19
|
+
EMBEDDINGS_MODEL: str = "mxbai-embed-large"
|
|
20
|
+
EMBEDDINGS_VECTOR_DIM: int = 1024
|
|
21
|
+
|
|
22
|
+
CHUNK_SIZE: int = 256
|
|
23
|
+
CHUNK_OVERLAP: int = 32
|
|
24
|
+
|
|
25
|
+
OLLAMA_BASE_URL: str = "http://localhost:11434"
|
|
26
|
+
|
|
27
|
+
@field_validator("MONITOR_DIRECTORIES", mode="before")
|
|
28
|
+
@classmethod
|
|
29
|
+
def parse_monitor_directories(cls, v):
|
|
30
|
+
if isinstance(v, str):
|
|
31
|
+
if not v.strip():
|
|
32
|
+
return []
|
|
33
|
+
return [
|
|
34
|
+
Path(path.strip()).absolute() for path in v.split(",") if path.strip()
|
|
35
|
+
]
|
|
36
|
+
return v
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Expose Config object for app to import
|
|
40
|
+
Config = AppConfig.model_validate(os.environ)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from haiku.rag.config import Config
|
|
2
|
+
from haiku.rag.embeddings.base import EmbedderBase
|
|
3
|
+
from haiku.rag.embeddings.ollama import Embedder as OllamaEmbedder
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_embedder() -> EmbedderBase:
|
|
7
|
+
"""
|
|
8
|
+
Factory function to get the appropriate embedder based on the configuration.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
if Config.EMBEDDINGS_PROVIDER == "ollama":
|
|
12
|
+
return OllamaEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
|
|
13
|
+
|
|
14
|
+
if Config.EMBEDDINGS_PROVIDER == "voyageai":
|
|
15
|
+
try:
|
|
16
|
+
from haiku.rag.embeddings.voyageai import Embedder as VoyageAIEmbedder
|
|
17
|
+
except ImportError:
|
|
18
|
+
raise ImportError(
|
|
19
|
+
"VoyageAI embedder requires the 'voyageai' package. "
|
|
20
|
+
"Please install haiku.rag with the 'voyageai' extra:"
|
|
21
|
+
"uv pip install haiku.rag --extra voyageai"
|
|
22
|
+
)
|
|
23
|
+
return VoyageAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
|
|
24
|
+
|
|
25
|
+
if Config.EMBEDDINGS_PROVIDER == "openai":
|
|
26
|
+
try:
|
|
27
|
+
from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
|
|
28
|
+
except ImportError:
|
|
29
|
+
raise ImportError(
|
|
30
|
+
"OpenAI embedder requires the 'openai' package. "
|
|
31
|
+
"Please install haiku.rag with the 'openai' extra:"
|
|
32
|
+
"uv pip install haiku.rag --extra openai"
|
|
33
|
+
)
|
|
34
|
+
return OpenAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
|
|
35
|
+
|
|
36
|
+
raise ValueError(f"Unsupported embedding provider: {Config.EMBEDDINGS_PROVIDER}")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from openai import AsyncOpenAI
|
|
3
|
+
|
|
4
|
+
from haiku.rag.config import Config
|
|
5
|
+
from haiku.rag.embeddings.base import EmbedderBase
|
|
6
|
+
|
|
7
|
+
class Embedder(EmbedderBase):
|
|
8
|
+
_model: str = Config.EMBEDDINGS_MODEL
|
|
9
|
+
_vector_dim: int = 1536
|
|
10
|
+
|
|
11
|
+
async def embed(self, text: str) -> list[float]:
|
|
12
|
+
client = AsyncOpenAI()
|
|
13
|
+
response = await client.embeddings.create(
|
|
14
|
+
model=self._model,
|
|
15
|
+
input=text,
|
|
16
|
+
)
|
|
17
|
+
return response.data[0].embedding
|
|
18
|
+
|
|
19
|
+
except ImportError:
|
|
20
|
+
pass
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from rich.logging import RichHandler
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_logger() -> logging.Logger:
|
|
8
|
+
logger = logging.getLogger("haiku.rag")
|
|
9
|
+
|
|
10
|
+
handler = RichHandler(
|
|
11
|
+
console=Console(stderr=True),
|
|
12
|
+
rich_tracebacks=True,
|
|
13
|
+
)
|
|
14
|
+
formatter = logging.Formatter("%(message)s")
|
|
15
|
+
handler.setFormatter(formatter)
|
|
16
|
+
|
|
17
|
+
logger.setLevel("INFO")
|
|
18
|
+
|
|
19
|
+
# Remove any existing handlers to avoid duplicates on reconfiguration
|
|
20
|
+
for hdlr in logger.handlers[:]:
|
|
21
|
+
logger.removeHandler(hdlr)
|
|
22
|
+
|
|
23
|
+
logger.addHandler(handler)
|
|
24
|
+
return logger
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from watchfiles import Change, DefaultFilter, awatch
|
|
4
|
+
|
|
5
|
+
from haiku.rag.client import HaikuRAG
|
|
6
|
+
from haiku.rag.logging import get_logger
|
|
7
|
+
from haiku.rag.reader import FileReader
|
|
8
|
+
from haiku.rag.store.models.document import Document
|
|
9
|
+
|
|
10
|
+
logger = get_logger()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FileFilter(DefaultFilter):
|
|
14
|
+
def __init__(self, *, ignore_paths: list[Path] | None = None) -> None:
|
|
15
|
+
self.extensions = tuple(FileReader.extensions)
|
|
16
|
+
super().__init__(ignore_paths=ignore_paths)
|
|
17
|
+
|
|
18
|
+
def __call__(self, change: "Change", path: str) -> bool:
|
|
19
|
+
return path.endswith(self.extensions) and super().__call__(change, path)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FileWatcher:
|
|
23
|
+
def __init__(self, paths: list[Path], client: HaikuRAG):
|
|
24
|
+
self.paths = paths
|
|
25
|
+
self.client = client
|
|
26
|
+
|
|
27
|
+
async def observe(self):
|
|
28
|
+
logger.info(f"Watching files in {self.paths}")
|
|
29
|
+
filter = FileFilter()
|
|
30
|
+
await self.refresh()
|
|
31
|
+
|
|
32
|
+
async for changes in awatch(*self.paths, watch_filter=filter):
|
|
33
|
+
await self.handler(changes)
|
|
34
|
+
|
|
35
|
+
async def handler(self, changes: set[tuple[Change, str]]):
|
|
36
|
+
for change, path in changes:
|
|
37
|
+
if change == Change.added or change == Change.modified:
|
|
38
|
+
await self._upsert_document(Path(path))
|
|
39
|
+
elif change == Change.deleted:
|
|
40
|
+
await self._delete_document(Path(path))
|
|
41
|
+
|
|
42
|
+
async def refresh(self):
|
|
43
|
+
for path in self.paths:
|
|
44
|
+
for f in Path(path).rglob("**/*"):
|
|
45
|
+
if f.is_file() and f.suffix in FileReader.extensions:
|
|
46
|
+
await self._upsert_document(f)
|
|
47
|
+
|
|
48
|
+
async def _upsert_document(self, file: Path) -> Document | None:
|
|
49
|
+
try:
|
|
50
|
+
uri = file.as_uri()
|
|
51
|
+
existing_doc = await self.client.get_document_by_uri(uri)
|
|
52
|
+
print(uri)
|
|
53
|
+
if existing_doc:
|
|
54
|
+
doc = await self.client.create_document_from_source(str(file))
|
|
55
|
+
logger.info(f"Updated document {existing_doc.id} from {file}")
|
|
56
|
+
return doc
|
|
57
|
+
else:
|
|
58
|
+
doc = await self.client.create_document_from_source(str(file))
|
|
59
|
+
logger.info(f"Created new document {doc.id} from {file}")
|
|
60
|
+
return doc
|
|
61
|
+
except Exception as e:
|
|
62
|
+
logger.error(f"Failed to upsert document from {file}: {e}")
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
async def _delete_document(self, file: Path):
|
|
66
|
+
try:
|
|
67
|
+
uri = file.as_uri()
|
|
68
|
+
existing_doc = await self.client.get_document_by_uri(uri)
|
|
69
|
+
|
|
70
|
+
if existing_doc and existing_doc.id:
|
|
71
|
+
await self.client.delete_document(existing_doc.id)
|
|
72
|
+
logger.info(f"Deleted document {existing_doc.id} for {file}")
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.error(f"Failed to delete document for {file}: {e}")
|
|
@@ -98,7 +98,7 @@ async def test_client_create_document_from_source():
|
|
|
98
98
|
|
|
99
99
|
assert doc.id is not None
|
|
100
100
|
assert doc.content == test_content
|
|
101
|
-
assert doc.uri ==
|
|
101
|
+
assert doc.uri == temp_path.as_uri()
|
|
102
102
|
assert doc.metadata["source_type"] == "file"
|
|
103
103
|
assert "contentType" in doc.metadata
|
|
104
104
|
assert "md5" in doc.metadata
|
|
@@ -109,7 +109,7 @@ async def test_client_create_document_from_source():
|
|
|
109
109
|
|
|
110
110
|
assert doc2.id is not None
|
|
111
111
|
assert doc2.content == test_content
|
|
112
|
-
assert doc2.uri ==
|
|
112
|
+
assert doc2.uri == temp_path.as_uri()
|
|
113
113
|
assert "contentType" in doc2.metadata
|
|
114
114
|
assert "md5" in doc2.metadata
|
|
115
115
|
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pytest
|
|
3
|
+
|
|
4
|
+
from haiku.rag.embeddings import get_embedder
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@pytest.mark.asyncio
|
|
8
|
+
async def test_embedder():
|
|
9
|
+
embedder = get_embedder()
|
|
10
|
+
embedding = await embedder.embed("hello world")
|
|
11
|
+
assert len(embedding) == embedder._vector_dim
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.mark.asyncio
|
|
15
|
+
async def test_similarity():
|
|
16
|
+
embedder = get_embedder()
|
|
17
|
+
phrases = [
|
|
18
|
+
"I enjoy eating great food.",
|
|
19
|
+
"Python is my favorite programming language.",
|
|
20
|
+
"I love to travel and see new places.",
|
|
21
|
+
]
|
|
22
|
+
embeddings = [np.array(await embedder.embed(phrase)) for phrase in phrases]
|
|
23
|
+
|
|
24
|
+
# Calculate cosine similarity
|
|
25
|
+
def similarities(embeddings, test_embedding):
|
|
26
|
+
return [
|
|
27
|
+
np.dot(embedding, test_embedding)
|
|
28
|
+
/ (np.linalg.norm(embedding) * np.linalg.norm(test_embedding))
|
|
29
|
+
for embedding in embeddings
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
test_phrase = "I am going for a camping trip."
|
|
33
|
+
test_embedding = await embedder.embed(test_phrase)
|
|
34
|
+
|
|
35
|
+
sims = similarities(embeddings, test_embedding)
|
|
36
|
+
assert max(sims) == sims[2]
|
|
37
|
+
|
|
38
|
+
test_phrase = "When is dinner ready?"
|
|
39
|
+
test_embedding = await embedder.embed(test_phrase)
|
|
40
|
+
|
|
41
|
+
sims = similarities(embeddings, test_embedding)
|
|
42
|
+
assert max(sims) == sims[0]
|
|
43
|
+
|
|
44
|
+
test_phrase = "I work as a software developer."
|
|
45
|
+
test_embedding = await embedder.embed(test_phrase)
|
|
46
|
+
|
|
47
|
+
sims = similarities(embeddings, test_embedding)
|
|
48
|
+
assert max(sims) == sims[1]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@pytest.mark.asyncio
|
|
52
|
+
async def test_openai_embedder(monkeypatch):
|
|
53
|
+
monkeypatch.setenv("EMBEDDINGS_PROVIDER", "openai")
|
|
54
|
+
monkeypatch.setenv("EMBEDDINGS_MODEL", "text-embedding-3-small")
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
|
|
58
|
+
|
|
59
|
+
embedder = OpenAIEmbedder("text-embedding-3-small", 1536)
|
|
60
|
+
|
|
61
|
+
# Mock the OpenAI client
|
|
62
|
+
class MockEmbeddingData:
|
|
63
|
+
def __init__(self, embedding):
|
|
64
|
+
self.embedding = embedding
|
|
65
|
+
|
|
66
|
+
class MockResponse:
|
|
67
|
+
def __init__(self, embedding):
|
|
68
|
+
self.data = [MockEmbeddingData(embedding)]
|
|
69
|
+
|
|
70
|
+
class MockAsyncOpenAI:
|
|
71
|
+
class MockEmbeddings:
|
|
72
|
+
async def create(self, model, input):
|
|
73
|
+
return MockResponse([0.1] * 1536)
|
|
74
|
+
|
|
75
|
+
def __init__(self):
|
|
76
|
+
self.embeddings = self.MockEmbeddings()
|
|
77
|
+
|
|
78
|
+
# Patch the AsyncOpenAI import
|
|
79
|
+
import haiku.rag.embeddings.openai
|
|
80
|
+
|
|
81
|
+
original_client = haiku.rag.embeddings.openai.AsyncOpenAI
|
|
82
|
+
haiku.rag.embeddings.openai.AsyncOpenAI = MockAsyncOpenAI
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
embedding = await embedder.embed("test text")
|
|
86
|
+
assert len(embedding) == 1536
|
|
87
|
+
assert all(isinstance(x, float) for x in embedding)
|
|
88
|
+
finally:
|
|
89
|
+
haiku.rag.embeddings.openai.AsyncOpenAI = original_client
|
|
90
|
+
|
|
91
|
+
except ImportError:
|
|
92
|
+
pytest.skip("OpenAI package not installed")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@pytest.mark.asyncio
|
|
96
|
+
async def test_voyageai_embedder(monkeypatch):
|
|
97
|
+
monkeypatch.setenv("EMBEDDINGS_PROVIDER", "voyageai")
|
|
98
|
+
monkeypatch.setenv("EMBEDDINGS_MODEL", "voyage-3.5")
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
from haiku.rag.embeddings.voyageai import Embedder as VoyageAIEmbedder
|
|
102
|
+
|
|
103
|
+
embedder = VoyageAIEmbedder("voyage-3.5", 1024)
|
|
104
|
+
|
|
105
|
+
# Mock the VoyageAI client
|
|
106
|
+
class MockEmbeddings:
|
|
107
|
+
def __init__(self, embeddings):
|
|
108
|
+
self.embeddings = embeddings
|
|
109
|
+
|
|
110
|
+
class MockClient:
|
|
111
|
+
def embed(self, texts, model, output_dtype):
|
|
112
|
+
return MockEmbeddings([[0.1] * 1024])
|
|
113
|
+
|
|
114
|
+
# Patch the Client import
|
|
115
|
+
import haiku.rag.embeddings.voyageai
|
|
116
|
+
|
|
117
|
+
original_client = haiku.rag.embeddings.voyageai.Client
|
|
118
|
+
haiku.rag.embeddings.voyageai.Client = MockClient
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
embedding = await embedder.embed("test text")
|
|
122
|
+
assert len(embedding) == 1024
|
|
123
|
+
assert all(isinstance(x, float) for x in embedding)
|
|
124
|
+
finally:
|
|
125
|
+
haiku.rag.embeddings.voyageai.Client = original_client
|
|
126
|
+
|
|
127
|
+
except ImportError:
|
|
128
|
+
pytest.skip("VoyageAI package not installed")
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import tempfile
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from unittest.mock import AsyncMock
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from haiku.rag.client import HaikuRAG
|
|
8
|
+
from haiku.rag.monitor import FileWatcher
|
|
9
|
+
from haiku.rag.store.models.document import Document
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.mark.asyncio
|
|
13
|
+
async def test_file_watcher_upsert_document():
|
|
14
|
+
"""Test FileWatcher._upsert_document method."""
|
|
15
|
+
|
|
16
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
17
|
+
f.write("Test content for file watcher")
|
|
18
|
+
temp_path = Path(f.name)
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
mock_client = AsyncMock(spec=HaikuRAG)
|
|
22
|
+
mock_doc = Document(id=1, content="Test content", uri=temp_path.as_uri())
|
|
23
|
+
mock_client.create_document_from_source.return_value = mock_doc
|
|
24
|
+
mock_client.get_document_by_uri.return_value = None # No existing document
|
|
25
|
+
|
|
26
|
+
watcher = FileWatcher(paths=[temp_path.parent], client=mock_client)
|
|
27
|
+
|
|
28
|
+
result = await watcher._upsert_document(temp_path)
|
|
29
|
+
|
|
30
|
+
assert result is not None
|
|
31
|
+
assert result.id == 1
|
|
32
|
+
mock_client.get_document_by_uri.assert_called_once_with(temp_path.as_uri())
|
|
33
|
+
mock_client.create_document_from_source.assert_called_once_with(str(temp_path))
|
|
34
|
+
|
|
35
|
+
finally:
|
|
36
|
+
temp_path.unlink(missing_ok=True)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@pytest.mark.asyncio
|
|
40
|
+
async def test_file_watcher_upsert_existing_document():
|
|
41
|
+
"""Test FileWatcher._upsert_document with existing document."""
|
|
42
|
+
|
|
43
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
44
|
+
f.write("Test content for file watcher")
|
|
45
|
+
temp_path = Path(f.name)
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
mock_client = AsyncMock(spec=HaikuRAG)
|
|
49
|
+
existing_doc = Document(id=1, content="Old content", uri=temp_path.as_uri())
|
|
50
|
+
updated_doc = Document(id=1, content="Updated content", uri=temp_path.as_uri())
|
|
51
|
+
|
|
52
|
+
mock_client.get_document_by_uri.return_value = existing_doc
|
|
53
|
+
mock_client.create_document_from_source.return_value = updated_doc
|
|
54
|
+
|
|
55
|
+
watcher = FileWatcher(paths=[temp_path.parent], client=mock_client)
|
|
56
|
+
|
|
57
|
+
result = await watcher._upsert_document(temp_path)
|
|
58
|
+
|
|
59
|
+
assert result is not None
|
|
60
|
+
assert result.content == "Updated content"
|
|
61
|
+
mock_client.get_document_by_uri.assert_called_once_with(temp_path.as_uri())
|
|
62
|
+
mock_client.create_document_from_source.assert_called_once_with(str(temp_path))
|
|
63
|
+
|
|
64
|
+
finally:
|
|
65
|
+
temp_path.unlink(missing_ok=True)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@pytest.mark.asyncio
|
|
69
|
+
async def test_file_watcher_delete_document():
|
|
70
|
+
"""Test FileWatcher._delete_document method."""
|
|
71
|
+
temp_path = Path("/tmp/test_file.txt")
|
|
72
|
+
|
|
73
|
+
mock_client = AsyncMock(spec=HaikuRAG)
|
|
74
|
+
existing_doc = Document(id=1, content="Content to delete", uri=temp_path.as_uri())
|
|
75
|
+
mock_client.get_document_by_uri.return_value = existing_doc
|
|
76
|
+
mock_client.delete_document.return_value = True
|
|
77
|
+
|
|
78
|
+
watcher = FileWatcher(paths=[temp_path.parent], client=mock_client)
|
|
79
|
+
|
|
80
|
+
await watcher._delete_document(temp_path)
|
|
81
|
+
|
|
82
|
+
mock_client.get_document_by_uri.assert_called_once_with(temp_path.as_uri())
|
|
83
|
+
mock_client.delete_document.assert_called_once_with(1)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pytest.mark.asyncio
|
|
87
|
+
async def test_file_watcher_delete_nonexistent_document():
|
|
88
|
+
"""Test FileWatcher._delete_document with non-existent document."""
|
|
89
|
+
temp_path = Path("/tmp/nonexistent_file.txt")
|
|
90
|
+
|
|
91
|
+
mock_client = AsyncMock(spec=HaikuRAG)
|
|
92
|
+
mock_client.get_document_by_uri.return_value = None
|
|
93
|
+
|
|
94
|
+
watcher = FileWatcher(paths=[temp_path.parent], client=mock_client)
|
|
95
|
+
|
|
96
|
+
await watcher._delete_document(temp_path)
|
|
97
|
+
|
|
98
|
+
mock_client.get_document_by_uri.assert_called_once_with(temp_path.as_uri())
|
|
99
|
+
mock_client.delete_document.assert_not_called()
|
|
@@ -577,6 +577,15 @@ wheels = [
|
|
|
577
577
|
{ url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" },
|
|
578
578
|
]
|
|
579
579
|
|
|
580
|
+
[[package]]
|
|
581
|
+
name = "distro"
|
|
582
|
+
version = "1.9.0"
|
|
583
|
+
source = { registry = "https://pypi.org/simple" }
|
|
584
|
+
sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
|
|
585
|
+
wheels = [
|
|
586
|
+
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
|
|
587
|
+
]
|
|
588
|
+
|
|
580
589
|
[[package]]
|
|
581
590
|
name = "et-xmlfile"
|
|
582
591
|
version = "2.0.0"
|
|
@@ -754,7 +763,7 @@ wheels = [
|
|
|
754
763
|
|
|
755
764
|
[[package]]
|
|
756
765
|
name = "haiku-rag"
|
|
757
|
-
version = "0.
|
|
766
|
+
version = "0.2.0"
|
|
758
767
|
source = { editable = "." }
|
|
759
768
|
dependencies = [
|
|
760
769
|
{ name = "fastmcp" },
|
|
@@ -771,6 +780,9 @@ dependencies = [
|
|
|
771
780
|
]
|
|
772
781
|
|
|
773
782
|
[package.optional-dependencies]
|
|
783
|
+
openai = [
|
|
784
|
+
{ name = "openai" },
|
|
785
|
+
]
|
|
774
786
|
voyageai = [
|
|
775
787
|
{ name = "voyageai" },
|
|
776
788
|
]
|
|
@@ -792,6 +804,7 @@ requires-dist = [
|
|
|
792
804
|
{ name = "httpx", specifier = ">=0.28.1" },
|
|
793
805
|
{ name = "markitdown", extras = ["audio-transcription", "docx", "pdf", "pptx", "xlsx"], specifier = ">=0.1.2" },
|
|
794
806
|
{ name = "ollama", specifier = ">=0.5.1" },
|
|
807
|
+
{ name = "openai", marker = "extra == 'openai'", specifier = ">=1.0.0" },
|
|
795
808
|
{ name = "pydantic", specifier = ">=2.11.7" },
|
|
796
809
|
{ name = "python-dotenv", specifier = ">=1.1.0" },
|
|
797
810
|
{ name = "rich", specifier = ">=14.0.0" },
|
|
@@ -801,7 +814,7 @@ requires-dist = [
|
|
|
801
814
|
{ name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.3.2" },
|
|
802
815
|
{ name = "watchfiles", specifier = ">=1.1.0" },
|
|
803
816
|
]
|
|
804
|
-
provides-extras = ["voyageai"]
|
|
817
|
+
provides-extras = ["voyageai", "openai"]
|
|
805
818
|
|
|
806
819
|
[package.metadata.requires-dev]
|
|
807
820
|
dev = [
|
|
@@ -924,6 +937,78 @@ wheels = [
|
|
|
924
937
|
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
|
|
925
938
|
]
|
|
926
939
|
|
|
940
|
+
[[package]]
|
|
941
|
+
name = "jiter"
|
|
942
|
+
version = "0.10.0"
|
|
943
|
+
source = { registry = "https://pypi.org/simple" }
|
|
944
|
+
sdist = { url = "https://files.pythonhosted.org/packages/ee/9d/ae7ddb4b8ab3fb1b51faf4deb36cb48a4fbbd7cb36bad6a5fca4741306f7/jiter-0.10.0.tar.gz", hash = "sha256:07a7142c38aacc85194391108dc91b5b57093c978a9932bd86a36862759d9500", size = 162759, upload-time = "2025-05-18T19:04:59.73Z" }
|
|
945
|
+
wheels = [
|
|
946
|
+
{ url = "https://files.pythonhosted.org/packages/be/7e/4011b5c77bec97cb2b572f566220364e3e21b51c48c5bd9c4a9c26b41b67/jiter-0.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cd2fb72b02478f06a900a5782de2ef47e0396b3e1f7d5aba30daeb1fce66f303", size = 317215, upload-time = "2025-05-18T19:03:04.303Z" },
|
|
947
|
+
{ url = "https://files.pythonhosted.org/packages/8a/4f/144c1b57c39692efc7ea7d8e247acf28e47d0912800b34d0ad815f6b2824/jiter-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:32bb468e3af278f095d3fa5b90314728a6916d89ba3d0ffb726dd9bf7367285e", size = 322814, upload-time = "2025-05-18T19:03:06.433Z" },
|
|
948
|
+
{ url = "https://files.pythonhosted.org/packages/63/1f/db977336d332a9406c0b1f0b82be6f71f72526a806cbb2281baf201d38e3/jiter-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa8b3e0068c26ddedc7abc6fac37da2d0af16b921e288a5a613f4b86f050354f", size = 345237, upload-time = "2025-05-18T19:03:07.833Z" },
|
|
949
|
+
{ url = "https://files.pythonhosted.org/packages/d7/1c/aa30a4a775e8a672ad7f21532bdbfb269f0706b39c6ff14e1f86bdd9e5ff/jiter-0.10.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:286299b74cc49e25cd42eea19b72aa82c515d2f2ee12d11392c56d8701f52224", size = 370999, upload-time = "2025-05-18T19:03:09.338Z" },
|
|
950
|
+
{ url = "https://files.pythonhosted.org/packages/35/df/f8257abc4207830cb18880781b5f5b716bad5b2a22fb4330cfd357407c5b/jiter-0.10.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ed5649ceeaeffc28d87fb012d25a4cd356dcd53eff5acff1f0466b831dda2a7", size = 491109, upload-time = "2025-05-18T19:03:11.13Z" },
|
|
951
|
+
{ url = "https://files.pythonhosted.org/packages/06/76/9e1516fd7b4278aa13a2cc7f159e56befbea9aa65c71586305e7afa8b0b3/jiter-0.10.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2ab0051160cb758a70716448908ef14ad476c3774bd03ddce075f3c1f90a3d6", size = 388608, upload-time = "2025-05-18T19:03:12.911Z" },
|
|
952
|
+
{ url = "https://files.pythonhosted.org/packages/6d/64/67750672b4354ca20ca18d3d1ccf2c62a072e8a2d452ac3cf8ced73571ef/jiter-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03997d2f37f6b67d2f5c475da4412be584e1cec273c1cfc03d642c46db43f8cf", size = 352454, upload-time = "2025-05-18T19:03:14.741Z" },
|
|
953
|
+
{ url = "https://files.pythonhosted.org/packages/96/4d/5c4e36d48f169a54b53a305114be3efa2bbffd33b648cd1478a688f639c1/jiter-0.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c404a99352d839fed80d6afd6c1d66071f3bacaaa5c4268983fc10f769112e90", size = 391833, upload-time = "2025-05-18T19:03:16.426Z" },
|
|
954
|
+
{ url = "https://files.pythonhosted.org/packages/0b/de/ce4a6166a78810bd83763d2fa13f85f73cbd3743a325469a4a9289af6dae/jiter-0.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66e989410b6666d3ddb27a74c7e50d0829704ede652fd4c858e91f8d64b403d0", size = 523646, upload-time = "2025-05-18T19:03:17.704Z" },
|
|
955
|
+
{ url = "https://files.pythonhosted.org/packages/a2/a6/3bc9acce53466972964cf4ad85efecb94f9244539ab6da1107f7aed82934/jiter-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b532d3af9ef4f6374609a3bcb5e05a1951d3bf6190dc6b176fdb277c9bbf15ee", size = 514735, upload-time = "2025-05-18T19:03:19.44Z" },
|
|
956
|
+
{ url = "https://files.pythonhosted.org/packages/b4/d8/243c2ab8426a2a4dea85ba2a2ba43df379ccece2145320dfd4799b9633c5/jiter-0.10.0-cp310-cp310-win32.whl", hash = "sha256:da9be20b333970e28b72edc4dff63d4fec3398e05770fb3205f7fb460eb48dd4", size = 210747, upload-time = "2025-05-18T19:03:21.184Z" },
|
|
957
|
+
{ url = "https://files.pythonhosted.org/packages/37/7a/8021bd615ef7788b98fc76ff533eaac846322c170e93cbffa01979197a45/jiter-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:f59e533afed0c5b0ac3eba20d2548c4a550336d8282ee69eb07b37ea526ee4e5", size = 207484, upload-time = "2025-05-18T19:03:23.046Z" },
|
|
958
|
+
{ url = "https://files.pythonhosted.org/packages/1b/dd/6cefc6bd68b1c3c979cecfa7029ab582b57690a31cd2f346c4d0ce7951b6/jiter-0.10.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3bebe0c558e19902c96e99217e0b8e8b17d570906e72ed8a87170bc290b1e978", size = 317473, upload-time = "2025-05-18T19:03:25.942Z" },
|
|
959
|
+
{ url = "https://files.pythonhosted.org/packages/be/cf/fc33f5159ce132be1d8dd57251a1ec7a631c7df4bd11e1cd198308c6ae32/jiter-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:558cc7e44fd8e507a236bee6a02fa17199ba752874400a0ca6cd6e2196cdb7dc", size = 321971, upload-time = "2025-05-18T19:03:27.255Z" },
|
|
960
|
+
{ url = "https://files.pythonhosted.org/packages/68/a4/da3f150cf1d51f6c472616fb7650429c7ce053e0c962b41b68557fdf6379/jiter-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d613e4b379a07d7c8453c5712ce7014e86c6ac93d990a0b8e7377e18505e98d", size = 345574, upload-time = "2025-05-18T19:03:28.63Z" },
|
|
961
|
+
{ url = "https://files.pythonhosted.org/packages/84/34/6e8d412e60ff06b186040e77da5f83bc158e9735759fcae65b37d681f28b/jiter-0.10.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f62cf8ba0618eda841b9bf61797f21c5ebd15a7a1e19daab76e4e4b498d515b2", size = 371028, upload-time = "2025-05-18T19:03:30.292Z" },
|
|
962
|
+
{ url = "https://files.pythonhosted.org/packages/fb/d9/9ee86173aae4576c35a2f50ae930d2ccb4c4c236f6cb9353267aa1d626b7/jiter-0.10.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:919d139cdfa8ae8945112398511cb7fca58a77382617d279556b344867a37e61", size = 491083, upload-time = "2025-05-18T19:03:31.654Z" },
|
|
963
|
+
{ url = "https://files.pythonhosted.org/packages/d9/2c/f955de55e74771493ac9e188b0f731524c6a995dffdcb8c255b89c6fb74b/jiter-0.10.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13ddbc6ae311175a3b03bd8994881bc4635c923754932918e18da841632349db", size = 388821, upload-time = "2025-05-18T19:03:33.184Z" },
|
|
964
|
+
{ url = "https://files.pythonhosted.org/packages/81/5a/0e73541b6edd3f4aada586c24e50626c7815c561a7ba337d6a7eb0a915b4/jiter-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c440ea003ad10927a30521a9062ce10b5479592e8a70da27f21eeb457b4a9c5", size = 352174, upload-time = "2025-05-18T19:03:34.965Z" },
|
|
965
|
+
{ url = "https://files.pythonhosted.org/packages/1c/c0/61eeec33b8c75b31cae42be14d44f9e6fe3ac15a4e58010256ac3abf3638/jiter-0.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc347c87944983481e138dea467c0551080c86b9d21de6ea9306efb12ca8f606", size = 391869, upload-time = "2025-05-18T19:03:36.436Z" },
|
|
966
|
+
{ url = "https://files.pythonhosted.org/packages/41/22/5beb5ee4ad4ef7d86f5ea5b4509f680a20706c4a7659e74344777efb7739/jiter-0.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:13252b58c1f4d8c5b63ab103c03d909e8e1e7842d302473f482915d95fefd605", size = 523741, upload-time = "2025-05-18T19:03:38.168Z" },
|
|
967
|
+
{ url = "https://files.pythonhosted.org/packages/ea/10/768e8818538e5817c637b0df52e54366ec4cebc3346108a4457ea7a98f32/jiter-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7d1bbf3c465de4a24ab12fb7766a0003f6f9bce48b8b6a886158c4d569452dc5", size = 514527, upload-time = "2025-05-18T19:03:39.577Z" },
|
|
968
|
+
{ url = "https://files.pythonhosted.org/packages/73/6d/29b7c2dc76ce93cbedabfd842fc9096d01a0550c52692dfc33d3cc889815/jiter-0.10.0-cp311-cp311-win32.whl", hash = "sha256:db16e4848b7e826edca4ccdd5b145939758dadf0dc06e7007ad0e9cfb5928ae7", size = 210765, upload-time = "2025-05-18T19:03:41.271Z" },
|
|
969
|
+
{ url = "https://files.pythonhosted.org/packages/c2/c9/d394706deb4c660137caf13e33d05a031d734eb99c051142e039d8ceb794/jiter-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c9c1d5f10e18909e993f9641f12fe1c77b3e9b533ee94ffa970acc14ded3812", size = 209234, upload-time = "2025-05-18T19:03:42.918Z" },
|
|
970
|
+
{ url = "https://files.pythonhosted.org/packages/6d/b5/348b3313c58f5fbfb2194eb4d07e46a35748ba6e5b3b3046143f3040bafa/jiter-0.10.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1e274728e4a5345a6dde2d343c8da018b9d4bd4350f5a472fa91f66fda44911b", size = 312262, upload-time = "2025-05-18T19:03:44.637Z" },
|
|
971
|
+
{ url = "https://files.pythonhosted.org/packages/9c/4a/6a2397096162b21645162825f058d1709a02965606e537e3304b02742e9b/jiter-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7202ae396446c988cb2a5feb33a543ab2165b786ac97f53b59aafb803fef0744", size = 320124, upload-time = "2025-05-18T19:03:46.341Z" },
|
|
972
|
+
{ url = "https://files.pythonhosted.org/packages/2a/85/1ce02cade7516b726dd88f59a4ee46914bf79d1676d1228ef2002ed2f1c9/jiter-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23ba7722d6748b6920ed02a8f1726fb4b33e0fd2f3f621816a8b486c66410ab2", size = 345330, upload-time = "2025-05-18T19:03:47.596Z" },
|
|
973
|
+
{ url = "https://files.pythonhosted.org/packages/75/d0/bb6b4f209a77190ce10ea8d7e50bf3725fc16d3372d0a9f11985a2b23eff/jiter-0.10.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:371eab43c0a288537d30e1f0b193bc4eca90439fc08a022dd83e5e07500ed026", size = 369670, upload-time = "2025-05-18T19:03:49.334Z" },
|
|
974
|
+
{ url = "https://files.pythonhosted.org/packages/a0/f5/a61787da9b8847a601e6827fbc42ecb12be2c925ced3252c8ffcb56afcaf/jiter-0.10.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c675736059020365cebc845a820214765162728b51ab1e03a1b7b3abb70f74c", size = 489057, upload-time = "2025-05-18T19:03:50.66Z" },
|
|
975
|
+
{ url = "https://files.pythonhosted.org/packages/12/e4/6f906272810a7b21406c760a53aadbe52e99ee070fc5c0cb191e316de30b/jiter-0.10.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c5867d40ab716e4684858e4887489685968a47e3ba222e44cde6e4a2154f959", size = 389372, upload-time = "2025-05-18T19:03:51.98Z" },
|
|
976
|
+
{ url = "https://files.pythonhosted.org/packages/e2/ba/77013b0b8ba904bf3762f11e0129b8928bff7f978a81838dfcc958ad5728/jiter-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:395bb9a26111b60141757d874d27fdea01b17e8fac958b91c20128ba8f4acc8a", size = 352038, upload-time = "2025-05-18T19:03:53.703Z" },
|
|
977
|
+
{ url = "https://files.pythonhosted.org/packages/67/27/c62568e3ccb03368dbcc44a1ef3a423cb86778a4389e995125d3d1aaa0a4/jiter-0.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6842184aed5cdb07e0c7e20e5bdcfafe33515ee1741a6835353bb45fe5d1bd95", size = 391538, upload-time = "2025-05-18T19:03:55.046Z" },
|
|
978
|
+
{ url = "https://files.pythonhosted.org/packages/c0/72/0d6b7e31fc17a8fdce76164884edef0698ba556b8eb0af9546ae1a06b91d/jiter-0.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:62755d1bcea9876770d4df713d82606c8c1a3dca88ff39046b85a048566d56ea", size = 523557, upload-time = "2025-05-18T19:03:56.386Z" },
|
|
979
|
+
{ url = "https://files.pythonhosted.org/packages/2f/09/bc1661fbbcbeb6244bd2904ff3a06f340aa77a2b94e5a7373fd165960ea3/jiter-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:533efbce2cacec78d5ba73a41756beff8431dfa1694b6346ce7af3a12c42202b", size = 514202, upload-time = "2025-05-18T19:03:57.675Z" },
|
|
980
|
+
{ url = "https://files.pythonhosted.org/packages/1b/84/5a5d5400e9d4d54b8004c9673bbe4403928a00d28529ff35b19e9d176b19/jiter-0.10.0-cp312-cp312-win32.whl", hash = "sha256:8be921f0cadd245e981b964dfbcd6fd4bc4e254cdc069490416dd7a2632ecc01", size = 211781, upload-time = "2025-05-18T19:03:59.025Z" },
|
|
981
|
+
{ url = "https://files.pythonhosted.org/packages/9b/52/7ec47455e26f2d6e5f2ea4951a0652c06e5b995c291f723973ae9e724a65/jiter-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:a7c7d785ae9dda68c2678532a5a1581347e9c15362ae9f6e68f3fdbfb64f2e49", size = 206176, upload-time = "2025-05-18T19:04:00.305Z" },
|
|
982
|
+
{ url = "https://files.pythonhosted.org/packages/2e/b0/279597e7a270e8d22623fea6c5d4eeac328e7d95c236ed51a2b884c54f70/jiter-0.10.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0588107ec8e11b6f5ef0e0d656fb2803ac6cf94a96b2b9fc675c0e3ab5e8644", size = 311617, upload-time = "2025-05-18T19:04:02.078Z" },
|
|
983
|
+
{ url = "https://files.pythonhosted.org/packages/91/e3/0916334936f356d605f54cc164af4060e3e7094364add445a3bc79335d46/jiter-0.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cafc4628b616dc32530c20ee53d71589816cf385dd9449633e910d596b1f5c8a", size = 318947, upload-time = "2025-05-18T19:04:03.347Z" },
|
|
984
|
+
{ url = "https://files.pythonhosted.org/packages/6a/8e/fd94e8c02d0e94539b7d669a7ebbd2776e51f329bb2c84d4385e8063a2ad/jiter-0.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:520ef6d981172693786a49ff5b09eda72a42e539f14788124a07530f785c3ad6", size = 344618, upload-time = "2025-05-18T19:04:04.709Z" },
|
|
985
|
+
{ url = "https://files.pythonhosted.org/packages/6f/b0/f9f0a2ec42c6e9c2e61c327824687f1e2415b767e1089c1d9135f43816bd/jiter-0.10.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:554dedfd05937f8fc45d17ebdf298fe7e0c77458232bcb73d9fbbf4c6455f5b3", size = 368829, upload-time = "2025-05-18T19:04:06.912Z" },
|
|
986
|
+
{ url = "https://files.pythonhosted.org/packages/e8/57/5bbcd5331910595ad53b9fd0c610392ac68692176f05ae48d6ce5c852967/jiter-0.10.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bc299da7789deacf95f64052d97f75c16d4fc8c4c214a22bf8d859a4288a1c2", size = 491034, upload-time = "2025-05-18T19:04:08.222Z" },
|
|
987
|
+
{ url = "https://files.pythonhosted.org/packages/9b/be/c393df00e6e6e9e623a73551774449f2f23b6ec6a502a3297aeeece2c65a/jiter-0.10.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5161e201172de298a8a1baad95eb85db4fb90e902353b1f6a41d64ea64644e25", size = 388529, upload-time = "2025-05-18T19:04:09.566Z" },
|
|
988
|
+
{ url = "https://files.pythonhosted.org/packages/42/3e/df2235c54d365434c7f150b986a6e35f41ebdc2f95acea3036d99613025d/jiter-0.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e2227db6ba93cb3e2bf67c87e594adde0609f146344e8207e8730364db27041", size = 350671, upload-time = "2025-05-18T19:04:10.98Z" },
|
|
989
|
+
{ url = "https://files.pythonhosted.org/packages/c6/77/71b0b24cbcc28f55ab4dbfe029f9a5b73aeadaba677843fc6dc9ed2b1d0a/jiter-0.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15acb267ea5e2c64515574b06a8bf393fbfee6a50eb1673614aa45f4613c0cca", size = 390864, upload-time = "2025-05-18T19:04:12.722Z" },
|
|
990
|
+
{ url = "https://files.pythonhosted.org/packages/6a/d3/ef774b6969b9b6178e1d1e7a89a3bd37d241f3d3ec5f8deb37bbd203714a/jiter-0.10.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:901b92f2e2947dc6dfcb52fd624453862e16665ea909a08398dde19c0731b7f4", size = 522989, upload-time = "2025-05-18T19:04:14.261Z" },
|
|
991
|
+
{ url = "https://files.pythonhosted.org/packages/0c/41/9becdb1d8dd5d854142f45a9d71949ed7e87a8e312b0bede2de849388cb9/jiter-0.10.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d0cb9a125d5a3ec971a094a845eadde2db0de85b33c9f13eb94a0c63d463879e", size = 513495, upload-time = "2025-05-18T19:04:15.603Z" },
|
|
992
|
+
{ url = "https://files.pythonhosted.org/packages/9c/36/3468e5a18238bdedae7c4d19461265b5e9b8e288d3f86cd89d00cbb48686/jiter-0.10.0-cp313-cp313-win32.whl", hash = "sha256:48a403277ad1ee208fb930bdf91745e4d2d6e47253eedc96e2559d1e6527006d", size = 211289, upload-time = "2025-05-18T19:04:17.541Z" },
|
|
993
|
+
{ url = "https://files.pythonhosted.org/packages/7e/07/1c96b623128bcb913706e294adb5f768fb7baf8db5e1338ce7b4ee8c78ef/jiter-0.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:75f9eb72ecb640619c29bf714e78c9c46c9c4eaafd644bf78577ede459f330d4", size = 205074, upload-time = "2025-05-18T19:04:19.21Z" },
|
|
994
|
+
{ url = "https://files.pythonhosted.org/packages/54/46/caa2c1342655f57d8f0f2519774c6d67132205909c65e9aa8255e1d7b4f4/jiter-0.10.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:28ed2a4c05a1f32ef0e1d24c2611330219fed727dae01789f4a335617634b1ca", size = 318225, upload-time = "2025-05-18T19:04:20.583Z" },
|
|
995
|
+
{ url = "https://files.pythonhosted.org/packages/43/84/c7d44c75767e18946219ba2d703a5a32ab37b0bc21886a97bc6062e4da42/jiter-0.10.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14a4c418b1ec86a195f1ca69da8b23e8926c752b685af665ce30777233dfe070", size = 350235, upload-time = "2025-05-18T19:04:22.363Z" },
|
|
996
|
+
{ url = "https://files.pythonhosted.org/packages/01/16/f5a0135ccd968b480daad0e6ab34b0c7c5ba3bc447e5088152696140dcb3/jiter-0.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d7bfed2fe1fe0e4dda6ef682cee888ba444b21e7a6553e03252e4feb6cf0adca", size = 207278, upload-time = "2025-05-18T19:04:23.627Z" },
|
|
997
|
+
{ url = "https://files.pythonhosted.org/packages/1c/9b/1d646da42c3de6c2188fdaa15bce8ecb22b635904fc68be025e21249ba44/jiter-0.10.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:5e9251a5e83fab8d87799d3e1a46cb4b7f2919b895c6f4483629ed2446f66522", size = 310866, upload-time = "2025-05-18T19:04:24.891Z" },
|
|
998
|
+
{ url = "https://files.pythonhosted.org/packages/ad/0e/26538b158e8a7c7987e94e7aeb2999e2e82b1f9d2e1f6e9874ddf71ebda0/jiter-0.10.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:023aa0204126fe5b87ccbcd75c8a0d0261b9abdbbf46d55e7ae9f8e22424eeb8", size = 318772, upload-time = "2025-05-18T19:04:26.161Z" },
|
|
999
|
+
{ url = "https://files.pythonhosted.org/packages/7b/fb/d302893151caa1c2636d6574d213e4b34e31fd077af6050a9c5cbb42f6fb/jiter-0.10.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c189c4f1779c05f75fc17c0c1267594ed918996a231593a21a5ca5438445216", size = 344534, upload-time = "2025-05-18T19:04:27.495Z" },
|
|
1000
|
+
{ url = "https://files.pythonhosted.org/packages/01/d8/5780b64a149d74e347c5128d82176eb1e3241b1391ac07935693466d6219/jiter-0.10.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15720084d90d1098ca0229352607cd68256c76991f6b374af96f36920eae13c4", size = 369087, upload-time = "2025-05-18T19:04:28.896Z" },
|
|
1001
|
+
{ url = "https://files.pythonhosted.org/packages/e8/5b/f235a1437445160e777544f3ade57544daf96ba7e96c1a5b24a6f7ac7004/jiter-0.10.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4f2fb68e5f1cfee30e2b2a09549a00683e0fde4c6a2ab88c94072fc33cb7426", size = 490694, upload-time = "2025-05-18T19:04:30.183Z" },
|
|
1002
|
+
{ url = "https://files.pythonhosted.org/packages/85/a9/9c3d4617caa2ff89cf61b41e83820c27ebb3f7b5fae8a72901e8cd6ff9be/jiter-0.10.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce541693355fc6da424c08b7edf39a2895f58d6ea17d92cc2b168d20907dee12", size = 388992, upload-time = "2025-05-18T19:04:32.028Z" },
|
|
1003
|
+
{ url = "https://files.pythonhosted.org/packages/68/b1/344fd14049ba5c94526540af7eb661871f9c54d5f5601ff41a959b9a0bbd/jiter-0.10.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31c50c40272e189d50006ad5c73883caabb73d4e9748a688b216e85a9a9ca3b9", size = 351723, upload-time = "2025-05-18T19:04:33.467Z" },
|
|
1004
|
+
{ url = "https://files.pythonhosted.org/packages/41/89/4c0e345041186f82a31aee7b9d4219a910df672b9fef26f129f0cda07a29/jiter-0.10.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fa3402a2ff9815960e0372a47b75c76979d74402448509ccd49a275fa983ef8a", size = 392215, upload-time = "2025-05-18T19:04:34.827Z" },
|
|
1005
|
+
{ url = "https://files.pythonhosted.org/packages/55/58/ee607863e18d3f895feb802154a2177d7e823a7103f000df182e0f718b38/jiter-0.10.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:1956f934dca32d7bb647ea21d06d93ca40868b505c228556d3373cbd255ce853", size = 522762, upload-time = "2025-05-18T19:04:36.19Z" },
|
|
1006
|
+
{ url = "https://files.pythonhosted.org/packages/15/d0/9123fb41825490d16929e73c212de9a42913d68324a8ce3c8476cae7ac9d/jiter-0.10.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:fcedb049bdfc555e261d6f65a6abe1d5ad68825b7202ccb9692636c70fcced86", size = 513427, upload-time = "2025-05-18T19:04:37.544Z" },
|
|
1007
|
+
{ url = "https://files.pythonhosted.org/packages/d8/b3/2bd02071c5a2430d0b70403a34411fc519c2f227da7b03da9ba6a956f931/jiter-0.10.0-cp314-cp314-win32.whl", hash = "sha256:ac509f7eccca54b2a29daeb516fb95b6f0bd0d0d8084efaf8ed5dfc7b9f0b357", size = 210127, upload-time = "2025-05-18T19:04:38.837Z" },
|
|
1008
|
+
{ url = "https://files.pythonhosted.org/packages/03/0c/5fe86614ea050c3ecd728ab4035534387cd41e7c1855ef6c031f1ca93e3f/jiter-0.10.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5ed975b83a2b8639356151cef5c0d597c68376fc4922b45d0eb384ac058cfa00", size = 318527, upload-time = "2025-05-18T19:04:40.612Z" },
|
|
1009
|
+
{ url = "https://files.pythonhosted.org/packages/b3/4a/4175a563579e884192ba6e81725fc0448b042024419be8d83aa8a80a3f44/jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", size = 354213, upload-time = "2025-05-18T19:04:41.894Z" },
|
|
1010
|
+
]
|
|
1011
|
+
|
|
927
1012
|
[[package]]
|
|
928
1013
|
name = "lxml"
|
|
929
1014
|
version = "5.4.0"
|
|
@@ -1436,6 +1521,25 @@ wheels = [
|
|
|
1436
1521
|
{ url = "https://files.pythonhosted.org/packages/c3/16/873b955beda7bada5b0d798d3a601b2ff210e44ad5169f6d405b93892103/onnxruntime-1.22.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64845709f9e8a2809e8e009bc4c8f73b788cee9c6619b7d9930344eae4c9cd36", size = 16427482, upload-time = "2025-05-09T20:26:20.376Z" },
|
|
1437
1522
|
]
|
|
1438
1523
|
|
|
1524
|
+
[[package]]
|
|
1525
|
+
name = "openai"
|
|
1526
|
+
version = "1.88.0"
|
|
1527
|
+
source = { registry = "https://pypi.org/simple" }
|
|
1528
|
+
dependencies = [
|
|
1529
|
+
{ name = "anyio" },
|
|
1530
|
+
{ name = "distro" },
|
|
1531
|
+
{ name = "httpx" },
|
|
1532
|
+
{ name = "jiter" },
|
|
1533
|
+
{ name = "pydantic" },
|
|
1534
|
+
{ name = "sniffio" },
|
|
1535
|
+
{ name = "tqdm" },
|
|
1536
|
+
{ name = "typing-extensions" },
|
|
1537
|
+
]
|
|
1538
|
+
sdist = { url = "https://files.pythonhosted.org/packages/5a/ea/bbeef604d1fe0f7e9111745bb8a81362973a95713b28855beb9a9832ab12/openai-1.88.0.tar.gz", hash = "sha256:122d35e42998255cf1fc84560f6ee49a844e65c054cd05d3e42fda506b832bb1", size = 470963, upload-time = "2025-06-17T05:04:45.856Z" }
|
|
1539
|
+
wheels = [
|
|
1540
|
+
{ url = "https://files.pythonhosted.org/packages/f4/03/ef68d77a38dd383cbed7fc898857d394d5a8b0520a35f054e7fe05dc3ac1/openai-1.88.0-py3-none-any.whl", hash = "sha256:7edd7826b3b83f5846562a6f310f040c79576278bf8e3687b30ba05bb5dff978", size = 734293, upload-time = "2025-06-17T05:04:43.858Z" },
|
|
1541
|
+
]
|
|
1542
|
+
|
|
1439
1543
|
[[package]]
|
|
1440
1544
|
name = "openapi-pydantic"
|
|
1441
1545
|
version = "0.5.1"
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
name: build-docs
|
|
2
|
-
on:
|
|
3
|
-
push:
|
|
4
|
-
branches:
|
|
5
|
-
- main
|
|
6
|
-
permissions:
|
|
7
|
-
contents: write
|
|
8
|
-
jobs:
|
|
9
|
-
deploy:
|
|
10
|
-
runs-on: ubuntu-latest
|
|
11
|
-
steps:
|
|
12
|
-
- uses: actions/checkout@v4
|
|
13
|
-
- name: Configure Git Credentials
|
|
14
|
-
run: |
|
|
15
|
-
git config user.name github-actions[bot]
|
|
16
|
-
git config user.email 41898282+github-actions[bot]@users.noreply.github.com
|
|
17
|
-
- uses: actions/setup-python@v5
|
|
18
|
-
with:
|
|
19
|
-
python-version: 3.x
|
|
20
|
-
- run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
|
|
21
|
-
- uses: actions/cache@v4
|
|
22
|
-
with:
|
|
23
|
-
key: mkdocs-material-${{ env.cache_id }}
|
|
24
|
-
path: .cache
|
|
25
|
-
restore-keys: |
|
|
26
|
-
mkdocs-material-
|
|
27
|
-
- run: pip install mkdocs-material
|
|
28
|
-
- run: mkdocs gh-deploy --force
|
haiku_rag-0.1.0/CLAUDE.md
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
# Claude AI Assistant Configuration
|
|
2
|
-
|
|
3
|
-
This file contains project-specific information and preferences for Claude AI assistant interactions.
|
|
4
|
-
|
|
5
|
-
## Project Overview
|
|
6
|
-
|
|
7
|
-
This is a SQLite-based RAG (Retrieval-Augmented Generation) system built with Haiku.
|
|
8
|
-
|
|
9
|
-
## Development Commands
|
|
10
|
-
|
|
11
|
-
- Install dependencies: `uv sync`
|
|
12
|
-
- Run tests: `pytest`
|
|
13
|
-
- Run specific test: `pytest path/to/test_file.py`
|
|
14
|
-
- Run with coverage: `pytest --cov`
|
|
15
|
-
- Type checking: `pyright`
|
|
16
|
-
- Run MCP server: `python -m haiku.rag.mcp`
|
|
17
|
-
|
|
18
|
-
## Project Structure
|
|
19
|
-
|
|
20
|
-
- `src/` - Source code
|
|
21
|
-
- `tests/` - Test files
|
|
22
|
-
- `README.md` - Documentation
|
|
23
|
-
|
|
24
|
-
## Notes
|
|
25
|
-
|
|
26
|
-
- This is a Python project using uv for dependency management
|
|
27
|
-
- Use pytest for testing
|
|
28
|
-
- Prefer editing existing files over creating new ones
|
|
29
|
-
- Follow existing code patterns and conventions
|
|
30
|
-
- Remember to activate the .venv when you start working
|
|
31
|
-
- Never use relative imports
|
|
32
|
-
- Always run ruff as well as pyright after you are done
|
|
33
|
-
- Do not be verbose with comments!
|
|
34
|
-
- When you change something check if the README needs an update too.
|
|
35
|
-
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from dotenv import load_dotenv
|
|
5
|
-
from pydantic import BaseModel
|
|
6
|
-
|
|
7
|
-
from haiku.rag.utils import get_default_data_dir
|
|
8
|
-
|
|
9
|
-
load_dotenv()
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class AppConfig(BaseModel):
|
|
13
|
-
ENV: str = "development"
|
|
14
|
-
|
|
15
|
-
DEFAULT_DATA_DIR: Path = get_default_data_dir()
|
|
16
|
-
|
|
17
|
-
EMBEDDING_PROVIDER: str = "ollama"
|
|
18
|
-
EMBEDDING_MODEL: str = "mxbai-embed-large"
|
|
19
|
-
EMBEDDING_VECTOR_DIM: int = 1024
|
|
20
|
-
|
|
21
|
-
CHUNK_SIZE: int = 256
|
|
22
|
-
CHUNK_OVERLAP: int = 32
|
|
23
|
-
|
|
24
|
-
OLLAMA_BASE_URL: str = "http://localhost:11434"
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# Expose Config object for app to import
|
|
28
|
-
Config = AppConfig.model_validate(os.environ)
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
from haiku.rag.config import Config
|
|
2
|
-
from haiku.rag.embeddings.base import EmbedderBase
|
|
3
|
-
from haiku.rag.embeddings.ollama import Embedder as OllamaEmbedder
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def get_embedder() -> EmbedderBase:
|
|
7
|
-
"""
|
|
8
|
-
Factory function to get the appropriate embedder based on the configuration.
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
if Config.EMBEDDING_PROVIDER == "ollama":
|
|
12
|
-
return OllamaEmbedder(Config.EMBEDDING_MODEL, Config.EMBEDDING_VECTOR_DIM)
|
|
13
|
-
|
|
14
|
-
if Config.EMBEDDING_PROVIDER == "voyageai":
|
|
15
|
-
try:
|
|
16
|
-
from haiku.rag.embeddings.voyageai import Embedder as VoyageAIEmbedder
|
|
17
|
-
except ImportError:
|
|
18
|
-
raise ImportError(
|
|
19
|
-
"VoyageAI embedder requires the 'voyageai' package. "
|
|
20
|
-
"Please install haiku.rag with the 'voyageai' extra:"
|
|
21
|
-
"uv pip install haiku.rag --extra voyageai"
|
|
22
|
-
)
|
|
23
|
-
return VoyageAIEmbedder(Config.EMBEDDING_MODEL, Config.EMBEDDING_VECTOR_DIM)
|
|
24
|
-
raise ValueError(f"Unsupported embedding provider: {Config.EMBEDDING_PROVIDER}")
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pytest
|
|
3
|
-
|
|
4
|
-
from haiku.rag.embeddings import get_embedder
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@pytest.mark.asyncio
|
|
8
|
-
async def test_embedder():
|
|
9
|
-
embedder = get_embedder()
|
|
10
|
-
embedding = await embedder.embed("hello world")
|
|
11
|
-
assert len(embedding) == embedder._vector_dim
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@pytest.mark.asyncio
|
|
15
|
-
async def test_similarity():
|
|
16
|
-
embedder = get_embedder()
|
|
17
|
-
phrases = [
|
|
18
|
-
"I enjoy eating great food.",
|
|
19
|
-
"Python is my favorite programming language.",
|
|
20
|
-
"I love to travel and see new places.",
|
|
21
|
-
]
|
|
22
|
-
embeddings = [np.array(await embedder.embed(phrase)) for phrase in phrases]
|
|
23
|
-
|
|
24
|
-
# Calculate cosine similarity
|
|
25
|
-
def similarities(embeddings, test_embedding):
|
|
26
|
-
return [
|
|
27
|
-
np.dot(embedding, test_embedding)
|
|
28
|
-
/ (np.linalg.norm(embedding) * np.linalg.norm(test_embedding))
|
|
29
|
-
for embedding in embeddings
|
|
30
|
-
]
|
|
31
|
-
|
|
32
|
-
test_phrase = "I am going for a camping trip."
|
|
33
|
-
test_embedding = await embedder.embed(test_phrase)
|
|
34
|
-
|
|
35
|
-
sims = similarities(embeddings, test_embedding)
|
|
36
|
-
assert max(sims) == sims[2]
|
|
37
|
-
|
|
38
|
-
test_phrase = "When is dinner ready?"
|
|
39
|
-
test_embedding = await embedder.embed(test_phrase)
|
|
40
|
-
|
|
41
|
-
sims = similarities(embeddings, test_embedding)
|
|
42
|
-
assert max(sims) == sims[0]
|
|
43
|
-
|
|
44
|
-
test_phrase = "I work as a software developer."
|
|
45
|
-
test_embedding = await embedder.embed(test_phrase)
|
|
46
|
-
|
|
47
|
-
sims = similarities(embeddings, test_embedding)
|
|
48
|
-
assert max(sims) == sims[1]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|