haiku.rag 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

Files changed (49) hide show
  1. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/PKG-INFO +52 -17
  2. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/README.md +48 -16
  3. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/pyproject.toml +3 -1
  4. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/app.py +24 -15
  5. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/cli.py +1 -1
  6. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/client.py +1 -1
  7. haiku_rag-0.2.0/src/haiku/rag/config.py +40 -0
  8. haiku_rag-0.2.0/src/haiku/rag/embeddings/__init__.py +36 -0
  9. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/embeddings/ollama.py +1 -1
  10. haiku_rag-0.2.0/src/haiku/rag/embeddings/openai.py +20 -0
  11. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/embeddings/voyageai.py +1 -1
  12. haiku_rag-0.2.0/src/haiku/rag/logging.py +24 -0
  13. haiku_rag-0.2.0/src/haiku/rag/monitor.py +74 -0
  14. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_client.py +2 -2
  15. haiku_rag-0.2.0/tests/test_embedder.py +128 -0
  16. haiku_rag-0.2.0/tests/test_monitor.py +99 -0
  17. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/uv.lock +106 -2
  18. haiku_rag-0.1.0/.claude/settings.local.json +0 -15
  19. haiku_rag-0.1.0/.github/workflows/build-docs.yml +0 -28
  20. haiku_rag-0.1.0/CLAUDE.md +0 -35
  21. haiku_rag-0.1.0/src/haiku/rag/config.py +0 -28
  22. haiku_rag-0.1.0/src/haiku/rag/embeddings/__init__.py +0 -24
  23. haiku_rag-0.1.0/tests/test_embedder.py +0 -48
  24. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.github/FUNDING.yml +0 -0
  25. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.github/workflows/build-publish.yml +0 -0
  26. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.gitignore +0 -0
  27. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.pre-commit-config.yaml +0 -0
  28. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/.python-version +0 -0
  29. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/LICENSE +0 -0
  30. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/__init__.py +0 -0
  31. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/chunker.py +0 -0
  32. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/embeddings/base.py +0 -0
  33. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/mcp.py +0 -0
  34. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/reader.py +0 -0
  35. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/__init__.py +0 -0
  36. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/engine.py +0 -0
  37. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/models/__init__.py +0 -0
  38. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/models/chunk.py +0 -0
  39. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/models/document.py +0 -0
  40. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/repositories/__init__.py +0 -0
  41. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/repositories/base.py +0 -0
  42. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/repositories/chunk.py +0 -0
  43. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/store/repositories/document.py +0 -0
  44. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/src/haiku/rag/utils.py +0 -0
  45. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/conftest.py +0 -0
  46. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_chunk.py +0 -0
  47. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_chunker.py +0 -0
  48. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_document.py +0 -0
  49. {haiku_rag-0.1.0 → haiku_rag-0.2.0}/tests/test_search.py +0 -0
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Retrieval Augmented Generation (RAG) with SQLite
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
7
7
  License-File: LICENSE
8
+ Keywords: RAG,mcp,ml,sqlite,sqlite-vec
8
9
  Classifier: Development Status :: 4 - Beta
9
10
  Classifier: Environment :: Console
10
11
  Classifier: Intended Audience :: Developers
@@ -28,6 +29,8 @@ Requires-Dist: sqlite-vec>=0.1.6
28
29
  Requires-Dist: tiktoken>=0.9.0
29
30
  Requires-Dist: typer>=0.16.0
30
31
  Requires-Dist: watchfiles>=1.1.0
32
+ Provides-Extra: openai
33
+ Requires-Dist: openai>=1.0.0; extra == 'openai'
31
34
  Provides-Extra: voyageai
32
35
  Requires-Dist: voyageai>=0.3.2; extra == 'voyageai'
33
36
  Description-Content-Type: text/markdown
@@ -38,9 +41,12 @@ A SQLite-based Retrieval-Augmented Generation (RAG) system built for efficient d
38
41
 
39
42
  ## Features
40
43
  - **Local SQLite**: No need to run additional servers
41
- - **Support for various embedding providers**: You can use Ollama, VoyageAI or add your own
44
+ - **Support for various embedding providers**: You can use Ollama, VoyageAI, OpenAI or add your own
42
45
  - **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
43
46
  - **Multi-format Support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a url!
47
+ - **File monitoring** when run as a server automatically indexing your files
48
+ - **MCP server** Exposes functionality as MCP tools.
49
+ - **Python client** Call `haiku.rag` from your own python applications.
44
50
 
45
51
  ## Installation
46
52
 
@@ -52,24 +58,41 @@ By default Ollama (with the `mxbai-embed-large` model) is used for the embedding
52
58
  For other providers use:
53
59
 
54
60
  - **VoyageAI**: `uv pip install haiku.rag --extra voyageai`
61
+ - **OpenAI**: `uv pip install haiku.rag --extra openai`
55
62
 
56
63
  ## Configuration
57
64
 
65
+ You can set the directories to monitor using the `MONITOR_DIRECTORIES` environment variable (as comma separated values) :
66
+
67
+ ```bash
68
+ # Monitor single directory
69
+ export MONITOR_DIRECTORIES="/path/to/documents,/another_path/to/documents"
70
+ ```
71
+
58
72
  If you want to use an alternative embeddings provider (Ollama being the default) you will need to set the provider details through environment variables:
59
73
 
60
74
  By default:
61
75
 
62
76
  ```bash
63
- EMBEDDING_PROVIDER="ollama"
64
- EMBEDDING_MODEL="mxbai-embed-large" # or any other model
65
- EMBEDDING_VECTOR_DIM=1024
77
+ EMBEDDINGS_PROVIDER="ollama"
78
+ EMBEDDINGS_MODEL="mxbai-embed-large" # or any other model
79
+ EMBEDDINGS_VECTOR_DIM=1024
66
80
  ```
67
81
 
68
82
  For VoyageAI:
69
83
  ```bash
70
- EMBEDDING_PROVIDER="voyageai"
71
- EMBEDDING_MODEL="voyage-3.5" # or any other model
72
- EMBEDDING_VECTOR_DIM=1024
84
+ EMBEDDINGS_PROVIDER="voyageai"
85
+ EMBEDDINGS_MODEL="voyage-3.5" # or any other model
86
+ EMBEDDINGS_VECTOR_DIM=1024
87
+ VOYAGE_API_KEY="your-api-key"
88
+ ```
89
+
90
+ For OpenAI:
91
+ ```bash
92
+ EMBEDDINGS_PROVIDER="openai"
93
+ EMBEDDINGS_MODEL="text-embedding-3-small" # or text-embedding-3-large
94
+ EMBEDDINGS_VECTOR_DIM=1536
95
+ OPENAI_API_KEY="your-api-key"
73
96
  ```
74
97
 
75
98
  ## Command Line Interface
@@ -101,7 +124,7 @@ haiku-rag search "machine learning"
101
124
  # Search with custom options
102
125
  haiku-rag search "python programming" --limit 10 --k 100
103
126
 
104
- # Start MCP server (default HTTP transport)
127
+ # Start file monitoring & MCP server (default HTTP transport)
105
128
  haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
106
129
  ```
107
130
 
@@ -111,7 +134,26 @@ haiku-rag command -h
111
134
  ```
112
135
  to see additional parameters for a command.
113
136
 
114
- ## MCP Server
137
+ ## File Monitoring & MCP server
138
+
139
+ You can start the server (using Streamble HTTP, stdio or SSE transports) with:
140
+
141
+ ```bash
142
+ # Start with default HTTP transport
143
+ haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
144
+ ```
145
+
146
+ You need to have set the `MONITOR_DIRECTORIES` environment variable for monitoring to take place.
147
+
148
+ ### File monitoring
149
+
150
+ `haiku.rag` can watch directories for changes and automatically update the document store:
151
+
152
+ - **Startup**: Scan all monitored directories and add any new files
153
+ - **File Added/Modified**: Automatically parse and add/update the document in the database
154
+ - **File Deleted**: Remove the corresponding document from the database
155
+
156
+ ### MCP Server
115
157
 
116
158
  `haiku.rag` includes a Model Context Protocol (MCP) server that exposes RAG functionality as tools for AI assistants like Claude Desktop. The MCP server provides the following tools:
117
159
 
@@ -123,13 +165,6 @@ to see additional parameters for a command.
123
165
  - `list_documents` - List all documents with pagination
124
166
  - `delete_document` - Delete documents by ID
125
167
 
126
- You can start the server (using Streamble HTTP, stdio or SSE transports) with:
127
-
128
- ```bash
129
- # Start with default HTTP transport
130
- haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
131
- ```
132
-
133
168
  ## Using `haiku.rag` from python
134
169
 
135
170
  ### Managing documents
@@ -4,9 +4,12 @@ A SQLite-based Retrieval-Augmented Generation (RAG) system built for efficient d
4
4
 
5
5
  ## Features
6
6
  - **Local SQLite**: No need to run additional servers
7
- - **Support for various embedding providers**: You can use Ollama, VoyageAI or add your own
7
+ - **Support for various embedding providers**: You can use Ollama, VoyageAI, OpenAI or add your own
8
8
  - **Hybrid Search**: Vector search using `sqlite-vec` combined with full-text search `FTS5`, using Reciprocal Rank Fusion
9
9
  - **Multi-format Support**: Parse 40+ file formats including PDF, DOCX, HTML, Markdown, audio and more. Or add a url!
10
+ - **File monitoring** when run as a server automatically indexing your files
11
+ - **MCP server** Exposes functionality as MCP tools.
12
+ - **Python client** Call `haiku.rag` from your own python applications.
10
13
 
11
14
  ## Installation
12
15
 
@@ -18,24 +21,41 @@ By default Ollama (with the `mxbai-embed-large` model) is used for the embedding
18
21
  For other providers use:
19
22
 
20
23
  - **VoyageAI**: `uv pip install haiku.rag --extra voyageai`
24
+ - **OpenAI**: `uv pip install haiku.rag --extra openai`
21
25
 
22
26
  ## Configuration
23
27
 
28
+ You can set the directories to monitor using the `MONITOR_DIRECTORIES` environment variable (as comma separated values) :
29
+
30
+ ```bash
31
+ # Monitor single directory
32
+ export MONITOR_DIRECTORIES="/path/to/documents,/another_path/to/documents"
33
+ ```
34
+
24
35
  If you want to use an alternative embeddings provider (Ollama being the default) you will need to set the provider details through environment variables:
25
36
 
26
37
  By default:
27
38
 
28
39
  ```bash
29
- EMBEDDING_PROVIDER="ollama"
30
- EMBEDDING_MODEL="mxbai-embed-large" # or any other model
31
- EMBEDDING_VECTOR_DIM=1024
40
+ EMBEDDINGS_PROVIDER="ollama"
41
+ EMBEDDINGS_MODEL="mxbai-embed-large" # or any other model
42
+ EMBEDDINGS_VECTOR_DIM=1024
32
43
  ```
33
44
 
34
45
  For VoyageAI:
35
46
  ```bash
36
- EMBEDDING_PROVIDER="voyageai"
37
- EMBEDDING_MODEL="voyage-3.5" # or any other model
38
- EMBEDDING_VECTOR_DIM=1024
47
+ EMBEDDINGS_PROVIDER="voyageai"
48
+ EMBEDDINGS_MODEL="voyage-3.5" # or any other model
49
+ EMBEDDINGS_VECTOR_DIM=1024
50
+ VOYAGE_API_KEY="your-api-key"
51
+ ```
52
+
53
+ For OpenAI:
54
+ ```bash
55
+ EMBEDDINGS_PROVIDER="openai"
56
+ EMBEDDINGS_MODEL="text-embedding-3-small" # or text-embedding-3-large
57
+ EMBEDDINGS_VECTOR_DIM=1536
58
+ OPENAI_API_KEY="your-api-key"
39
59
  ```
40
60
 
41
61
  ## Command Line Interface
@@ -67,7 +87,7 @@ haiku-rag search "machine learning"
67
87
  # Search with custom options
68
88
  haiku-rag search "python programming" --limit 10 --k 100
69
89
 
70
- # Start MCP server (default HTTP transport)
90
+ # Start file monitoring & MCP server (default HTTP transport)
71
91
  haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
72
92
  ```
73
93
 
@@ -77,7 +97,26 @@ haiku-rag command -h
77
97
  ```
78
98
  to see additional parameters for a command.
79
99
 
80
- ## MCP Server
100
+ ## File Monitoring & MCP server
101
+
102
+ You can start the server (using Streamble HTTP, stdio or SSE transports) with:
103
+
104
+ ```bash
105
+ # Start with default HTTP transport
106
+ haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
107
+ ```
108
+
109
+ You need to have set the `MONITOR_DIRECTORIES` environment variable for monitoring to take place.
110
+
111
+ ### File monitoring
112
+
113
+ `haiku.rag` can watch directories for changes and automatically update the document store:
114
+
115
+ - **Startup**: Scan all monitored directories and add any new files
116
+ - **File Added/Modified**: Automatically parse and add/update the document in the database
117
+ - **File Deleted**: Remove the corresponding document from the database
118
+
119
+ ### MCP Server
81
120
 
82
121
  `haiku.rag` includes a Model Context Protocol (MCP) server that exposes RAG functionality as tools for AI assistants like Claude Desktop. The MCP server provides the following tools:
83
122
 
@@ -89,13 +128,6 @@ to see additional parameters for a command.
89
128
  - `list_documents` - List all documents with pagination
90
129
  - `delete_document` - Delete documents by ID
91
130
 
92
- You can start the server (using Streamble HTTP, stdio or SSE transports) with:
93
-
94
- ```bash
95
- # Start with default HTTP transport
96
- haiku-rag serve # --stdio for stdio transport or --sse for SSE transport
97
- ```
98
-
99
131
  ## Using `haiku.rag` from python
100
132
 
101
133
  ### Managing documents
@@ -1,11 +1,12 @@
1
1
  [project]
2
2
  name = "haiku.rag"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  description = "Retrieval Augmented Generation (RAG) with SQLite"
5
5
  authors = [{ name = "Yiorgis Gozadinos", email = "ggozadinos@gmail.com" }]
6
6
  license = { text = "MIT" }
7
7
  readme = { file = "README.md", content-type = "text/markdown" }
8
8
  requires-python = ">=3.10"
9
+ keywords = ["RAG", "sqlite", "sqlite-vec", "ml", "mcp"]
9
10
  classifiers = [
10
11
  "Development Status :: 4 - Beta",
11
12
  "Environment :: Console",
@@ -36,6 +37,7 @@ dependencies = [
36
37
 
37
38
  [project.optional-dependencies]
38
39
  voyageai = ["voyageai>=0.3.2"]
40
+ openai = ["openai>=1.0.0"]
39
41
 
40
42
  [project.scripts]
41
43
  haiku-rag = "haiku.rag.cli:cli"
@@ -1,9 +1,13 @@
1
+ import asyncio
1
2
  from pathlib import Path
2
3
 
3
4
  from rich.console import Console
4
5
  from rich.markdown import Markdown
5
6
 
6
7
  from haiku.rag.client import HaikuRAG
8
+ from haiku.rag.config import Config
9
+ from haiku.rag.mcp import create_mcp_server
10
+ from haiku.rag.monitor import FileWatcher
7
11
  from haiku.rag.store.models.chunk import Chunk
8
12
  from haiku.rag.store.models.document import Document
9
13
 
@@ -88,20 +92,25 @@ class HaikuRAGApp:
88
92
  self.console.print(content)
89
93
  self.console.rule()
90
94
 
91
- def serve(self, transport: str | None = None):
95
+ async def serve(self, transport: str | None = None):
92
96
  """Start the MCP server."""
93
- from haiku.rag.mcp import create_mcp_server
97
+ async with HaikuRAG(self.db_path) as client:
98
+ monitor = FileWatcher(paths=Config.MONITOR_DIRECTORIES, client=client)
99
+ monitor_task = asyncio.create_task(monitor.observe())
100
+ server = create_mcp_server(self.db_path)
94
101
 
95
- server = create_mcp_server(self.db_path)
96
-
97
- if transport == "stdio":
98
- self.console.print("[green]Starting MCP server on stdio...[/green]")
99
- server.run("stdio")
100
- elif transport == "sse":
101
- self.console.print(
102
- "[green]Starting MCP server with streamable HTTP...[/green]"
103
- )
104
- server.run("sse")
105
- else:
106
- self.console.print("[green]Starting MCP server with HTTP...[/green]")
107
- server.run("streamable-http")
102
+ try:
103
+ if transport == "stdio":
104
+ await server.run_stdio_async()
105
+ elif transport == "sse":
106
+ await server.run_sse_async("sse")
107
+ else:
108
+ await server.run_http_async("streamable-http")
109
+ except KeyboardInterrupt:
110
+ pass
111
+ finally:
112
+ monitor_task.cancel()
113
+ try:
114
+ await monitor_task
115
+ except asyncio.CancelledError:
116
+ pass
@@ -146,7 +146,7 @@ def serve(
146
146
  elif sse:
147
147
  transport = "sse"
148
148
 
149
- app.serve(transport=transport)
149
+ event_loop.run_until_complete(app.serve(transport=transport))
150
150
 
151
151
 
152
152
  if __name__ == "__main__":
@@ -88,7 +88,7 @@ class HaikuRAG:
88
88
  if not source_path.exists():
89
89
  raise ValueError(f"File does not exist: {source_path}")
90
90
 
91
- uri = str(source_path.resolve())
91
+ uri = source_path.as_uri()
92
92
  md5_hash = hashlib.md5(source_path.read_bytes()).hexdigest()
93
93
 
94
94
  # Check if document already exists
@@ -0,0 +1,40 @@
1
+ import os
2
+ from pathlib import Path
3
+
4
+ from dotenv import load_dotenv
5
+ from pydantic import BaseModel, field_validator
6
+
7
+ from haiku.rag.utils import get_default_data_dir
8
+
9
+ load_dotenv()
10
+
11
+
12
+ class AppConfig(BaseModel):
13
+ ENV: str = "development"
14
+
15
+ DEFAULT_DATA_DIR: Path = get_default_data_dir()
16
+ MONITOR_DIRECTORIES: list[Path] = []
17
+
18
+ EMBEDDINGS_PROVIDER: str = "ollama"
19
+ EMBEDDINGS_MODEL: str = "mxbai-embed-large"
20
+ EMBEDDINGS_VECTOR_DIM: int = 1024
21
+
22
+ CHUNK_SIZE: int = 256
23
+ CHUNK_OVERLAP: int = 32
24
+
25
+ OLLAMA_BASE_URL: str = "http://localhost:11434"
26
+
27
+ @field_validator("MONITOR_DIRECTORIES", mode="before")
28
+ @classmethod
29
+ def parse_monitor_directories(cls, v):
30
+ if isinstance(v, str):
31
+ if not v.strip():
32
+ return []
33
+ return [
34
+ Path(path.strip()).absolute() for path in v.split(",") if path.strip()
35
+ ]
36
+ return v
37
+
38
+
39
+ # Expose Config object for app to import
40
+ Config = AppConfig.model_validate(os.environ)
@@ -0,0 +1,36 @@
1
+ from haiku.rag.config import Config
2
+ from haiku.rag.embeddings.base import EmbedderBase
3
+ from haiku.rag.embeddings.ollama import Embedder as OllamaEmbedder
4
+
5
+
6
+ def get_embedder() -> EmbedderBase:
7
+ """
8
+ Factory function to get the appropriate embedder based on the configuration.
9
+ """
10
+
11
+ if Config.EMBEDDINGS_PROVIDER == "ollama":
12
+ return OllamaEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
13
+
14
+ if Config.EMBEDDINGS_PROVIDER == "voyageai":
15
+ try:
16
+ from haiku.rag.embeddings.voyageai import Embedder as VoyageAIEmbedder
17
+ except ImportError:
18
+ raise ImportError(
19
+ "VoyageAI embedder requires the 'voyageai' package. "
20
+ "Please install haiku.rag with the 'voyageai' extra:"
21
+ "uv pip install haiku.rag --extra voyageai"
22
+ )
23
+ return VoyageAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
24
+
25
+ if Config.EMBEDDINGS_PROVIDER == "openai":
26
+ try:
27
+ from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
28
+ except ImportError:
29
+ raise ImportError(
30
+ "OpenAI embedder requires the 'openai' package. "
31
+ "Please install haiku.rag with the 'openai' extra:"
32
+ "uv pip install haiku.rag --extra openai"
33
+ )
34
+ return OpenAIEmbedder(Config.EMBEDDINGS_MODEL, Config.EMBEDDINGS_VECTOR_DIM)
35
+
36
+ raise ValueError(f"Unsupported embedding provider: {Config.EMBEDDINGS_PROVIDER}")
@@ -5,7 +5,7 @@ from haiku.rag.embeddings.base import EmbedderBase
5
5
 
6
6
 
7
7
  class Embedder(EmbedderBase):
8
- _model: str = Config.EMBEDDING_MODEL
8
+ _model: str = Config.EMBEDDINGS_MODEL
9
9
  _vector_dim: int = 1024
10
10
 
11
11
  async def embed(self, text: str) -> list[float]:
@@ -0,0 +1,20 @@
1
+ try:
2
+ from openai import AsyncOpenAI
3
+
4
+ from haiku.rag.config import Config
5
+ from haiku.rag.embeddings.base import EmbedderBase
6
+
7
+ class Embedder(EmbedderBase):
8
+ _model: str = Config.EMBEDDINGS_MODEL
9
+ _vector_dim: int = 1536
10
+
11
+ async def embed(self, text: str) -> list[float]:
12
+ client = AsyncOpenAI()
13
+ response = await client.embeddings.create(
14
+ model=self._model,
15
+ input=text,
16
+ )
17
+ return response.data[0].embedding
18
+
19
+ except ImportError:
20
+ pass
@@ -5,7 +5,7 @@ try:
5
5
  from haiku.rag.embeddings.base import EmbedderBase
6
6
 
7
7
  class Embedder(EmbedderBase):
8
- _model: str = Config.EMBEDDING_MODEL
8
+ _model: str = Config.EMBEDDINGS_MODEL
9
9
  _vector_dim: int = 1024
10
10
 
11
11
  async def embed(self, text: str) -> list[float]:
@@ -0,0 +1,24 @@
1
+ import logging
2
+
3
+ from rich.console import Console
4
+ from rich.logging import RichHandler
5
+
6
+
7
+ def get_logger() -> logging.Logger:
8
+ logger = logging.getLogger("haiku.rag")
9
+
10
+ handler = RichHandler(
11
+ console=Console(stderr=True),
12
+ rich_tracebacks=True,
13
+ )
14
+ formatter = logging.Formatter("%(message)s")
15
+ handler.setFormatter(formatter)
16
+
17
+ logger.setLevel("INFO")
18
+
19
+ # Remove any existing handlers to avoid duplicates on reconfiguration
20
+ for hdlr in logger.handlers[:]:
21
+ logger.removeHandler(hdlr)
22
+
23
+ logger.addHandler(handler)
24
+ return logger
@@ -0,0 +1,74 @@
1
+ from pathlib import Path
2
+
3
+ from watchfiles import Change, DefaultFilter, awatch
4
+
5
+ from haiku.rag.client import HaikuRAG
6
+ from haiku.rag.logging import get_logger
7
+ from haiku.rag.reader import FileReader
8
+ from haiku.rag.store.models.document import Document
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ class FileFilter(DefaultFilter):
14
+ def __init__(self, *, ignore_paths: list[Path] | None = None) -> None:
15
+ self.extensions = tuple(FileReader.extensions)
16
+ super().__init__(ignore_paths=ignore_paths)
17
+
18
+ def __call__(self, change: "Change", path: str) -> bool:
19
+ return path.endswith(self.extensions) and super().__call__(change, path)
20
+
21
+
22
+ class FileWatcher:
23
+ def __init__(self, paths: list[Path], client: HaikuRAG):
24
+ self.paths = paths
25
+ self.client = client
26
+
27
+ async def observe(self):
28
+ logger.info(f"Watching files in {self.paths}")
29
+ filter = FileFilter()
30
+ await self.refresh()
31
+
32
+ async for changes in awatch(*self.paths, watch_filter=filter):
33
+ await self.handler(changes)
34
+
35
+ async def handler(self, changes: set[tuple[Change, str]]):
36
+ for change, path in changes:
37
+ if change == Change.added or change == Change.modified:
38
+ await self._upsert_document(Path(path))
39
+ elif change == Change.deleted:
40
+ await self._delete_document(Path(path))
41
+
42
+ async def refresh(self):
43
+ for path in self.paths:
44
+ for f in Path(path).rglob("**/*"):
45
+ if f.is_file() and f.suffix in FileReader.extensions:
46
+ await self._upsert_document(f)
47
+
48
+ async def _upsert_document(self, file: Path) -> Document | None:
49
+ try:
50
+ uri = file.as_uri()
51
+ existing_doc = await self.client.get_document_by_uri(uri)
52
+ print(uri)
53
+ if existing_doc:
54
+ doc = await self.client.create_document_from_source(str(file))
55
+ logger.info(f"Updated document {existing_doc.id} from {file}")
56
+ return doc
57
+ else:
58
+ doc = await self.client.create_document_from_source(str(file))
59
+ logger.info(f"Created new document {doc.id} from {file}")
60
+ return doc
61
+ except Exception as e:
62
+ logger.error(f"Failed to upsert document from {file}: {e}")
63
+ return None
64
+
65
+ async def _delete_document(self, file: Path):
66
+ try:
67
+ uri = file.as_uri()
68
+ existing_doc = await self.client.get_document_by_uri(uri)
69
+
70
+ if existing_doc and existing_doc.id:
71
+ await self.client.delete_document(existing_doc.id)
72
+ logger.info(f"Deleted document {existing_doc.id} for {file}")
73
+ except Exception as e:
74
+ logger.error(f"Failed to delete document for {file}: {e}")
@@ -98,7 +98,7 @@ async def test_client_create_document_from_source():
98
98
 
99
99
  assert doc.id is not None
100
100
  assert doc.content == test_content
101
- assert doc.uri == str(temp_path.resolve())
101
+ assert doc.uri == temp_path.as_uri()
102
102
  assert doc.metadata["source_type"] == "file"
103
103
  assert "contentType" in doc.metadata
104
104
  assert "md5" in doc.metadata
@@ -109,7 +109,7 @@ async def test_client_create_document_from_source():
109
109
 
110
110
  assert doc2.id is not None
111
111
  assert doc2.content == test_content
112
- assert doc2.uri == str(temp_path.resolve())
112
+ assert doc2.uri == temp_path.as_uri()
113
113
  assert "contentType" in doc2.metadata
114
114
  assert "md5" in doc2.metadata
115
115
 
@@ -0,0 +1,128 @@
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from haiku.rag.embeddings import get_embedder
5
+
6
+
7
+ @pytest.mark.asyncio
8
+ async def test_embedder():
9
+ embedder = get_embedder()
10
+ embedding = await embedder.embed("hello world")
11
+ assert len(embedding) == embedder._vector_dim
12
+
13
+
14
+ @pytest.mark.asyncio
15
+ async def test_similarity():
16
+ embedder = get_embedder()
17
+ phrases = [
18
+ "I enjoy eating great food.",
19
+ "Python is my favorite programming language.",
20
+ "I love to travel and see new places.",
21
+ ]
22
+ embeddings = [np.array(await embedder.embed(phrase)) for phrase in phrases]
23
+
24
+ # Calculate cosine similarity
25
+ def similarities(embeddings, test_embedding):
26
+ return [
27
+ np.dot(embedding, test_embedding)
28
+ / (np.linalg.norm(embedding) * np.linalg.norm(test_embedding))
29
+ for embedding in embeddings
30
+ ]
31
+
32
+ test_phrase = "I am going for a camping trip."
33
+ test_embedding = await embedder.embed(test_phrase)
34
+
35
+ sims = similarities(embeddings, test_embedding)
36
+ assert max(sims) == sims[2]
37
+
38
+ test_phrase = "When is dinner ready?"
39
+ test_embedding = await embedder.embed(test_phrase)
40
+
41
+ sims = similarities(embeddings, test_embedding)
42
+ assert max(sims) == sims[0]
43
+
44
+ test_phrase = "I work as a software developer."
45
+ test_embedding = await embedder.embed(test_phrase)
46
+
47
+ sims = similarities(embeddings, test_embedding)
48
+ assert max(sims) == sims[1]
49
+
50
+
51
+ @pytest.mark.asyncio
52
+ async def test_openai_embedder(monkeypatch):
53
+ monkeypatch.setenv("EMBEDDINGS_PROVIDER", "openai")
54
+ monkeypatch.setenv("EMBEDDINGS_MODEL", "text-embedding-3-small")
55
+
56
+ try:
57
+ from haiku.rag.embeddings.openai import Embedder as OpenAIEmbedder
58
+
59
+ embedder = OpenAIEmbedder("text-embedding-3-small", 1536)
60
+
61
+ # Mock the OpenAI client
62
+ class MockEmbeddingData:
63
+ def __init__(self, embedding):
64
+ self.embedding = embedding
65
+
66
+ class MockResponse:
67
+ def __init__(self, embedding):
68
+ self.data = [MockEmbeddingData(embedding)]
69
+
70
+ class MockAsyncOpenAI:
71
+ class MockEmbeddings:
72
+ async def create(self, model, input):
73
+ return MockResponse([0.1] * 1536)
74
+
75
+ def __init__(self):
76
+ self.embeddings = self.MockEmbeddings()
77
+
78
+ # Patch the AsyncOpenAI import
79
+ import haiku.rag.embeddings.openai
80
+
81
+ original_client = haiku.rag.embeddings.openai.AsyncOpenAI
82
+ haiku.rag.embeddings.openai.AsyncOpenAI = MockAsyncOpenAI
83
+
84
+ try:
85
+ embedding = await embedder.embed("test text")
86
+ assert len(embedding) == 1536
87
+ assert all(isinstance(x, float) for x in embedding)
88
+ finally:
89
+ haiku.rag.embeddings.openai.AsyncOpenAI = original_client
90
+
91
+ except ImportError:
92
+ pytest.skip("OpenAI package not installed")
93
+
94
+
95
+ @pytest.mark.asyncio
96
+ async def test_voyageai_embedder(monkeypatch):
97
+ monkeypatch.setenv("EMBEDDINGS_PROVIDER", "voyageai")
98
+ monkeypatch.setenv("EMBEDDINGS_MODEL", "voyage-3.5")
99
+
100
+ try:
101
+ from haiku.rag.embeddings.voyageai import Embedder as VoyageAIEmbedder
102
+
103
+ embedder = VoyageAIEmbedder("voyage-3.5", 1024)
104
+
105
+ # Mock the VoyageAI client
106
+ class MockEmbeddings:
107
+ def __init__(self, embeddings):
108
+ self.embeddings = embeddings
109
+
110
+ class MockClient:
111
+ def embed(self, texts, model, output_dtype):
112
+ return MockEmbeddings([[0.1] * 1024])
113
+
114
+ # Patch the Client import
115
+ import haiku.rag.embeddings.voyageai
116
+
117
+ original_client = haiku.rag.embeddings.voyageai.Client
118
+ haiku.rag.embeddings.voyageai.Client = MockClient
119
+
120
+ try:
121
+ embedding = await embedder.embed("test text")
122
+ assert len(embedding) == 1024
123
+ assert all(isinstance(x, float) for x in embedding)
124
+ finally:
125
+ haiku.rag.embeddings.voyageai.Client = original_client
126
+
127
+ except ImportError:
128
+ pytest.skip("VoyageAI package not installed")
@@ -0,0 +1,99 @@
1
+ import tempfile
2
+ from pathlib import Path
3
+ from unittest.mock import AsyncMock
4
+
5
+ import pytest
6
+
7
+ from haiku.rag.client import HaikuRAG
8
+ from haiku.rag.monitor import FileWatcher
9
+ from haiku.rag.store.models.document import Document
10
+
11
+
12
+ @pytest.mark.asyncio
13
+ async def test_file_watcher_upsert_document():
14
+ """Test FileWatcher._upsert_document method."""
15
+
16
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
17
+ f.write("Test content for file watcher")
18
+ temp_path = Path(f.name)
19
+
20
+ try:
21
+ mock_client = AsyncMock(spec=HaikuRAG)
22
+ mock_doc = Document(id=1, content="Test content", uri=temp_path.as_uri())
23
+ mock_client.create_document_from_source.return_value = mock_doc
24
+ mock_client.get_document_by_uri.return_value = None # No existing document
25
+
26
+ watcher = FileWatcher(paths=[temp_path.parent], client=mock_client)
27
+
28
+ result = await watcher._upsert_document(temp_path)
29
+
30
+ assert result is not None
31
+ assert result.id == 1
32
+ mock_client.get_document_by_uri.assert_called_once_with(temp_path.as_uri())
33
+ mock_client.create_document_from_source.assert_called_once_with(str(temp_path))
34
+
35
+ finally:
36
+ temp_path.unlink(missing_ok=True)
37
+
38
+
39
+ @pytest.mark.asyncio
40
+ async def test_file_watcher_upsert_existing_document():
41
+ """Test FileWatcher._upsert_document with existing document."""
42
+
43
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
44
+ f.write("Test content for file watcher")
45
+ temp_path = Path(f.name)
46
+
47
+ try:
48
+ mock_client = AsyncMock(spec=HaikuRAG)
49
+ existing_doc = Document(id=1, content="Old content", uri=temp_path.as_uri())
50
+ updated_doc = Document(id=1, content="Updated content", uri=temp_path.as_uri())
51
+
52
+ mock_client.get_document_by_uri.return_value = existing_doc
53
+ mock_client.create_document_from_source.return_value = updated_doc
54
+
55
+ watcher = FileWatcher(paths=[temp_path.parent], client=mock_client)
56
+
57
+ result = await watcher._upsert_document(temp_path)
58
+
59
+ assert result is not None
60
+ assert result.content == "Updated content"
61
+ mock_client.get_document_by_uri.assert_called_once_with(temp_path.as_uri())
62
+ mock_client.create_document_from_source.assert_called_once_with(str(temp_path))
63
+
64
+ finally:
65
+ temp_path.unlink(missing_ok=True)
66
+
67
+
68
+ @pytest.mark.asyncio
69
+ async def test_file_watcher_delete_document():
70
+ """Test FileWatcher._delete_document method."""
71
+ temp_path = Path("/tmp/test_file.txt")
72
+
73
+ mock_client = AsyncMock(spec=HaikuRAG)
74
+ existing_doc = Document(id=1, content="Content to delete", uri=temp_path.as_uri())
75
+ mock_client.get_document_by_uri.return_value = existing_doc
76
+ mock_client.delete_document.return_value = True
77
+
78
+ watcher = FileWatcher(paths=[temp_path.parent], client=mock_client)
79
+
80
+ await watcher._delete_document(temp_path)
81
+
82
+ mock_client.get_document_by_uri.assert_called_once_with(temp_path.as_uri())
83
+ mock_client.delete_document.assert_called_once_with(1)
84
+
85
+
86
+ @pytest.mark.asyncio
87
+ async def test_file_watcher_delete_nonexistent_document():
88
+ """Test FileWatcher._delete_document with non-existent document."""
89
+ temp_path = Path("/tmp/nonexistent_file.txt")
90
+
91
+ mock_client = AsyncMock(spec=HaikuRAG)
92
+ mock_client.get_document_by_uri.return_value = None
93
+
94
+ watcher = FileWatcher(paths=[temp_path.parent], client=mock_client)
95
+
96
+ await watcher._delete_document(temp_path)
97
+
98
+ mock_client.get_document_by_uri.assert_called_once_with(temp_path.as_uri())
99
+ mock_client.delete_document.assert_not_called()
@@ -577,6 +577,15 @@ wheels = [
577
577
  { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" },
578
578
  ]
579
579
 
580
+ [[package]]
581
+ name = "distro"
582
+ version = "1.9.0"
583
+ source = { registry = "https://pypi.org/simple" }
584
+ sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
585
+ wheels = [
586
+ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
587
+ ]
588
+
580
589
  [[package]]
581
590
  name = "et-xmlfile"
582
591
  version = "2.0.0"
@@ -754,7 +763,7 @@ wheels = [
754
763
 
755
764
  [[package]]
756
765
  name = "haiku-rag"
757
- version = "0.1.0"
766
+ version = "0.2.0"
758
767
  source = { editable = "." }
759
768
  dependencies = [
760
769
  { name = "fastmcp" },
@@ -771,6 +780,9 @@ dependencies = [
771
780
  ]
772
781
 
773
782
  [package.optional-dependencies]
783
+ openai = [
784
+ { name = "openai" },
785
+ ]
774
786
  voyageai = [
775
787
  { name = "voyageai" },
776
788
  ]
@@ -792,6 +804,7 @@ requires-dist = [
792
804
  { name = "httpx", specifier = ">=0.28.1" },
793
805
  { name = "markitdown", extras = ["audio-transcription", "docx", "pdf", "pptx", "xlsx"], specifier = ">=0.1.2" },
794
806
  { name = "ollama", specifier = ">=0.5.1" },
807
+ { name = "openai", marker = "extra == 'openai'", specifier = ">=1.0.0" },
795
808
  { name = "pydantic", specifier = ">=2.11.7" },
796
809
  { name = "python-dotenv", specifier = ">=1.1.0" },
797
810
  { name = "rich", specifier = ">=14.0.0" },
@@ -801,7 +814,7 @@ requires-dist = [
801
814
  { name = "voyageai", marker = "extra == 'voyageai'", specifier = ">=0.3.2" },
802
815
  { name = "watchfiles", specifier = ">=1.1.0" },
803
816
  ]
804
- provides-extras = ["voyageai"]
817
+ provides-extras = ["voyageai", "openai"]
805
818
 
806
819
  [package.metadata.requires-dev]
807
820
  dev = [
@@ -924,6 +937,78 @@ wheels = [
924
937
  { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
925
938
  ]
926
939
 
940
+ [[package]]
941
+ name = "jiter"
942
+ version = "0.10.0"
943
+ source = { registry = "https://pypi.org/simple" }
944
+ sdist = { url = "https://files.pythonhosted.org/packages/ee/9d/ae7ddb4b8ab3fb1b51faf4deb36cb48a4fbbd7cb36bad6a5fca4741306f7/jiter-0.10.0.tar.gz", hash = "sha256:07a7142c38aacc85194391108dc91b5b57093c978a9932bd86a36862759d9500", size = 162759, upload-time = "2025-05-18T19:04:59.73Z" }
945
+ wheels = [
946
+ { url = "https://files.pythonhosted.org/packages/be/7e/4011b5c77bec97cb2b572f566220364e3e21b51c48c5bd9c4a9c26b41b67/jiter-0.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cd2fb72b02478f06a900a5782de2ef47e0396b3e1f7d5aba30daeb1fce66f303", size = 317215, upload-time = "2025-05-18T19:03:04.303Z" },
947
+ { url = "https://files.pythonhosted.org/packages/8a/4f/144c1b57c39692efc7ea7d8e247acf28e47d0912800b34d0ad815f6b2824/jiter-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:32bb468e3af278f095d3fa5b90314728a6916d89ba3d0ffb726dd9bf7367285e", size = 322814, upload-time = "2025-05-18T19:03:06.433Z" },
948
+ { url = "https://files.pythonhosted.org/packages/63/1f/db977336d332a9406c0b1f0b82be6f71f72526a806cbb2281baf201d38e3/jiter-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa8b3e0068c26ddedc7abc6fac37da2d0af16b921e288a5a613f4b86f050354f", size = 345237, upload-time = "2025-05-18T19:03:07.833Z" },
949
+ { url = "https://files.pythonhosted.org/packages/d7/1c/aa30a4a775e8a672ad7f21532bdbfb269f0706b39c6ff14e1f86bdd9e5ff/jiter-0.10.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:286299b74cc49e25cd42eea19b72aa82c515d2f2ee12d11392c56d8701f52224", size = 370999, upload-time = "2025-05-18T19:03:09.338Z" },
950
+ { url = "https://files.pythonhosted.org/packages/35/df/f8257abc4207830cb18880781b5f5b716bad5b2a22fb4330cfd357407c5b/jiter-0.10.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ed5649ceeaeffc28d87fb012d25a4cd356dcd53eff5acff1f0466b831dda2a7", size = 491109, upload-time = "2025-05-18T19:03:11.13Z" },
951
+ { url = "https://files.pythonhosted.org/packages/06/76/9e1516fd7b4278aa13a2cc7f159e56befbea9aa65c71586305e7afa8b0b3/jiter-0.10.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2ab0051160cb758a70716448908ef14ad476c3774bd03ddce075f3c1f90a3d6", size = 388608, upload-time = "2025-05-18T19:03:12.911Z" },
952
+ { url = "https://files.pythonhosted.org/packages/6d/64/67750672b4354ca20ca18d3d1ccf2c62a072e8a2d452ac3cf8ced73571ef/jiter-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03997d2f37f6b67d2f5c475da4412be584e1cec273c1cfc03d642c46db43f8cf", size = 352454, upload-time = "2025-05-18T19:03:14.741Z" },
953
+ { url = "https://files.pythonhosted.org/packages/96/4d/5c4e36d48f169a54b53a305114be3efa2bbffd33b648cd1478a688f639c1/jiter-0.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c404a99352d839fed80d6afd6c1d66071f3bacaaa5c4268983fc10f769112e90", size = 391833, upload-time = "2025-05-18T19:03:16.426Z" },
954
+ { url = "https://files.pythonhosted.org/packages/0b/de/ce4a6166a78810bd83763d2fa13f85f73cbd3743a325469a4a9289af6dae/jiter-0.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66e989410b6666d3ddb27a74c7e50d0829704ede652fd4c858e91f8d64b403d0", size = 523646, upload-time = "2025-05-18T19:03:17.704Z" },
955
+ { url = "https://files.pythonhosted.org/packages/a2/a6/3bc9acce53466972964cf4ad85efecb94f9244539ab6da1107f7aed82934/jiter-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b532d3af9ef4f6374609a3bcb5e05a1951d3bf6190dc6b176fdb277c9bbf15ee", size = 514735, upload-time = "2025-05-18T19:03:19.44Z" },
956
+ { url = "https://files.pythonhosted.org/packages/b4/d8/243c2ab8426a2a4dea85ba2a2ba43df379ccece2145320dfd4799b9633c5/jiter-0.10.0-cp310-cp310-win32.whl", hash = "sha256:da9be20b333970e28b72edc4dff63d4fec3398e05770fb3205f7fb460eb48dd4", size = 210747, upload-time = "2025-05-18T19:03:21.184Z" },
957
+ { url = "https://files.pythonhosted.org/packages/37/7a/8021bd615ef7788b98fc76ff533eaac846322c170e93cbffa01979197a45/jiter-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:f59e533afed0c5b0ac3eba20d2548c4a550336d8282ee69eb07b37ea526ee4e5", size = 207484, upload-time = "2025-05-18T19:03:23.046Z" },
958
+ { url = "https://files.pythonhosted.org/packages/1b/dd/6cefc6bd68b1c3c979cecfa7029ab582b57690a31cd2f346c4d0ce7951b6/jiter-0.10.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3bebe0c558e19902c96e99217e0b8e8b17d570906e72ed8a87170bc290b1e978", size = 317473, upload-time = "2025-05-18T19:03:25.942Z" },
959
+ { url = "https://files.pythonhosted.org/packages/be/cf/fc33f5159ce132be1d8dd57251a1ec7a631c7df4bd11e1cd198308c6ae32/jiter-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:558cc7e44fd8e507a236bee6a02fa17199ba752874400a0ca6cd6e2196cdb7dc", size = 321971, upload-time = "2025-05-18T19:03:27.255Z" },
960
+ { url = "https://files.pythonhosted.org/packages/68/a4/da3f150cf1d51f6c472616fb7650429c7ce053e0c962b41b68557fdf6379/jiter-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d613e4b379a07d7c8453c5712ce7014e86c6ac93d990a0b8e7377e18505e98d", size = 345574, upload-time = "2025-05-18T19:03:28.63Z" },
961
+ { url = "https://files.pythonhosted.org/packages/84/34/6e8d412e60ff06b186040e77da5f83bc158e9735759fcae65b37d681f28b/jiter-0.10.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f62cf8ba0618eda841b9bf61797f21c5ebd15a7a1e19daab76e4e4b498d515b2", size = 371028, upload-time = "2025-05-18T19:03:30.292Z" },
962
+ { url = "https://files.pythonhosted.org/packages/fb/d9/9ee86173aae4576c35a2f50ae930d2ccb4c4c236f6cb9353267aa1d626b7/jiter-0.10.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:919d139cdfa8ae8945112398511cb7fca58a77382617d279556b344867a37e61", size = 491083, upload-time = "2025-05-18T19:03:31.654Z" },
963
+ { url = "https://files.pythonhosted.org/packages/d9/2c/f955de55e74771493ac9e188b0f731524c6a995dffdcb8c255b89c6fb74b/jiter-0.10.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13ddbc6ae311175a3b03bd8994881bc4635c923754932918e18da841632349db", size = 388821, upload-time = "2025-05-18T19:03:33.184Z" },
964
+ { url = "https://files.pythonhosted.org/packages/81/5a/0e73541b6edd3f4aada586c24e50626c7815c561a7ba337d6a7eb0a915b4/jiter-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c440ea003ad10927a30521a9062ce10b5479592e8a70da27f21eeb457b4a9c5", size = 352174, upload-time = "2025-05-18T19:03:34.965Z" },
965
+ { url = "https://files.pythonhosted.org/packages/1c/c0/61eeec33b8c75b31cae42be14d44f9e6fe3ac15a4e58010256ac3abf3638/jiter-0.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc347c87944983481e138dea467c0551080c86b9d21de6ea9306efb12ca8f606", size = 391869, upload-time = "2025-05-18T19:03:36.436Z" },
966
+ { url = "https://files.pythonhosted.org/packages/41/22/5beb5ee4ad4ef7d86f5ea5b4509f680a20706c4a7659e74344777efb7739/jiter-0.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:13252b58c1f4d8c5b63ab103c03d909e8e1e7842d302473f482915d95fefd605", size = 523741, upload-time = "2025-05-18T19:03:38.168Z" },
967
+ { url = "https://files.pythonhosted.org/packages/ea/10/768e8818538e5817c637b0df52e54366ec4cebc3346108a4457ea7a98f32/jiter-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7d1bbf3c465de4a24ab12fb7766a0003f6f9bce48b8b6a886158c4d569452dc5", size = 514527, upload-time = "2025-05-18T19:03:39.577Z" },
968
+ { url = "https://files.pythonhosted.org/packages/73/6d/29b7c2dc76ce93cbedabfd842fc9096d01a0550c52692dfc33d3cc889815/jiter-0.10.0-cp311-cp311-win32.whl", hash = "sha256:db16e4848b7e826edca4ccdd5b145939758dadf0dc06e7007ad0e9cfb5928ae7", size = 210765, upload-time = "2025-05-18T19:03:41.271Z" },
969
+ { url = "https://files.pythonhosted.org/packages/c2/c9/d394706deb4c660137caf13e33d05a031d734eb99c051142e039d8ceb794/jiter-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c9c1d5f10e18909e993f9641f12fe1c77b3e9b533ee94ffa970acc14ded3812", size = 209234, upload-time = "2025-05-18T19:03:42.918Z" },
970
+ { url = "https://files.pythonhosted.org/packages/6d/b5/348b3313c58f5fbfb2194eb4d07e46a35748ba6e5b3b3046143f3040bafa/jiter-0.10.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1e274728e4a5345a6dde2d343c8da018b9d4bd4350f5a472fa91f66fda44911b", size = 312262, upload-time = "2025-05-18T19:03:44.637Z" },
971
+ { url = "https://files.pythonhosted.org/packages/9c/4a/6a2397096162b21645162825f058d1709a02965606e537e3304b02742e9b/jiter-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7202ae396446c988cb2a5feb33a543ab2165b786ac97f53b59aafb803fef0744", size = 320124, upload-time = "2025-05-18T19:03:46.341Z" },
972
+ { url = "https://files.pythonhosted.org/packages/2a/85/1ce02cade7516b726dd88f59a4ee46914bf79d1676d1228ef2002ed2f1c9/jiter-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23ba7722d6748b6920ed02a8f1726fb4b33e0fd2f3f621816a8b486c66410ab2", size = 345330, upload-time = "2025-05-18T19:03:47.596Z" },
973
+ { url = "https://files.pythonhosted.org/packages/75/d0/bb6b4f209a77190ce10ea8d7e50bf3725fc16d3372d0a9f11985a2b23eff/jiter-0.10.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:371eab43c0a288537d30e1f0b193bc4eca90439fc08a022dd83e5e07500ed026", size = 369670, upload-time = "2025-05-18T19:03:49.334Z" },
974
+ { url = "https://files.pythonhosted.org/packages/a0/f5/a61787da9b8847a601e6827fbc42ecb12be2c925ced3252c8ffcb56afcaf/jiter-0.10.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c675736059020365cebc845a820214765162728b51ab1e03a1b7b3abb70f74c", size = 489057, upload-time = "2025-05-18T19:03:50.66Z" },
975
+ { url = "https://files.pythonhosted.org/packages/12/e4/6f906272810a7b21406c760a53aadbe52e99ee070fc5c0cb191e316de30b/jiter-0.10.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c5867d40ab716e4684858e4887489685968a47e3ba222e44cde6e4a2154f959", size = 389372, upload-time = "2025-05-18T19:03:51.98Z" },
976
+ { url = "https://files.pythonhosted.org/packages/e2/ba/77013b0b8ba904bf3762f11e0129b8928bff7f978a81838dfcc958ad5728/jiter-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:395bb9a26111b60141757d874d27fdea01b17e8fac958b91c20128ba8f4acc8a", size = 352038, upload-time = "2025-05-18T19:03:53.703Z" },
977
+ { url = "https://files.pythonhosted.org/packages/67/27/c62568e3ccb03368dbcc44a1ef3a423cb86778a4389e995125d3d1aaa0a4/jiter-0.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6842184aed5cdb07e0c7e20e5bdcfafe33515ee1741a6835353bb45fe5d1bd95", size = 391538, upload-time = "2025-05-18T19:03:55.046Z" },
978
+ { url = "https://files.pythonhosted.org/packages/c0/72/0d6b7e31fc17a8fdce76164884edef0698ba556b8eb0af9546ae1a06b91d/jiter-0.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:62755d1bcea9876770d4df713d82606c8c1a3dca88ff39046b85a048566d56ea", size = 523557, upload-time = "2025-05-18T19:03:56.386Z" },
979
+ { url = "https://files.pythonhosted.org/packages/2f/09/bc1661fbbcbeb6244bd2904ff3a06f340aa77a2b94e5a7373fd165960ea3/jiter-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:533efbce2cacec78d5ba73a41756beff8431dfa1694b6346ce7af3a12c42202b", size = 514202, upload-time = "2025-05-18T19:03:57.675Z" },
980
+ { url = "https://files.pythonhosted.org/packages/1b/84/5a5d5400e9d4d54b8004c9673bbe4403928a00d28529ff35b19e9d176b19/jiter-0.10.0-cp312-cp312-win32.whl", hash = "sha256:8be921f0cadd245e981b964dfbcd6fd4bc4e254cdc069490416dd7a2632ecc01", size = 211781, upload-time = "2025-05-18T19:03:59.025Z" },
981
+ { url = "https://files.pythonhosted.org/packages/9b/52/7ec47455e26f2d6e5f2ea4951a0652c06e5b995c291f723973ae9e724a65/jiter-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:a7c7d785ae9dda68c2678532a5a1581347e9c15362ae9f6e68f3fdbfb64f2e49", size = 206176, upload-time = "2025-05-18T19:04:00.305Z" },
982
+ { url = "https://files.pythonhosted.org/packages/2e/b0/279597e7a270e8d22623fea6c5d4eeac328e7d95c236ed51a2b884c54f70/jiter-0.10.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0588107ec8e11b6f5ef0e0d656fb2803ac6cf94a96b2b9fc675c0e3ab5e8644", size = 311617, upload-time = "2025-05-18T19:04:02.078Z" },
983
+ { url = "https://files.pythonhosted.org/packages/91/e3/0916334936f356d605f54cc164af4060e3e7094364add445a3bc79335d46/jiter-0.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cafc4628b616dc32530c20ee53d71589816cf385dd9449633e910d596b1f5c8a", size = 318947, upload-time = "2025-05-18T19:04:03.347Z" },
984
+ { url = "https://files.pythonhosted.org/packages/6a/8e/fd94e8c02d0e94539b7d669a7ebbd2776e51f329bb2c84d4385e8063a2ad/jiter-0.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:520ef6d981172693786a49ff5b09eda72a42e539f14788124a07530f785c3ad6", size = 344618, upload-time = "2025-05-18T19:04:04.709Z" },
985
+ { url = "https://files.pythonhosted.org/packages/6f/b0/f9f0a2ec42c6e9c2e61c327824687f1e2415b767e1089c1d9135f43816bd/jiter-0.10.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:554dedfd05937f8fc45d17ebdf298fe7e0c77458232bcb73d9fbbf4c6455f5b3", size = 368829, upload-time = "2025-05-18T19:04:06.912Z" },
986
+ { url = "https://files.pythonhosted.org/packages/e8/57/5bbcd5331910595ad53b9fd0c610392ac68692176f05ae48d6ce5c852967/jiter-0.10.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bc299da7789deacf95f64052d97f75c16d4fc8c4c214a22bf8d859a4288a1c2", size = 491034, upload-time = "2025-05-18T19:04:08.222Z" },
987
+ { url = "https://files.pythonhosted.org/packages/9b/be/c393df00e6e6e9e623a73551774449f2f23b6ec6a502a3297aeeece2c65a/jiter-0.10.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5161e201172de298a8a1baad95eb85db4fb90e902353b1f6a41d64ea64644e25", size = 388529, upload-time = "2025-05-18T19:04:09.566Z" },
988
+ { url = "https://files.pythonhosted.org/packages/42/3e/df2235c54d365434c7f150b986a6e35f41ebdc2f95acea3036d99613025d/jiter-0.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e2227db6ba93cb3e2bf67c87e594adde0609f146344e8207e8730364db27041", size = 350671, upload-time = "2025-05-18T19:04:10.98Z" },
989
+ { url = "https://files.pythonhosted.org/packages/c6/77/71b0b24cbcc28f55ab4dbfe029f9a5b73aeadaba677843fc6dc9ed2b1d0a/jiter-0.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15acb267ea5e2c64515574b06a8bf393fbfee6a50eb1673614aa45f4613c0cca", size = 390864, upload-time = "2025-05-18T19:04:12.722Z" },
990
+ { url = "https://files.pythonhosted.org/packages/6a/d3/ef774b6969b9b6178e1d1e7a89a3bd37d241f3d3ec5f8deb37bbd203714a/jiter-0.10.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:901b92f2e2947dc6dfcb52fd624453862e16665ea909a08398dde19c0731b7f4", size = 522989, upload-time = "2025-05-18T19:04:14.261Z" },
991
+ { url = "https://files.pythonhosted.org/packages/0c/41/9becdb1d8dd5d854142f45a9d71949ed7e87a8e312b0bede2de849388cb9/jiter-0.10.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d0cb9a125d5a3ec971a094a845eadde2db0de85b33c9f13eb94a0c63d463879e", size = 513495, upload-time = "2025-05-18T19:04:15.603Z" },
992
+ { url = "https://files.pythonhosted.org/packages/9c/36/3468e5a18238bdedae7c4d19461265b5e9b8e288d3f86cd89d00cbb48686/jiter-0.10.0-cp313-cp313-win32.whl", hash = "sha256:48a403277ad1ee208fb930bdf91745e4d2d6e47253eedc96e2559d1e6527006d", size = 211289, upload-time = "2025-05-18T19:04:17.541Z" },
993
+ { url = "https://files.pythonhosted.org/packages/7e/07/1c96b623128bcb913706e294adb5f768fb7baf8db5e1338ce7b4ee8c78ef/jiter-0.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:75f9eb72ecb640619c29bf714e78c9c46c9c4eaafd644bf78577ede459f330d4", size = 205074, upload-time = "2025-05-18T19:04:19.21Z" },
994
+ { url = "https://files.pythonhosted.org/packages/54/46/caa2c1342655f57d8f0f2519774c6d67132205909c65e9aa8255e1d7b4f4/jiter-0.10.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:28ed2a4c05a1f32ef0e1d24c2611330219fed727dae01789f4a335617634b1ca", size = 318225, upload-time = "2025-05-18T19:04:20.583Z" },
995
+ { url = "https://files.pythonhosted.org/packages/43/84/c7d44c75767e18946219ba2d703a5a32ab37b0bc21886a97bc6062e4da42/jiter-0.10.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14a4c418b1ec86a195f1ca69da8b23e8926c752b685af665ce30777233dfe070", size = 350235, upload-time = "2025-05-18T19:04:22.363Z" },
996
+ { url = "https://files.pythonhosted.org/packages/01/16/f5a0135ccd968b480daad0e6ab34b0c7c5ba3bc447e5088152696140dcb3/jiter-0.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d7bfed2fe1fe0e4dda6ef682cee888ba444b21e7a6553e03252e4feb6cf0adca", size = 207278, upload-time = "2025-05-18T19:04:23.627Z" },
997
+ { url = "https://files.pythonhosted.org/packages/1c/9b/1d646da42c3de6c2188fdaa15bce8ecb22b635904fc68be025e21249ba44/jiter-0.10.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:5e9251a5e83fab8d87799d3e1a46cb4b7f2919b895c6f4483629ed2446f66522", size = 310866, upload-time = "2025-05-18T19:04:24.891Z" },
998
+ { url = "https://files.pythonhosted.org/packages/ad/0e/26538b158e8a7c7987e94e7aeb2999e2e82b1f9d2e1f6e9874ddf71ebda0/jiter-0.10.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:023aa0204126fe5b87ccbcd75c8a0d0261b9abdbbf46d55e7ae9f8e22424eeb8", size = 318772, upload-time = "2025-05-18T19:04:26.161Z" },
999
+ { url = "https://files.pythonhosted.org/packages/7b/fb/d302893151caa1c2636d6574d213e4b34e31fd077af6050a9c5cbb42f6fb/jiter-0.10.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c189c4f1779c05f75fc17c0c1267594ed918996a231593a21a5ca5438445216", size = 344534, upload-time = "2025-05-18T19:04:27.495Z" },
1000
+ { url = "https://files.pythonhosted.org/packages/01/d8/5780b64a149d74e347c5128d82176eb1e3241b1391ac07935693466d6219/jiter-0.10.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15720084d90d1098ca0229352607cd68256c76991f6b374af96f36920eae13c4", size = 369087, upload-time = "2025-05-18T19:04:28.896Z" },
1001
+ { url = "https://files.pythonhosted.org/packages/e8/5b/f235a1437445160e777544f3ade57544daf96ba7e96c1a5b24a6f7ac7004/jiter-0.10.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4f2fb68e5f1cfee30e2b2a09549a00683e0fde4c6a2ab88c94072fc33cb7426", size = 490694, upload-time = "2025-05-18T19:04:30.183Z" },
1002
+ { url = "https://files.pythonhosted.org/packages/85/a9/9c3d4617caa2ff89cf61b41e83820c27ebb3f7b5fae8a72901e8cd6ff9be/jiter-0.10.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce541693355fc6da424c08b7edf39a2895f58d6ea17d92cc2b168d20907dee12", size = 388992, upload-time = "2025-05-18T19:04:32.028Z" },
1003
+ { url = "https://files.pythonhosted.org/packages/68/b1/344fd14049ba5c94526540af7eb661871f9c54d5f5601ff41a959b9a0bbd/jiter-0.10.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31c50c40272e189d50006ad5c73883caabb73d4e9748a688b216e85a9a9ca3b9", size = 351723, upload-time = "2025-05-18T19:04:33.467Z" },
1004
+ { url = "https://files.pythonhosted.org/packages/41/89/4c0e345041186f82a31aee7b9d4219a910df672b9fef26f129f0cda07a29/jiter-0.10.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fa3402a2ff9815960e0372a47b75c76979d74402448509ccd49a275fa983ef8a", size = 392215, upload-time = "2025-05-18T19:04:34.827Z" },
1005
+ { url = "https://files.pythonhosted.org/packages/55/58/ee607863e18d3f895feb802154a2177d7e823a7103f000df182e0f718b38/jiter-0.10.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:1956f934dca32d7bb647ea21d06d93ca40868b505c228556d3373cbd255ce853", size = 522762, upload-time = "2025-05-18T19:04:36.19Z" },
1006
+ { url = "https://files.pythonhosted.org/packages/15/d0/9123fb41825490d16929e73c212de9a42913d68324a8ce3c8476cae7ac9d/jiter-0.10.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:fcedb049bdfc555e261d6f65a6abe1d5ad68825b7202ccb9692636c70fcced86", size = 513427, upload-time = "2025-05-18T19:04:37.544Z" },
1007
+ { url = "https://files.pythonhosted.org/packages/d8/b3/2bd02071c5a2430d0b70403a34411fc519c2f227da7b03da9ba6a956f931/jiter-0.10.0-cp314-cp314-win32.whl", hash = "sha256:ac509f7eccca54b2a29daeb516fb95b6f0bd0d0d8084efaf8ed5dfc7b9f0b357", size = 210127, upload-time = "2025-05-18T19:04:38.837Z" },
1008
+ { url = "https://files.pythonhosted.org/packages/03/0c/5fe86614ea050c3ecd728ab4035534387cd41e7c1855ef6c031f1ca93e3f/jiter-0.10.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5ed975b83a2b8639356151cef5c0d597c68376fc4922b45d0eb384ac058cfa00", size = 318527, upload-time = "2025-05-18T19:04:40.612Z" },
1009
+ { url = "https://files.pythonhosted.org/packages/b3/4a/4175a563579e884192ba6e81725fc0448b042024419be8d83aa8a80a3f44/jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", size = 354213, upload-time = "2025-05-18T19:04:41.894Z" },
1010
+ ]
1011
+
927
1012
  [[package]]
928
1013
  name = "lxml"
929
1014
  version = "5.4.0"
@@ -1436,6 +1521,25 @@ wheels = [
1436
1521
  { url = "https://files.pythonhosted.org/packages/c3/16/873b955beda7bada5b0d798d3a601b2ff210e44ad5169f6d405b93892103/onnxruntime-1.22.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64845709f9e8a2809e8e009bc4c8f73b788cee9c6619b7d9930344eae4c9cd36", size = 16427482, upload-time = "2025-05-09T20:26:20.376Z" },
1437
1522
  ]
1438
1523
 
1524
+ [[package]]
1525
+ name = "openai"
1526
+ version = "1.88.0"
1527
+ source = { registry = "https://pypi.org/simple" }
1528
+ dependencies = [
1529
+ { name = "anyio" },
1530
+ { name = "distro" },
1531
+ { name = "httpx" },
1532
+ { name = "jiter" },
1533
+ { name = "pydantic" },
1534
+ { name = "sniffio" },
1535
+ { name = "tqdm" },
1536
+ { name = "typing-extensions" },
1537
+ ]
1538
+ sdist = { url = "https://files.pythonhosted.org/packages/5a/ea/bbeef604d1fe0f7e9111745bb8a81362973a95713b28855beb9a9832ab12/openai-1.88.0.tar.gz", hash = "sha256:122d35e42998255cf1fc84560f6ee49a844e65c054cd05d3e42fda506b832bb1", size = 470963, upload-time = "2025-06-17T05:04:45.856Z" }
1539
+ wheels = [
1540
+ { url = "https://files.pythonhosted.org/packages/f4/03/ef68d77a38dd383cbed7fc898857d394d5a8b0520a35f054e7fe05dc3ac1/openai-1.88.0-py3-none-any.whl", hash = "sha256:7edd7826b3b83f5846562a6f310f040c79576278bf8e3687b30ba05bb5dff978", size = 734293, upload-time = "2025-06-17T05:04:43.858Z" },
1541
+ ]
1542
+
1439
1543
  [[package]]
1440
1544
  name = "openapi-pydantic"
1441
1545
  version = "0.5.1"
@@ -1,15 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(pytest:*)",
5
- "Bash(uv add:*)",
6
- "Bash(source:*)",
7
- "Bash(pyright:*)",
8
- "Bash(ruff check:*)",
9
- "Bash(find:*)",
10
- "Bash(. .venv/bin/activate)",
11
- "Bash(mv:*)"
12
- ],
13
- "deny": []
14
- }
15
- }
@@ -1,28 +0,0 @@
1
- name: build-docs
2
- on:
3
- push:
4
- branches:
5
- - main
6
- permissions:
7
- contents: write
8
- jobs:
9
- deploy:
10
- runs-on: ubuntu-latest
11
- steps:
12
- - uses: actions/checkout@v4
13
- - name: Configure Git Credentials
14
- run: |
15
- git config user.name github-actions[bot]
16
- git config user.email 41898282+github-actions[bot]@users.noreply.github.com
17
- - uses: actions/setup-python@v5
18
- with:
19
- python-version: 3.x
20
- - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
21
- - uses: actions/cache@v4
22
- with:
23
- key: mkdocs-material-${{ env.cache_id }}
24
- path: .cache
25
- restore-keys: |
26
- mkdocs-material-
27
- - run: pip install mkdocs-material
28
- - run: mkdocs gh-deploy --force
haiku_rag-0.1.0/CLAUDE.md DELETED
@@ -1,35 +0,0 @@
1
- # Claude AI Assistant Configuration
2
-
3
- This file contains project-specific information and preferences for Claude AI assistant interactions.
4
-
5
- ## Project Overview
6
-
7
- This is a SQLite-based RAG (Retrieval-Augmented Generation) system built with Haiku.
8
-
9
- ## Development Commands
10
-
11
- - Install dependencies: `uv sync`
12
- - Run tests: `pytest`
13
- - Run specific test: `pytest path/to/test_file.py`
14
- - Run with coverage: `pytest --cov`
15
- - Type checking: `pyright`
16
- - Run MCP server: `python -m haiku.rag.mcp`
17
-
18
- ## Project Structure
19
-
20
- - `src/` - Source code
21
- - `tests/` - Test files
22
- - `README.md` - Documentation
23
-
24
- ## Notes
25
-
26
- - This is a Python project using uv for dependency management
27
- - Use pytest for testing
28
- - Prefer editing existing files over creating new ones
29
- - Follow existing code patterns and conventions
30
- - Remember to activate the .venv when you start working
31
- - Never use relative imports
32
- - Always run ruff as well as pyright after you are done
33
- - Do not be verbose with comments!
34
- - When you change something check if the README needs an update too.
35
-
@@ -1,28 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- from dotenv import load_dotenv
5
- from pydantic import BaseModel
6
-
7
- from haiku.rag.utils import get_default_data_dir
8
-
9
- load_dotenv()
10
-
11
-
12
- class AppConfig(BaseModel):
13
- ENV: str = "development"
14
-
15
- DEFAULT_DATA_DIR: Path = get_default_data_dir()
16
-
17
- EMBEDDING_PROVIDER: str = "ollama"
18
- EMBEDDING_MODEL: str = "mxbai-embed-large"
19
- EMBEDDING_VECTOR_DIM: int = 1024
20
-
21
- CHUNK_SIZE: int = 256
22
- CHUNK_OVERLAP: int = 32
23
-
24
- OLLAMA_BASE_URL: str = "http://localhost:11434"
25
-
26
-
27
- # Expose Config object for app to import
28
- Config = AppConfig.model_validate(os.environ)
@@ -1,24 +0,0 @@
1
- from haiku.rag.config import Config
2
- from haiku.rag.embeddings.base import EmbedderBase
3
- from haiku.rag.embeddings.ollama import Embedder as OllamaEmbedder
4
-
5
-
6
- def get_embedder() -> EmbedderBase:
7
- """
8
- Factory function to get the appropriate embedder based on the configuration.
9
- """
10
-
11
- if Config.EMBEDDING_PROVIDER == "ollama":
12
- return OllamaEmbedder(Config.EMBEDDING_MODEL, Config.EMBEDDING_VECTOR_DIM)
13
-
14
- if Config.EMBEDDING_PROVIDER == "voyageai":
15
- try:
16
- from haiku.rag.embeddings.voyageai import Embedder as VoyageAIEmbedder
17
- except ImportError:
18
- raise ImportError(
19
- "VoyageAI embedder requires the 'voyageai' package. "
20
- "Please install haiku.rag with the 'voyageai' extra:"
21
- "uv pip install haiku.rag --extra voyageai"
22
- )
23
- return VoyageAIEmbedder(Config.EMBEDDING_MODEL, Config.EMBEDDING_VECTOR_DIM)
24
- raise ValueError(f"Unsupported embedding provider: {Config.EMBEDDING_PROVIDER}")
@@ -1,48 +0,0 @@
1
- import numpy as np
2
- import pytest
3
-
4
- from haiku.rag.embeddings import get_embedder
5
-
6
-
7
- @pytest.mark.asyncio
8
- async def test_embedder():
9
- embedder = get_embedder()
10
- embedding = await embedder.embed("hello world")
11
- assert len(embedding) == embedder._vector_dim
12
-
13
-
14
- @pytest.mark.asyncio
15
- async def test_similarity():
16
- embedder = get_embedder()
17
- phrases = [
18
- "I enjoy eating great food.",
19
- "Python is my favorite programming language.",
20
- "I love to travel and see new places.",
21
- ]
22
- embeddings = [np.array(await embedder.embed(phrase)) for phrase in phrases]
23
-
24
- # Calculate cosine similarity
25
- def similarities(embeddings, test_embedding):
26
- return [
27
- np.dot(embedding, test_embedding)
28
- / (np.linalg.norm(embedding) * np.linalg.norm(test_embedding))
29
- for embedding in embeddings
30
- ]
31
-
32
- test_phrase = "I am going for a camping trip."
33
- test_embedding = await embedder.embed(test_phrase)
34
-
35
- sims = similarities(embeddings, test_embedding)
36
- assert max(sims) == sims[2]
37
-
38
- test_phrase = "When is dinner ready?"
39
- test_embedding = await embedder.embed(test_phrase)
40
-
41
- sims = similarities(embeddings, test_embedding)
42
- assert max(sims) == sims[0]
43
-
44
- test_phrase = "I work as a software developer."
45
- test_embedding = await embedder.embed(test_phrase)
46
-
47
- sims = similarities(embeddings, test_embedding)
48
- assert max(sims) == sims[1]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes