haiku.rag 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of haiku.rag might be problematic. Click here for more details.
- haiku/rag/config.py +3 -0
- haiku/rag/embeddings/base.py +1 -1
- haiku/rag/embeddings/ollama.py +11 -5
- haiku/rag/embeddings/openai.py +5 -2
- haiku/rag/embeddings/vllm.py +19 -0
- haiku/rag/embeddings/voyageai.py +7 -3
- haiku/rag/qa/agent.py +8 -0
- haiku/rag/reranking/vllm.py +44 -0
- haiku/rag/store/repositories/chunk.py +1 -7
- {haiku_rag-0.7.1.dist-info → haiku_rag-0.7.3.dist-info}/METADATA +3 -3
- {haiku_rag-0.7.1.dist-info → haiku_rag-0.7.3.dist-info}/RECORD +14 -12
- {haiku_rag-0.7.1.dist-info → haiku_rag-0.7.3.dist-info}/WHEEL +0 -0
- {haiku_rag-0.7.1.dist-info → haiku_rag-0.7.3.dist-info}/entry_points.txt +0 -0
- {haiku_rag-0.7.1.dist-info → haiku_rag-0.7.3.dist-info}/licenses/LICENSE +0 -0
haiku/rag/config.py
CHANGED
|
@@ -33,6 +33,9 @@ class AppConfig(BaseModel):
|
|
|
33
33
|
CONTEXT_CHUNK_RADIUS: int = 0
|
|
34
34
|
|
|
35
35
|
OLLAMA_BASE_URL: str = "http://localhost:11434"
|
|
36
|
+
VLLM_EMBEDDINGS_BASE_URL: str = ""
|
|
37
|
+
VLLM_RERANK_BASE_URL: str = ""
|
|
38
|
+
VLLM_QA_BASE_URL: str = ""
|
|
36
39
|
|
|
37
40
|
# Provider keys
|
|
38
41
|
VOYAGE_API_KEY: str = ""
|
haiku/rag/embeddings/base.py
CHANGED
|
@@ -9,7 +9,7 @@ class EmbedderBase:
|
|
|
9
9
|
self._model = model
|
|
10
10
|
self._vector_dim = vector_dim
|
|
11
11
|
|
|
12
|
-
async def embed(self, text: str) -> list[float]:
|
|
12
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
13
13
|
raise NotImplementedError(
|
|
14
14
|
"Embedder is an abstract class. Please implement the embed method in a subclass."
|
|
15
15
|
)
|
haiku/rag/embeddings/ollama.py
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
|
-
from
|
|
1
|
+
from openai import AsyncOpenAI
|
|
2
2
|
|
|
3
3
|
from haiku.rag.config import Config
|
|
4
4
|
from haiku.rag.embeddings.base import EmbedderBase
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class Embedder(EmbedderBase):
|
|
8
|
-
async def embed(self, text: str) -> list[float]:
|
|
9
|
-
client =
|
|
10
|
-
|
|
11
|
-
|
|
8
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
9
|
+
client = AsyncOpenAI(base_url=f"{Config.OLLAMA_BASE_URL}/v1", api_key="dummy")
|
|
10
|
+
response = await client.embeddings.create(
|
|
11
|
+
model=self._model,
|
|
12
|
+
input=text,
|
|
13
|
+
)
|
|
14
|
+
if isinstance(text, str):
|
|
15
|
+
return response.data[0].embedding
|
|
16
|
+
else:
|
|
17
|
+
return [item.embedding for item in response.data]
|
haiku/rag/embeddings/openai.py
CHANGED
|
@@ -4,10 +4,13 @@ from haiku.rag.embeddings.base import EmbedderBase
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class Embedder(EmbedderBase):
|
|
7
|
-
async def embed(self, text: str) -> list[float]:
|
|
7
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
8
8
|
client = AsyncOpenAI()
|
|
9
9
|
response = await client.embeddings.create(
|
|
10
10
|
model=self._model,
|
|
11
11
|
input=text,
|
|
12
12
|
)
|
|
13
|
-
|
|
13
|
+
if isinstance(text, str):
|
|
14
|
+
return response.data[0].embedding
|
|
15
|
+
else:
|
|
16
|
+
return [item.embedding for item in response.data]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from openai import AsyncOpenAI
|
|
2
|
+
|
|
3
|
+
from haiku.rag.config import Config
|
|
4
|
+
from haiku.rag.embeddings.base import EmbedderBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Embedder(EmbedderBase):
|
|
8
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
9
|
+
client = AsyncOpenAI(
|
|
10
|
+
base_url=f"{Config.VLLM_EMBEDDINGS_BASE_URL}/v1", api_key="dummy"
|
|
11
|
+
)
|
|
12
|
+
response = await client.embeddings.create(
|
|
13
|
+
model=self._model,
|
|
14
|
+
input=text,
|
|
15
|
+
)
|
|
16
|
+
if isinstance(text, str):
|
|
17
|
+
return response.data[0].embedding
|
|
18
|
+
else:
|
|
19
|
+
return [item.embedding for item in response.data]
|
haiku/rag/embeddings/voyageai.py
CHANGED
|
@@ -4,10 +4,14 @@ try:
|
|
|
4
4
|
from haiku.rag.embeddings.base import EmbedderBase
|
|
5
5
|
|
|
6
6
|
class Embedder(EmbedderBase):
|
|
7
|
-
async def embed(self, text: str) -> list[float]:
|
|
7
|
+
async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
|
|
8
8
|
client = Client()
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
if isinstance(text, str):
|
|
10
|
+
res = client.embed([text], model=self._model, output_dtype="float")
|
|
11
|
+
return res.embeddings[0] # type: ignore[return-value]
|
|
12
|
+
else:
|
|
13
|
+
res = client.embed(text, model=self._model, output_dtype="float")
|
|
14
|
+
return res.embeddings # type: ignore[return-value]
|
|
11
15
|
|
|
12
16
|
except ImportError:
|
|
13
17
|
pass
|
haiku/rag/qa/agent.py
CHANGED
|
@@ -2,6 +2,7 @@ from pydantic import BaseModel, Field
|
|
|
2
2
|
from pydantic_ai import Agent, RunContext
|
|
3
3
|
from pydantic_ai.models.openai import OpenAIChatModel
|
|
4
4
|
from pydantic_ai.providers.ollama import OllamaProvider
|
|
5
|
+
from pydantic_ai.providers.openai import OpenAIProvider
|
|
5
6
|
|
|
6
7
|
from haiku.rag.client import HaikuRAG
|
|
7
8
|
from haiku.rag.config import Config
|
|
@@ -65,6 +66,13 @@ class QuestionAnswerAgent:
|
|
|
65
66
|
model_name=model,
|
|
66
67
|
provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
|
|
67
68
|
)
|
|
69
|
+
elif provider == "vllm":
|
|
70
|
+
return OpenAIChatModel(
|
|
71
|
+
model_name=model,
|
|
72
|
+
provider=OpenAIProvider(
|
|
73
|
+
base_url=f"{Config.VLLM_QA_BASE_URL}/v1", api_key="none"
|
|
74
|
+
),
|
|
75
|
+
)
|
|
68
76
|
else:
|
|
69
77
|
# For all other providers, use the provider:model format
|
|
70
78
|
return f"{provider}:{model}"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
|
|
3
|
+
from haiku.rag.config import Config
|
|
4
|
+
from haiku.rag.reranking.base import RerankerBase
|
|
5
|
+
from haiku.rag.store.models.chunk import Chunk
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class VLLMReranker(RerankerBase):
|
|
9
|
+
def __init__(self, model: str):
|
|
10
|
+
self._model = model
|
|
11
|
+
self._base_url = Config.VLLM_RERANK_BASE_URL
|
|
12
|
+
|
|
13
|
+
async def rerank(
|
|
14
|
+
self, query: str, chunks: list[Chunk], top_n: int = 10
|
|
15
|
+
) -> list[tuple[Chunk, float]]:
|
|
16
|
+
if not chunks:
|
|
17
|
+
return []
|
|
18
|
+
|
|
19
|
+
# Prepare documents for reranking
|
|
20
|
+
documents = [chunk.content for chunk in chunks]
|
|
21
|
+
|
|
22
|
+
async with httpx.AsyncClient() as client:
|
|
23
|
+
response = await client.post(
|
|
24
|
+
f"{self._base_url}/v1/rerank",
|
|
25
|
+
json={"model": self._model, "query": query, "documents": documents},
|
|
26
|
+
headers={
|
|
27
|
+
"accept": "application/json",
|
|
28
|
+
"Content-Type": "application/json",
|
|
29
|
+
},
|
|
30
|
+
)
|
|
31
|
+
response.raise_for_status()
|
|
32
|
+
|
|
33
|
+
result = response.json()
|
|
34
|
+
|
|
35
|
+
# Extract scores and pair with chunks
|
|
36
|
+
scored_chunks = []
|
|
37
|
+
for item in result.get("results", []):
|
|
38
|
+
index = item["index"]
|
|
39
|
+
score = item["relevance_score"]
|
|
40
|
+
scored_chunks.append((chunks[index], score))
|
|
41
|
+
|
|
42
|
+
# Sort by score (descending) and return top_n
|
|
43
|
+
scored_chunks.sort(key=lambda x: x[1], reverse=True)
|
|
44
|
+
return scored_chunks[:top_n]
|
|
@@ -154,13 +154,7 @@ class ChunkRepository:
|
|
|
154
154
|
"""Create chunks and embeddings for a document from DoclingDocument."""
|
|
155
155
|
chunk_texts = await chunker.chunk(document)
|
|
156
156
|
|
|
157
|
-
|
|
158
|
-
embeddings_tasks = []
|
|
159
|
-
for chunk_text in chunk_texts:
|
|
160
|
-
embeddings_tasks.append(self.embedder.embed(chunk_text))
|
|
161
|
-
|
|
162
|
-
# Wait for all embeddings to complete
|
|
163
|
-
embeddings = await asyncio.gather(*embeddings_tasks)
|
|
157
|
+
embeddings = await self.embedder.embed(chunk_texts)
|
|
164
158
|
|
|
165
159
|
# Prepare all chunk records for batch insertion
|
|
166
160
|
chunk_records = []
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: haiku.rag
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Retrieval Augmented Generation (RAG) with LanceDB
|
|
5
5
|
Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -47,10 +47,10 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
|
|
|
47
47
|
## Features
|
|
48
48
|
|
|
49
49
|
- **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
|
|
50
|
-
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
|
|
50
|
+
- **Multiple embedding providers**: Ollama, VoyageAI, OpenAI, vLLM
|
|
51
51
|
- **Multiple QA providers**: Any provider/model supported by Pydantic AI
|
|
52
52
|
- **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
|
|
53
|
-
- **Reranking**: Default search result reranking with MixedBread AI or
|
|
53
|
+
- **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
|
|
54
54
|
- **Question answering**: Built-in QA agents on your documents
|
|
55
55
|
- **File monitoring**: Auto-index files when run as server
|
|
56
56
|
- **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
|
|
@@ -3,7 +3,7 @@ haiku/rag/app.py,sha256=GmuZxH7BMutWt8Mdu0RSateRBaKiqXh7Z9tV7cZX6n0,7655
|
|
|
3
3
|
haiku/rag/chunker.py,sha256=PVe6ysv8UlacUd4Zb3_8RFWIaWDXnzBAy2VDJ4TaUsE,1555
|
|
4
4
|
haiku/rag/cli.py,sha256=UY9Vh5RsIxSCV14eQbNOiwToKmbFAvqTOAnxjieaYBs,6399
|
|
5
5
|
haiku/rag/client.py,sha256=N4zkWjE9Rsw9YgPvNo83xptHUQR2ognfOnjkoV_w6hc,20999
|
|
6
|
-
haiku/rag/config.py,sha256=
|
|
6
|
+
haiku/rag/config.py,sha256=3H41da9BU1R1y2JJHD0cOSErX_VSM1UXA7M2JSOxFXE,1795
|
|
7
7
|
haiku/rag/logging.py,sha256=DOQi9QMpQRl8h17Vu4nQh8HxpHdeIu29n8-HZaT3SRQ,786
|
|
8
8
|
haiku/rag/mcp.py,sha256=bR9Y-Nz-hvjiql20Y0KE0hwNGwyjmPGX8K9d-qmXptY,4683
|
|
9
9
|
haiku/rag/migration.py,sha256=gWxQwiKo0YulRhogYz4K8N98kHN9LQXIx9FeTmT24v4,10915
|
|
@@ -11,29 +11,31 @@ haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
|
|
|
11
11
|
haiku/rag/reader.py,sha256=qkPTMJuQ_o4sK-8zpDl9WFYe_MJ7aL_gUw6rczIpW-g,3274
|
|
12
12
|
haiku/rag/utils.py,sha256=c8F0ECsFSqvQxzxINAOAnvShoOnJPLsOaNE3JEY2JSc,3230
|
|
13
13
|
haiku/rag/embeddings/__init__.py,sha256=n7aHW3BxHlpGxU4ze4YYDOsljzFpEep8dwVE2n45JoE,1218
|
|
14
|
-
haiku/rag/embeddings/base.py,sha256=
|
|
15
|
-
haiku/rag/embeddings/ollama.py,sha256=
|
|
16
|
-
haiku/rag/embeddings/openai.py,sha256=
|
|
17
|
-
haiku/rag/embeddings/
|
|
14
|
+
haiku/rag/embeddings/base.py,sha256=BnSviKrlzjv3L0sZJs_T-pxfawd-bcTak-rsX-D2f3A,497
|
|
15
|
+
haiku/rag/embeddings/ollama.py,sha256=LuLlHH6RGoO9_gFCIlbmesuXOj017gTw6z-p8Ez0CfE,595
|
|
16
|
+
haiku/rag/embeddings/openai.py,sha256=fIFCk-jpUtaW0xsnrQnJ824O0UCjaGG2sgvBzREhilc,503
|
|
17
|
+
haiku/rag/embeddings/vllm.py,sha256=vhaUnCn6VMkfSluLhWKtSV-sekFaPsp4pKo2N7-SBCY,626
|
|
18
|
+
haiku/rag/embeddings/voyageai.py,sha256=UW-MW4tJKnPB6Fs2P7A3yt-ZeRm46H9npckchSriPX8,661
|
|
18
19
|
haiku/rag/qa/__init__.py,sha256=Sl7Kzrg9CuBOcMF01wc1NtQhUNWjJI0MhIHfCWrb8V4,434
|
|
19
|
-
haiku/rag/qa/agent.py,sha256=
|
|
20
|
+
haiku/rag/qa/agent.py,sha256=15-jMuF08U0uxGdqgQysKMZLr8BUWssI76PtyQ2Ngd8,2912
|
|
20
21
|
haiku/rag/qa/prompts.py,sha256=xdT4cyrOrAK9UDgVqyev1wHF49jD57Bh40gx2sH4NPI,3341
|
|
21
22
|
haiku/rag/reranking/__init__.py,sha256=IRXHs4qPu6VbGJQpzSwhgtVWWumURH_vEoVFE-extlo,894
|
|
22
23
|
haiku/rag/reranking/base.py,sha256=LM9yUSSJ414UgBZhFTgxGprlRqzfTe4I1vgjricz2JY,405
|
|
23
24
|
haiku/rag/reranking/cohere.py,sha256=1iTdiaa8vvb6oHVB2qpWzUOVkyfUcimVSZp6Qr4aq4c,1049
|
|
24
25
|
haiku/rag/reranking/mxbai.py,sha256=46sVTsTIkzIX9THgM3u8HaEmgY7evvEyB-N54JTHvK8,867
|
|
26
|
+
haiku/rag/reranking/vllm.py,sha256=xVGH9ss-ISWdJ5SKUUHUbTqBo7PIEmA_SQv0ScdJ6XA,1479
|
|
25
27
|
haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
|
|
26
28
|
haiku/rag/store/engine.py,sha256=XHGo5Xl-dCFdQHrOdMo64xVK5n0k8-LoUl5V-tlA0HI,7131
|
|
27
29
|
haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
|
|
28
30
|
haiku/rag/store/models/chunk.py,sha256=ZNyTfO6lh3rXWLVYO3TZcitbL4LSUGr42fR6jQQ5iQc,364
|
|
29
31
|
haiku/rag/store/models/document.py,sha256=zSSpt6pyrMJAIXGQvIcqojcqUzwZnhp3WxVokaWxNRc,396
|
|
30
32
|
haiku/rag/store/repositories/__init__.py,sha256=Olv5dLfBQINRV3HrsfUpjzkZ7Qm7goEYyMNykgo_DaY,291
|
|
31
|
-
haiku/rag/store/repositories/chunk.py,sha256=
|
|
33
|
+
haiku/rag/store/repositories/chunk.py,sha256=v4y4eh4yIf6zJaWfHxljvnmb12dmvwdinzmxQt8Lvhs,13343
|
|
32
34
|
haiku/rag/store/repositories/document.py,sha256=lP8Lo82KTP-qwXFRpYZ46WjeAdAsHwZ5pJcrXdz4g0U,6988
|
|
33
35
|
haiku/rag/store/repositories/settings.py,sha256=dqnAvm-98nQrWpLBbf9QghJw673QD80-iqQhRMP5t0c,5025
|
|
34
36
|
haiku/rag/store/upgrades/__init__.py,sha256=wUiEoSiHTahvuagx93E4FB07v123AhdbOjwUkPusiIg,14
|
|
35
|
-
haiku_rag-0.7.
|
|
36
|
-
haiku_rag-0.7.
|
|
37
|
-
haiku_rag-0.7.
|
|
38
|
-
haiku_rag-0.7.
|
|
39
|
-
haiku_rag-0.7.
|
|
37
|
+
haiku_rag-0.7.3.dist-info/METADATA,sha256=PAvA6VZuyZp9IekXhYCLWDxM1wMZMmujtntxZE2lBoE,4610
|
|
38
|
+
haiku_rag-0.7.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
39
|
+
haiku_rag-0.7.3.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
|
|
40
|
+
haiku_rag-0.7.3.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
|
|
41
|
+
haiku_rag-0.7.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|