haiku.rag 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of haiku.rag might be problematic. Click here for more details.

haiku/rag/config.py CHANGED
@@ -33,6 +33,9 @@ class AppConfig(BaseModel):
33
33
  CONTEXT_CHUNK_RADIUS: int = 0
34
34
 
35
35
  OLLAMA_BASE_URL: str = "http://localhost:11434"
36
+ VLLM_EMBEDDINGS_BASE_URL: str = ""
37
+ VLLM_RERANK_BASE_URL: str = ""
38
+ VLLM_QA_BASE_URL: str = ""
36
39
 
37
40
  # Provider keys
38
41
  VOYAGE_API_KEY: str = ""
@@ -9,7 +9,7 @@ class EmbedderBase:
9
9
  self._model = model
10
10
  self._vector_dim = vector_dim
11
11
 
12
- async def embed(self, text: str) -> list[float]:
12
+ async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
13
13
  raise NotImplementedError(
14
14
  "Embedder is an abstract class. Please implement the embed method in a subclass."
15
15
  )
@@ -1,11 +1,17 @@
1
- from ollama import AsyncClient
1
+ from openai import AsyncOpenAI
2
2
 
3
3
  from haiku.rag.config import Config
4
4
  from haiku.rag.embeddings.base import EmbedderBase
5
5
 
6
6
 
7
7
  class Embedder(EmbedderBase):
8
- async def embed(self, text: str) -> list[float]:
9
- client = AsyncClient(host=Config.OLLAMA_BASE_URL)
10
- res = await client.embeddings(model=self._model, prompt=text)
11
- return list(res["embedding"])
8
+ async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
9
+ client = AsyncOpenAI(base_url=f"{Config.OLLAMA_BASE_URL}/v1", api_key="dummy")
10
+ response = await client.embeddings.create(
11
+ model=self._model,
12
+ input=text,
13
+ )
14
+ if isinstance(text, str):
15
+ return response.data[0].embedding
16
+ else:
17
+ return [item.embedding for item in response.data]
@@ -4,10 +4,13 @@ from haiku.rag.embeddings.base import EmbedderBase
4
4
 
5
5
 
6
6
  class Embedder(EmbedderBase):
7
- async def embed(self, text: str) -> list[float]:
7
+ async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
8
8
  client = AsyncOpenAI()
9
9
  response = await client.embeddings.create(
10
10
  model=self._model,
11
11
  input=text,
12
12
  )
13
- return response.data[0].embedding
13
+ if isinstance(text, str):
14
+ return response.data[0].embedding
15
+ else:
16
+ return [item.embedding for item in response.data]
@@ -0,0 +1,19 @@
1
+ from openai import AsyncOpenAI
2
+
3
+ from haiku.rag.config import Config
4
+ from haiku.rag.embeddings.base import EmbedderBase
5
+
6
+
7
+ class Embedder(EmbedderBase):
8
+ async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
9
+ client = AsyncOpenAI(
10
+ base_url=f"{Config.VLLM_EMBEDDINGS_BASE_URL}/v1", api_key="dummy"
11
+ )
12
+ response = await client.embeddings.create(
13
+ model=self._model,
14
+ input=text,
15
+ )
16
+ if isinstance(text, str):
17
+ return response.data[0].embedding
18
+ else:
19
+ return [item.embedding for item in response.data]
@@ -4,10 +4,14 @@ try:
4
4
  from haiku.rag.embeddings.base import EmbedderBase
5
5
 
6
6
  class Embedder(EmbedderBase):
7
- async def embed(self, text: str) -> list[float]:
7
+ async def embed(self, text: str | list[str]) -> list[float] | list[list[float]]:
8
8
  client = Client()
9
- res = client.embed([text], model=self._model, output_dtype="float")
10
- return res.embeddings[0] # type: ignore[return-value]
9
+ if isinstance(text, str):
10
+ res = client.embed([text], model=self._model, output_dtype="float")
11
+ return res.embeddings[0] # type: ignore[return-value]
12
+ else:
13
+ res = client.embed(text, model=self._model, output_dtype="float")
14
+ return res.embeddings # type: ignore[return-value]
11
15
 
12
16
  except ImportError:
13
17
  pass
haiku/rag/qa/agent.py CHANGED
@@ -2,6 +2,7 @@ from pydantic import BaseModel, Field
2
2
  from pydantic_ai import Agent, RunContext
3
3
  from pydantic_ai.models.openai import OpenAIChatModel
4
4
  from pydantic_ai.providers.ollama import OllamaProvider
5
+ from pydantic_ai.providers.openai import OpenAIProvider
5
6
 
6
7
  from haiku.rag.client import HaikuRAG
7
8
  from haiku.rag.config import Config
@@ -65,6 +66,13 @@ class QuestionAnswerAgent:
65
66
  model_name=model,
66
67
  provider=OllamaProvider(base_url=f"{Config.OLLAMA_BASE_URL}/v1"),
67
68
  )
69
+ elif provider == "vllm":
70
+ return OpenAIChatModel(
71
+ model_name=model,
72
+ provider=OpenAIProvider(
73
+ base_url=f"{Config.VLLM_QA_BASE_URL}/v1", api_key="none"
74
+ ),
75
+ )
68
76
  else:
69
77
  # For all other providers, use the provider:model format
70
78
  return f"{provider}:{model}"
@@ -0,0 +1,44 @@
1
+ import httpx
2
+
3
+ from haiku.rag.config import Config
4
+ from haiku.rag.reranking.base import RerankerBase
5
+ from haiku.rag.store.models.chunk import Chunk
6
+
7
+
8
+ class VLLMReranker(RerankerBase):
9
+ def __init__(self, model: str):
10
+ self._model = model
11
+ self._base_url = Config.VLLM_RERANK_BASE_URL
12
+
13
+ async def rerank(
14
+ self, query: str, chunks: list[Chunk], top_n: int = 10
15
+ ) -> list[tuple[Chunk, float]]:
16
+ if not chunks:
17
+ return []
18
+
19
+ # Prepare documents for reranking
20
+ documents = [chunk.content for chunk in chunks]
21
+
22
+ async with httpx.AsyncClient() as client:
23
+ response = await client.post(
24
+ f"{self._base_url}/v1/rerank",
25
+ json={"model": self._model, "query": query, "documents": documents},
26
+ headers={
27
+ "accept": "application/json",
28
+ "Content-Type": "application/json",
29
+ },
30
+ )
31
+ response.raise_for_status()
32
+
33
+ result = response.json()
34
+
35
+ # Extract scores and pair with chunks
36
+ scored_chunks = []
37
+ for item in result.get("results", []):
38
+ index = item["index"]
39
+ score = item["relevance_score"]
40
+ scored_chunks.append((chunks[index], score))
41
+
42
+ # Sort by score (descending) and return top_n
43
+ scored_chunks.sort(key=lambda x: x[1], reverse=True)
44
+ return scored_chunks[:top_n]
@@ -154,13 +154,7 @@ class ChunkRepository:
154
154
  """Create chunks and embeddings for a document from DoclingDocument."""
155
155
  chunk_texts = await chunker.chunk(document)
156
156
 
157
- # Generate embeddings in parallel for all chunks
158
- embeddings_tasks = []
159
- for chunk_text in chunk_texts:
160
- embeddings_tasks.append(self.embedder.embed(chunk_text))
161
-
162
- # Wait for all embeddings to complete
163
- embeddings = await asyncio.gather(*embeddings_tasks)
157
+ embeddings = await self.embedder.embed(chunk_texts)
164
158
 
165
159
  # Prepare all chunk records for batch insertion
166
160
  chunk_records = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: haiku.rag
3
- Version: 0.7.1
3
+ Version: 0.7.3
4
4
  Summary: Retrieval Augmented Generation (RAG) with LanceDB
5
5
  Author-email: Yiorgis Gozadinos <ggozadinos@gmail.com>
6
6
  License: MIT
@@ -47,10 +47,10 @@ Retrieval-Augmented Generation (RAG) library built on LanceDB.
47
47
  ## Features
48
48
 
49
49
  - **Local LanceDB**: No external servers required, supports also LanceDB cloud storage, S3, Google Cloud & Azure
50
- - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI
50
+ - **Multiple embedding providers**: Ollama, VoyageAI, OpenAI, vLLM
51
51
  - **Multiple QA providers**: Any provider/model supported by Pydantic AI
52
52
  - **Native hybrid search**: Vector + full-text search with native LanceDB RRF reranking
53
- - **Reranking**: Default search result reranking with MixedBread AI or Cohere
53
+ - **Reranking**: Default search result reranking with MixedBread AI, Cohere, or vLLM
54
54
  - **Question answering**: Built-in QA agents on your documents
55
55
  - **File monitoring**: Auto-index files when run as server
56
56
  - **40+ file formats**: PDF, DOCX, HTML, Markdown, code files, URLs
@@ -3,7 +3,7 @@ haiku/rag/app.py,sha256=GmuZxH7BMutWt8Mdu0RSateRBaKiqXh7Z9tV7cZX6n0,7655
3
3
  haiku/rag/chunker.py,sha256=PVe6ysv8UlacUd4Zb3_8RFWIaWDXnzBAy2VDJ4TaUsE,1555
4
4
  haiku/rag/cli.py,sha256=UY9Vh5RsIxSCV14eQbNOiwToKmbFAvqTOAnxjieaYBs,6399
5
5
  haiku/rag/client.py,sha256=N4zkWjE9Rsw9YgPvNo83xptHUQR2ognfOnjkoV_w6hc,20999
6
- haiku/rag/config.py,sha256=9Mv0QJ3c6VF1oVRSXJlSsG234dCd_sKnJO-ybMaTpDQ,1690
6
+ haiku/rag/config.py,sha256=3H41da9BU1R1y2JJHD0cOSErX_VSM1UXA7M2JSOxFXE,1795
7
7
  haiku/rag/logging.py,sha256=DOQi9QMpQRl8h17Vu4nQh8HxpHdeIu29n8-HZaT3SRQ,786
8
8
  haiku/rag/mcp.py,sha256=bR9Y-Nz-hvjiql20Y0KE0hwNGwyjmPGX8K9d-qmXptY,4683
9
9
  haiku/rag/migration.py,sha256=gWxQwiKo0YulRhogYz4K8N98kHN9LQXIx9FeTmT24v4,10915
@@ -11,29 +11,31 @@ haiku/rag/monitor.py,sha256=r386nkhdlsU8UECwIuVwnrSlgMk3vNIuUZGNIzkZuec,2770
11
11
  haiku/rag/reader.py,sha256=qkPTMJuQ_o4sK-8zpDl9WFYe_MJ7aL_gUw6rczIpW-g,3274
12
12
  haiku/rag/utils.py,sha256=c8F0ECsFSqvQxzxINAOAnvShoOnJPLsOaNE3JEY2JSc,3230
13
13
  haiku/rag/embeddings/__init__.py,sha256=n7aHW3BxHlpGxU4ze4YYDOsljzFpEep8dwVE2n45JoE,1218
14
- haiku/rag/embeddings/base.py,sha256=NTQvuzbZPu0LBo5wAu3qGyJ4xXUaRAt1fjBO0ygWn_Y,465
15
- haiku/rag/embeddings/ollama.py,sha256=y6-lp0XpbnyIjoOEdtSzMdEVkU5glOwnWQ1FkpUZnpI,370
16
- haiku/rag/embeddings/openai.py,sha256=iA-DewCOSip8PLU_RhEJHFHBle4DtmCCIGNfGs58Wvk,357
17
- haiku/rag/embeddings/voyageai.py,sha256=0hiRTIqu-bpl-4OaCtMHvWfPdgbrzhnfZJowSV8pLRA,415
14
+ haiku/rag/embeddings/base.py,sha256=BnSviKrlzjv3L0sZJs_T-pxfawd-bcTak-rsX-D2f3A,497
15
+ haiku/rag/embeddings/ollama.py,sha256=LuLlHH6RGoO9_gFCIlbmesuXOj017gTw6z-p8Ez0CfE,595
16
+ haiku/rag/embeddings/openai.py,sha256=fIFCk-jpUtaW0xsnrQnJ824O0UCjaGG2sgvBzREhilc,503
17
+ haiku/rag/embeddings/vllm.py,sha256=vhaUnCn6VMkfSluLhWKtSV-sekFaPsp4pKo2N7-SBCY,626
18
+ haiku/rag/embeddings/voyageai.py,sha256=UW-MW4tJKnPB6Fs2P7A3yt-ZeRm46H9npckchSriPX8,661
18
19
  haiku/rag/qa/__init__.py,sha256=Sl7Kzrg9CuBOcMF01wc1NtQhUNWjJI0MhIHfCWrb8V4,434
19
- haiku/rag/qa/agent.py,sha256=gZ12vLUSHHCMl0HyPoLlPDbhUWoyEUydXG7u8lG1eqg,2602
20
+ haiku/rag/qa/agent.py,sha256=15-jMuF08U0uxGdqgQysKMZLr8BUWssI76PtyQ2Ngd8,2912
20
21
  haiku/rag/qa/prompts.py,sha256=xdT4cyrOrAK9UDgVqyev1wHF49jD57Bh40gx2sH4NPI,3341
21
22
  haiku/rag/reranking/__init__.py,sha256=IRXHs4qPu6VbGJQpzSwhgtVWWumURH_vEoVFE-extlo,894
22
23
  haiku/rag/reranking/base.py,sha256=LM9yUSSJ414UgBZhFTgxGprlRqzfTe4I1vgjricz2JY,405
23
24
  haiku/rag/reranking/cohere.py,sha256=1iTdiaa8vvb6oHVB2qpWzUOVkyfUcimVSZp6Qr4aq4c,1049
24
25
  haiku/rag/reranking/mxbai.py,sha256=46sVTsTIkzIX9THgM3u8HaEmgY7evvEyB-N54JTHvK8,867
26
+ haiku/rag/reranking/vllm.py,sha256=xVGH9ss-ISWdJ5SKUUHUbTqBo7PIEmA_SQv0ScdJ6XA,1479
25
27
  haiku/rag/store/__init__.py,sha256=hq0W0DAC7ysqhWSP2M2uHX8cbG6kbr-sWHxhq6qQcY0,103
26
28
  haiku/rag/store/engine.py,sha256=XHGo5Xl-dCFdQHrOdMo64xVK5n0k8-LoUl5V-tlA0HI,7131
27
29
  haiku/rag/store/models/__init__.py,sha256=s0E72zneGlowvZrFWaNxHYjOAUjgWdLxzdYsnvNRVlY,88
28
30
  haiku/rag/store/models/chunk.py,sha256=ZNyTfO6lh3rXWLVYO3TZcitbL4LSUGr42fR6jQQ5iQc,364
29
31
  haiku/rag/store/models/document.py,sha256=zSSpt6pyrMJAIXGQvIcqojcqUzwZnhp3WxVokaWxNRc,396
30
32
  haiku/rag/store/repositories/__init__.py,sha256=Olv5dLfBQINRV3HrsfUpjzkZ7Qm7goEYyMNykgo_DaY,291
31
- haiku/rag/store/repositories/chunk.py,sha256=5S77mGh6pWxPHjaXriJGmvbSOhoNM8tLwygE2GXPlbU,13586
33
+ haiku/rag/store/repositories/chunk.py,sha256=v4y4eh4yIf6zJaWfHxljvnmb12dmvwdinzmxQt8Lvhs,13343
32
34
  haiku/rag/store/repositories/document.py,sha256=lP8Lo82KTP-qwXFRpYZ46WjeAdAsHwZ5pJcrXdz4g0U,6988
33
35
  haiku/rag/store/repositories/settings.py,sha256=dqnAvm-98nQrWpLBbf9QghJw673QD80-iqQhRMP5t0c,5025
34
36
  haiku/rag/store/upgrades/__init__.py,sha256=wUiEoSiHTahvuagx93E4FB07v123AhdbOjwUkPusiIg,14
35
- haiku_rag-0.7.1.dist-info/METADATA,sha256=yIz4nATa_b2vDstrCZe9CzV_77Vw74QIhRWqCZCdaes,4597
36
- haiku_rag-0.7.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
37
- haiku_rag-0.7.1.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
38
- haiku_rag-0.7.1.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
39
- haiku_rag-0.7.1.dist-info/RECORD,,
37
+ haiku_rag-0.7.3.dist-info/METADATA,sha256=PAvA6VZuyZp9IekXhYCLWDxM1wMZMmujtntxZE2lBoE,4610
38
+ haiku_rag-0.7.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
39
+ haiku_rag-0.7.3.dist-info/entry_points.txt,sha256=G1U3nAkNd5YDYd4v0tuYFbriz0i-JheCsFuT9kIoGCI,48
40
+ haiku_rag-0.7.3.dist-info/licenses/LICENSE,sha256=eXZrWjSk9PwYFNK9yUczl3oPl95Z4V9UXH7bPN46iPo,1065
41
+ haiku_rag-0.7.3.dist-info/RECORD,,