PyPI - ragit - Versions diffs - 0.7.4__py3-none-any.whl → 0.7.5__py3-none-any.whl - Mend

ragit 0.7.4py3-none-any.whl → 0.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

ragit/config.py +1 -1
ragit/providers/ollama.py +62 -77
ragit/version.py +1 -1
{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/METADATA +1 -1
{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/RECORD +8 -8
{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/WHEEL +0 -0
{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/licenses/LICENSE +0 -0
{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/top_level.txt +0 -0

ragit/config.py CHANGED Viewed

@@ -41,7 +41,7 @@ class Config:
     # Default Models
     DEFAULT_LLM_MODEL: str = os.getenv("RAGIT_DEFAULT_LLM_MODEL", "qwen3-vl:235b-instruct")
-    DEFAULT_EMBEDDING_MODEL: str = os.getenv("RAGIT_DEFAULT_EMBEDDING_MODEL", "mxbai-embed-large")
+    DEFAULT_EMBEDDING_MODEL: str = os.getenv("RAGIT_DEFAULT_EMBEDDING_MODEL", "nomic-embed-text:latest")
     # Logging
     LOG_LEVEL: str = os.getenv("RAGIT_LOG_LEVEL", "INFO")

ragit/providers/ollama.py CHANGED Viewed

@@ -19,7 +19,6 @@ from typing import Any
 import httpx
 import requests
-import trio
 from ragit.config import config
 from ragit.providers.base import (
@@ -39,17 +38,17 @@ def _cached_embedding(text: str, model: str, embedding_url: str, timeout: int) -
         text = text[: OllamaProvider.MAX_EMBED_CHARS]
     response = requests.post(
-        f"{embedding_url}/api/embeddings",
+        f"{embedding_url}/api/embed",
         headers={"Content-Type": "application/json"},
-        json={"model": model, "prompt": text},
+        json={"model": model, "input": text},
         timeout=timeout,
     )
     response.raise_for_status()
     data = response.json()
-    embedding = data.get("embedding", [])
-    if not embedding:
+    embeddings = data.get("embeddings", [])
+    if not embeddings or not embeddings[0]:
         raise ValueError("Empty embedding returned from Ollama")
-    return tuple(embedding)
+    return tuple(embeddings[0])
 class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
@@ -58,7 +57,7 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
     Performance features:
     - Connection pooling via requests.Session() for faster sequential requests
-    - Async parallel embedding via embed_batch_async() using trio + httpx
+    - Native batch embedding via /api/embed endpoint (single API call)
     - LRU cache for repeated embedding queries (2048 entries)
     Parameters
@@ -78,8 +77,8 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
     >>> response = provider.generate("What is RAG?", model="llama3")
     >>> print(response.text)
-    >>> # Async batch embedding (5-10x faster for large batches)
-    >>> embeddings = trio.run(provider.embed_batch_async, texts, "mxbai-embed-large")
+    >>> # Batch embedding (single API call)
+    >>> embeddings = provider.embed_batch(texts, "mxbai-embed-large")
     """
     # Known embedding model dimensions
@@ -234,16 +233,16 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
                 # Direct call without cache
                 truncated = text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text
                 response = self.session.post(
-                    f"{self.embedding_url}/api/embeddings",
-                    json={"model": model, "prompt": truncated},
+                    f"{self.embedding_url}/api/embed",
+                    json={"model": model, "input": truncated},
                     timeout=self.timeout,
                 )
                 response.raise_for_status()
                 data = response.json()
-                embedding_list = data.get("embedding", [])
-                if not embedding_list:
+                embeddings = data.get("embeddings", [])
+                if not embeddings or not embeddings[0]:
                     raise ValueError("Empty embedding returned from Ollama")
-                embedding = tuple(embedding_list)
+                embedding = tuple(embeddings[0])
             # Update dimensions from actual response
             self._current_dimensions = len(embedding)
@@ -258,34 +257,32 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
             raise ConnectionError(f"Ollama embed failed: {e}") from e
     def embed_batch(self, texts: list[str], model: str) -> list[EmbeddingResponse]:
-        """Generate embeddings for multiple texts sequentially.
+        """Generate embeddings for multiple texts in a single API call.
-        For better performance with large batches, use embed_batch_async().
-        Note: Ollama /api/embeddings only supports single prompts, so we loop.
+        The /api/embed endpoint supports batch inputs natively.
         """
         self._current_embed_model = model
         self._current_dimensions = self.EMBEDDING_DIMENSIONS.get(model, 768)
-        results = []
+        # Truncate oversized inputs
+        truncated_texts = [text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text for text in texts]
         try:
-            for text in texts:
-                # Truncate oversized inputs
-                truncated = text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text
+            response = self.session.post(
+                f"{self.embedding_url}/api/embed",
+                json={"model": model, "input": truncated_texts},
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            data = response.json()
+            embeddings_list = data.get("embeddings", [])
-                if self.use_cache:
-                    embedding = _cached_embedding(truncated, model, self.embedding_url, self.timeout)
-                else:
-                    response = self.session.post(
-                        f"{self.embedding_url}/api/embeddings",
-                        json={"model": model, "prompt": truncated},
-                        timeout=self.timeout,
-                    )
-                    response.raise_for_status()
-                    data = response.json()
-                    embedding_list = data.get("embedding", [])
-                    embedding = tuple(embedding_list) if embedding_list else ()
+            if not embeddings_list:
+                raise ValueError("Empty embeddings returned from Ollama")
+            results = []
+            for embedding_data in embeddings_list:
+                embedding = tuple(embedding_data) if embedding_data else ()
                 if embedding:
                     self._current_dimensions = len(embedding)
@@ -305,12 +302,12 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
         self,
         texts: list[str],
         model: str,
-        max_concurrent: int = 10,
+        max_concurrent: int = 10,  # kept for API compatibility, no longer used
     ) -> list[EmbeddingResponse]:
-        """Generate embeddings for multiple texts in parallel using trio.
+        """Generate embeddings for multiple texts asynchronously.
-        This method is 5-10x faster than embed_batch() for large batches
-        by making concurrent HTTP requests.
+        The /api/embed endpoint supports batch inputs natively, so this
+        makes a single async HTTP request for all texts.
         Parameters
         ----------
@@ -319,8 +316,8 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
         model : str
             Embedding model name.
         max_concurrent : int
-            Maximum concurrent requests (default: 10).
-            Higher values = faster but more server load.
+            Deprecated, kept for API compatibility. No longer used since
+            the API now supports native batching.
         Returns
         -------
@@ -335,52 +332,40 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
         self._current_embed_model = model
         self._current_dimensions = self.EMBEDDING_DIMENSIONS.get(model, 768)
-        # Results storage (index -> embedding)
-        results: dict[int, EmbeddingResponse] = {}
-        errors: list[Exception] = []
-        # Semaphore to limit concurrency
-        limiter = trio.CapacityLimiter(max_concurrent)
+        # Truncate oversized inputs
+        truncated_texts = [text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text for text in texts]
-        async def fetch_embedding(client: httpx.AsyncClient, index: int, text: str) -> None:
-            """Fetch a single embedding."""
-            async with limiter:
-                try:
-                    # Truncate oversized inputs
-                    truncated = text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text
-                    response = await client.post(
-                        f"{self.embedding_url}/api/embeddings",
-                        json={"model": model, "prompt": truncated},
-                        timeout=self.timeout,
-                    )
-                    response.raise_for_status()
-                    data = response.json()
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    f"{self.embedding_url}/api/embed",
+                    json={"model": model, "input": truncated_texts},
+                    timeout=self.timeout,
+                )
+                response.raise_for_status()
+                data = response.json()
-                    embedding_list = data.get("embedding", [])
-                    embedding = tuple(embedding_list) if embedding_list else ()
+            embeddings_list = data.get("embeddings", [])
+            if not embeddings_list:
+                raise ValueError("Empty embeddings returned from Ollama")
-                    if embedding:
-                        self._current_dimensions = len(embedding)
+            results = []
+            for embedding_data in embeddings_list:
+                embedding = tuple(embedding_data) if embedding_data else ()
+                if embedding:
+                    self._current_dimensions = len(embedding)
-                    results[index] = EmbeddingResponse(
+                results.append(
+                    EmbeddingResponse(
                         embedding=embedding,
                         model=model,
                         provider=self.provider_name,
                         dimensions=len(embedding),
                     )
-                except Exception as e:
-                    errors.append(e)
-        async with httpx.AsyncClient() as client, trio.open_nursery() as nursery:
-            for i, text in enumerate(texts):
-                nursery.start_soon(fetch_embedding, client, i, text)
-        if errors:
-            raise ConnectionError(f"Ollama async batch embed failed: {errors[0]}") from errors[0]
-        # Return results in original order
-        return [results[i] for i in range(len(texts))]
+                )
+            return results
+        except httpx.HTTPError as e:
+            raise ConnectionError(f"Ollama async batch embed failed: {e}") from e
     def chat(
         self,

ragit/version.py CHANGED Viewed

@@ -2,4 +2,4 @@
 # Copyright RODMENA LIMITED 2025
 # SPDX-License-Identifier: Apache-2.0
 #
-__version__ = "0.7.4"
+__version__ = "0.7.5"

{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ragit
-Version: 0.7.4
+Version: 0.7.5
 Summary: Automatic RAG Pattern Optimization Engine
 Author: RODMENA LIMITED
 Maintainer-email: RODMENA LIMITED <info@rodmena.co.uk>

{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 ragit/__init__.py,sha256=PjQogIWMlydZFWVECqhmxw-X9i7lEXdUTe2XlT6qYUQ,2213
 ragit/assistant.py,sha256=lXjZRUr_WsYLP3XLOktabgfPVyKOZPdREzyL7cSRufk,11251
-ragit/config.py,sha256=uKLchJQHjH8MImZ2OJahDjSzyasFqgrFb9Z4aHxJ7og,1495
+ragit/config.py,sha256=aSGWQGiaRm6hrjssvCjhqZOa76pxegeOtcFbFRlQx4M,1501
 ragit/loaders.py,sha256=keusuPzXPBiLDVj4hKfPCcge-rm-cnzNRk50fGXvTJs,5571
-ragit/version.py,sha256=8-YGrxlAluU3va125prug6u_bleRtoTX3c4m7WfDYNM,97
+ragit/version.py,sha256=Vj5ogQMaioIPZOEL7StQIcdzW1RI4gnuLlRkcVqW7qk,97
 ragit/core/__init__.py,sha256=j53PFfoSMXwSbK1rRHpMbo8mX2i4R1LJ5kvTxBd7-0w,100
 ragit/core/experiment/__init__.py,sha256=4vAPOOYlY5Dcr2gOolyhBSPGIUxZKwEkgQffxS9BodA,452
 ragit/core/experiment/experiment.py,sha256=Qh1NJkY9LbKaidRfiI8GOwBZqopjK-MSVBuD_JEgO-k,16582
 ragit/core/experiment/results.py,sha256=KHpN3YSLJ83_JUfIMccRPS-q7LEt0S9p8ehDRawk_4k,3487
 ragit/providers/__init__.py,sha256=iliJt74Lt3mFUlKGfSFW-D0cMonUygY6sRZ6lLjeU7M,435
 ragit/providers/base.py,sha256=MJ8mVeXuGWhkX2XGTbkWIY3cVoTOPr4h5XBXw8rAX2Q,3434
-ragit/providers/ollama.py,sha256=fFUziRmpu_L6rnlxi53PnzQ5b34-fEbaU4u1irK1nG0,16055
+ragit/providers/ollama.py,sha256=bGZfcmlfchnVP5851noWaf3c1weMhknGOs7Fu69Oz4E,15404
 ragit/utils/__init__.py,sha256=-UsE5oJSnmEnBDswl-ph0A09Iu8yKNbPhd1-_7Lcb8Y,3051
-ragit-0.7.4.dist-info/licenses/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
-ragit-0.7.4.dist-info/METADATA,sha256=5nEjliSCc-F7X6IgG7OdgkadNjANPw4env04EEBI5J8,15528
-ragit-0.7.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ragit-0.7.4.dist-info/top_level.txt,sha256=pkPbG7yrw61wt9_y_xcLE2vq2a55fzockASD0yq0g4s,6
-ragit-0.7.4.dist-info/RECORD,,
+ragit-0.7.5.dist-info/licenses/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
+ragit-0.7.5.dist-info/METADATA,sha256=T_wNuarfzzkfhViVmigIe8n4Kz5FLFCbVj3oWAA_D9w,15528
+ragit-0.7.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ragit-0.7.5.dist-info/top_level.txt,sha256=pkPbG7yrw61wt9_y_xcLE2vq2a55fzockASD0yq0g4s,6
+ragit-0.7.5.dist-info/RECORD,,

{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

ragit 0.7.4__py3-none-any.whl → 0.7.5__py3-none-any.whl

ragit 0.7.4py3-none-any.whl → 0.7.5py3-none-any.whl