ragit 0.7.4__tar.gz → 0.7.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragit
3
- Version: 0.7.4
3
+ Version: 0.7.5
4
4
  Summary: Automatic RAG Pattern Optimization Engine
5
5
  Author: RODMENA LIMITED
6
6
  Maintainer-email: RODMENA LIMITED <info@rodmena.co.uk>
@@ -41,7 +41,7 @@ class Config:
41
41
 
42
42
  # Default Models
43
43
  DEFAULT_LLM_MODEL: str = os.getenv("RAGIT_DEFAULT_LLM_MODEL", "qwen3-vl:235b-instruct")
44
- DEFAULT_EMBEDDING_MODEL: str = os.getenv("RAGIT_DEFAULT_EMBEDDING_MODEL", "mxbai-embed-large")
44
+ DEFAULT_EMBEDDING_MODEL: str = os.getenv("RAGIT_DEFAULT_EMBEDDING_MODEL", "nomic-embed-text:latest")
45
45
 
46
46
  # Logging
47
47
  LOG_LEVEL: str = os.getenv("RAGIT_LOG_LEVEL", "INFO")
@@ -19,7 +19,6 @@ from typing import Any
19
19
 
20
20
  import httpx
21
21
  import requests
22
- import trio
23
22
 
24
23
  from ragit.config import config
25
24
  from ragit.providers.base import (
@@ -39,17 +38,17 @@ def _cached_embedding(text: str, model: str, embedding_url: str, timeout: int) -
39
38
  text = text[: OllamaProvider.MAX_EMBED_CHARS]
40
39
 
41
40
  response = requests.post(
42
- f"{embedding_url}/api/embeddings",
41
+ f"{embedding_url}/api/embed",
43
42
  headers={"Content-Type": "application/json"},
44
- json={"model": model, "prompt": text},
43
+ json={"model": model, "input": text},
45
44
  timeout=timeout,
46
45
  )
47
46
  response.raise_for_status()
48
47
  data = response.json()
49
- embedding = data.get("embedding", [])
50
- if not embedding:
48
+ embeddings = data.get("embeddings", [])
49
+ if not embeddings or not embeddings[0]:
51
50
  raise ValueError("Empty embedding returned from Ollama")
52
- return tuple(embedding)
51
+ return tuple(embeddings[0])
53
52
 
54
53
 
55
54
  class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
@@ -58,7 +57,7 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
58
57
 
59
58
  Performance features:
60
59
  - Connection pooling via requests.Session() for faster sequential requests
61
- - Async parallel embedding via embed_batch_async() using trio + httpx
60
+ - Native batch embedding via /api/embed endpoint (single API call)
62
61
  - LRU cache for repeated embedding queries (2048 entries)
63
62
 
64
63
  Parameters
@@ -78,8 +77,8 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
78
77
  >>> response = provider.generate("What is RAG?", model="llama3")
79
78
  >>> print(response.text)
80
79
 
81
- >>> # Async batch embedding (5-10x faster for large batches)
82
- >>> embeddings = trio.run(provider.embed_batch_async, texts, "mxbai-embed-large")
80
+ >>> # Batch embedding (single API call)
81
+ >>> embeddings = provider.embed_batch(texts, "mxbai-embed-large")
83
82
  """
84
83
 
85
84
  # Known embedding model dimensions
@@ -234,16 +233,16 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
234
233
  # Direct call without cache
235
234
  truncated = text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text
236
235
  response = self.session.post(
237
- f"{self.embedding_url}/api/embeddings",
238
- json={"model": model, "prompt": truncated},
236
+ f"{self.embedding_url}/api/embed",
237
+ json={"model": model, "input": truncated},
239
238
  timeout=self.timeout,
240
239
  )
241
240
  response.raise_for_status()
242
241
  data = response.json()
243
- embedding_list = data.get("embedding", [])
244
- if not embedding_list:
242
+ embeddings = data.get("embeddings", [])
243
+ if not embeddings or not embeddings[0]:
245
244
  raise ValueError("Empty embedding returned from Ollama")
246
- embedding = tuple(embedding_list)
245
+ embedding = tuple(embeddings[0])
247
246
 
248
247
  # Update dimensions from actual response
249
248
  self._current_dimensions = len(embedding)
@@ -258,34 +257,32 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
258
257
  raise ConnectionError(f"Ollama embed failed: {e}") from e
259
258
 
260
259
  def embed_batch(self, texts: list[str], model: str) -> list[EmbeddingResponse]:
261
- """Generate embeddings for multiple texts sequentially.
260
+ """Generate embeddings for multiple texts in a single API call.
262
261
 
263
- For better performance with large batches, use embed_batch_async().
264
-
265
- Note: Ollama /api/embeddings only supports single prompts, so we loop.
262
+ The /api/embed endpoint supports batch inputs natively.
266
263
  """
267
264
  self._current_embed_model = model
268
265
  self._current_dimensions = self.EMBEDDING_DIMENSIONS.get(model, 768)
269
266
 
270
- results = []
267
+ # Truncate oversized inputs
268
+ truncated_texts = [text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text for text in texts]
269
+
271
270
  try:
272
- for text in texts:
273
- # Truncate oversized inputs
274
- truncated = text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text
271
+ response = self.session.post(
272
+ f"{self.embedding_url}/api/embed",
273
+ json={"model": model, "input": truncated_texts},
274
+ timeout=self.timeout,
275
+ )
276
+ response.raise_for_status()
277
+ data = response.json()
278
+ embeddings_list = data.get("embeddings", [])
275
279
 
276
- if self.use_cache:
277
- embedding = _cached_embedding(truncated, model, self.embedding_url, self.timeout)
278
- else:
279
- response = self.session.post(
280
- f"{self.embedding_url}/api/embeddings",
281
- json={"model": model, "prompt": truncated},
282
- timeout=self.timeout,
283
- )
284
- response.raise_for_status()
285
- data = response.json()
286
- embedding_list = data.get("embedding", [])
287
- embedding = tuple(embedding_list) if embedding_list else ()
280
+ if not embeddings_list:
281
+ raise ValueError("Empty embeddings returned from Ollama")
288
282
 
283
+ results = []
284
+ for embedding_data in embeddings_list:
285
+ embedding = tuple(embedding_data) if embedding_data else ()
289
286
  if embedding:
290
287
  self._current_dimensions = len(embedding)
291
288
 
@@ -305,12 +302,12 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
305
302
  self,
306
303
  texts: list[str],
307
304
  model: str,
308
- max_concurrent: int = 10,
305
+ max_concurrent: int = 10, # kept for API compatibility, no longer used
309
306
  ) -> list[EmbeddingResponse]:
310
- """Generate embeddings for multiple texts in parallel using trio.
307
+ """Generate embeddings for multiple texts asynchronously.
311
308
 
312
- This method is 5-10x faster than embed_batch() for large batches
313
- by making concurrent HTTP requests.
309
+ The /api/embed endpoint supports batch inputs natively, so this
310
+ makes a single async HTTP request for all texts.
314
311
 
315
312
  Parameters
316
313
  ----------
@@ -319,8 +316,8 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
319
316
  model : str
320
317
  Embedding model name.
321
318
  max_concurrent : int
322
- Maximum concurrent requests (default: 10).
323
- Higher values = faster but more server load.
319
+ Deprecated, kept for API compatibility. No longer used since
320
+ the API now supports native batching.
324
321
 
325
322
  Returns
326
323
  -------
@@ -335,52 +332,40 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
335
332
  self._current_embed_model = model
336
333
  self._current_dimensions = self.EMBEDDING_DIMENSIONS.get(model, 768)
337
334
 
338
- # Results storage (index -> embedding)
339
- results: dict[int, EmbeddingResponse] = {}
340
- errors: list[Exception] = []
341
-
342
- # Semaphore to limit concurrency
343
- limiter = trio.CapacityLimiter(max_concurrent)
335
+ # Truncate oversized inputs
336
+ truncated_texts = [text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text for text in texts]
344
337
 
345
- async def fetch_embedding(client: httpx.AsyncClient, index: int, text: str) -> None:
346
- """Fetch a single embedding."""
347
- async with limiter:
348
- try:
349
- # Truncate oversized inputs
350
- truncated = text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text
351
-
352
- response = await client.post(
353
- f"{self.embedding_url}/api/embeddings",
354
- json={"model": model, "prompt": truncated},
355
- timeout=self.timeout,
356
- )
357
- response.raise_for_status()
358
- data = response.json()
338
+ try:
339
+ async with httpx.AsyncClient() as client:
340
+ response = await client.post(
341
+ f"{self.embedding_url}/api/embed",
342
+ json={"model": model, "input": truncated_texts},
343
+ timeout=self.timeout,
344
+ )
345
+ response.raise_for_status()
346
+ data = response.json()
359
347
 
360
- embedding_list = data.get("embedding", [])
361
- embedding = tuple(embedding_list) if embedding_list else ()
348
+ embeddings_list = data.get("embeddings", [])
349
+ if not embeddings_list:
350
+ raise ValueError("Empty embeddings returned from Ollama")
362
351
 
363
- if embedding:
364
- self._current_dimensions = len(embedding)
352
+ results = []
353
+ for embedding_data in embeddings_list:
354
+ embedding = tuple(embedding_data) if embedding_data else ()
355
+ if embedding:
356
+ self._current_dimensions = len(embedding)
365
357
 
366
- results[index] = EmbeddingResponse(
358
+ results.append(
359
+ EmbeddingResponse(
367
360
  embedding=embedding,
368
361
  model=model,
369
362
  provider=self.provider_name,
370
363
  dimensions=len(embedding),
371
364
  )
372
- except Exception as e:
373
- errors.append(e)
374
-
375
- async with httpx.AsyncClient() as client, trio.open_nursery() as nursery:
376
- for i, text in enumerate(texts):
377
- nursery.start_soon(fetch_embedding, client, i, text)
378
-
379
- if errors:
380
- raise ConnectionError(f"Ollama async batch embed failed: {errors[0]}") from errors[0]
381
-
382
- # Return results in original order
383
- return [results[i] for i in range(len(texts))]
365
+ )
366
+ return results
367
+ except httpx.HTTPError as e:
368
+ raise ConnectionError(f"Ollama async batch embed failed: {e}") from e
384
369
 
385
370
  def chat(
386
371
  self,
@@ -2,4 +2,4 @@
2
2
  # Copyright RODMENA LIMITED 2025
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  #
5
- __version__ = "0.7.4"
5
+ __version__ = "0.7.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragit
3
- Version: 0.7.4
3
+ Version: 0.7.5
4
4
  Summary: Automatic RAG Pattern Optimization Engine
5
5
  Author: RODMENA LIMITED
6
6
  Maintainer-email: RODMENA LIMITED <info@rodmena.co.uk>
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes