ragit 0.7.4__py3-none-any.whl → 0.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragit/config.py +1 -1
- ragit/providers/ollama.py +62 -77
- ragit/version.py +1 -1
- {ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/METADATA +1 -1
- {ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/RECORD +8 -8
- {ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/WHEEL +0 -0
- {ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/licenses/LICENSE +0 -0
- {ragit-0.7.4.dist-info → ragit-0.7.5.dist-info}/top_level.txt +0 -0
ragit/config.py
CHANGED
|
@@ -41,7 +41,7 @@ class Config:
|
|
|
41
41
|
|
|
42
42
|
# Default Models
|
|
43
43
|
DEFAULT_LLM_MODEL: str = os.getenv("RAGIT_DEFAULT_LLM_MODEL", "qwen3-vl:235b-instruct")
|
|
44
|
-
DEFAULT_EMBEDDING_MODEL: str = os.getenv("RAGIT_DEFAULT_EMBEDDING_MODEL", "
|
|
44
|
+
DEFAULT_EMBEDDING_MODEL: str = os.getenv("RAGIT_DEFAULT_EMBEDDING_MODEL", "nomic-embed-text:latest")
|
|
45
45
|
|
|
46
46
|
# Logging
|
|
47
47
|
LOG_LEVEL: str = os.getenv("RAGIT_LOG_LEVEL", "INFO")
|
ragit/providers/ollama.py
CHANGED
|
@@ -19,7 +19,6 @@ from typing import Any
|
|
|
19
19
|
|
|
20
20
|
import httpx
|
|
21
21
|
import requests
|
|
22
|
-
import trio
|
|
23
22
|
|
|
24
23
|
from ragit.config import config
|
|
25
24
|
from ragit.providers.base import (
|
|
@@ -39,17 +38,17 @@ def _cached_embedding(text: str, model: str, embedding_url: str, timeout: int) -
|
|
|
39
38
|
text = text[: OllamaProvider.MAX_EMBED_CHARS]
|
|
40
39
|
|
|
41
40
|
response = requests.post(
|
|
42
|
-
f"{embedding_url}/api/
|
|
41
|
+
f"{embedding_url}/api/embed",
|
|
43
42
|
headers={"Content-Type": "application/json"},
|
|
44
|
-
json={"model": model, "
|
|
43
|
+
json={"model": model, "input": text},
|
|
45
44
|
timeout=timeout,
|
|
46
45
|
)
|
|
47
46
|
response.raise_for_status()
|
|
48
47
|
data = response.json()
|
|
49
|
-
|
|
50
|
-
if not
|
|
48
|
+
embeddings = data.get("embeddings", [])
|
|
49
|
+
if not embeddings or not embeddings[0]:
|
|
51
50
|
raise ValueError("Empty embedding returned from Ollama")
|
|
52
|
-
return tuple(
|
|
51
|
+
return tuple(embeddings[0])
|
|
53
52
|
|
|
54
53
|
|
|
55
54
|
class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
|
|
@@ -58,7 +57,7 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
|
|
|
58
57
|
|
|
59
58
|
Performance features:
|
|
60
59
|
- Connection pooling via requests.Session() for faster sequential requests
|
|
61
|
-
-
|
|
60
|
+
- Native batch embedding via /api/embed endpoint (single API call)
|
|
62
61
|
- LRU cache for repeated embedding queries (2048 entries)
|
|
63
62
|
|
|
64
63
|
Parameters
|
|
@@ -78,8 +77,8 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
|
|
|
78
77
|
>>> response = provider.generate("What is RAG?", model="llama3")
|
|
79
78
|
>>> print(response.text)
|
|
80
79
|
|
|
81
|
-
>>> #
|
|
82
|
-
>>> embeddings =
|
|
80
|
+
>>> # Batch embedding (single API call)
|
|
81
|
+
>>> embeddings = provider.embed_batch(texts, "mxbai-embed-large")
|
|
83
82
|
"""
|
|
84
83
|
|
|
85
84
|
# Known embedding model dimensions
|
|
@@ -234,16 +233,16 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
|
|
|
234
233
|
# Direct call without cache
|
|
235
234
|
truncated = text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text
|
|
236
235
|
response = self.session.post(
|
|
237
|
-
f"{self.embedding_url}/api/
|
|
238
|
-
json={"model": model, "
|
|
236
|
+
f"{self.embedding_url}/api/embed",
|
|
237
|
+
json={"model": model, "input": truncated},
|
|
239
238
|
timeout=self.timeout,
|
|
240
239
|
)
|
|
241
240
|
response.raise_for_status()
|
|
242
241
|
data = response.json()
|
|
243
|
-
|
|
244
|
-
if not
|
|
242
|
+
embeddings = data.get("embeddings", [])
|
|
243
|
+
if not embeddings or not embeddings[0]:
|
|
245
244
|
raise ValueError("Empty embedding returned from Ollama")
|
|
246
|
-
embedding = tuple(
|
|
245
|
+
embedding = tuple(embeddings[0])
|
|
247
246
|
|
|
248
247
|
# Update dimensions from actual response
|
|
249
248
|
self._current_dimensions = len(embedding)
|
|
@@ -258,34 +257,32 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
|
|
|
258
257
|
raise ConnectionError(f"Ollama embed failed: {e}") from e
|
|
259
258
|
|
|
260
259
|
def embed_batch(self, texts: list[str], model: str) -> list[EmbeddingResponse]:
|
|
261
|
-
"""Generate embeddings for multiple texts
|
|
260
|
+
"""Generate embeddings for multiple texts in a single API call.
|
|
262
261
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
Note: Ollama /api/embeddings only supports single prompts, so we loop.
|
|
262
|
+
The /api/embed endpoint supports batch inputs natively.
|
|
266
263
|
"""
|
|
267
264
|
self._current_embed_model = model
|
|
268
265
|
self._current_dimensions = self.EMBEDDING_DIMENSIONS.get(model, 768)
|
|
269
266
|
|
|
270
|
-
|
|
267
|
+
# Truncate oversized inputs
|
|
268
|
+
truncated_texts = [text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text for text in texts]
|
|
269
|
+
|
|
271
270
|
try:
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
271
|
+
response = self.session.post(
|
|
272
|
+
f"{self.embedding_url}/api/embed",
|
|
273
|
+
json={"model": model, "input": truncated_texts},
|
|
274
|
+
timeout=self.timeout,
|
|
275
|
+
)
|
|
276
|
+
response.raise_for_status()
|
|
277
|
+
data = response.json()
|
|
278
|
+
embeddings_list = data.get("embeddings", [])
|
|
275
279
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
else:
|
|
279
|
-
response = self.session.post(
|
|
280
|
-
f"{self.embedding_url}/api/embeddings",
|
|
281
|
-
json={"model": model, "prompt": truncated},
|
|
282
|
-
timeout=self.timeout,
|
|
283
|
-
)
|
|
284
|
-
response.raise_for_status()
|
|
285
|
-
data = response.json()
|
|
286
|
-
embedding_list = data.get("embedding", [])
|
|
287
|
-
embedding = tuple(embedding_list) if embedding_list else ()
|
|
280
|
+
if not embeddings_list:
|
|
281
|
+
raise ValueError("Empty embeddings returned from Ollama")
|
|
288
282
|
|
|
283
|
+
results = []
|
|
284
|
+
for embedding_data in embeddings_list:
|
|
285
|
+
embedding = tuple(embedding_data) if embedding_data else ()
|
|
289
286
|
if embedding:
|
|
290
287
|
self._current_dimensions = len(embedding)
|
|
291
288
|
|
|
@@ -305,12 +302,12 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
|
|
|
305
302
|
self,
|
|
306
303
|
texts: list[str],
|
|
307
304
|
model: str,
|
|
308
|
-
max_concurrent: int = 10,
|
|
305
|
+
max_concurrent: int = 10, # kept for API compatibility, no longer used
|
|
309
306
|
) -> list[EmbeddingResponse]:
|
|
310
|
-
"""Generate embeddings for multiple texts
|
|
307
|
+
"""Generate embeddings for multiple texts asynchronously.
|
|
311
308
|
|
|
312
|
-
|
|
313
|
-
|
|
309
|
+
The /api/embed endpoint supports batch inputs natively, so this
|
|
310
|
+
makes a single async HTTP request for all texts.
|
|
314
311
|
|
|
315
312
|
Parameters
|
|
316
313
|
----------
|
|
@@ -319,8 +316,8 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
|
|
|
319
316
|
model : str
|
|
320
317
|
Embedding model name.
|
|
321
318
|
max_concurrent : int
|
|
322
|
-
|
|
323
|
-
|
|
319
|
+
Deprecated, kept for API compatibility. No longer used since
|
|
320
|
+
the API now supports native batching.
|
|
324
321
|
|
|
325
322
|
Returns
|
|
326
323
|
-------
|
|
@@ -335,52 +332,40 @@ class OllamaProvider(BaseLLMProvider, BaseEmbeddingProvider):
|
|
|
335
332
|
self._current_embed_model = model
|
|
336
333
|
self._current_dimensions = self.EMBEDDING_DIMENSIONS.get(model, 768)
|
|
337
334
|
|
|
338
|
-
#
|
|
339
|
-
|
|
340
|
-
errors: list[Exception] = []
|
|
341
|
-
|
|
342
|
-
# Semaphore to limit concurrency
|
|
343
|
-
limiter = trio.CapacityLimiter(max_concurrent)
|
|
335
|
+
# Truncate oversized inputs
|
|
336
|
+
truncated_texts = [text[: self.MAX_EMBED_CHARS] if len(text) > self.MAX_EMBED_CHARS else text for text in texts]
|
|
344
337
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
json={"model": model, "prompt": truncated},
|
|
355
|
-
timeout=self.timeout,
|
|
356
|
-
)
|
|
357
|
-
response.raise_for_status()
|
|
358
|
-
data = response.json()
|
|
338
|
+
try:
|
|
339
|
+
async with httpx.AsyncClient() as client:
|
|
340
|
+
response = await client.post(
|
|
341
|
+
f"{self.embedding_url}/api/embed",
|
|
342
|
+
json={"model": model, "input": truncated_texts},
|
|
343
|
+
timeout=self.timeout,
|
|
344
|
+
)
|
|
345
|
+
response.raise_for_status()
|
|
346
|
+
data = response.json()
|
|
359
347
|
|
|
360
|
-
|
|
361
|
-
|
|
348
|
+
embeddings_list = data.get("embeddings", [])
|
|
349
|
+
if not embeddings_list:
|
|
350
|
+
raise ValueError("Empty embeddings returned from Ollama")
|
|
362
351
|
|
|
363
|
-
|
|
364
|
-
|
|
352
|
+
results = []
|
|
353
|
+
for embedding_data in embeddings_list:
|
|
354
|
+
embedding = tuple(embedding_data) if embedding_data else ()
|
|
355
|
+
if embedding:
|
|
356
|
+
self._current_dimensions = len(embedding)
|
|
365
357
|
|
|
366
|
-
|
|
358
|
+
results.append(
|
|
359
|
+
EmbeddingResponse(
|
|
367
360
|
embedding=embedding,
|
|
368
361
|
model=model,
|
|
369
362
|
provider=self.provider_name,
|
|
370
363
|
dimensions=len(embedding),
|
|
371
364
|
)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
for i, text in enumerate(texts):
|
|
377
|
-
nursery.start_soon(fetch_embedding, client, i, text)
|
|
378
|
-
|
|
379
|
-
if errors:
|
|
380
|
-
raise ConnectionError(f"Ollama async batch embed failed: {errors[0]}") from errors[0]
|
|
381
|
-
|
|
382
|
-
# Return results in original order
|
|
383
|
-
return [results[i] for i in range(len(texts))]
|
|
365
|
+
)
|
|
366
|
+
return results
|
|
367
|
+
except httpx.HTTPError as e:
|
|
368
|
+
raise ConnectionError(f"Ollama async batch embed failed: {e}") from e
|
|
384
369
|
|
|
385
370
|
def chat(
|
|
386
371
|
self,
|
ragit/version.py
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
ragit/__init__.py,sha256=PjQogIWMlydZFWVECqhmxw-X9i7lEXdUTe2XlT6qYUQ,2213
|
|
2
2
|
ragit/assistant.py,sha256=lXjZRUr_WsYLP3XLOktabgfPVyKOZPdREzyL7cSRufk,11251
|
|
3
|
-
ragit/config.py,sha256=
|
|
3
|
+
ragit/config.py,sha256=aSGWQGiaRm6hrjssvCjhqZOa76pxegeOtcFbFRlQx4M,1501
|
|
4
4
|
ragit/loaders.py,sha256=keusuPzXPBiLDVj4hKfPCcge-rm-cnzNRk50fGXvTJs,5571
|
|
5
|
-
ragit/version.py,sha256=
|
|
5
|
+
ragit/version.py,sha256=Vj5ogQMaioIPZOEL7StQIcdzW1RI4gnuLlRkcVqW7qk,97
|
|
6
6
|
ragit/core/__init__.py,sha256=j53PFfoSMXwSbK1rRHpMbo8mX2i4R1LJ5kvTxBd7-0w,100
|
|
7
7
|
ragit/core/experiment/__init__.py,sha256=4vAPOOYlY5Dcr2gOolyhBSPGIUxZKwEkgQffxS9BodA,452
|
|
8
8
|
ragit/core/experiment/experiment.py,sha256=Qh1NJkY9LbKaidRfiI8GOwBZqopjK-MSVBuD_JEgO-k,16582
|
|
9
9
|
ragit/core/experiment/results.py,sha256=KHpN3YSLJ83_JUfIMccRPS-q7LEt0S9p8ehDRawk_4k,3487
|
|
10
10
|
ragit/providers/__init__.py,sha256=iliJt74Lt3mFUlKGfSFW-D0cMonUygY6sRZ6lLjeU7M,435
|
|
11
11
|
ragit/providers/base.py,sha256=MJ8mVeXuGWhkX2XGTbkWIY3cVoTOPr4h5XBXw8rAX2Q,3434
|
|
12
|
-
ragit/providers/ollama.py,sha256=
|
|
12
|
+
ragit/providers/ollama.py,sha256=bGZfcmlfchnVP5851noWaf3c1weMhknGOs7Fu69Oz4E,15404
|
|
13
13
|
ragit/utils/__init__.py,sha256=-UsE5oJSnmEnBDswl-ph0A09Iu8yKNbPhd1-_7Lcb8Y,3051
|
|
14
|
-
ragit-0.7.
|
|
15
|
-
ragit-0.7.
|
|
16
|
-
ragit-0.7.
|
|
17
|
-
ragit-0.7.
|
|
18
|
-
ragit-0.7.
|
|
14
|
+
ragit-0.7.5.dist-info/licenses/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
|
|
15
|
+
ragit-0.7.5.dist-info/METADATA,sha256=T_wNuarfzzkfhViVmigIe8n4Kz5FLFCbVj3oWAA_D9w,15528
|
|
16
|
+
ragit-0.7.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
ragit-0.7.5.dist-info/top_level.txt,sha256=pkPbG7yrw61wt9_y_xcLE2vq2a55fzockASD0yq0g4s,6
|
|
18
|
+
ragit-0.7.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|